mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 08:00:37 +00:00
Compare commits
46 Commits
problame/c
...
on_demand_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2e09a27fa9 | ||
|
|
abf412c21d | ||
|
|
9abea54e51 | ||
|
|
d93dee9532 | ||
|
|
7d5fa3d4ee | ||
|
|
27991a4052 | ||
|
|
1f83f9114b | ||
|
|
0a8c5e1214 | ||
|
|
78938d1b59 | ||
|
|
fcab61bdcd | ||
|
|
9e3ead3689 | ||
|
|
8dc069037b | ||
|
|
0a363c3dce | ||
|
|
aeca15008c | ||
|
|
43846b72fa | ||
|
|
cb060548fb | ||
|
|
bae793ffcd | ||
|
|
26b5fcdc50 | ||
|
|
97582178cb | ||
|
|
842be0ba74 | ||
|
|
982b376ea2 | ||
|
|
e158df4e86 | ||
|
|
723c0971e8 | ||
|
|
c8f67eed8f | ||
|
|
2d885ac07a | ||
|
|
89c5e80b3f | ||
|
|
93ec7503e0 | ||
|
|
7d7d1f354b | ||
|
|
16c200d6d9 | ||
|
|
3dbd34aa78 | ||
|
|
fa3fc73c1b | ||
|
|
ac5815b594 | ||
|
|
30583cb626 | ||
|
|
c1a51416db | ||
|
|
8eab7009c1 | ||
|
|
11cf16e3f3 | ||
|
|
af6f63617e | ||
|
|
e287f36a05 | ||
|
|
cbcd4058ed | ||
|
|
e86fef05dd | ||
|
|
a1323231bc | ||
|
|
06e840b884 | ||
|
|
cf11c8ab6a | ||
|
|
04f99a87bf | ||
|
|
fd12dd942f | ||
|
|
ebddda5b7f |
7
.github/actionlint.yml
vendored
7
.github/actionlint.yml
vendored
@@ -7,6 +7,13 @@ self-hosted-runner:
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- AZURE_DEV_CLIENT_ID
|
||||
- AZURE_DEV_REGISTRY_NAME
|
||||
- AZURE_DEV_SUBSCRIPTION_ID
|
||||
- AZURE_PROD_CLIENT_ID
|
||||
- AZURE_PROD_REGISTRY_NAME
|
||||
- AZURE_PROD_SUBSCRIPTION_ID
|
||||
- AZURE_TENANT_ID
|
||||
- BENCHMARK_PROJECT_ID_PUB
|
||||
- BENCHMARK_PROJECT_ID_SUB
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
|
||||
19
.github/workflows/_build-and-test-locally.yml
vendored
19
.github/workflows/_build-and-test-locally.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16; do
|
||||
for r in 14 15 16 17; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
@@ -83,6 +83,10 @@ jobs:
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||
@@ -136,6 +140,13 @@ jobs:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v14 -j$(nproc)
|
||||
@@ -148,6 +159,10 @@ jobs:
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres-v17 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make neon-pg-ext -j$(nproc)
|
||||
|
||||
@@ -210,7 +225,7 @@ jobs:
|
||||
run: |
|
||||
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
|
||||
export PQ_LIB_DIR
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
|
||||
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
#nextest does not yet support running doctests
|
||||
|
||||
56
.github/workflows/_push-to-acr.yml
vendored
Normal file
56
.github/workflows/_push-to-acr.yml
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
name: Push images to ACR
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
client_id:
|
||||
description: Client ID of Azure managed identity or Entra app
|
||||
required: true
|
||||
type: string
|
||||
image_tag:
|
||||
description: Tag for the container image
|
||||
required: true
|
||||
type: string
|
||||
images:
|
||||
description: Images to push
|
||||
required: true
|
||||
type: string
|
||||
registry_name:
|
||||
description: Name of the container registry
|
||||
required: true
|
||||
type: string
|
||||
subscription_id:
|
||||
description: Azure subscription ID
|
||||
required: true
|
||||
type: string
|
||||
tenant_id:
|
||||
description: Azure tenant ID
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
push-to-acr:
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout
|
||||
id-token: write # This is required for Azure Login to work.
|
||||
|
||||
steps:
|
||||
- name: Azure login
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
with:
|
||||
client-id: ${{ inputs.client_id }}
|
||||
subscription-id: ${{ inputs.subscription_id }}
|
||||
tenant-id: ${{ inputs.tenant_id }}
|
||||
|
||||
- name: Login to ACR
|
||||
run: |
|
||||
az acr login --name=${{ inputs.registry_name }}
|
||||
|
||||
- name: Copy docker images to ACR ${{ inputs.registry_name }}
|
||||
run: |
|
||||
images='${{ inputs.images }}'
|
||||
for image in ${images}; do
|
||||
docker buildx imagetools create \
|
||||
-t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
|
||||
neondatabase/${image}:${{ inputs.image_tag }}
|
||||
done
|
||||
76
.github/workflows/build_and_test.yml
vendored
76
.github/workflows/build_and_test.yml
vendored
@@ -211,7 +211,7 @@ jobs:
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
|
||||
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
|
||||
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
|
||||
secrets: inherit
|
||||
|
||||
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
|
||||
@@ -548,7 +548,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
version: [ v14, v15, v16, v17 ]
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
@@ -627,7 +627,7 @@ jobs:
|
||||
|
||||
- name: Build compute-tools image
|
||||
# compute-tools are Postgres independent, so build it only once
|
||||
if: matrix.version == 'v16'
|
||||
if: matrix.version == 'v17'
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
target: compute-tools-image
|
||||
@@ -649,7 +649,7 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
version: [ v14, v15, v16, v17 ]
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
@@ -671,7 +671,7 @@ jobs:
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- name: Create multi-arch compute-tools image
|
||||
if: matrix.version == 'v16'
|
||||
if: matrix.version == 'v17'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
@@ -689,7 +689,7 @@ jobs:
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Push multi-arch compute-tools image to ECR
|
||||
if: matrix.version == 'v16'
|
||||
if: matrix.version == 'v17'
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
@@ -700,7 +700,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
version: [ v14, v15, v16, v17 ]
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.29.3
|
||||
|
||||
@@ -794,14 +794,11 @@ jobs:
|
||||
docker compose -f ./docker-compose/docker-compose.yml down
|
||||
|
||||
promote-images:
|
||||
permissions:
|
||||
contents: read # This is required for actions/checkout
|
||||
id-token: write # This is required for Azure Login to work.
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
env:
|
||||
VERSIONS: v14 v15 v16
|
||||
VERSIONS: v14 v15 v16 v17
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
@@ -823,28 +820,6 @@ jobs:
|
||||
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
|
||||
- name: Azure login
|
||||
if: github.ref_name == 'main'
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
with:
|
||||
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
|
||||
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
|
||||
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
|
||||
- name: Login to ACR
|
||||
if: github.ref_name == 'main'
|
||||
run: |
|
||||
az acr login --name=neoneastus2
|
||||
|
||||
- name: Copy docker images to ACR-dev
|
||||
if: github.ref_name == 'main'
|
||||
run: |
|
||||
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
|
||||
docker buildx imagetools create \
|
||||
-t neoneastus2.azurecr.io/neondatabase/${image}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/${image}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: github.ref_name == 'main'
|
||||
run: |
|
||||
@@ -864,7 +839,7 @@ jobs:
|
||||
done
|
||||
done
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
|
||||
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
||||
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Login to prod ECR
|
||||
uses: docker/login-action@v3
|
||||
@@ -877,11 +852,35 @@ jobs:
|
||||
- name: Copy all images to prod ECR
|
||||
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||
run: |
|
||||
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
|
||||
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
|
||||
docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
|
||||
369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
|
||||
push-to-acr-dev:
|
||||
if: github.ref_name == 'main'
|
||||
needs: [ tag, promote-images ]
|
||||
uses: ./.github/workflows/_push-to-acr.yml
|
||||
with:
|
||||
client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
|
||||
image_tag: ${{ needs.tag.outputs.build-tag }}
|
||||
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
|
||||
subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
|
||||
tenant_id: ${{ vars.AZURE_TENANT_ID }}
|
||||
|
||||
push-to-acr-prod:
|
||||
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||
needs: [ tag, promote-images ]
|
||||
uses: ./.github/workflows/_push-to-acr.yml
|
||||
with:
|
||||
client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
|
||||
image_tag: ${{ needs.tag.outputs.build-tag }}
|
||||
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
|
||||
registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
|
||||
subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
|
||||
tenant_id: ${{ vars.AZURE_TENANT_ID }}
|
||||
|
||||
trigger-custom-extensions-build-and-wait:
|
||||
needs: [ check-permissions, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
@@ -957,8 +956,8 @@ jobs:
|
||||
exit 1
|
||||
|
||||
deploy:
|
||||
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
|
||||
if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
@@ -972,7 +971,7 @@ jobs:
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16; do
|
||||
for r in 14 15 16 17; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
@@ -1118,6 +1117,7 @@ jobs:
|
||||
|
||||
files_to_promote+=("s3://${BUCKET}/${s3_key}")
|
||||
|
||||
# TODO Add v17
|
||||
for pg_version in v14 v15 v16; do
|
||||
# We run less tests for debug builds, so we don't need to promote them
|
||||
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
|
||||
|
||||
34
.github/workflows/label-for-external-users.yml
vendored
34
.github/workflows/label-for-external-users.yml
vendored
@@ -7,6 +7,11 @@ on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
github-actor:
|
||||
description: 'GitHub username. If empty, the username of the current user will be used'
|
||||
required: false
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
@@ -26,12 +31,31 @@ jobs:
|
||||
id: check-user
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
ACTOR: ${{ inputs.github-actor || github.actor }}
|
||||
run: |
|
||||
if gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${GITHUB_ACTOR}"; then
|
||||
is_member=true
|
||||
else
|
||||
is_member=false
|
||||
fi
|
||||
expected_error="User does not exist or is not a member of the organization"
|
||||
output_file=output.txt
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
if gh api "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}" \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" > ${output_file}; then
|
||||
|
||||
is_member=true
|
||||
break
|
||||
elif grep -q "${expected_error}" ${output_file}; then
|
||||
is_member=false
|
||||
break
|
||||
elif [ $i -eq 10 ]; then
|
||||
title="Failed to get memmbership status for ${ACTOR}"
|
||||
message="The latest GitHub API error message: '$(cat ${output_file})'"
|
||||
echo "::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}"
|
||||
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
|
||||
15
.github/workflows/neon_extra_builds.yml
vendored
15
.github/workflows/neon_extra_builds.yml
vendored
@@ -72,6 +72,10 @@ jobs:
|
||||
id: pg_v16_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set pg 17 revision for caching
|
||||
id: pg_v17_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg_14
|
||||
uses: actions/cache@v4
|
||||
@@ -93,6 +97,13 @@ jobs:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
@@ -120,6 +131,10 @@ jobs:
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: make postgres-v16 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
|
||||
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -10,3 +10,7 @@
|
||||
path = vendor/postgres-v16
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_16_STABLE_neon
|
||||
[submodule "vendor/postgres-v17"]
|
||||
path = vendor/postgres-v17
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
branch = REL_17_STABLE_neon
|
||||
|
||||
150
Cargo.lock
generated
150
Cargo.lock
generated
@@ -915,25 +915,22 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.65.1"
|
||||
version = "0.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
|
||||
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"bitflags 2.4.1",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"itertools 0.12.1",
|
||||
"log",
|
||||
"peeking_take_while",
|
||||
"prettyplease 0.2.6",
|
||||
"prettyplease 0.2.17",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.52",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1192,9 +1189,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "6.1.4"
|
||||
version = "7.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d"
|
||||
checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"strum",
|
||||
@@ -1249,7 +1246,7 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
@@ -1363,8 +1360,8 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-util",
|
||||
"toml 0.7.4",
|
||||
"toml_edit 0.19.10",
|
||||
"toml",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
@@ -1488,25 +1485,22 @@ checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.25.0"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
|
||||
checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"bitflags 2.4.1",
|
||||
"crossterm_winapi",
|
||||
"libc",
|
||||
"mio",
|
||||
"parking_lot 0.12.1",
|
||||
"signal-hook",
|
||||
"signal-hook-mio",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm_winapi"
|
||||
version = "0.9.0"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
|
||||
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
@@ -2949,12 +2943,6 @@ dependencies = [
|
||||
"spin 0.5.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
@@ -3153,7 +3141,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd01039851e82f8799046eabbb354056283fb265c8ec0996af940f4e85a380ff"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"toml 0.8.14",
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3669,7 +3657,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -3756,7 +3744,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"twox-hash",
|
||||
"url",
|
||||
@@ -3919,8 +3907,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "51.0.0"
|
||||
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
|
||||
version = "53.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"bytes",
|
||||
@@ -3939,8 +3928,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "parquet_derive"
|
||||
version = "51.0.0"
|
||||
source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
|
||||
version = "53.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86e9fcfae007533a06b580429a3f7e07cb833ec8aa37c041c16563e7918f057e"
|
||||
dependencies = [
|
||||
"parquet",
|
||||
"proc-macro2",
|
||||
@@ -3977,12 +3967,6 @@ dependencies = [
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "3.0.3"
|
||||
@@ -4136,7 +4120,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4149,7 +4133,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -4168,7 +4152,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4280,9 +4264,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.6"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1"
|
||||
checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.52",
|
||||
@@ -4827,7 +4811,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"utils",
|
||||
]
|
||||
@@ -5337,7 +5321,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
@@ -5746,17 +5730,6 @@ dependencies = [
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-mio"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"signal-hook",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.1"
|
||||
@@ -6069,21 +6042,21 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
version = "0.26.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck 0.4.1",
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6094,8 +6067,9 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
|
||||
[[package]]
|
||||
name = "svg_fmt"
|
||||
version = "0.4.2"
|
||||
source = "git+https://github.com/nical/rust_debug?rev=28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4#28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
@@ -6423,7 +6397,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -6534,18 +6508,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit 0.19.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml"
|
||||
version = "0.8.14"
|
||||
@@ -6555,7 +6517,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"toml_edit 0.22.14",
|
||||
"toml_edit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6567,19 +6529,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.19.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
|
||||
dependencies = [
|
||||
"indexmap 1.9.3",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow 0.4.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.14"
|
||||
@@ -6590,7 +6539,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
"winnow 0.6.13",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7003,7 +6952,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-tar",
|
||||
"tokio-util",
|
||||
"toml_edit 0.19.10",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
@@ -7549,15 +7498,6 @@ version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.13"
|
||||
@@ -7627,6 +7567,7 @@ dependencies = [
|
||||
"hyper 0.14.26",
|
||||
"indexmap 1.9.3",
|
||||
"itertools 0.10.5",
|
||||
"itertools 0.12.1",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"log",
|
||||
@@ -7664,6 +7605,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"tonic",
|
||||
"tower",
|
||||
"tracing",
|
||||
|
||||
44
Cargo.toml
44
Cargo.toml
@@ -64,7 +64,7 @@ aws-types = "1.2.0"
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
bindgen = "0.65"
|
||||
bindgen = "0.70"
|
||||
bit_field = "0.10.2"
|
||||
bstr = "1.0"
|
||||
byteorder = "1.4"
|
||||
@@ -73,7 +73,7 @@ camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "6.1"
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
crossbeam-deque = "0.8.5"
|
||||
@@ -123,8 +123,8 @@ opentelemetry = "0.20.0"
|
||||
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.12.0"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "51.0.0"
|
||||
parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
procfs = "0.16"
|
||||
@@ -158,11 +158,10 @@ signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||
socket2 = "0.5"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
"subtle" = "2.5.0"
|
||||
# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
|
||||
svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
|
||||
svg_fmt = "0.4.3"
|
||||
sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
task-local-extensions = "0.1.4"
|
||||
@@ -178,8 +177,8 @@ tokio-rustls = "0.25"
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.7"
|
||||
toml_edit = "0.19"
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
@@ -202,10 +201,21 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# We want to use the 'neon' branch for these, but there's currently one
|
||||
# incompatible change on the branch. See:
|
||||
#
|
||||
# - PR #8076 which contained changes that depended on the new changes in
|
||||
# the rust-postgres crate, and
|
||||
# - PR #8654 which reverted those changes and made the code in proxy incompatible
|
||||
# with the tip of the 'neon' branch again.
|
||||
#
|
||||
# When those proxy changes are re-applied (see PR #8747), we can switch using
|
||||
# the tip of the 'neon' branch again.
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
|
||||
## Local libraries
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
@@ -242,11 +252,7 @@ tonic-build = "0.9"
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
13
Dockerfile
13
Dockerfile
@@ -5,6 +5,8 @@
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG DEFAULT_PG_VERSION=17
|
||||
ARG STABLE_PG_VERSION=16
|
||||
|
||||
# Build Postgres
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
|
||||
@@ -13,6 +15,7 @@ WORKDIR /home/nonroot
|
||||
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
|
||||
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
|
||||
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
|
||||
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
|
||||
COPY --chown=nonroot pgxn pgxn
|
||||
COPY --chown=nonroot Makefile Makefile
|
||||
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||
@@ -28,16 +31,19 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
ARG STABLE_PG_VERSION
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
|
||||
COPY --chown=nonroot . .
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS
|
||||
RUN set -e \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
|
||||
--bin pg_sni_router \
|
||||
--bin pageserver \
|
||||
--bin pagectl \
|
||||
@@ -52,6 +58,7 @@ RUN set -e \
|
||||
# Build final image
|
||||
#
|
||||
FROM debian:bullseye-slim
|
||||
ARG DEFAULT_PG_VERSION
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
@@ -77,6 +84,7 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubbe
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
|
||||
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
|
||||
|
||||
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
|
||||
@@ -87,12 +95,13 @@ RUN mkdir -p /data/.neon/ && \
|
||||
"pg_distrib_dir='/usr/local/'\n" \
|
||||
"listen_pg_addr='0.0.0.0:6400'\n" \
|
||||
"listen_http_addr='0.0.0.0:9898'\n" \
|
||||
"availability_zone='local'\n" \
|
||||
> /data/.neon/pageserver.toml && \
|
||||
chown -R neon:neon /data/.neon
|
||||
|
||||
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries
|
||||
# that want a particular postgres version will select it explicitly: this is just a default.
|
||||
ENV LD_LIBRARY_PATH=/usr/local/v16/lib
|
||||
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
|
||||
|
||||
|
||||
VOLUME ["/data"]
|
||||
|
||||
@@ -192,7 +192,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.80.1
|
||||
ENV RUSTC_VERSION=1.81.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
@@ -207,7 +207,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
export PATH="$HOME/.cargo/bin:$PATH" && \
|
||||
. "$HOME/.cargo/env" && \
|
||||
cargo --version && rustup --version && \
|
||||
rustup component add llvm-tools-preview rustfmt clippy && \
|
||||
rustup component add llvm-tools rustfmt clippy && \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
|
||||
@@ -79,6 +79,7 @@ RUN cd postgres && \
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS postgis-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
|
||||
@@ -87,7 +88,11 @@ RUN apt update && \
|
||||
protobuf-c-compiler xsltproc
|
||||
|
||||
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
|
||||
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
mkdir -p /sfcgal && \
|
||||
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
|
||||
esac && \
|
||||
wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
||||
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -96,7 +101,10 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
|
||||
esac && \
|
||||
wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
||||
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
|
||||
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
@@ -122,7 +130,10 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
|
||||
cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
|
||||
cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
|
||||
|
||||
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
|
||||
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
@@ -142,12 +153,19 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plv8-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt update && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
apt update && \
|
||||
apt install -y ninja-build python3-dev libncurses5 binutils clang
|
||||
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
|
||||
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
|
||||
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
# generate and copy upgrade scripts
|
||||
@@ -172,9 +190,13 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.t
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS h3-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN case "$(uname -m)" in \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
case "$(uname -m)" in \
|
||||
"x86_64") \
|
||||
export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
|
||||
;; \
|
||||
@@ -192,7 +214,11 @@ RUN case "$(uname -m)" in \
|
||||
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
|
||||
&& rm /tmp/cmake-install.sh
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
mkdir -p /h3/usr/ && \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
|
||||
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
@@ -202,7 +228,10 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
|
||||
cp -R /h3/usr / && \
|
||||
rm -rf build
|
||||
|
||||
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
|
||||
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
@@ -218,9 +247,13 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS unit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
|
||||
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -239,6 +272,7 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS vector-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY patches/pgvector.patch /pgvector.patch
|
||||
@@ -246,7 +280,10 @@ COPY patches/pgvector.patch /pgvector.patch
|
||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||
# because we build the images on different machines than where we run them.
|
||||
# Pass OPTFLAGS="" to remove it.
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
|
||||
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
@@ -261,10 +298,14 @@ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pgjwt-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
|
||||
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
|
||||
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -277,9 +318,13 @@ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b214
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS hypopg-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
|
||||
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -293,9 +338,13 @@ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypo
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-hashids-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
|
||||
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
@@ -309,11 +358,15 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rum-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY patches/rum.patch /rum.patch
|
||||
|
||||
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /rum.patch && \
|
||||
@@ -328,9 +381,13 @@ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O r
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pgtap-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
|
||||
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -344,9 +401,13 @@ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgta
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS ip4r-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
|
||||
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -360,9 +421,13 @@ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O i
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS prefix-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
|
||||
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
|
||||
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -376,9 +441,13 @@ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O p
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS hll-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
|
||||
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -392,9 +461,13 @@ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS plpgsql-check-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
@@ -413,7 +486,10 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
export TIMESCALEDB_VERSION=2.10.1 \
|
||||
export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
|
||||
@@ -446,7 +522,10 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ARG PG_VERSION
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
case "${PG_VERSION}" in \
|
||||
"v14") \
|
||||
export PG_HINT_PLAN_VERSION=14_1_4_1 \
|
||||
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
|
||||
@@ -459,6 +538,9 @@ RUN case "${PG_VERSION}" in \
|
||||
export PG_HINT_PLAN_VERSION=16_1_6_0 \
|
||||
export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
|
||||
;; \
|
||||
"v17") \
|
||||
echo "TODO: PG17 pg_hint_plan support" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
|
||||
;; \
|
||||
@@ -478,10 +560,14 @@ RUN case "${PG_VERSION}" in \
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-cron-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -495,9 +581,13 @@ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rdkit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake \
|
||||
libboost-iostreams1.74-dev \
|
||||
@@ -507,7 +597,10 @@ RUN apt-get update && \
|
||||
libeigen3-dev
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
cmake \
|
||||
@@ -544,10 +637,14 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-uuidv7-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -561,10 +658,14 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-roaringbitmap-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -578,10 +679,14 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-semver-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
||||
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -620,10 +725,14 @@ RUN case "${PG_VERSION}" in \
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-anon-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
@@ -641,6 +750,7 @@ RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tag
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
@@ -651,9 +761,11 @@ ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
|
||||
esac && \
|
||||
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
@@ -672,7 +784,10 @@ USER root
|
||||
FROM rust-extensions-build AS pg-jsonschema-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
|
||||
@@ -694,7 +809,10 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.
|
||||
FROM rust-extensions-build AS pg-graphql-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
@@ -714,7 +832,10 @@ FROM rust-extensions-build AS pg-tiktoken-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
# TODO update pgrx version in the pg_tiktoken repo and remove this line
|
||||
@@ -733,7 +854,10 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6
|
||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
@@ -748,10 +872,14 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS wal2json-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -764,10 +892,14 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-ivm-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -781,10 +913,14 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-partman-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_partman doesn't support PG17 yet" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -854,8 +990,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
case "${PG_VERSION}" in \
|
||||
"v14" | "v15") \
|
||||
;; \
|
||||
"v16") \
|
||||
echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
|
||||
"v16" | "v17") \
|
||||
echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
@@ -899,7 +1035,7 @@ FROM neon-pg-ext-build AS postgres-cleanup-layer
|
||||
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
|
||||
|
||||
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
|
||||
RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
|
||||
RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
|
||||
|
||||
# Remove headers that we won't need anymore - we've completed installation of all extensions
|
||||
RUN rm -r /usr/local/pgsql/include
|
||||
@@ -918,7 +1054,10 @@ RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
FROM neon-pg-ext-build AS neon-pg-ext-test
|
||||
ARG PG_VERSION
|
||||
RUN mkdir /ext-src
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
mkdir /ext-src
|
||||
|
||||
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
|
||||
#COPY --from=postgis-build /sfcgal/* /usr
|
||||
@@ -956,18 +1095,39 @@ COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||
COPY patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN cd /ext-src/ && for f in *.tar.gz; \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/ && for f in *.tar.gz; \
|
||||
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
|
||||
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
||||
|| exit 1; rm -f $f; done
|
||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/rum-src && patch -p1 <../rum.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
# cmake is required for the h3 test
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
apt-get update && apt-get install -y cmake
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN patch -p1 </ext-src/pg_anon.patch
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_anon.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
|
||||
56
Makefile
56
Makefile
@@ -119,6 +119,8 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
# I'm not sure why it wouldn't work, but this is the only place (apart from
|
||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||
# versions is used.
|
||||
.PHONY: postgres-configure-v17
|
||||
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
|
||||
.PHONY: postgres-configure-v16
|
||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
||||
.PHONY: postgres-configure-v15
|
||||
@@ -215,29 +217,31 @@ neon-pg-clean-ext-%:
|
||||
# they depend on openssl and other libraries that are not included in our
|
||||
# Rust build.
|
||||
.PHONY: walproposer-lib
|
||||
walproposer-lib: neon-pg-ext-v16
|
||||
walproposer-lib: neon-pg-ext-v17
|
||||
+@echo "Compiling walproposer-lib"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
|
||||
pg_strong_random.o
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
pg_crc32c.o \
|
||||
hmac_openssl.o \
|
||||
checksum_helper.o \
|
||||
cryptohash_openssl.o \
|
||||
scram-common.o \
|
||||
hmac_openssl.o \
|
||||
md5_common.o \
|
||||
checksum_helper.o
|
||||
parse_manifest.o \
|
||||
scram-common.o
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
pg_crc32c.o
|
||||
endif
|
||||
|
||||
.PHONY: walproposer-lib-clean
|
||||
walproposer-lib-clean:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
|
||||
@@ -245,38 +249,44 @@ walproposer-lib-clean:
|
||||
neon-pg-ext: \
|
||||
neon-pg-ext-v14 \
|
||||
neon-pg-ext-v15 \
|
||||
neon-pg-ext-v16
|
||||
neon-pg-ext-v16 \
|
||||
neon-pg-ext-v17
|
||||
|
||||
.PHONY: neon-pg-clean-ext
|
||||
neon-pg-clean-ext: \
|
||||
neon-pg-clean-ext-v14 \
|
||||
neon-pg-clean-ext-v15 \
|
||||
neon-pg-clean-ext-v16
|
||||
neon-pg-clean-ext-v16 \
|
||||
neon-pg-clean-ext-v17
|
||||
|
||||
# shorthand to build all Postgres versions
|
||||
.PHONY: postgres
|
||||
postgres: \
|
||||
postgres-v14 \
|
||||
postgres-v15 \
|
||||
postgres-v16
|
||||
postgres-v16 \
|
||||
postgres-v17
|
||||
|
||||
.PHONY: postgres-headers
|
||||
postgres-headers: \
|
||||
postgres-headers-v14 \
|
||||
postgres-headers-v15 \
|
||||
postgres-headers-v16
|
||||
postgres-headers-v16 \
|
||||
postgres-headers-v17
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean: \
|
||||
postgres-clean-v14 \
|
||||
postgres-clean-v15 \
|
||||
postgres-clean-v16
|
||||
postgres-clean-v16 \
|
||||
postgres-clean-v17
|
||||
|
||||
.PHONY: postgres-check
|
||||
postgres-check: \
|
||||
postgres-check-v14 \
|
||||
postgres-check-v15 \
|
||||
postgres-check-v16
|
||||
postgres-check-v16 \
|
||||
postgres-check-v17
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
@@ -321,13 +331,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
rm -f pg*.BAK
|
||||
|
||||
# Indent pxgn/neon.
|
||||
.PHONY: pgindent
|
||||
neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
|
||||
.PHONY: neon-pgindent
|
||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
|
||||
@@ -64,6 +64,12 @@ brew install protobuf openssl flex bison icu4c pkg-config
|
||||
echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
|
||||
```
|
||||
|
||||
If you get errors about missing `m4` you may have to install it manually:
|
||||
```
|
||||
brew install m4
|
||||
brew link --force m4
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
```
|
||||
# recommended approach from https://www.rust-lang.org/tools/install
|
||||
|
||||
@@ -1052,26 +1052,19 @@ impl ComputeNode {
|
||||
let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;
|
||||
|
||||
let config_time = Utc::now();
|
||||
if pspec.spec.mode == ComputeMode::Primary {
|
||||
if !pspec.spec.skip_pg_catalog_updates {
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
// temporarily reset max_cluster_size in config
|
||||
// to avoid the possibility of hitting the limit, while we are applying config:
|
||||
// creating new extensions, roles, etc...
|
||||
config::with_compute_ctl_tmp_override(
|
||||
pgdata_path,
|
||||
"neon.max_cluster_size=-1",
|
||||
|| {
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
self.apply_config(&compute_state)?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
// temporarily reset max_cluster_size in config
|
||||
// to avoid the possibility of hitting the limit, while we are applying config:
|
||||
// creating new extensions, roles, etc...
|
||||
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
self.post_apply_config()?;
|
||||
|
||||
self.apply_config(&compute_state)?;
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
|
||||
let startup_end_time = Utc::now();
|
||||
|
||||
@@ -124,6 +124,7 @@ fn parse_pg_version(human_version: &str) -> &str {
|
||||
"14" => return "v14",
|
||||
"15" => return "v15",
|
||||
"16" => return "v16",
|
||||
"17" => return "v17",
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
|
||||
@@ -22,9 +22,10 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
||||
|
||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||
|
||||
/// Escape a string for including it in a SQL literal. Wrapping the result
|
||||
/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
|
||||
/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
|
||||
/// Escape a string for including it in a SQL literal.
|
||||
///
|
||||
/// Wrapping the result with `E'{}'` or `'{}'` is not required,
|
||||
/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
|
||||
/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
|
||||
/// for the original implementation.
|
||||
pub fn escape_literal(s: &str) -> String {
|
||||
|
||||
@@ -640,6 +640,8 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
||||
}
|
||||
Some(("branch", branch_match)) => {
|
||||
let tenant_id = get_tenant_id(branch_match, env)?;
|
||||
let new_timeline_id =
|
||||
parse_timeline_id(branch_match)?.unwrap_or(TimelineId::generate());
|
||||
let new_branch_name = branch_match
|
||||
.get_one::<String>("branch-name")
|
||||
.ok_or_else(|| anyhow!("No branch name provided"))?;
|
||||
@@ -658,7 +660,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
||||
.map(|lsn_str| Lsn::from_str(lsn_str))
|
||||
.transpose()
|
||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||
let new_timeline_id = TimelineId::generate();
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
let create_req = TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
@@ -1570,7 +1571,6 @@ fn cli() -> Command {
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("config")
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
.arg(force_arg)
|
||||
)
|
||||
.subcommand(
|
||||
@@ -1583,6 +1583,7 @@ fn cli() -> Command {
|
||||
.subcommand(Command::new("branch")
|
||||
.about("Create a new timeline, using another timeline as a base, copying its data")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(timeline_id_arg.clone())
|
||||
.arg(branch_name_arg.clone())
|
||||
.arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
|
||||
.help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
|
||||
|
||||
@@ -342,7 +342,7 @@ impl LocalEnv {
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
|
||||
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,14 +75,14 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
|
||||
toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
|
||||
fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
|
||||
toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
|
||||
}
|
||||
|
||||
fn pageserver_init_make_toml(
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::Document> {
|
||||
) -> anyhow::Result<toml_edit::DocumentMut> {
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
@@ -137,9 +137,9 @@ impl PageServerNode {
|
||||
|
||||
// Turn `overrides` into a toml document.
|
||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||
let mut config_toml = toml_edit::Document::new();
|
||||
let mut config_toml = toml_edit::DocumentMut::new();
|
||||
for fragment_str in overrides {
|
||||
let fragment = toml_edit::Document::from_str(&fragment_str)
|
||||
let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
|
||||
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
|
||||
for (key, item) in fragment.iter() {
|
||||
config_toml.insert(key, item.clone());
|
||||
|
||||
@@ -28,6 +28,7 @@ use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
id::{NodeId, TenantId},
|
||||
};
|
||||
use whoami::username;
|
||||
|
||||
pub struct StorageController {
|
||||
env: LocalEnv,
|
||||
@@ -183,7 +184,7 @@ impl StorageController {
|
||||
/// to other versions if that one isn't found. Some automated tests create circumstances
|
||||
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
|
||||
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
|
||||
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
|
||||
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
|
||||
|
||||
for v in prefer_versions {
|
||||
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
|
||||
@@ -211,7 +212,16 @@ impl StorageController {
|
||||
/// Readiness check for our postgres process
|
||||
async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
|
||||
let bin_path = pg_bin_dir.join("pg_isready");
|
||||
let args = ["-h", "localhost", "-p", &format!("{}", postgres_port)];
|
||||
let args = [
|
||||
"-h",
|
||||
"localhost",
|
||||
"-U",
|
||||
&username(),
|
||||
"-d",
|
||||
DB_NAME,
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
];
|
||||
let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
|
||||
|
||||
Ok(exitcode.success())
|
||||
@@ -225,7 +235,11 @@ impl StorageController {
|
||||
///
|
||||
/// Returns the database url
|
||||
pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
|
||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
|
||||
let database_url = format!(
|
||||
"postgresql://{}@localhost:{}/{DB_NAME}",
|
||||
&username(),
|
||||
postgres_port
|
||||
);
|
||||
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
let createdb_path = pg_bin_dir.join("createdb");
|
||||
@@ -235,6 +249,10 @@ impl StorageController {
|
||||
"localhost",
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
"-U",
|
||||
&username(),
|
||||
"-O",
|
||||
&username(),
|
||||
DB_NAME,
|
||||
])
|
||||
.output()
|
||||
@@ -271,7 +289,7 @@ impl StorageController {
|
||||
// But tokio-postgres fork doesn't have this upstream commit:
|
||||
// https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
|
||||
// => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
|
||||
.user(&whoami::username())
|
||||
.user(&username())
|
||||
.dbname(DB_NAME)
|
||||
.connect(tokio_postgres::NoTls)
|
||||
.await
|
||||
@@ -328,6 +346,12 @@ impl StorageController {
|
||||
let pg_log_path = pg_data_path.join("postgres.log");
|
||||
|
||||
if !tokio::fs::try_exists(&pg_data_path).await? {
|
||||
let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
|
||||
tracing::info!(
|
||||
"Initializing storage controller database with args: {:?}",
|
||||
initdb_args
|
||||
);
|
||||
|
||||
// Initialize empty database
|
||||
let initdb_path = pg_bin_dir.join("initdb");
|
||||
let mut child = Command::new(&initdb_path)
|
||||
@@ -335,7 +359,7 @@ impl StorageController {
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
])
|
||||
.args(["-D", pg_data_path.as_ref()])
|
||||
.args(initdb_args)
|
||||
.spawn()
|
||||
.expect("Failed to spawn initdb");
|
||||
let status = child.wait().await?;
|
||||
@@ -364,8 +388,14 @@ impl StorageController {
|
||||
pg_data_path.as_ref(),
|
||||
"-l",
|
||||
pg_log_path.as_ref(),
|
||||
"-U",
|
||||
&username(),
|
||||
"start",
|
||||
];
|
||||
tracing::info!(
|
||||
"Starting storage controller database with args: {:?}",
|
||||
db_start_args
|
||||
);
|
||||
|
||||
background_process::start_process(
|
||||
"storage_controller_db",
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
|
||||
use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
|
||||
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
@@ -80,7 +80,10 @@ enum Command {
|
||||
/// List nodes known to the storage controller
|
||||
Nodes {},
|
||||
/// List tenants known to the storage controller
|
||||
Tenants {},
|
||||
Tenants {
|
||||
/// If this field is set, it will list the tenants on a specific node
|
||||
node_id: Option<NodeId>,
|
||||
},
|
||||
/// Create a new tenant in the storage controller, and by extension on pageservers.
|
||||
TenantCreate {
|
||||
#[arg(long)]
|
||||
@@ -336,7 +339,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
listen_pg_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
availability_zone_id: Some(availability_zone_id),
|
||||
availability_zone_id,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -403,7 +406,41 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::Tenants {} => {
|
||||
Command::Tenants {
|
||||
node_id: Some(node_id),
|
||||
} => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), NodeShardResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/node/{node_id}/shards"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let shards = describe_response.shards;
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"Shard",
|
||||
"Intended Primary/Secondary",
|
||||
"Observed Primary/Secondary",
|
||||
]);
|
||||
for shard in shards {
|
||||
table.add_row([
|
||||
format!("{}", shard.tenant_shard_id),
|
||||
match shard.is_intended_secondary {
|
||||
None => "".to_string(),
|
||||
Some(true) => "Secondary".to_string(),
|
||||
Some(false) => "Primary".to_string(),
|
||||
},
|
||||
match shard.is_observed_secondary {
|
||||
None => "".to_string(),
|
||||
Some(true) => "Secondary".to_string(),
|
||||
Some(false) => "Primary".to_string(),
|
||||
},
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::Tenants { node_id: None } => {
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
|
||||
@@ -68,6 +68,7 @@ macro_rules! register_uint_gauge {
|
||||
static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
|
||||
|
||||
/// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
|
||||
///
|
||||
/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
|
||||
/// while holding the lock.
|
||||
pub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> {
|
||||
|
||||
@@ -104,7 +104,9 @@ pub struct ConfigToml {
|
||||
pub image_compression: ImageCompressionAlgorithm,
|
||||
pub ephemeral_bytes_per_memory_kb: usize,
|
||||
pub l0_flush: Option<crate::models::L0FlushConfig>,
|
||||
pub compact_level0_phase1_value_access: CompactL0Phase1ValueAccess,
|
||||
#[serde(skip_serializing)]
|
||||
// TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
|
||||
pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
|
||||
pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
|
||||
pub io_buffer_alignment: usize,
|
||||
}
|
||||
@@ -209,40 +211,6 @@ pub enum GetImpl {
|
||||
#[serde(transparent)]
|
||||
pub struct MaxVectoredReadBytes(pub NonZeroUsize);
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
||||
pub enum CompactL0Phase1ValueAccess {
|
||||
/// The old way.
|
||||
PageCachedBlobIo,
|
||||
/// The new way.
|
||||
StreamingKmerge {
|
||||
/// If set, we run both the old way and the new way, validate that
|
||||
/// they are identical (=> [`CompactL0BypassPageCacheValidation`]),
|
||||
/// and if the validation fails,
|
||||
/// - in tests: fail them with a panic or
|
||||
/// - in prod, log a rate-limited warning and use the old way's results.
|
||||
///
|
||||
/// If not set, we only run the new way and trust its results.
|
||||
validate: Option<CompactL0BypassPageCacheValidation>,
|
||||
},
|
||||
}
|
||||
|
||||
/// See [`CompactL0Phase1ValueAccess::StreamingKmerge`].
|
||||
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum CompactL0BypassPageCacheValidation {
|
||||
/// Validate that the series of (key, lsn) pairs are the same.
|
||||
KeyLsn,
|
||||
/// Validate that the entire output of old and new way is identical.
|
||||
KeyLsnValue,
|
||||
}
|
||||
|
||||
impl Default for CompactL0Phase1ValueAccess {
|
||||
fn default() -> Self {
|
||||
CompactL0Phase1ValueAccess::StreamingKmerge { validate: None }
|
||||
}
|
||||
}
|
||||
|
||||
/// A tenant's calcuated configuration, which is the result of merging a
|
||||
/// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
|
||||
///
|
||||
@@ -449,7 +417,7 @@ impl Default for ConfigToml {
|
||||
image_compression: (DEFAULT_IMAGE_COMPRESSION),
|
||||
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||
l0_flush: None,
|
||||
compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
|
||||
compact_level0_phase1_value_access: Default::default(),
|
||||
virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
|
||||
|
||||
io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -57,7 +57,7 @@ pub struct NodeRegisterRequest {
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
|
||||
pub availability_zone_id: Option<String>,
|
||||
pub availability_zone_id: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@@ -74,6 +74,17 @@ pub struct TenantPolicyRequest {
|
||||
pub scheduling: Option<ShardSchedulingPolicy>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ShardsPreferredAzsRequest {
|
||||
#[serde(flatten)]
|
||||
pub preferred_az_ids: HashMap<TenantShardId, String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ShardsPreferredAzsResponse {
|
||||
pub updated: Vec<TenantShardId>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct TenantLocateResponseShard {
|
||||
pub shard_id: TenantShardId,
|
||||
@@ -101,6 +112,21 @@ pub struct TenantDescribeResponse {
|
||||
pub config: TenantConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct NodeShardResponse {
|
||||
pub node_id: NodeId,
|
||||
pub shards: Vec<NodeShard>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct NodeShard {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
/// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||
pub is_observed_secondary: Option<bool>,
|
||||
/// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||
pub is_intended_secondary: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct NodeDescribeResponse {
|
||||
pub id: NodeId,
|
||||
@@ -132,8 +158,12 @@ pub struct TenantDescribeResponseShard {
|
||||
pub is_splitting: bool,
|
||||
|
||||
pub scheduling_policy: ShardSchedulingPolicy,
|
||||
|
||||
pub preferred_az_id: Option<String>,
|
||||
}
|
||||
|
||||
/// Migration request for a given tenant shard to a given node.
|
||||
///
|
||||
/// Explicitly migrating a particular shard is a low level operation
|
||||
/// TODO: higher level "Reschedule tenant" operation where the request
|
||||
/// specifies some constraints, e.g. asking it to get off particular node(s)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
use anyhow::{bail, Result};
|
||||
use byteorder::{ByteOrder, BE};
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::Oid;
|
||||
use postgres_ffi::RepOriginId;
|
||||
use postgres_ffi::{Oid, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{fmt, ops::Range};
|
||||
|
||||
@@ -263,15 +263,6 @@ impl Key {
|
||||
field5: u8::MAX,
|
||||
field6: u32::MAX,
|
||||
};
|
||||
/// A key slightly smaller than [`Key::MAX`] for use in layer key ranges to avoid them to be confused with L0 layers
|
||||
pub const NON_L0_MAX: Key = Key {
|
||||
field1: u8::MAX,
|
||||
field2: u32::MAX,
|
||||
field3: u32::MAX,
|
||||
field4: u32::MAX,
|
||||
field5: u8::MAX,
|
||||
field6: u32::MAX - 1,
|
||||
};
|
||||
|
||||
pub fn from_hex(s: &str) -> Result<Self> {
|
||||
if s.len() != 36 {
|
||||
@@ -359,7 +350,17 @@ impl Key {
|
||||
// 02 00000000 00000000 00000000 00 00000000
|
||||
//
|
||||
// TwoPhaseFile:
|
||||
// 02 00000000 00000000 00000000 00 XID
|
||||
//
|
||||
// 02 00000000 00000000 00XXXXXX XX XXXXXXXX
|
||||
//
|
||||
// \______XID_________/
|
||||
//
|
||||
// The 64-bit XID is stored a little awkwardly in field6, field5 and
|
||||
// field4. PostgreSQL v16 and below only stored a 32-bit XID, which
|
||||
// fit completely in field6, but starting with PostgreSQL v17, a full
|
||||
// 64-bit XID is used. Most pageserver code that accesses
|
||||
// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
|
||||
// are just unused.
|
||||
//
|
||||
// ControlFile:
|
||||
// 03 00000000 00000000 00000000 00 00000000
|
||||
@@ -591,35 +592,36 @@ pub const TWOPHASEDIR_KEY: Key = Key {
|
||||
};
|
||||
|
||||
#[inline(always)]
|
||||
pub fn twophase_file_key(xid: TransactionId) -> Key {
|
||||
pub fn twophase_file_key(xid: u64) -> Key {
|
||||
Key {
|
||||
field1: 0x02,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: xid,
|
||||
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
|
||||
field5: ((xid & 0x000000FF00000000) >> 32) as u8,
|
||||
field6: (xid & 0x00000000FFFFFFFF) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
|
||||
pub fn twophase_key_range(xid: u64) -> Range<Key> {
|
||||
// 64-bit XIDs really should not overflow
|
||||
let (next_xid, overflowed) = xid.overflowing_add(1);
|
||||
|
||||
Key {
|
||||
field1: 0x02,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: xid,
|
||||
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
|
||||
field5: ((xid & 0x000000FF00000000) >> 32) as u8,
|
||||
field6: (xid & 0x00000000FFFFFFFF) as u32,
|
||||
}..Key {
|
||||
field1: 0x02,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: u8::from(overflowed),
|
||||
field6: next_xid,
|
||||
field3: u32::from(overflowed),
|
||||
field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
|
||||
field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
|
||||
field6: (next_xid & 0x00000000FFFFFFFF) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
serde::Serialize,
|
||||
serde::Deserialize,
|
||||
strum_macros::Display,
|
||||
strum_macros::EnumVariantNames,
|
||||
strum_macros::VariantNames,
|
||||
strum_macros::AsRefStr,
|
||||
strum_macros::IntoStaticStr,
|
||||
)]
|
||||
@@ -305,8 +305,10 @@ pub struct TenantConfig {
|
||||
pub lsn_lease_length_for_ts: Option<String>,
|
||||
}
|
||||
|
||||
/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy`
|
||||
/// tenant config. When the first aux file written, the policy will be persisted in the
|
||||
/// The policy for the aux file storage.
|
||||
///
|
||||
/// It can be switched through `switch_aux_file_policy` tenant config.
|
||||
/// When the first aux file written, the policy will be persisted in the
|
||||
/// `index_part.json` file and has a limited migration path.
|
||||
///
|
||||
/// Currently, we only allow the following migration path:
|
||||
@@ -896,7 +898,9 @@ pub struct WalRedoManagerStatus {
|
||||
pub process: Option<WalRedoManagerProcessStatus>,
|
||||
}
|
||||
|
||||
/// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating
|
||||
/// The progress of a secondary tenant.
|
||||
///
|
||||
/// It is mostly useful when doing a long running download: e.g. initiating
|
||||
/// a download job, timing out while waiting for it to run, and then inspecting this status to understand
|
||||
/// what's happening.
|
||||
#[derive(Default, Debug, Serialize, Deserialize, Clone)]
|
||||
|
||||
@@ -89,8 +89,19 @@ impl PageserverUtilization {
|
||||
|
||||
/// If a node is currently hosting more work than it can comfortably handle. This does not indicate that
|
||||
/// it will fail, but it is a strong signal that more work should not be added unless there is no alternative.
|
||||
///
|
||||
/// When a node is overloaded, we may override soft affinity preferences and do things like scheduling
|
||||
/// into a node in a less desirable AZ, if all the nodes in the preferred AZ are overloaded.
|
||||
pub fn is_overloaded(score: RawScore) -> bool {
|
||||
score >= Self::UTILIZATION_FULL
|
||||
// Why the factor of two? This is unscientific but reflects behavior of real systems:
|
||||
// - In terms of shard counts, a node's preferred max count is a soft limit intended to keep
|
||||
// startup and housekeeping jobs nice and responsive. We can go to double this limit if needed
|
||||
// until some more nodes are deployed.
|
||||
// - In terms of disk space, the node's utilization heuristic assumes every tenant needs to
|
||||
// hold its biggest timeline fully on disk, which is tends to be an over estimate when
|
||||
// some tenants are very idle and have dropped layers from disk. In practice going up to
|
||||
// double is generally better than giving up and scheduling in a sub-optimal AZ.
|
||||
score >= 2 * Self::UTILIZATION_FULL
|
||||
}
|
||||
|
||||
pub fn adjust_shard_count_max(&mut self, shard_count: u32) {
|
||||
|
||||
@@ -69,8 +69,10 @@ impl QueryError {
|
||||
}
|
||||
|
||||
/// Returns true if the given error is a normal consequence of a network issue,
|
||||
/// or the client closing the connection. These errors can happen during normal
|
||||
/// operations, and don't indicate a bug in our code.
|
||||
/// or the client closing the connection.
|
||||
///
|
||||
/// These errors can happen during normal operations,
|
||||
/// and don't indicate a bug in our code.
|
||||
pub fn is_expected_io_error(e: &io::Error) -> bool {
|
||||
use io::ErrorKind::*;
|
||||
matches!(
|
||||
@@ -79,17 +81,16 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
|
||||
)
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait Handler<IO> {
|
||||
/// Handle single query.
|
||||
/// postgres_backend will issue ReadyForQuery after calling this (this
|
||||
/// might be not what we want after CopyData streaming, but currently we don't
|
||||
/// care). It will also flush out the output buffer.
|
||||
async fn process_query(
|
||||
fn process_query(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
query_string: &str,
|
||||
) -> Result<(), QueryError>;
|
||||
) -> impl Future<Output = Result<(), QueryError>>;
|
||||
|
||||
/// Called on startup packet receival, allows to process params.
|
||||
///
|
||||
|
||||
@@ -23,7 +23,6 @@ async fn make_tcp_pair() -> (TcpStream, TcpStream) {
|
||||
|
||||
struct TestHandler {}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {
|
||||
// return single col 'hey' for any query
|
||||
async fn process_query(
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::fmt;
|
||||
use url::Host;
|
||||
|
||||
/// Parses a string of format either `host:port` or `host` into a corresponding pair.
|
||||
///
|
||||
/// The `host` part should be a correct `url::Host`, while `port` (if present) should be
|
||||
/// a valid decimal u16 of digits only.
|
||||
pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {
|
||||
|
||||
@@ -14,7 +14,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
|
||||
fn include_file(&self, filename: &str) {
|
||||
// This does the equivalent of passing bindgen::CargoCallbacks
|
||||
// to the builder .parse_callbacks() method.
|
||||
let cargo_callbacks = bindgen::CargoCallbacks;
|
||||
let cargo_callbacks = bindgen::CargoCallbacks::new();
|
||||
cargo_callbacks.include_file(filename)
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
|
||||
PathBuf::from("pg_install")
|
||||
};
|
||||
|
||||
for pg_version in &["v14", "v15", "v16"] {
|
||||
for pg_version in &["v14", "v15", "v16", "v17"] {
|
||||
let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
|
||||
if pg_install_dir_versioned.is_relative() {
|
||||
let cwd = env::current_dir().context("Failed to get current_dir")?;
|
||||
@@ -121,6 +121,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.allowlist_type("XLogPageHeaderData")
|
||||
.allowlist_type("XLogLongPageHeaderData")
|
||||
.allowlist_var("XLOG_PAGE_MAGIC")
|
||||
.allowlist_var("PG_MAJORVERSION_NUM")
|
||||
.allowlist_var("PG_CONTROL_FILE_SIZE")
|
||||
.allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.allowlist_type("PageHeaderData")
|
||||
|
||||
@@ -44,6 +44,9 @@ macro_rules! postgres_ffi {
|
||||
// Re-export some symbols from bindings
|
||||
pub use bindings::DBState_DB_SHUTDOWNED;
|
||||
pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
|
||||
|
||||
pub const ZERO_CHECKPOINT: bytes::Bytes =
|
||||
bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -54,6 +57,7 @@ macro_rules! for_all_postgres_versions {
|
||||
$macro!(v14);
|
||||
$macro!(v15);
|
||||
$macro!(v16);
|
||||
$macro!(v17);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -88,6 +92,7 @@ macro_rules! dispatch_pgversion {
|
||||
14 : v14,
|
||||
15 : v15,
|
||||
16 : v16,
|
||||
17 : v17,
|
||||
]
|
||||
)
|
||||
};
|
||||
@@ -106,6 +111,110 @@ macro_rules! dispatch_pgversion {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! enum_pgversion_dispatch {
|
||||
($name:expr, $typ:ident, $bind:ident, $code:block) => {
|
||||
enum_pgversion_dispatch!(
|
||||
name = $name,
|
||||
bind = $bind,
|
||||
typ = $typ,
|
||||
code = $code,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
V17 : v17,
|
||||
]
|
||||
)
|
||||
};
|
||||
(name = $name:expr,
|
||||
bind = $bind:ident,
|
||||
typ = $typ:ident,
|
||||
code = $code:block,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {
|
||||
match $name {
|
||||
$(
|
||||
self::$typ::$variant($bind) => {
|
||||
use $crate::$md as pgv;
|
||||
$code
|
||||
}
|
||||
),+,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! enum_pgversion {
|
||||
{$name:ident, pgv :: $t:ident} => {
|
||||
enum_pgversion!{
|
||||
name = $name,
|
||||
typ = $t,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
V17 : v17,
|
||||
]
|
||||
}
|
||||
};
|
||||
{$name:ident, pgv :: $p:ident :: $t:ident} => {
|
||||
enum_pgversion!{
|
||||
name = $name,
|
||||
path = $p,
|
||||
typ = $t,
|
||||
pgversions = [
|
||||
V14 : v14,
|
||||
V15 : v15,
|
||||
V16 : v16,
|
||||
V17 : v17,
|
||||
]
|
||||
}
|
||||
};
|
||||
{name = $name:ident,
|
||||
typ = $t:ident,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
|
||||
pub enum $name {
|
||||
$($variant ( $crate::$md::$t )),+
|
||||
}
|
||||
impl self::$name {
|
||||
pub fn pg_version(&self) -> u32 {
|
||||
enum_pgversion_dispatch!(self, $name, _ign, {
|
||||
pgv::bindings::PG_MAJORVERSION_NUM
|
||||
})
|
||||
}
|
||||
}
|
||||
$(
|
||||
impl Into<self::$name> for $crate::$md::$t {
|
||||
fn into(self) -> self::$name {
|
||||
self::$name::$variant (self)
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
{name = $name:ident,
|
||||
path = $p:ident,
|
||||
typ = $t:ident,
|
||||
pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
|
||||
pub enum $name {
|
||||
$($variant ($crate::$md::$p::$t)),+
|
||||
}
|
||||
impl $name {
|
||||
pub fn pg_version(&self) -> u32 {
|
||||
enum_pgversion_dispatch!(self, $name, _ign, {
|
||||
pgv::bindings::PG_MAJORVERSION_NUM
|
||||
})
|
||||
}
|
||||
}
|
||||
$(
|
||||
impl Into<$name> for $crate::$md::$p::$t {
|
||||
fn into(self) -> $name {
|
||||
$name::$variant (self)
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
}
|
||||
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
|
||||
|
||||
@@ -152,6 +152,9 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
|
||||
pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
|
||||
pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
|
||||
|
||||
// From heapam_xlog.h
|
||||
pub const XLOG_HEAP2_REWRITE: u8 = 0x00;
|
||||
|
||||
// From replication/message.h
|
||||
pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
|
||||
|
||||
@@ -219,15 +222,20 @@ pub const INVALID_TRANSACTION_ID: u32 = 0;
|
||||
pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
|
||||
pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
|
||||
|
||||
/* pg_control.h */
|
||||
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
|
||||
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
|
||||
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||
pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
pub const XLOG_PARAMETER_CHANGE: u8 = 0x60;
|
||||
pub const XLOG_END_OF_RECOVERY: u8 = 0x90;
|
||||
|
||||
/* From xlog.h */
|
||||
pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
|
||||
pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;
|
||||
|
||||
/* xlog_internal.h */
|
||||
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||
pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
|
||||
/* From replication/slot.h */
|
||||
pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4 /* offset of `slotdata` in ReplicationSlotOnDisk */
|
||||
+ 64 /* NameData */ + 4*4;
|
||||
@@ -245,33 +253,6 @@ pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
|
||||
/* From origin.c */
|
||||
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
|
||||
|
||||
// List of subdirectories inside pgdata.
|
||||
// Copied from src/bin/initdb/initdb.c
|
||||
pub const PGDATA_SUBDIRS: [&str; 22] = [
|
||||
"global",
|
||||
"pg_wal/archive_status",
|
||||
"pg_commit_ts",
|
||||
"pg_dynshmem",
|
||||
"pg_notify",
|
||||
"pg_serial",
|
||||
"pg_snapshots",
|
||||
"pg_subtrans",
|
||||
"pg_twophase",
|
||||
"pg_multixact",
|
||||
"pg_multixact/members",
|
||||
"pg_multixact/offsets",
|
||||
"base",
|
||||
"base/1",
|
||||
"pg_replslot",
|
||||
"pg_tblspc",
|
||||
"pg_stat",
|
||||
"pg_stat_tmp",
|
||||
"pg_xact",
|
||||
"pg_logical",
|
||||
"pg_logical/snapshots",
|
||||
"pg_logical/mappings",
|
||||
];
|
||||
|
||||
// Don't include postgresql.conf as it is inconvenient on node start:
|
||||
// we need postgresql.conf before basebackup to synchronize safekeepers
|
||||
// so no point in overwriting it during backup restore. Rest of the files
|
||||
|
||||
@@ -5,6 +5,33 @@ pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
|
||||
pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
|
||||
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
// List of subdirectories inside pgdata.
|
||||
// Copied from src/bin/initdb/initdb.c
|
||||
pub const PGDATA_SUBDIRS: [&str; 22] = [
|
||||
"global",
|
||||
"pg_wal/archive_status",
|
||||
"pg_commit_ts",
|
||||
"pg_dynshmem",
|
||||
"pg_notify",
|
||||
"pg_serial",
|
||||
"pg_snapshots",
|
||||
"pg_subtrans",
|
||||
"pg_twophase",
|
||||
"pg_multixact",
|
||||
"pg_multixact/members",
|
||||
"pg_multixact/offsets",
|
||||
"base",
|
||||
"base/1",
|
||||
"pg_replslot",
|
||||
"pg_tblspc",
|
||||
"pg_stat",
|
||||
"pg_stat_tmp",
|
||||
"pg_xact",
|
||||
"pg_logical",
|
||||
"pg_logical/snapshots",
|
||||
"pg_logical/mappings",
|
||||
];
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
(bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
|
||||
|
||||
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
|
||||
|
||||
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
|
||||
|
||||
|
||||
55
libs/postgres_ffi/src/pg_constants_v17.rs
Normal file
55
libs/postgres_ffi/src/pg_constants_v17.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
|
||||
|
||||
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
|
||||
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
|
||||
pub const XLOG_DBASE_DROP: u8 = 0x20;
|
||||
|
||||
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
|
||||
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
|
||||
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
|
||||
|
||||
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
|
||||
|
||||
// List of subdirectories inside pgdata.
|
||||
// Copied from src/bin/initdb/initdb.c
|
||||
pub const PGDATA_SUBDIRS: [&str; 23] = [
|
||||
"global",
|
||||
"pg_wal/archive_status",
|
||||
"pg_wal/summaries",
|
||||
"pg_commit_ts",
|
||||
"pg_dynshmem",
|
||||
"pg_notify",
|
||||
"pg_serial",
|
||||
"pg_snapshots",
|
||||
"pg_subtrans",
|
||||
"pg_twophase",
|
||||
"pg_multixact",
|
||||
"pg_multixact/members",
|
||||
"pg_multixact/offsets",
|
||||
"base",
|
||||
"base/1",
|
||||
"pg_replslot",
|
||||
"pg_tblspc",
|
||||
"pg_stat",
|
||||
"pg_stat_tmp",
|
||||
"pg_xact",
|
||||
"pg_logical",
|
||||
"pg_logical/snapshots",
|
||||
"pg_logical/mappings",
|
||||
];
|
||||
|
||||
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
|
||||
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
|
||||
|
||||
(bimg_info & ANY_COMPRESS_FLAG) != 0
|
||||
}
|
||||
|
||||
|
||||
pub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;
|
||||
pub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;
|
||||
pub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;
|
||||
|
||||
|
||||
pub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;
|
||||
pub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;
|
||||
@@ -53,7 +53,7 @@ impl Conf {
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match self.pg_version {
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
14 | 15 | 16 | 17 => Ok(path.join(format!("v{}", self.pg_version))),
|
||||
_ => bail!("Unsupported postgres version: {}", self.pg_version),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,7 +185,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
|
||||
let toml = input.parse::<toml_edit::Document>().unwrap();
|
||||
let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
|
||||
RemoteStorageConfig::from_toml(toml.as_item())
|
||||
}
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@ pub use azure_core::Etag;
|
||||
|
||||
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
|
||||
|
||||
/// Default concurrency limit for S3 operations
|
||||
///
|
||||
/// Currently, sync happens with AWS S3, that has two limits on requests per second:
|
||||
/// ~200 RPS for IAM services
|
||||
/// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
|
||||
@@ -300,7 +302,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
) -> Result<(), TimeTravelError>;
|
||||
}
|
||||
|
||||
/// DownloadStream is sensitive to the timeout and cancellation used with the original
|
||||
/// Data part of an ongoing [`Download`].
|
||||
///
|
||||
/// `DownloadStream` is sensitive to the timeout and cancellation used with the original
|
||||
/// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
|
||||
/// with `tokio::io::copy_buf`.
|
||||
// This has 'static because safekeepers do not use cancellation tokens (yet)
|
||||
|
||||
@@ -60,3 +60,16 @@ pub struct TimelineCopyRequest {
|
||||
pub target_timeline_id: TimelineId,
|
||||
pub until_lsn: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TimelineTermBumpRequest {
|
||||
/// bump to
|
||||
pub term: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TimelineTermBumpResponse {
|
||||
// before the request
|
||||
pub previous_term: u64,
|
||||
pub current_term: u64,
|
||||
}
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
mod calculation;
|
||||
pub mod svg;
|
||||
|
||||
/// StorageModel is the input to the synthetic size calculation. It represents
|
||||
/// a tree of timelines, with just the information that's needed for the
|
||||
/// calculation. This doesn't track timeline names or where each timeline
|
||||
/// StorageModel is the input to the synthetic size calculation.
|
||||
///
|
||||
/// It represents a tree of timelines, with just the information that's needed
|
||||
/// for the calculation. This doesn't track timeline names or where each timeline
|
||||
/// begins and ends, for example. Instead, it consists of "points of interest"
|
||||
/// on the timelines. A point of interest could be the timeline start or end point,
|
||||
/// the oldest point on a timeline that needs to be retained because of PITR
|
||||
|
||||
@@ -5,8 +5,10 @@ use std::{
|
||||
|
||||
use metrics::IntCounter;
|
||||
|
||||
/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly,
|
||||
/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and
|
||||
/// Circuit breakers are for operations that are expensive and fallible.
|
||||
///
|
||||
/// If a circuit breaker fails repeatedly, we will stop attempting it for some
|
||||
/// period of time, to avoid denial-of-service from retries, and
|
||||
/// to mitigate the log spam from repeated failures.
|
||||
pub struct CircuitBreaker {
|
||||
/// An identifier that enables us to log useful errors when a circuit is broken
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
fs::{self, File},
|
||||
@@ -203,6 +204,27 @@ pub fn overwrite(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Syncs the filesystem for the given file descriptor.
|
||||
#[cfg_attr(target_os = "macos", allow(unused_variables))]
|
||||
pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
|
||||
// Linux guarantees durability for syncfs.
|
||||
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use anyhow::Context;
|
||||
nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
// macOS is not a production platform for Neon, don't even bother.
|
||||
}
|
||||
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
|
||||
{
|
||||
compile_error!("Unsupported OS");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
|
||||
@@ -249,8 +249,10 @@ macro_rules! id_newtype {
|
||||
};
|
||||
}
|
||||
|
||||
/// Neon timeline IDs are different from PostgreSQL timeline
|
||||
/// IDs. They serve a similar purpose though: they differentiate
|
||||
/// Neon timeline ID.
|
||||
///
|
||||
/// They are different from PostgreSQL timeline
|
||||
/// IDs, but serve a similar purpose: they differentiate
|
||||
/// between different "histories" of the same cluster. However,
|
||||
/// PostgreSQL timeline IDs are a bit cumbersome, because they are only
|
||||
/// 32-bits wide, and they must be in ascending order in any given
|
||||
|
||||
@@ -100,7 +100,9 @@ pub enum LockFileRead {
|
||||
}
|
||||
|
||||
/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
|
||||
/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
|
||||
/// inspect its content.
|
||||
///
|
||||
/// It is not an `Err(...)` if the file does not exist or is already locked.
|
||||
/// Check the [`LockFileRead`] variants for details.
|
||||
pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {
|
||||
let res = fs::OpenOptions::new().read(true).open(path);
|
||||
|
||||
@@ -3,11 +3,9 @@ use std::str::FromStr;
|
||||
use anyhow::Context;
|
||||
use metrics::{IntCounter, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
use strum_macros::{EnumString, EnumVariantNames};
|
||||
use strum_macros::{EnumString, VariantNames};
|
||||
|
||||
#[derive(
|
||||
EnumString, strum_macros::Display, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy,
|
||||
)]
|
||||
#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
pub enum LogFormat {
|
||||
Plain,
|
||||
@@ -190,7 +188,7 @@ impl Drop for TracingPanicHookGuard {
|
||||
}
|
||||
|
||||
/// Named symbol for our panic hook, which logs the panic.
|
||||
fn tracing_panic_hook(info: &std::panic::PanicInfo) {
|
||||
fn tracing_panic_hook(info: &std::panic::PanicHookInfo) {
|
||||
// following rust 1.66.1 std implementation:
|
||||
// https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
|
||||
let location = info.location();
|
||||
|
||||
@@ -8,6 +8,7 @@ use tracing::{trace, warn};
|
||||
use crate::lsn::Lsn;
|
||||
|
||||
/// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
|
||||
///
|
||||
/// Serialized in custom flexible key/value format. In replication protocol, it
|
||||
/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
|
||||
/// Standby status update / Hot standby feedback messages.
|
||||
|
||||
@@ -65,6 +65,8 @@ impl<T> Poison<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Armed pointer to a [`Poison`].
|
||||
///
|
||||
/// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.
|
||||
/// Once modifications are done, use [`Self::disarm`].
|
||||
/// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned
|
||||
|
||||
@@ -13,10 +13,11 @@ pub struct ShardNumber(pub u8);
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
|
||||
pub struct ShardCount(pub u8);
|
||||
|
||||
/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant,
|
||||
/// when we need to know which shard we're dealing with, but do not need to know the full
|
||||
/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
|
||||
/// the fully qualified TenantShardId.
|
||||
/// Combination of ShardNumber and ShardCount.
|
||||
///
|
||||
/// For use within the context of a particular tenant, when we need to know which shard we're
|
||||
/// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
|
||||
/// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
|
||||
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
|
||||
pub struct ShardIndex {
|
||||
pub shard_number: ShardNumber,
|
||||
|
||||
@@ -49,12 +49,11 @@ use std::sync::{RwLock, RwLockWriteGuard};
|
||||
|
||||
use tokio::sync::watch;
|
||||
|
||||
///
|
||||
/// Rcu allows multiple readers to read and hold onto a value without blocking
|
||||
/// (for very long). Storing to the Rcu updates the value, making new readers
|
||||
/// immediately see the new value, but it also waits for all current readers to
|
||||
/// finish.
|
||||
/// (for very long).
|
||||
///
|
||||
/// Storing to the Rcu updates the value, making new readers immediately see
|
||||
/// the new value, but it also waits for all current readers to finish.
|
||||
pub struct Rcu<V> {
|
||||
inner: RwLock<RcuInner<V>>,
|
||||
}
|
||||
|
||||
@@ -5,7 +5,9 @@ use std::sync::{
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
/// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of
|
||||
/// `SemaphorePermit`, allowing use of `take` which does not require holding an outer mutex guard
|
||||
/// `SemaphorePermit`.
|
||||
///
|
||||
/// Allows use of `take` which does not require holding an outer mutex guard
|
||||
/// for the duration of initialization.
|
||||
///
|
||||
/// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`].
|
||||
|
||||
@@ -10,7 +10,7 @@ pub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>
|
||||
where
|
||||
T: serde::de::DeserializeOwned,
|
||||
{
|
||||
let document: toml_edit::Document = match item {
|
||||
let document: toml_edit::DocumentMut = match item {
|
||||
toml_edit::Item::Table(toml) => toml.clone().into(),
|
||||
toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {
|
||||
toml.clone().into_table().into()
|
||||
|
||||
@@ -7,6 +7,7 @@ pub enum VecMapOrdering {
|
||||
}
|
||||
|
||||
/// Ordered map datastructure implemented in a Vec.
|
||||
///
|
||||
/// Append only - can only add keys that are larger than the
|
||||
/// current max key.
|
||||
/// Ordering can be adjusted using [`VecMapOrdering`]
|
||||
|
||||
@@ -6,9 +6,10 @@ pub enum YieldingLoopError {
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
/// Helper for long synchronous loops, e.g. over all tenants in the system. Periodically
|
||||
/// yields to avoid blocking the executor, and after resuming checks the provided
|
||||
/// cancellation token to drop out promptly on shutdown.
|
||||
/// Helper for long synchronous loops, e.g. over all tenants in the system.
|
||||
///
|
||||
/// Periodically yields to avoid blocking the executor, and after resuming
|
||||
/// checks the provided cancellation token to drop out promptly on shutdown.
|
||||
#[inline(always)]
|
||||
pub async fn yielding_loop<I, T, F>(
|
||||
interval: usize,
|
||||
@@ -23,7 +24,7 @@ where
|
||||
for (i, item) in iter.enumerate() {
|
||||
visitor(item);
|
||||
|
||||
if i + 1 % interval == 0 {
|
||||
if (i + 1) % interval == 0 {
|
||||
tokio::task::yield_now().await;
|
||||
if cancel.is_cancelled() {
|
||||
return Err(YieldingLoopError::Cancelled);
|
||||
|
||||
@@ -4,7 +4,8 @@
|
||||
use std::{env, path::PathBuf, process::Command};
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use bindgen::CargoCallbacks;
|
||||
|
||||
const WALPROPOSER_PG_VERSION: &str = "v17";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
@@ -37,7 +38,10 @@ fn main() -> anyhow::Result<()> {
|
||||
// Rebuild crate when libwalproposer.a changes
|
||||
println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a");
|
||||
|
||||
let pg_config_bin = pg_install_abs.join("v16").join("bin").join("pg_config");
|
||||
let pg_config_bin = pg_install_abs
|
||||
.join(WALPROPOSER_PG_VERSION)
|
||||
.join("bin")
|
||||
.join("pg_config");
|
||||
let inc_server_path: String = if pg_config_bin.exists() {
|
||||
let output = Command::new(pg_config_bin)
|
||||
.arg("--includedir-server")
|
||||
@@ -54,7 +58,7 @@ fn main() -> anyhow::Result<()> {
|
||||
.into()
|
||||
} else {
|
||||
let server_path = pg_install_abs
|
||||
.join("v16")
|
||||
.join(WALPROPOSER_PG_VERSION)
|
||||
.join("include")
|
||||
.join("postgresql")
|
||||
.join("server")
|
||||
@@ -64,16 +68,25 @@ fn main() -> anyhow::Result<()> {
|
||||
.map_err(|s| anyhow!("Bad postgres server path {s:?}"))?
|
||||
};
|
||||
|
||||
let unwind_abi_functions = [
|
||||
"log_internal",
|
||||
"recovery_download",
|
||||
"start_streaming",
|
||||
"finish_sync_safekeepers",
|
||||
"wait_event_set",
|
||||
"WalProposerStart",
|
||||
];
|
||||
|
||||
// The bindgen::Builder is the main entry point
|
||||
// to bindgen, and lets you build up options for
|
||||
// the resulting bindings.
|
||||
let bindings = bindgen::Builder::default()
|
||||
let mut builder = bindgen::Builder::default()
|
||||
// The input header we would like to generate
|
||||
// bindings for.
|
||||
.header("bindgen_deps.h")
|
||||
// Tell cargo to invalidate the built crate whenever any of the
|
||||
// included header files changed.
|
||||
.parse_callbacks(Box::new(CargoCallbacks))
|
||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
|
||||
.allowlist_type("WalProposer")
|
||||
.allowlist_type("WalProposerConfig")
|
||||
.allowlist_type("walproposer_api")
|
||||
@@ -105,7 +118,12 @@ fn main() -> anyhow::Result<()> {
|
||||
.allowlist_var("WL_SOCKET_MASK")
|
||||
.clang_arg("-DWALPROPOSER_LIB")
|
||||
.clang_arg(format!("-I{pgxn_neon}"))
|
||||
.clang_arg(format!("-I{inc_server_path}"))
|
||||
.clang_arg(format!("-I{inc_server_path}"));
|
||||
|
||||
for name in unwind_abi_functions {
|
||||
builder = builder.override_abi(bindgen::Abi::CUnwind, name);
|
||||
}
|
||||
let bindings = builder
|
||||
// Finish the builder and generate the bindings.
|
||||
.generate()
|
||||
// Unwrap the Result and panic on failure.
|
||||
|
||||
@@ -33,7 +33,7 @@ extern "C" fn get_shmem_state(wp: *mut WalProposer) -> *mut WalproposerShmemStat
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
|
||||
extern "C-unwind" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
|
||||
unsafe {
|
||||
let callback_data = (*(*wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
@@ -187,7 +187,7 @@ extern "C" fn conn_blocking_write(
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
|
||||
extern "C-unwind" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
|
||||
unsafe {
|
||||
let callback_data = (*(*(*sk).wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
@@ -272,7 +272,7 @@ extern "C" fn rm_safekeeper_event_set(sk: *mut Safekeeper) {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn wait_event_set(
|
||||
extern "C-unwind" fn wait_event_set(
|
||||
wp: *mut WalProposer,
|
||||
timeout: ::std::os::raw::c_long,
|
||||
event_sk: *mut *mut Safekeeper,
|
||||
@@ -324,7 +324,7 @@ extern "C" fn get_redo_start_lsn(wp: *mut WalProposer) -> XLogRecPtr {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
|
||||
extern "C-unwind" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
|
||||
unsafe {
|
||||
let callback_data = (*(*wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
@@ -340,7 +340,7 @@ extern "C" fn process_safekeeper_feedback(wp: *mut WalProposer, sk: *mut Safekee
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn log_internal(
|
||||
extern "C-unwind" fn log_internal(
|
||||
wp: *mut WalProposer,
|
||||
level: ::std::os::raw::c_int,
|
||||
line: *const ::std::os::raw::c_char,
|
||||
|
||||
@@ -1,2 +1,20 @@
|
||||
pub mod mgmt_api;
|
||||
pub mod page_service;
|
||||
|
||||
/// For timeline_block_unblock_gc, distinguish the two different operations. This could be a bool.
|
||||
// If file structure is per-kind not per-feature then where to put this?
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum BlockUnblock {
|
||||
Block,
|
||||
Unblock,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BlockUnblock {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
BlockUnblock::Block => "block",
|
||||
BlockUnblock::Unblock => "unblock",
|
||||
};
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ use utils::{
|
||||
|
||||
pub use reqwest::Body as ReqwestBody;
|
||||
|
||||
use crate::BlockUnblock;
|
||||
|
||||
pub mod util;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -454,6 +456,20 @@ impl Client {
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
pub async fn timeline_block_unblock_gc(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
dir: BlockUnblock,
|
||||
) -> Result<()> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/{dir}_gc",
|
||||
self.mgmt_api_endpoint,
|
||||
);
|
||||
|
||||
self.request(Method::POST, &uri, ()).await.map(|_| ())
|
||||
}
|
||||
|
||||
pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
|
||||
let uri = format!(
|
||||
"{}/v1/tenant/{}/reset",
|
||||
|
||||
@@ -79,16 +79,24 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
|
||||
return None;
|
||||
}
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let mut lsns: Vec<&str> = split[1].split('-').collect();
|
||||
let is_delta = if lsns.len() == 1 {
|
||||
lsns.push(lsns[0]);
|
||||
let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
|
||||
let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
|
||||
let the_lsns: [&str; 2];
|
||||
|
||||
/*
|
||||
* Generations add a -vX-XXXXXX postfix, which causes issues when we try to
|
||||
* parse 'vX' as an LSN.
|
||||
*/
|
||||
let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
|
||||
the_lsns = [lsns[0], lsns[0]];
|
||||
false
|
||||
} else {
|
||||
the_lsns = [lsns[0], lsns[1]];
|
||||
true
|
||||
};
|
||||
|
||||
let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
|
||||
let lsn_range = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
|
||||
let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
|
||||
let holes = Vec::new();
|
||||
Some(LayerFile {
|
||||
key_range,
|
||||
|
||||
@@ -174,7 +174,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("specified prefix '{}' failed validation", cmd.prefix);
|
||||
return Ok(());
|
||||
};
|
||||
let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
|
||||
let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
|
||||
let toml_item = toml_document
|
||||
.get("remote_storage")
|
||||
.expect("need remote_storage");
|
||||
|
||||
@@ -30,9 +30,8 @@ use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
use postgres_ffi::dispatch_pgversion;
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
|
||||
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
|
||||
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::XLogFileName;
|
||||
use postgres_ffi::PG_TLI;
|
||||
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
|
||||
@@ -255,8 +254,11 @@ where
|
||||
|
||||
let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
|
||||
|
||||
let pgversion = self.timeline.pg_version;
|
||||
let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);
|
||||
|
||||
// Create pgdata subdirs structure
|
||||
for dir in PGDATA_SUBDIRS.iter() {
|
||||
for dir in subdirs.iter() {
|
||||
let header = new_tar_header_dir(dir)?;
|
||||
self.ar
|
||||
.append(&header, &mut io::empty())
|
||||
@@ -606,7 +608,7 @@ where
|
||||
//
|
||||
// Extract twophase state files
|
||||
//
|
||||
async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
|
||||
async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> {
|
||||
let img = self
|
||||
.timeline
|
||||
.get_twophase_file(xid, self.lsn, self.ctx)
|
||||
@@ -617,7 +619,11 @@ where
|
||||
buf.extend_from_slice(&img[..]);
|
||||
let crc = crc32c::crc32c(&img[..]);
|
||||
buf.put_u32_le(crc);
|
||||
let path = format!("pg_twophase/{:>08X}", xid);
|
||||
let path = if self.timeline.pg_version < 17 {
|
||||
format!("pg_twophase/{:>08X}", xid)
|
||||
} else {
|
||||
format!("pg_twophase/{:>016X}", xid)
|
||||
};
|
||||
let header = new_tar_header(&path, buf.len() as u64)?;
|
||||
self.ar
|
||||
.append(&header, &buf[..])
|
||||
|
||||
@@ -37,6 +37,7 @@ use pageserver::{
|
||||
virtual_file,
|
||||
};
|
||||
use postgres_backend::AuthType;
|
||||
use utils::crashsafe::syncfs;
|
||||
use utils::failpoint_support;
|
||||
use utils::logging::TracingErrorLayerEnablement;
|
||||
use utils::{
|
||||
@@ -125,7 +126,6 @@ fn main() -> anyhow::Result<()> {
|
||||
// after setting up logging, log the effective IO engine choice and read path implementations
|
||||
info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
|
||||
info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
|
||||
info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");
|
||||
info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");
|
||||
|
||||
// The tenants directory contains all the pageserver local disk state.
|
||||
@@ -156,23 +156,7 @@ fn main() -> anyhow::Result<()> {
|
||||
};
|
||||
|
||||
let started = Instant::now();
|
||||
// Linux guarantees durability for syncfs.
|
||||
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
use std::os::fd::AsRawFd;
|
||||
nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?;
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
// macOS is not a production platform for Neon, don't even bother.
|
||||
drop(dirfd);
|
||||
}
|
||||
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
|
||||
{
|
||||
compile_error!("Unsupported OS");
|
||||
}
|
||||
|
||||
syncfs(dirfd)?;
|
||||
let elapsed = started.elapsed();
|
||||
info!(
|
||||
elapsed_ms = elapsed.as_millis(),
|
||||
|
||||
@@ -174,16 +174,14 @@ pub struct PageServerConf {
|
||||
|
||||
pub l0_flush: crate::l0_flush::L0FlushConfig,
|
||||
|
||||
/// This flag is temporary and will be removed after gradual rollout.
|
||||
/// See <https://github.com/neondatabase/neon/issues/8184>.
|
||||
pub compact_level0_phase1_value_access: pageserver_api::config::CompactL0Phase1ValueAccess,
|
||||
|
||||
/// Direct IO settings
|
||||
pub virtual_file_direct_io: virtual_file::DirectIoMode,
|
||||
|
||||
pub io_buffer_alignment: usize,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
///
|
||||
/// We do not want to store this in a PageServerConf because the latter may be logged
|
||||
/// and/or serialized at a whim, while the token is secret. Currently this token is the
|
||||
/// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in
|
||||
@@ -283,7 +281,7 @@ impl PageServerConf {
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
|
||||
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
}
|
||||
@@ -338,7 +336,7 @@ impl PageServerConf {
|
||||
max_vectored_read_bytes,
|
||||
image_compression,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
compact_level0_phase1_value_access,
|
||||
compact_level0_phase1_value_access: _,
|
||||
l0_flush,
|
||||
virtual_file_direct_io,
|
||||
concurrent_tenant_warmup,
|
||||
@@ -383,7 +381,6 @@ impl PageServerConf {
|
||||
max_vectored_read_bytes,
|
||||
image_compression,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
compact_level0_phase1_value_access,
|
||||
virtual_file_direct_io,
|
||||
io_buffer_alignment,
|
||||
|
||||
@@ -561,6 +558,16 @@ mod tests {
|
||||
.expect("parse_and_validate");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compactl0_phase1_access_mode_is_ignored_silently() {
|
||||
let input = indoc::indoc! {r#"
|
||||
[compact_level0_phase1_value_access]
|
||||
mode = "streaming-kmerge"
|
||||
validate = "key-lsn-value"
|
||||
"#};
|
||||
toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input).unwrap();
|
||||
}
|
||||
|
||||
/// If there's a typo in the pageserver config, we'd rather catch that typo
|
||||
/// and fail pageserver startup than silently ignoring the typo, leaving whoever
|
||||
/// made it in the believe that their config change is effective.
|
||||
@@ -637,14 +644,5 @@ mod tests {
|
||||
// some_invalid_field = 23
|
||||
// "#}
|
||||
// );
|
||||
|
||||
test!(
|
||||
compact_level0_phase1_value_access,
|
||||
indoc! {r#"
|
||||
[compact_level0_phase1_value_access]
|
||||
mode = "streaming-kmerge"
|
||||
some_invalid_field = 23
|
||||
"#}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! This module defines `RequestContext`, a structure that we use throughout
|
||||
//! the pageserver to propagate high-level context from places
|
||||
//! that _originate_ activity down to the shared code paths at the
|
||||
//! heart of the pageserver. It's inspired by Golang's `context.Context`.
|
||||
//! Defines [`RequestContext`].
|
||||
//!
|
||||
//! It is a structure that we use throughout the pageserver to propagate
|
||||
//! high-level context from places that _originate_ activity down to the
|
||||
//! shared code paths at the heart of the pageserver. It's inspired by
|
||||
//! Golang's `context.Context`.
|
||||
//!
|
||||
//! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:
|
||||
//! 1. What high-level activity ([`TaskKind`]) needs this page?
|
||||
|
||||
@@ -141,10 +141,24 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
||||
m.other
|
||||
);
|
||||
|
||||
let az_id = m
|
||||
.other
|
||||
.get("availability_zone_id")
|
||||
.and_then(|jv| jv.as_str().map(|str| str.to_owned()));
|
||||
let az_id = {
|
||||
let az_id_from_metadata = m
|
||||
.other
|
||||
.get("availability_zone_id")
|
||||
.and_then(|jv| jv.as_str().map(|str| str.to_owned()));
|
||||
|
||||
match az_id_from_metadata {
|
||||
Some(az_id) => Some(az_id),
|
||||
None => {
|
||||
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
|
||||
conf.availability_zone.clone()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if az_id.is_none() {
|
||||
panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
|
||||
}
|
||||
|
||||
Some(NodeRegisterRequest {
|
||||
node_id: conf.id,
|
||||
@@ -152,7 +166,7 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
||||
listen_pg_port: m.postgres_port,
|
||||
listen_http_addr: m.http_host,
|
||||
listen_http_port: m.http_port,
|
||||
availability_zone_id: az_id,
|
||||
availability_zone_id: az_id.expect("Checked above"),
|
||||
})
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -580,9 +580,11 @@ async fn import_file(
|
||||
import_slru(modification, slru, file_path, reader, len, ctx).await?;
|
||||
debug!("imported multixact members slru");
|
||||
} else if file_path.starts_with("pg_twophase") {
|
||||
let xid = u32::from_str_radix(file_name.as_ref(), 16)?;
|
||||
|
||||
let bytes = read_all_bytes(reader).await?;
|
||||
|
||||
// In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions,
|
||||
// it's a 32-bit TransactionId, which fits in u64 anyway.
|
||||
let xid = u64::from_str_radix(file_name.as_ref(), 16)?;
|
||||
modification
|
||||
.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
|
||||
.await?;
|
||||
|
||||
@@ -9,7 +9,7 @@ use metrics::{
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use strum::{EnumCount, VariantNames};
|
||||
use strum_macros::{EnumVariantNames, IntoStaticStr};
|
||||
use strum_macros::{IntoStaticStr, VariantNames};
|
||||
use tracing::warn;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
@@ -27,7 +27,7 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
|
||||
];
|
||||
|
||||
// Metrics collected on operations on the storage repository.
|
||||
#[derive(Debug, EnumVariantNames, IntoStaticStr)]
|
||||
#[derive(Debug, VariantNames, IntoStaticStr)]
|
||||
#[strum(serialize_all = "kebab_case")]
|
||||
pub(crate) enum StorageTimeOperation {
|
||||
#[strum(serialize = "layer flush")]
|
||||
|
||||
@@ -1199,7 +1199,6 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<IO> postgres_backend::Handler<IO> for PageServerHandler
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
|
||||
@@ -633,7 +633,7 @@ impl Timeline {
|
||||
|
||||
pub(crate) async fn get_twophase_file(
|
||||
&self,
|
||||
xid: TransactionId,
|
||||
xid: u64,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
@@ -646,11 +646,19 @@ impl Timeline {
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<HashSet<TransactionId>, PageReconstructError> {
|
||||
) -> Result<HashSet<u64>, PageReconstructError> {
|
||||
// fetch directory entry
|
||||
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
|
||||
|
||||
Ok(TwoPhaseDirectory::des(&buf)?.xids)
|
||||
if self.pg_version >= 17 {
|
||||
Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)
|
||||
} else {
|
||||
Ok(TwoPhaseDirectory::des(&buf)?
|
||||
.xids
|
||||
.iter()
|
||||
.map(|x| u64::from(*x))
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_control_file(
|
||||
@@ -902,9 +910,13 @@ impl Timeline {
|
||||
|
||||
// Then pg_twophase
|
||||
result.add_key(TWOPHASEDIR_KEY);
|
||||
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
|
||||
let twophase_dir = TwoPhaseDirectory::des(&buf)?;
|
||||
let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
|
||||
|
||||
let mut xids: Vec<u64> = self
|
||||
.list_twophase_files(lsn, ctx)
|
||||
.await?
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
xids.sort_unstable();
|
||||
for xid in xids {
|
||||
result.add_key(twophase_file_key(xid));
|
||||
@@ -1021,9 +1033,10 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// DatadirModification represents an operation to ingest an atomic set of
|
||||
/// updates to the repository. It is created by the 'begin_record'
|
||||
/// function. It is called for each WAL record, so that all the modifications
|
||||
/// by a one WAL record appear atomic.
|
||||
/// updates to the repository.
|
||||
///
|
||||
/// It is created by the 'begin_record' function. It is called for each WAL
|
||||
/// record, so that all the modifications by a one WAL record appear atomic.
|
||||
pub struct DatadirModification<'a> {
|
||||
/// The timeline this modification applies to. You can access this to
|
||||
/// read the state, but note that any pending updates are *not* reflected
|
||||
@@ -1126,9 +1139,15 @@ impl<'a> DatadirModification<'a> {
|
||||
// Create AuxFilesDirectory
|
||||
self.init_aux_dir()?;
|
||||
|
||||
let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
|
||||
xids: HashSet::new(),
|
||||
})?;
|
||||
let buf = if self.tline.pg_version >= 17 {
|
||||
TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 {
|
||||
xids: HashSet::new(),
|
||||
})
|
||||
} else {
|
||||
TwoPhaseDirectory::ser(&TwoPhaseDirectory {
|
||||
xids: HashSet::new(),
|
||||
})
|
||||
}?;
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, 0));
|
||||
self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
|
||||
@@ -1204,6 +1223,13 @@ impl<'a> DatadirModification<'a> {
|
||||
img: Bytes,
|
||||
) -> anyhow::Result<()> {
|
||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||
let key = rel_block_to_key(rel, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
"the request contains data not supported by pageserver at {}",
|
||||
key
|
||||
);
|
||||
}
|
||||
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
|
||||
Ok(())
|
||||
}
|
||||
@@ -1215,14 +1241,34 @@ impl<'a> DatadirModification<'a> {
|
||||
blknum: BlockNumber,
|
||||
img: Bytes,
|
||||
) -> anyhow::Result<()> {
|
||||
self.put(slru_block_to_key(kind, segno, blknum), Value::Image(img));
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
"the request contains data not supported by pageserver at {}",
|
||||
key
|
||||
);
|
||||
}
|
||||
self.put(key, Value::Image(img));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn put_rel_page_image_zero(&mut self, rel: RelTag, blknum: BlockNumber) {
|
||||
self.pending_zero_data_pages
|
||||
.insert(rel_block_to_key(rel, blknum).to_compact());
|
||||
pub(crate) fn put_rel_page_image_zero(
|
||||
&mut self,
|
||||
rel: RelTag,
|
||||
blknum: BlockNumber,
|
||||
) -> anyhow::Result<()> {
|
||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||
let key = rel_block_to_key(rel, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
"the request contains data not supported by pageserver: {} @ {}",
|
||||
key,
|
||||
self.lsn
|
||||
);
|
||||
}
|
||||
self.pending_zero_data_pages.insert(key.to_compact());
|
||||
self.pending_bytes += ZERO_PAGE.len();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn put_slru_page_image_zero(
|
||||
@@ -1230,10 +1276,18 @@ impl<'a> DatadirModification<'a> {
|
||||
kind: SlruKind,
|
||||
segno: u32,
|
||||
blknum: BlockNumber,
|
||||
) {
|
||||
self.pending_zero_data_pages
|
||||
.insert(slru_block_to_key(kind, segno, blknum).to_compact());
|
||||
) -> anyhow::Result<()> {
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!(
|
||||
"the request contains data not supported by pageserver: {} @ {}",
|
||||
key,
|
||||
self.lsn
|
||||
);
|
||||
}
|
||||
self.pending_zero_data_pages.insert(key.to_compact());
|
||||
self.pending_bytes += ZERO_PAGE.len();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Call this at the end of each WAL record.
|
||||
@@ -1285,22 +1339,31 @@ impl<'a> DatadirModification<'a> {
|
||||
|
||||
pub async fn put_twophase_file(
|
||||
&mut self,
|
||||
xid: TransactionId,
|
||||
xid: u64,
|
||||
img: Bytes,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
// Add it to the directory entry
|
||||
let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
|
||||
let mut dir = TwoPhaseDirectory::des(&buf)?;
|
||||
if !dir.xids.insert(xid) {
|
||||
anyhow::bail!("twophase file for xid {} already exists", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
self.put(
|
||||
TWOPHASEDIR_KEY,
|
||||
Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
|
||||
);
|
||||
let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?;
|
||||
let newdirbuf = if self.tline.pg_version >= 17 {
|
||||
let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?;
|
||||
if !dir.xids.insert(xid) {
|
||||
anyhow::bail!("twophase file for xid {} already exists", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
|
||||
} else {
|
||||
let xid = xid as u32;
|
||||
let mut dir = TwoPhaseDirectory::des(&dirbuf)?;
|
||||
if !dir.xids.insert(xid) {
|
||||
anyhow::bail!("twophase file for xid {} already exists", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
|
||||
};
|
||||
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
|
||||
|
||||
self.put(twophase_file_key(xid), Value::Image(img));
|
||||
Ok(())
|
||||
@@ -1603,22 +1666,32 @@ impl<'a> DatadirModification<'a> {
|
||||
/// This method is used for marking truncated SLRU files
|
||||
pub async fn drop_twophase_file(
|
||||
&mut self,
|
||||
xid: TransactionId,
|
||||
xid: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
// Remove it from the directory entry
|
||||
let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
|
||||
let mut dir = TwoPhaseDirectory::des(&buf)?;
|
||||
let newdirbuf = if self.tline.pg_version >= 17 {
|
||||
let mut dir = TwoPhaseDirectoryV17::des(&buf)?;
|
||||
|
||||
if !dir.xids.remove(&xid) {
|
||||
warn!("twophase file for xid {} does not exist", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
self.put(
|
||||
TWOPHASEDIR_KEY,
|
||||
Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
|
||||
);
|
||||
if !dir.xids.remove(&xid) {
|
||||
warn!("twophase file for xid {} does not exist", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
|
||||
} else {
|
||||
let xid: u32 = u32::try_from(xid)?;
|
||||
let mut dir = TwoPhaseDirectory::des(&buf)?;
|
||||
|
||||
if !dir.xids.remove(&xid) {
|
||||
warn!("twophase file for xid {} does not exist", xid);
|
||||
}
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::TwoPhase, dir.xids.len()));
|
||||
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
|
||||
};
|
||||
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
|
||||
|
||||
// Delete it
|
||||
self.delete(twophase_key_range(xid));
|
||||
@@ -2048,6 +2121,7 @@ impl<'a> DatadirModification<'a> {
|
||||
|
||||
/// This struct facilitates accessing either a committed key from the timeline at a
|
||||
/// specific LSN, or the latest uncommitted key from a pending modification.
|
||||
///
|
||||
/// During WAL ingestion, the records from multiple LSNs may be batched in the same
|
||||
/// modification before being flushed to the timeline. Hence, the routines in WalIngest
|
||||
/// need to look up the keys in the modification first before looking them up in the
|
||||
@@ -2087,11 +2161,21 @@ struct DbDirectory {
|
||||
dbdirs: HashMap<(Oid, Oid), bool>,
|
||||
}
|
||||
|
||||
// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
|
||||
// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs. Previously, the files
|
||||
// were named like "pg_twophase/000002E5", now they're like
|
||||
// "pg_twophsae/0000000A000002E4".
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct TwoPhaseDirectory {
|
||||
xids: HashSet<TransactionId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct TwoPhaseDirectoryV17 {
|
||||
xids: HashSet<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
struct RelDirectory {
|
||||
// Set of relations that exist. (relfilenode, forknum)
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
//! Timeline repository implementation that keeps old data in layer files, and
|
||||
//! the recent changes in ephemeral files.
|
||||
//!
|
||||
//! Timeline repository implementation that keeps old data in files on disk, and
|
||||
//! the recent changes in memory. See tenant/*_layer.rs files.
|
||||
//! The functions here are responsible for locating the correct layer for the
|
||||
//! get/put call, walking back the timeline branching history as needed.
|
||||
//! See tenant/*_layer.rs files. The functions here are responsible for locating
|
||||
//! the correct layer for the get/put call, walking back the timeline branching
|
||||
//! history as needed.
|
||||
//!
|
||||
//! The files are stored in the .neon/tenants/<tenant_id>/timelines/<timeline_id>
|
||||
//! directory. See docs/pageserver-storage.md for how the files are managed.
|
||||
@@ -7090,13 +7091,13 @@ mod tests {
|
||||
vec![
|
||||
// Image layer at GC horizon
|
||||
PersistentLayerKey {
|
||||
key_range: Key::MIN..Key::NON_L0_MAX,
|
||||
key_range: Key::MIN..Key::MAX,
|
||||
lsn_range: Lsn(0x30)..Lsn(0x31),
|
||||
is_delta: false
|
||||
},
|
||||
// The delta layer covers the full range (with the layer key hack to avoid being recognized as L0)
|
||||
// The delta layer below the horizon
|
||||
PersistentLayerKey {
|
||||
key_range: Key::MIN..Key::NON_L0_MAX,
|
||||
key_range: get_key(3)..get_key(4),
|
||||
lsn_range: Lsn(0x30)..Lsn(0x48),
|
||||
is_delta: true
|
||||
},
|
||||
|
||||
@@ -452,7 +452,8 @@ impl TryFrom<toml_edit::Item> for TenantConfOpt {
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
toml_edit::Item::Table(table) => {
|
||||
let deserializer = toml_edit::de::Deserializer::new(table.into());
|
||||
let deserializer =
|
||||
toml_edit::de::Deserializer::from(toml_edit::DocumentMut::from(table));
|
||||
return serde_path_to_error::deserialize(deserializer)
|
||||
.map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
//! Describes the legacy now hopefully no longer modified per-timeline metadata stored in
|
||||
//! `index_part.json` managed by [`remote_timeline_client`]. For many tenants and their timelines,
|
||||
//! this struct and it's original serialization format is still needed because they were written a
|
||||
//! long time ago.
|
||||
//! Describes the legacy now hopefully no longer modified per-timeline metadata.
|
||||
//!
|
||||
//! It is stored in `index_part.json` managed by [`remote_timeline_client`]. For many tenants and
|
||||
//! their timelines, this struct and its original serialization format is still needed because
|
||||
//! they were written a long time ago.
|
||||
//!
|
||||
//! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json
|
||||
//! versioning.
|
||||
|
||||
@@ -282,9 +282,10 @@ impl BackgroundPurges {
|
||||
static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> =
|
||||
Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing));
|
||||
|
||||
/// The TenantManager is responsible for storing and mutating the collection of all tenants
|
||||
/// that this pageserver process has state for. Every Tenant and SecondaryTenant instance
|
||||
/// lives inside the TenantManager.
|
||||
/// Responsible for storing and mutating the collection of all tenants
|
||||
/// that this pageserver has state for.
|
||||
///
|
||||
/// Every Tenant and SecondaryTenant instance lives inside the TenantManager.
|
||||
///
|
||||
/// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach
|
||||
/// the same tenant twice concurrently, or trying to configure the same tenant into secondary
|
||||
@@ -2346,8 +2347,9 @@ pub enum TenantMapError {
|
||||
ShuttingDown,
|
||||
}
|
||||
|
||||
/// Guards a particular tenant_id's content in the TenantsMap. While this
|
||||
/// structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]
|
||||
/// Guards a particular tenant_id's content in the TenantsMap.
|
||||
///
|
||||
/// While this structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]
|
||||
/// for this tenant, which acts as a marker for any operations targeting
|
||||
/// this tenant to retry later, or wait for the InProgress state to end.
|
||||
///
|
||||
|
||||
@@ -2184,6 +2184,8 @@ pub fn remote_timeline_path(
|
||||
remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
|
||||
}
|
||||
|
||||
/// Obtains the path of the given Layer in the remote
|
||||
///
|
||||
/// Note that the shard component of a remote layer path is _not_ always the same
|
||||
/// as in the TenantShardId of the caller: tenants may reference layers from a different
|
||||
/// ShardIndex. Use the ShardIndex from the layer's metadata.
|
||||
|
||||
@@ -548,7 +548,7 @@ pub(crate) async fn download_initdb_tar_zst(
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
.inspect_err(|_e| {
|
||||
// Do a best-effort attempt at deleting the temporary file upon encountering an error.
|
||||
// We don't have async here nor do we want to pile on any extra errors.
|
||||
if let Err(e) = std::fs::remove_file(&temp_path) {
|
||||
@@ -556,7 +556,6 @@ pub(crate) async fn download_initdb_tar_zst(
|
||||
warn!("error deleting temporary file {temp_path}: {e}");
|
||||
}
|
||||
}
|
||||
e
|
||||
})?;
|
||||
|
||||
Ok((temp_path, file))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
//! In-memory index to track the tenant files on the remote storage.
|
||||
//!
|
||||
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
||||
//! remote timeline layers and its metadata.
|
||||
|
||||
|
||||
@@ -434,10 +434,11 @@ impl ReadableLayer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather
|
||||
/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
|
||||
/// of layers (for example when creating a branch that makes some previously covered layers visible). It should
|
||||
/// be used for cache management but not for correctness-critical checks.
|
||||
/// Layers contain a hint indicating whether they are likely to be used for reads.
|
||||
///
|
||||
/// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
|
||||
/// when changing the visibility of layers (for example when creating a branch that makes some previously
|
||||
/// covered layers visible). It should be used for cache management but not for correctness-critical checks.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum LayerVisibilityHint {
|
||||
/// A Visible layer might be read while serving a read, because there is not an image layer between it
|
||||
|
||||
@@ -136,10 +136,11 @@ impl Summary {
|
||||
// Flag indicating that this version initialize the page
|
||||
const WILL_INIT: u64 = 1;
|
||||
|
||||
/// Struct representing reference to BLOB in layers. Reference contains BLOB
|
||||
/// offset, and for WAL records it also contains `will_init` flag. The flag
|
||||
/// helps to determine the range of records that needs to be applied, without
|
||||
/// reading/deserializing records themselves.
|
||||
/// Struct representing reference to BLOB in layers.
|
||||
///
|
||||
/// Reference contains BLOB offset, and for WAL records it also contains
|
||||
/// `will_init` flag. The flag helps to determine the range of records
|
||||
/// that needs to be applied, without reading/deserializing records themselves.
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
pub struct BlobRef(pub u64);
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! An ImageLayer represents an image or a snapshot of a key-range at
|
||||
//! one particular LSN. It contains an image of all key-value pairs
|
||||
//! in its key-range. Any key that falls into the image layer's range
|
||||
//! but does not exist in the layer, does not exist.
|
||||
//! one particular LSN.
|
||||
//!
|
||||
//! It contains an image of all key-value pairs in its key-range. Any key
|
||||
//! that falls into the image layer's range but does not exist in the layer,
|
||||
//! does not exist.
|
||||
//!
|
||||
//! An image layer is stored in a file on disk. The file is stored in
|
||||
//! timelines/<timeline_id> directory. Currently, there are no
|
||||
|
||||
@@ -12,8 +12,10 @@ use serde::{Deserialize, Serialize};
|
||||
#[cfg(test)]
|
||||
use utils::id::TenantId;
|
||||
|
||||
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
|
||||
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
|
||||
/// A unique identifier of a persistent layer.
|
||||
///
|
||||
/// This is different from `LayerDescriptor`, which is only used in the benchmarks.
|
||||
/// This struct contains all necessary information to find the image / delta layer. It also provides
|
||||
/// a unified way to generate layer information like file name.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)]
|
||||
pub struct PersistentLayerDesc {
|
||||
|
||||
@@ -217,8 +217,9 @@ impl fmt::Display for ImageLayerName {
|
||||
}
|
||||
}
|
||||
|
||||
/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. The
|
||||
/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
|
||||
/// LayerName is the logical identity of a layer within a LayerMap at a moment in time.
|
||||
///
|
||||
/// The LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
|
||||
/// over time (e.g. across shard splits or compression). The physical filenames of layers in local
|
||||
/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers
|
||||
/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])
|
||||
|
||||
@@ -226,9 +226,11 @@ impl<'a> IteratorWrapper<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A merge iterator over delta/image layer iterators. When duplicated records are
|
||||
/// found, the iterator will not perform any deduplication, and the caller should handle
|
||||
/// these situation. By saying duplicated records, there are many possibilities:
|
||||
/// A merge iterator over delta/image layer iterators.
|
||||
///
|
||||
/// When duplicated records are found, the iterator will not perform any
|
||||
/// deduplication, and the caller should handle these situation. By saying
|
||||
/// duplicated records, there are many possibilities:
|
||||
///
|
||||
/// * Two same delta at the same LSN.
|
||||
/// * Two same image at the same LSN.
|
||||
|
||||
@@ -34,9 +34,10 @@ impl SplitWriterResult {
|
||||
}
|
||||
}
|
||||
|
||||
/// An image writer that takes images and produces multiple image layers. The interface does not
|
||||
/// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files
|
||||
/// to be cleaned up)
|
||||
/// An image writer that takes images and produces multiple image layers.
|
||||
///
|
||||
/// The interface does not guarantee atomicity (i.e., if the image layer generation
|
||||
/// fails, there might be leftover files to be cleaned up)
|
||||
#[must_use]
|
||||
pub struct SplitImageLayerWriter {
|
||||
inner: ImageLayerWriter,
|
||||
@@ -187,22 +188,23 @@ impl SplitImageLayerWriter {
|
||||
.await
|
||||
}
|
||||
|
||||
/// When split writer fails, the caller should call this function and handle partially generated layers.
|
||||
/// This function will be deprecated with #8841.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, ImageLayerWriter)> {
|
||||
Ok((self.generated_layers, self.inner))
|
||||
}
|
||||
}
|
||||
|
||||
/// A delta writer that takes key-lsn-values and produces multiple delta layers. The interface does not
|
||||
/// guarantee atomicity (i.e., if the delta layer generation fails, there might be leftover files
|
||||
/// to be cleaned up).
|
||||
/// A delta writer that takes key-lsn-values and produces multiple delta layers.
|
||||
///
|
||||
/// The interface does not guarantee atomicity (i.e., if the delta layer generation fails,
|
||||
/// there might be leftover files to be cleaned up).
|
||||
///
|
||||
/// Note that if updates of a single key exceed the target size limit, all of the updates will be batched
|
||||
/// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm
|
||||
/// will split them into multiple files based on size.
|
||||
#[must_use]
|
||||
pub struct SplitDeltaLayerWriter {
|
||||
inner: DeltaLayerWriter,
|
||||
inner: Option<(Key, DeltaLayerWriter)>,
|
||||
target_layer_size: u64,
|
||||
generated_layers: Vec<SplitWriterResult>,
|
||||
conf: &'static PageServerConf,
|
||||
@@ -210,7 +212,6 @@ pub struct SplitDeltaLayerWriter {
|
||||
tenant_shard_id: TenantShardId,
|
||||
lsn_range: Range<Lsn>,
|
||||
last_key_written: Key,
|
||||
start_key: Key,
|
||||
}
|
||||
|
||||
impl SplitDeltaLayerWriter {
|
||||
@@ -218,29 +219,18 @@ impl SplitDeltaLayerWriter {
|
||||
conf: &'static PageServerConf,
|
||||
timeline_id: TimelineId,
|
||||
tenant_shard_id: TenantShardId,
|
||||
start_key: Key,
|
||||
lsn_range: Range<Lsn>,
|
||||
target_layer_size: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
target_layer_size,
|
||||
inner: DeltaLayerWriter::new(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
start_key,
|
||||
lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
inner: None,
|
||||
generated_layers: Vec::new(),
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
lsn_range,
|
||||
last_key_written: Key::MIN,
|
||||
start_key,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -263,9 +253,26 @@ impl SplitDeltaLayerWriter {
|
||||
//
|
||||
// Also, keep all updates of a single key in a single file. TODO: split them using the legacy compaction
|
||||
// strategy. https://github.com/neondatabase/neon/issues/8837
|
||||
|
||||
if self.inner.is_none() {
|
||||
self.inner = Some((
|
||||
key,
|
||||
DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
key,
|
||||
self.lsn_range.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?,
|
||||
));
|
||||
}
|
||||
let (_, inner) = self.inner.as_mut().unwrap();
|
||||
|
||||
let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;
|
||||
if self.inner.num_keys() >= 1
|
||||
&& self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
|
||||
if inner.num_keys() >= 1
|
||||
&& inner.estimated_size() + addition_size_estimation >= self.target_layer_size
|
||||
{
|
||||
if key != self.last_key_written {
|
||||
let next_delta_writer = DeltaLayerWriter::new(
|
||||
@@ -277,13 +284,13 @@ impl SplitDeltaLayerWriter {
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let prev_delta_writer = std::mem::replace(&mut self.inner, next_delta_writer);
|
||||
let (start_key, prev_delta_writer) =
|
||||
std::mem::replace(&mut self.inner, Some((key, next_delta_writer))).unwrap();
|
||||
let layer_key = PersistentLayerKey {
|
||||
key_range: self.start_key..key,
|
||||
key_range: start_key..key,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
is_delta: true,
|
||||
};
|
||||
self.start_key = key;
|
||||
if discard(&layer_key).await {
|
||||
drop(prev_delta_writer);
|
||||
self.generated_layers
|
||||
@@ -294,17 +301,18 @@ impl SplitDeltaLayerWriter {
|
||||
self.generated_layers
|
||||
.push(SplitWriterResult::Produced(delta_layer));
|
||||
}
|
||||
} else if self.inner.estimated_size() >= S3_UPLOAD_LIMIT {
|
||||
} else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
|
||||
// We have to produce a very large file b/c a key is updated too often.
|
||||
anyhow::bail!(
|
||||
"a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced",
|
||||
key,
|
||||
self.inner.estimated_size()
|
||||
inner.estimated_size()
|
||||
);
|
||||
}
|
||||
}
|
||||
self.last_key_written = key;
|
||||
self.inner.put_value(key, lsn, val, ctx).await
|
||||
let (_, inner) = self.inner.as_mut().unwrap();
|
||||
inner.put_value(key, lsn, val, ctx).await
|
||||
}
|
||||
|
||||
pub async fn put_value(
|
||||
@@ -323,7 +331,6 @@ impl SplitDeltaLayerWriter {
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
end_key: Key,
|
||||
discard: D,
|
||||
) -> anyhow::Result<Vec<SplitWriterResult>>
|
||||
where
|
||||
@@ -335,11 +342,15 @@ impl SplitDeltaLayerWriter {
|
||||
inner,
|
||||
..
|
||||
} = self;
|
||||
let Some((start_key, inner)) = inner else {
|
||||
return Ok(generated_layers);
|
||||
};
|
||||
if inner.num_keys() == 0 {
|
||||
return Ok(generated_layers);
|
||||
}
|
||||
let end_key = self.last_key_written.next();
|
||||
let layer_key = PersistentLayerKey {
|
||||
key_range: self.start_key..end_key,
|
||||
key_range: start_key..end_key,
|
||||
lsn_range: self.lsn_range.clone(),
|
||||
is_delta: true,
|
||||
};
|
||||
@@ -358,15 +369,14 @@ impl SplitDeltaLayerWriter {
|
||||
self,
|
||||
tline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
end_key: Key,
|
||||
) -> anyhow::Result<Vec<SplitWriterResult>> {
|
||||
self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })
|
||||
self.finish_with_discard_fn(tline, ctx, |_| async { false })
|
||||
.await
|
||||
}
|
||||
|
||||
/// When split writer fails, the caller should call this function and handle partially generated layers.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, DeltaLayerWriter)> {
|
||||
Ok((self.generated_layers, self.inner))
|
||||
/// This function will be deprecated with #8841.
|
||||
pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, Option<DeltaLayerWriter>)> {
|
||||
Ok((self.generated_layers, self.inner.map(|x| x.1)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,10 +440,8 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -458,11 +466,22 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(10))
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
assert_eq!(layers.len(), 1);
|
||||
assert_eq!(
|
||||
layers
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -499,10 +518,8 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -531,10 +548,7 @@ mod tests {
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
let delta_layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
if discard {
|
||||
for layer in image_layers {
|
||||
layer.into_discarded_layer();
|
||||
@@ -553,6 +567,14 @@ mod tests {
|
||||
.collect_vec();
|
||||
assert_eq!(image_layers.len(), N / 512 + 1);
|
||||
assert_eq!(delta_layers.len(), N / 512 + 1);
|
||||
assert_eq!(
|
||||
delta_layers.first().unwrap().layer_desc().key_range.start,
|
||||
get_key(0)
|
||||
);
|
||||
assert_eq!(
|
||||
delta_layers.last().unwrap().layer_desc().key_range.end,
|
||||
get_key(N as u32)
|
||||
);
|
||||
for idx in 0..image_layers.len() {
|
||||
assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
|
||||
assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
|
||||
@@ -600,10 +622,8 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x18)..Lsn(0x20),
|
||||
4 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -642,11 +662,35 @@ mod tests {
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(10))
|
||||
.await
|
||||
.unwrap();
|
||||
let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
assert_eq!(layers.len(), 2);
|
||||
let mut layers_iter = layers.into_iter();
|
||||
assert_eq!(
|
||||
layers_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
layers_iter
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer()
|
||||
.layer_desc()
|
||||
.key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(1)..get_key(2),
|
||||
lsn_range: Lsn(0x18)..Lsn(0x20),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -666,10 +710,8 @@ mod tests {
|
||||
tenant.conf,
|
||||
tline.timeline_id,
|
||||
tenant.tenant_shard_id,
|
||||
get_key(0),
|
||||
Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
|
||||
4 * 1024 * 1024,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -687,10 +729,20 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
let delta_layers = delta_writer
|
||||
.finish(&tline, &ctx, get_key(N as u32))
|
||||
.await
|
||||
.unwrap();
|
||||
let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
|
||||
assert_eq!(delta_layers.len(), 1);
|
||||
let delta_layer = delta_layers
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_resident_layer();
|
||||
assert_eq!(
|
||||
delta_layer.layer_desc().key(),
|
||||
PersistentLayerKey {
|
||||
key_range: get_key(0)..get_key(1),
|
||||
lsn_range: Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
|
||||
is_delta: true
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ use bytes::Bytes;
|
||||
use enumset::EnumSet;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::config::{CompactL0BypassPageCacheValidation, CompactL0Phase1ValueAccess};
|
||||
use pageserver_api::key::KEY_SIZE;
|
||||
use pageserver_api::keyspace::ShardedRange;
|
||||
use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
|
||||
@@ -912,137 +911,13 @@ impl Timeline {
|
||||
// we're compacting, in key, LSN order.
|
||||
// If there's both a Value::Image and Value::WalRecord for the same (key,lsn),
|
||||
// then the Value::Image is ordered before Value::WalRecord.
|
||||
//
|
||||
// TODO(https://github.com/neondatabase/neon/issues/8184): remove the page cached blob_io
|
||||
// option and validation code once we've reached confidence.
|
||||
enum AllValuesIter<'a> {
|
||||
PageCachedBlobIo {
|
||||
all_keys_iter: VecIter<'a>,
|
||||
},
|
||||
StreamingKmergeBypassingPageCache {
|
||||
merge_iter: MergeIterator<'a>,
|
||||
},
|
||||
ValidatingStreamingKmergeBypassingPageCache {
|
||||
mode: CompactL0BypassPageCacheValidation,
|
||||
merge_iter: MergeIterator<'a>,
|
||||
all_keys_iter: VecIter<'a>,
|
||||
},
|
||||
}
|
||||
type VecIter<'a> = std::slice::Iter<'a, DeltaEntry<'a>>; // TODO: distinguished lifetimes
|
||||
impl AllValuesIter<'_> {
|
||||
async fn next_all_keys_iter(
|
||||
iter: &mut VecIter<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
let Some(DeltaEntry {
|
||||
key,
|
||||
lsn,
|
||||
val: value_ref,
|
||||
..
|
||||
}) = iter.next()
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
let value = value_ref.load(ctx).await?;
|
||||
Ok(Some((*key, *lsn, value)))
|
||||
}
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||
match self {
|
||||
AllValuesIter::PageCachedBlobIo { all_keys_iter: iter } => {
|
||||
Self::next_all_keys_iter(iter, ctx).await
|
||||
}
|
||||
AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter } => merge_iter.next().await,
|
||||
AllValuesIter::ValidatingStreamingKmergeBypassingPageCache { mode, merge_iter, all_keys_iter } => async {
|
||||
// advance both iterators
|
||||
let all_keys_iter_item = Self::next_all_keys_iter(all_keys_iter, ctx).await;
|
||||
let merge_iter_item = merge_iter.next().await;
|
||||
// compare results & log warnings as needed
|
||||
macro_rules! rate_limited_warn {
|
||||
($($arg:tt)*) => {{
|
||||
if cfg!(debug_assertions) || cfg!(feature = "testing") {
|
||||
warn!($($arg)*);
|
||||
panic!("CompactL0BypassPageCacheValidation failure, check logs");
|
||||
}
|
||||
use once_cell::sync::Lazy;
|
||||
use utils::rate_limit::RateLimit;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
static LOGGED: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
|
||||
let mut rate_limit = LOGGED.lock().unwrap();
|
||||
rate_limit.call(|| {
|
||||
warn!($($arg)*);
|
||||
});
|
||||
}}
|
||||
}
|
||||
match (&all_keys_iter_item, &merge_iter_item) {
|
||||
(Err(_), Err(_)) => {
|
||||
// don't bother asserting equivality of the errors
|
||||
}
|
||||
(Err(all_keys), Ok(merge)) => {
|
||||
rate_limited_warn!(?merge, "all_keys_iter returned an error where merge did not: {all_keys:?}");
|
||||
},
|
||||
(Ok(all_keys), Err(merge)) => {
|
||||
rate_limited_warn!(?all_keys, "merge returned an error where all_keys_iter did not: {merge:?}");
|
||||
},
|
||||
(Ok(None), Ok(None)) => { }
|
||||
(Ok(Some(all_keys)), Ok(None)) => {
|
||||
rate_limited_warn!(?all_keys, "merge returned None where all_keys_iter returned Some");
|
||||
}
|
||||
(Ok(None), Ok(Some(merge))) => {
|
||||
rate_limited_warn!(?merge, "all_keys_iter returned None where merge returned Some");
|
||||
}
|
||||
(Ok(Some((all_keys_key, all_keys_lsn, all_keys_value))), Ok(Some((merge_key, merge_lsn, merge_value)))) => {
|
||||
match mode {
|
||||
// TODO: in this mode, we still load the value from disk for both iterators, even though we only need the all_keys_iter one
|
||||
CompactL0BypassPageCacheValidation::KeyLsn => {
|
||||
let all_keys = (all_keys_key, all_keys_lsn);
|
||||
let merge = (merge_key, merge_lsn);
|
||||
if all_keys != merge {
|
||||
rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN) than all_keys_iter");
|
||||
}
|
||||
}
|
||||
CompactL0BypassPageCacheValidation::KeyLsnValue => {
|
||||
let all_keys = (all_keys_key, all_keys_lsn, all_keys_value);
|
||||
let merge = (merge_key, merge_lsn, merge_value);
|
||||
if all_keys != merge {
|
||||
rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN,Value) than all_keys_iter");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// in case of mismatch, trust the legacy all_keys_iter_item
|
||||
all_keys_iter_item
|
||||
}.instrument(info_span!("next")).await
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut all_values_iter = match &self.conf.compact_level0_phase1_value_access {
|
||||
CompactL0Phase1ValueAccess::PageCachedBlobIo => AllValuesIter::PageCachedBlobIo {
|
||||
all_keys_iter: all_keys.iter(),
|
||||
},
|
||||
CompactL0Phase1ValueAccess::StreamingKmerge { validate } => {
|
||||
let merge_iter = {
|
||||
let mut deltas = Vec::with_capacity(deltas_to_compact.len());
|
||||
for l in deltas_to_compact.iter() {
|
||||
let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;
|
||||
deltas.push(l);
|
||||
}
|
||||
MergeIterator::create(&deltas, &[], ctx)
|
||||
};
|
||||
match validate {
|
||||
None => AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter },
|
||||
Some(validate) => AllValuesIter::ValidatingStreamingKmergeBypassingPageCache {
|
||||
mode: validate.clone(),
|
||||
merge_iter,
|
||||
all_keys_iter: all_keys.iter(),
|
||||
},
|
||||
}
|
||||
let mut all_values_iter = {
|
||||
let mut deltas = Vec::with_capacity(deltas_to_compact.len());
|
||||
for l in deltas_to_compact.iter() {
|
||||
let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;
|
||||
deltas.push(l);
|
||||
}
|
||||
MergeIterator::create(&deltas, &[], ctx)
|
||||
};
|
||||
|
||||
// This iterator walks through all keys and is needed to calculate size used by each key
|
||||
@@ -1119,7 +994,7 @@ impl Timeline {
|
||||
let mut keys = 0;
|
||||
|
||||
while let Some((key, lsn, value)) = all_values_iter
|
||||
.next(ctx)
|
||||
.next()
|
||||
.await
|
||||
.map_err(CompactionError::Other)?
|
||||
{
|
||||
@@ -1934,7 +1809,6 @@ impl Timeline {
|
||||
.unwrap();
|
||||
// We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
|
||||
// as an L0 layer.
|
||||
let hack_end_key = Key::NON_L0_MAX;
|
||||
let mut delta_layers = Vec::new();
|
||||
let mut image_layers = Vec::new();
|
||||
let mut downloaded_layers = Vec::new();
|
||||
@@ -1980,10 +1854,8 @@ impl Timeline {
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
Key::MIN,
|
||||
lowest_retain_lsn..end_lsn,
|
||||
self.get_compaction_target_size(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -2090,7 +1962,7 @@ impl Timeline {
|
||||
let produced_image_layers = if let Some(writer) = image_layer_writer {
|
||||
if !dry_run {
|
||||
writer
|
||||
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
|
||||
.finish_with_discard_fn(self, ctx, Key::MAX, discard)
|
||||
.await?
|
||||
} else {
|
||||
let (layers, _) = writer.take()?;
|
||||
@@ -2103,7 +1975,7 @@ impl Timeline {
|
||||
|
||||
let produced_delta_layers = if !dry_run {
|
||||
delta_layer_writer
|
||||
.finish_with_discard_fn(self, ctx, hack_end_key, discard)
|
||||
.finish_with_discard_fn(self, ctx, discard)
|
||||
.await?
|
||||
} else {
|
||||
let (layers, _) = delta_layer_writer.take()?;
|
||||
|
||||
@@ -593,8 +593,10 @@ impl<'a> VectoredBlobReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`]. It provides a streaming API for
|
||||
/// getting read blobs. It returns a batch when `handle` gets called and when the current key would just exceed the read_size and
|
||||
/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].
|
||||
///
|
||||
/// It provides a streaming API for getting read blobs. It returns a batch when
|
||||
/// `handle` gets called and when the current key would just exceed the read_size and
|
||||
/// max_cnt constraints.
|
||||
pub struct StreamingVectoredReadPlanner {
|
||||
read_builder: Option<VectoredReadBuilder>,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//!
|
||||
//! VirtualFile is like a normal File, but it's not bound directly to
|
||||
//! a file descriptor. Instead, the file is opened when it's read from,
|
||||
//! a file descriptor.
|
||||
//!
|
||||
//! Instead, the file is opened when it's read from,
|
||||
//! and if too many files are open globally in the system, least-recently
|
||||
//! used ones are closed.
|
||||
//!
|
||||
|
||||
@@ -25,9 +25,7 @@ use std::time::Duration;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
|
||||
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use postgres_ffi::TimestampTz;
|
||||
use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
|
||||
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
@@ -48,16 +46,31 @@ use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
|
||||
use postgres_ffi::v14::xlog_utils::*;
|
||||
use postgres_ffi::v14::CheckPoint;
|
||||
use postgres_ffi::TransactionId;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
enum_pgversion! {CheckPoint, pgv::CheckPoint}
|
||||
|
||||
impl CheckPoint {
|
||||
fn encode(&self) -> Result<Bytes, SerializeError> {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.encode() })
|
||||
}
|
||||
|
||||
fn update_next_xid(&mut self, xid: u32) -> bool {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.update_next_xid(xid) })
|
||||
}
|
||||
|
||||
pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {
|
||||
enum_pgversion_dispatch!(self, CheckPoint, cp, {
|
||||
cp.update_next_multixid(multi_xid, multi_offset)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WalIngest {
|
||||
shard: ShardIdentity,
|
||||
pg_version: u32,
|
||||
checkpoint: CheckPoint,
|
||||
checkpoint_modified: bool,
|
||||
warn_ingest_lag: WarnIngestLag,
|
||||
@@ -78,12 +91,16 @@ impl WalIngest {
|
||||
// Fetch the latest checkpoint into memory, so that we can compare with it
|
||||
// quickly in `ingest_record` and update it when it changes.
|
||||
let checkpoint_bytes = timeline.get_checkpoint(startpoint, ctx).await?;
|
||||
let checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
|
||||
let pgversion = timeline.pg_version;
|
||||
|
||||
let checkpoint = dispatch_pgversion!(pgversion, {
|
||||
let checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
|
||||
<pgv::CheckPoint as Into<CheckPoint>>::into(checkpoint)
|
||||
});
|
||||
|
||||
Ok(WalIngest {
|
||||
shard: *timeline.get_shard_identity(),
|
||||
pg_version: timeline.pg_version,
|
||||
checkpoint,
|
||||
checkpoint_modified: false,
|
||||
warn_ingest_lag: WarnIngestLag {
|
||||
@@ -117,7 +134,7 @@ impl WalIngest {
|
||||
|
||||
modification.set_lsn(lsn)?;
|
||||
|
||||
if decoded.is_dbase_create_copy(self.pg_version) {
|
||||
if decoded.is_dbase_create_copy(pg_version) {
|
||||
// Records of this type should always be preceded by a commit(), as they
|
||||
// rely on reading data pages back from the Timeline.
|
||||
assert!(!modification.has_dirty_data_pages());
|
||||
@@ -220,6 +237,26 @@ impl WalIngest {
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
} else if pg_version == 17 {
|
||||
if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {
|
||||
debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
|
||||
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {
|
||||
// The XLOG record was renamed between v14 and v15,
|
||||
// but the record format is the same.
|
||||
// So we can reuse XlCreateDatabase here.
|
||||
debug!("XLOG_DBASE_CREATE_FILE_COPY");
|
||||
let createdb = XlCreateDatabase::decode(&mut buf);
|
||||
self.ingest_xlog_dbase_create(modification, &createdb, ctx)
|
||||
.await?;
|
||||
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {
|
||||
let dropdb = XlDropDatabase::decode(&mut buf);
|
||||
for tablespace_id in dropdb.tablespace_ids {
|
||||
trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
|
||||
modification
|
||||
.drop_dbdir(tablespace_id, dropdb.db_id, ctx)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pg_constants::RM_TBLSPC_ID => {
|
||||
@@ -229,7 +266,11 @@ impl WalIngest {
|
||||
let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;
|
||||
|
||||
if info == pg_constants::CLOG_ZEROPAGE {
|
||||
let pageno = buf.get_u32_le();
|
||||
let pageno = if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
};
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
self.put_slru_page_image(
|
||||
@@ -243,7 +284,7 @@ impl WalIngest {
|
||||
.await?;
|
||||
} else {
|
||||
assert!(info == pg_constants::CLOG_TRUNCATE);
|
||||
let xlrec = XlClogTruncate::decode(&mut buf);
|
||||
let xlrec = XlClogTruncate::decode(&mut buf, pg_version);
|
||||
self.ingest_clog_truncate_record(modification, &xlrec, ctx)
|
||||
.await?;
|
||||
}
|
||||
@@ -282,12 +323,21 @@ impl WalIngest {
|
||||
parsed_xact.xid,
|
||||
lsn,
|
||||
);
|
||||
modification
|
||||
.drop_twophase_file(parsed_xact.xid, ctx)
|
||||
.await?;
|
||||
|
||||
let xid: u64 = if pg_version >= 17 {
|
||||
self.adjust_to_full_transaction_id(parsed_xact.xid)?
|
||||
} else {
|
||||
parsed_xact.xid as u64
|
||||
};
|
||||
modification.drop_twophase_file(xid, ctx).await?;
|
||||
} else if info == pg_constants::XLOG_XACT_PREPARE {
|
||||
let xid: u64 = if pg_version >= 17 {
|
||||
self.adjust_to_full_transaction_id(decoded.xl_xid)?
|
||||
} else {
|
||||
decoded.xl_xid as u64
|
||||
};
|
||||
modification
|
||||
.put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
|
||||
.put_twophase_file(xid, Bytes::copy_from_slice(&buf[..]), ctx)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
@@ -295,7 +345,11 @@ impl WalIngest {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
|
||||
if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
|
||||
let pageno = buf.get_u32_le();
|
||||
let pageno = if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
};
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
self.put_slru_page_image(
|
||||
@@ -308,7 +362,11 @@ impl WalIngest {
|
||||
)
|
||||
.await?;
|
||||
} else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
|
||||
let pageno = buf.get_u32_le();
|
||||
let pageno = if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
};
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
self.put_slru_page_image(
|
||||
@@ -337,70 +395,93 @@ impl WalIngest {
|
||||
pg_constants::RM_XLOG_ID => {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
|
||||
if info == pg_constants::XLOG_NEXTOID {
|
||||
let next_oid = buf.get_u32_le();
|
||||
if self.checkpoint.nextOid != next_oid {
|
||||
self.checkpoint.nextOid = next_oid;
|
||||
if info == pg_constants::XLOG_PARAMETER_CHANGE {
|
||||
if let CheckPoint::V17(cp) = &mut self.checkpoint {
|
||||
let rec = v17::XlParameterChange::decode(&mut buf);
|
||||
cp.wal_level = rec.wal_level;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|
||||
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
|
||||
buf.copy_to_slice(&mut checkpoint_bytes);
|
||||
let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
|
||||
xlog_checkpoint.oldestXid,
|
||||
self.checkpoint.oldestXid
|
||||
);
|
||||
if (self
|
||||
.checkpoint
|
||||
.oldestXid
|
||||
.wrapping_sub(xlog_checkpoint.oldestXid) as i32)
|
||||
< 0
|
||||
{
|
||||
self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
|
||||
} else if info == pg_constants::XLOG_END_OF_RECOVERY {
|
||||
if let CheckPoint::V17(cp) = &mut self.checkpoint {
|
||||
let rec = v17::XlEndOfRecovery::decode(&mut buf);
|
||||
cp.wal_level = rec.wal_level;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
|
||||
xlog_checkpoint.oldestActiveXid,
|
||||
self.checkpoint.oldestActiveXid
|
||||
);
|
||||
|
||||
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
|
||||
// because at shutdown, all in-progress transactions will implicitly
|
||||
// end. Postgres startup code knows that, and allows hot standby to start
|
||||
// immediately from a shutdown checkpoint.
|
||||
//
|
||||
// In Neon, Postgres hot standby startup always behaves as if starting from
|
||||
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
|
||||
// instead of overwriting self.checkpoint.oldestActiveXid with
|
||||
// InvalidTransactionid from the checkpoint WAL record, update it to a
|
||||
// proper value, knowing that there are no in-progress transactions at this
|
||||
// point, except for prepared transactions.
|
||||
//
|
||||
// See also the neon code changes in the InitWalRecovery() function.
|
||||
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
|
||||
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut oldest_active_xid = self.checkpoint.nextXid.value as u32;
|
||||
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
|
||||
if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
|
||||
oldest_active_xid = xid;
|
||||
}
|
||||
}
|
||||
self.checkpoint.oldestActiveXid = oldest_active_xid;
|
||||
} else {
|
||||
self.checkpoint.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
|
||||
}
|
||||
|
||||
// Write a new checkpoint key-value pair on every checkpoint record, even
|
||||
// if nothing really changed. Not strictly required, but it seems nice to
|
||||
// have some trace of the checkpoint records in the layer files at the same
|
||||
// LSNs.
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
if info == pg_constants::XLOG_NEXTOID {
|
||||
let next_oid = buf.get_u32_le();
|
||||
if cp.nextOid != next_oid {
|
||||
cp.nextOid = next_oid;
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|
||||
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let mut checkpoint_bytes = [0u8; pgv::xlog_utils::SIZEOF_CHECKPOINT];
|
||||
buf.copy_to_slice(&mut checkpoint_bytes);
|
||||
let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
|
||||
xlog_checkpoint.oldestXid,
|
||||
cp.oldestXid
|
||||
);
|
||||
if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
|
||||
cp.oldestXid = xlog_checkpoint.oldestXid;
|
||||
}
|
||||
trace!(
|
||||
"xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
|
||||
xlog_checkpoint.oldestActiveXid,
|
||||
cp.oldestActiveXid
|
||||
);
|
||||
|
||||
// A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
|
||||
// because at shutdown, all in-progress transactions will implicitly
|
||||
// end. Postgres startup code knows that, and allows hot standby to start
|
||||
// immediately from a shutdown checkpoint.
|
||||
//
|
||||
// In Neon, Postgres hot standby startup always behaves as if starting from
|
||||
// an online checkpoint. It needs a valid `oldestActiveXid` value, so
|
||||
// instead of overwriting self.checkpoint.oldestActiveXid with
|
||||
// InvalidTransactionid from the checkpoint WAL record, update it to a
|
||||
// proper value, knowing that there are no in-progress transactions at this
|
||||
// point, except for prepared transactions.
|
||||
//
|
||||
// See also the neon code changes in the InitWalRecovery() function.
|
||||
if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
|
||||
&& info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
let oldest_active_xid = if pg_version >= 17 {
|
||||
let mut oldest_active_full_xid = cp.nextXid.value;
|
||||
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
|
||||
if xid < oldest_active_full_xid {
|
||||
oldest_active_full_xid = xid;
|
||||
}
|
||||
}
|
||||
oldest_active_full_xid as u32
|
||||
} else {
|
||||
let mut oldest_active_xid = cp.nextXid.value as u32;
|
||||
for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
|
||||
let narrow_xid = xid as u32;
|
||||
if (narrow_xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
|
||||
oldest_active_xid = narrow_xid;
|
||||
}
|
||||
}
|
||||
oldest_active_xid
|
||||
};
|
||||
cp.oldestActiveXid = oldest_active_xid;
|
||||
} else {
|
||||
cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
|
||||
}
|
||||
|
||||
// Write a new checkpoint key-value pair on every checkpoint record, even
|
||||
// if nothing really changed. Not strictly required, but it seems nice to
|
||||
// have some trace of the checkpoint records in the layer files at the same
|
||||
// LSNs.
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
pg_constants::RM_LOGICALMSG_ID => {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
@@ -424,7 +505,11 @@ impl WalIngest {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_RUNNING_XACTS {
|
||||
let xlrec = crate::walrecord::XlRunningXacts::decode(&mut buf);
|
||||
self.checkpoint.oldestActiveXid = xlrec.oldest_running_xid;
|
||||
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestActiveXid = xlrec.oldest_running_xid;
|
||||
});
|
||||
|
||||
self.checkpoint_modified = true;
|
||||
}
|
||||
}
|
||||
@@ -497,6 +582,25 @@ impl WalIngest {
|
||||
Ok(modification.len() > prev_len)
|
||||
}
|
||||
|
||||
/// This is the same as AdjustToFullTransactionId(xid) in PostgreSQL
|
||||
fn adjust_to_full_transaction_id(&self, xid: TransactionId) -> Result<u64> {
|
||||
let next_full_xid =
|
||||
enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, cp, { cp.nextXid.value });
|
||||
|
||||
let next_xid = (next_full_xid) as u32;
|
||||
let mut epoch = (next_full_xid >> 32) as u32;
|
||||
|
||||
if xid > next_xid {
|
||||
// Wraparound occurred, must be from a prev epoch.
|
||||
if epoch == 0 {
|
||||
bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}");
|
||||
}
|
||||
epoch -= 1;
|
||||
}
|
||||
|
||||
Ok((epoch as u64) << 32 | xid as u64)
|
||||
}
|
||||
|
||||
/// Do not store this block, but observe it for the purposes of updating our relation size state.
|
||||
async fn observe_decoded_block(
|
||||
&mut self,
|
||||
@@ -539,7 +643,7 @@ impl WalIngest {
|
||||
&& blk.has_image
|
||||
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
|
||||
&& (decoded.xl_info == pg_constants::XLOG_FPI
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
// compression of WAL is not yet supported: fall back to storing the original WAL record
|
||||
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, modification.tline.pg_version)
|
||||
// do not materialize null pages because them most likely be soon replaced with real data
|
||||
@@ -797,6 +901,73 @@ impl WalIngest {
|
||||
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
17 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v17::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v17::XlHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v17::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_LOCK {
|
||||
let xlrec = v17::XlHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v17::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
|
||||
let xlrec = v17::XlHeapLockUpdated::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -905,26 +1076,26 @@ impl WalIngest {
|
||||
assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);
|
||||
|
||||
match pg_version {
|
||||
16 => {
|
||||
16 | 17 => {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
match info {
|
||||
pg_constants::XLOG_NEON_HEAP_INSERT => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
|
||||
let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_DELETE => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
|
||||
let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_UPDATE
|
||||
| pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
|
||||
let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
@@ -940,7 +1111,7 @@ impl WalIngest {
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);
|
||||
let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
@@ -956,7 +1127,7 @@ impl WalIngest {
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_LOCK => {
|
||||
let xlrec = v16::rm_neon::XlNeonHeapLock::decode(buf);
|
||||
let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
@@ -1204,7 +1375,7 @@ impl WalIngest {
|
||||
if rec.blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 {
|
||||
// Tail of last remaining FSM page has to be zeroed.
|
||||
// We are not precise here and instead of digging in FSM bitmap format just clear the whole page.
|
||||
modification.put_rel_page_image_zero(rel, fsm_physical_page_no);
|
||||
modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;
|
||||
fsm_physical_page_no += 1;
|
||||
}
|
||||
let nblocks = get_relsize(modification, rel, ctx).await?;
|
||||
@@ -1226,7 +1397,7 @@ impl WalIngest {
|
||||
if rec.blkno % pg_constants::VM_HEAPBLOCKS_PER_PAGE != 0 {
|
||||
// Tail of last remaining vm page has to be zeroed.
|
||||
// We are not precise here and instead of digging in VM bitmap format just clear the whole page.
|
||||
modification.put_rel_page_image_zero(rel, vm_page_no);
|
||||
modification.put_rel_page_image_zero(rel, vm_page_no)?;
|
||||
vm_page_no += 1;
|
||||
}
|
||||
let nblocks = get_relsize(modification, rel, ctx).await?;
|
||||
@@ -1242,12 +1413,17 @@ impl WalIngest {
|
||||
fn warn_on_ingest_lag(
|
||||
&mut self,
|
||||
conf: &crate::config::PageServerConf,
|
||||
wal_timestmap: TimestampTz,
|
||||
wal_timestamp: TimestampTz,
|
||||
) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let now = SystemTime::now();
|
||||
let rate_limits = &mut self.warn_ingest_lag;
|
||||
match try_from_pg_timestamp(wal_timestmap) {
|
||||
|
||||
let ts = enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, _cp, {
|
||||
pgv::xlog_utils::try_from_pg_timestamp(wal_timestamp)
|
||||
});
|
||||
|
||||
match ts {
|
||||
Ok(ts) => {
|
||||
match now.duration_since(ts) {
|
||||
Ok(lag) => {
|
||||
@@ -1257,7 +1433,7 @@ impl WalIngest {
|
||||
warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
|
||||
})
|
||||
}
|
||||
},
|
||||
}
|
||||
Err(e) => {
|
||||
let delta_t = e.duration();
|
||||
// determined by prod victoriametrics query: 1000 * (timestamp(node_time_seconds{neon_service="pageserver"}) - node_time_seconds)
|
||||
@@ -1271,7 +1447,6 @@ impl WalIngest {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
Err(error) => {
|
||||
rate_limits.timestamp_invalid_msg_ratelimit.call2(|rate_limit_stats| {
|
||||
@@ -1379,14 +1554,17 @@ impl WalIngest {
|
||||
// truncated, but a checkpoint record with the updated values isn't written until
|
||||
// later. In Neon, a server can start at any LSN, not just on a checkpoint record,
|
||||
// so we keep the oldestXid and oldestXidDB up-to-date.
|
||||
self.checkpoint.oldestXid = xlrec.oldest_xid;
|
||||
self.checkpoint.oldestXidDB = xlrec.oldest_xid_db;
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestXid = xlrec.oldest_xid;
|
||||
cp.oldestXidDB = xlrec.oldest_xid_db;
|
||||
});
|
||||
self.checkpoint_modified = true;
|
||||
|
||||
// TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
|
||||
|
||||
let latest_page_number =
|
||||
self.checkpoint.nextXid.value as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
enum_pgversion_dispatch!(self.checkpoint, CheckPoint, cp, { cp.nextXid.value }) as u32
|
||||
/ pg_constants::CLOG_XACTS_PER_PAGE;
|
||||
|
||||
// Now delete all segments containing pages between xlrec.pageno
|
||||
// and latest_page_number.
|
||||
@@ -1394,7 +1572,9 @@ impl WalIngest {
|
||||
// First, make an important safety check:
|
||||
// the current endpoint page must not be eligible for removal.
|
||||
// See SimpleLruTruncate() in slru.c
|
||||
if clogpage_precedes(latest_page_number, xlrec.pageno) {
|
||||
if dispatch_pgversion!(modification.tline.pg_version, {
|
||||
pgv::nonrelfile_utils::clogpage_precedes(latest_page_number, xlrec.pageno)
|
||||
}) {
|
||||
info!("could not truncate directory pg_xact apparent wraparound");
|
||||
return Ok(());
|
||||
}
|
||||
@@ -1411,7 +1591,12 @@ impl WalIngest {
|
||||
.await?
|
||||
{
|
||||
let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
if slru_may_delete_clogsegment(segpage, xlrec.pageno) {
|
||||
|
||||
let may_delete = dispatch_pgversion!(modification.tline.pg_version, {
|
||||
pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, xlrec.pageno)
|
||||
});
|
||||
|
||||
if may_delete {
|
||||
modification
|
||||
.drop_slru_segment(SlruKind::Clog, segno, ctx)
|
||||
.await?;
|
||||
@@ -1530,14 +1715,23 @@ impl WalIngest {
|
||||
xlrec: &XlMultiXactTruncate,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<()> {
|
||||
self.checkpoint.oldestMulti = xlrec.end_trunc_off;
|
||||
self.checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
|
||||
let (maxsegment, startsegment, endsegment) =
|
||||
enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
|
||||
cp.oldestMulti = xlrec.end_trunc_off;
|
||||
cp.oldestMultiDB = xlrec.oldest_multi_db;
|
||||
let maxsegment: i32 = pgv::nonrelfile_utils::mx_offset_to_member_segment(
|
||||
pg_constants::MAX_MULTIXACT_OFFSET,
|
||||
);
|
||||
let startsegment: i32 =
|
||||
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.start_trunc_memb);
|
||||
let endsegment: i32 =
|
||||
pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.end_trunc_memb);
|
||||
(maxsegment, startsegment, endsegment)
|
||||
});
|
||||
|
||||
self.checkpoint_modified = true;
|
||||
|
||||
// PerformMembersTruncation
|
||||
let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
|
||||
let startsegment: i32 = mx_offset_to_member_segment(xlrec.start_trunc_memb);
|
||||
let endsegment: i32 = mx_offset_to_member_segment(xlrec.end_trunc_memb);
|
||||
let mut segment: i32 = startsegment;
|
||||
|
||||
// Delete all the segments except the last one. The last segment can still
|
||||
@@ -1696,7 +1890,7 @@ impl WalIngest {
|
||||
continue;
|
||||
}
|
||||
|
||||
modification.put_rel_page_image_zero(rel, gap_blknum);
|
||||
modification.put_rel_page_image_zero(rel, gap_blknum)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -1762,7 +1956,7 @@ impl WalIngest {
|
||||
|
||||
// fill the gap with zeros
|
||||
for gap_blknum in old_nblocks..blknum {
|
||||
modification.put_slru_page_image_zero(kind, segno, gap_blknum);
|
||||
modification.put_slru_page_image_zero(kind, segno, gap_blknum)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -1811,11 +2005,23 @@ mod tests {
|
||||
// TODO
|
||||
}
|
||||
|
||||
static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);
|
||||
#[tokio::test]
|
||||
async fn test_zeroed_checkpoint_decodes_correctly() -> Result<()> {
|
||||
for i in 14..=16 {
|
||||
dispatch_pgversion!(i, {
|
||||
pgv::CheckPoint::decode(&pgv::ZERO_CHECKPOINT)?;
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn init_walingest_test(tline: &Timeline, ctx: &RequestContext) -> Result<WalIngest> {
|
||||
let mut m = tline.begin_modification(Lsn(0x10));
|
||||
m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
|
||||
m.put_checkpoint(dispatch_pgversion!(
|
||||
tline.pg_version,
|
||||
pgv::ZERO_CHECKPOINT.clone()
|
||||
))?;
|
||||
m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
|
||||
m.commit(ctx).await?;
|
||||
let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;
|
||||
|
||||
@@ -174,6 +174,7 @@ impl DecodedWALRecord {
|
||||
}
|
||||
15 => info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
|
||||
16 => info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
|
||||
17 => info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
|
||||
_ => {
|
||||
panic!("Unsupported postgres version {pg_version}")
|
||||
}
|
||||
@@ -341,16 +342,47 @@ pub mod v14 {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlParameterChange {
|
||||
pub max_connections: i32,
|
||||
pub max_worker_processes: i32,
|
||||
pub max_wal_senders: i32,
|
||||
pub max_prepared_xacts: i32,
|
||||
pub max_locks_per_xact: i32,
|
||||
pub wal_level: i32,
|
||||
pub wal_log_hints: bool,
|
||||
pub track_commit_timestamp: bool,
|
||||
pub _padding: [u8; 2],
|
||||
}
|
||||
|
||||
impl XlParameterChange {
|
||||
pub fn decode(buf: &mut Bytes) -> XlParameterChange {
|
||||
XlParameterChange {
|
||||
max_connections: buf.get_i32_le(),
|
||||
max_worker_processes: buf.get_i32_le(),
|
||||
max_wal_senders: buf.get_i32_le(),
|
||||
max_prepared_xacts: buf.get_i32_le(),
|
||||
max_locks_per_xact: buf.get_i32_le(),
|
||||
wal_level: buf.get_i32_le(),
|
||||
wal_log_hints: buf.get_u8() != 0,
|
||||
track_commit_timestamp: buf.get_u8() != 0,
|
||||
_padding: [buf.get_u8(), buf.get_u8()],
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub mod v15 {
|
||||
pub use super::v14::{
|
||||
XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapLockUpdated, XlHeapMultiInsert, XlHeapUpdate,
|
||||
XlParameterChange,
|
||||
};
|
||||
}
|
||||
|
||||
pub mod v16 {
|
||||
pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert};
|
||||
pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};
|
||||
use bytes::{Buf, Bytes};
|
||||
use postgres_ffi::{OffsetNumber, TransactionId};
|
||||
|
||||
@@ -529,6 +561,37 @@ pub mod v16 {
|
||||
}
|
||||
}
|
||||
|
||||
pub mod v17 {
|
||||
pub use super::v14::XlHeapLockUpdated;
|
||||
use bytes::{Buf, Bytes};
|
||||
pub use postgres_ffi::{TimeLineID, TimestampTz};
|
||||
|
||||
pub use super::v16::rm_neon;
|
||||
pub use super::v16::{
|
||||
XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,
|
||||
};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlEndOfRecovery {
|
||||
pub end_time: TimestampTz,
|
||||
pub this_time_line_id: TimeLineID,
|
||||
pub prev_time_line_id: TimeLineID,
|
||||
pub wal_level: i32,
|
||||
}
|
||||
|
||||
impl XlEndOfRecovery {
|
||||
pub fn decode(buf: &mut Bytes) -> XlEndOfRecovery {
|
||||
XlEndOfRecovery {
|
||||
end_time: buf.get_i64_le(),
|
||||
this_time_line_id: buf.get_u32_le(),
|
||||
prev_time_line_id: buf.get_u32_le(),
|
||||
wal_level: buf.get_i32_le(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
pub struct XlSmgrCreate {
|
||||
@@ -746,9 +809,13 @@ pub struct XlClogTruncate {
|
||||
}
|
||||
|
||||
impl XlClogTruncate {
|
||||
pub fn decode(buf: &mut Bytes) -> XlClogTruncate {
|
||||
pub fn decode(buf: &mut Bytes, pg_version: u32) -> XlClogTruncate {
|
||||
XlClogTruncate {
|
||||
pageno: buf.get_u32_le(),
|
||||
pageno: if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
},
|
||||
oldest_xid: buf.get_u32_le(),
|
||||
oldest_xid_db: buf.get_u32_le(),
|
||||
}
|
||||
|
||||
@@ -43,13 +43,12 @@ use utils::lsn::Lsn;
|
||||
use utils::sync::gate::GateError;
|
||||
use utils::sync::heavier_once_cell;
|
||||
|
||||
/// The real implementation that uses a Postgres process to
|
||||
/// perform WAL replay.
|
||||
///
|
||||
/// This is the real implementation that uses a Postgres process to
|
||||
/// perform WAL replay. Only one thread can use the process at a time,
|
||||
/// that is controlled by the Mutex. In the future, we might want to
|
||||
/// launch a pool of processes to allow concurrent replay of multiple
|
||||
/// records.
|
||||
///
|
||||
/// Only one thread can use the process at a time, that is controlled by the
|
||||
/// Mutex. In the future, we might want to launch a pool of processes to allow
|
||||
/// concurrent replay of multiple records.
|
||||
pub struct PostgresRedoManager {
|
||||
tenant_shard_id: TenantShardId,
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
@@ -23,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
|
||||
SHLIB_LINK = -lcurl
|
||||
|
||||
EXTENSION = neon
|
||||
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql
|
||||
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
|
||||
PGFILEDESC = "neon - cloud storage for PostgreSQL"
|
||||
|
||||
EXTRA_CLEAN = \
|
||||
|
||||
12
pgxn/neon/bitmap.h
Normal file
12
pgxn/neon/bitmap.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef NEON_BITMAP_H
|
||||
#define NEON_BITMAP_H
|
||||
|
||||
/*
|
||||
* Utilities for manipulating bits8* as bitmaps.
|
||||
*/
|
||||
|
||||
#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))
|
||||
#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))
|
||||
#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))
|
||||
|
||||
#endif //NEON_BITMAP_H
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "pagestore_client.h"
|
||||
#include "common/hashfn.h"
|
||||
#include "pgstat.h"
|
||||
#include "port/pg_iovec.h"
|
||||
#include "postmaster/bgworker.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/buf_internals.h"
|
||||
@@ -40,6 +41,7 @@
|
||||
#include "utils/guc.h"
|
||||
|
||||
#include "hll.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
|
||||
|
||||
@@ -469,6 +471,99 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if page is present in the cache.
|
||||
* Returns true if page is found in local cache.
|
||||
*/
|
||||
int
|
||||
lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
int nblocks, bits8 *bitmap)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry *entry;
|
||||
uint32 chunk_offs;
|
||||
int found = 0;
|
||||
uint32 hash;
|
||||
int i = 0;
|
||||
|
||||
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
|
||||
return 0;
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
|
||||
while (true)
|
||||
{
|
||||
int this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
|
||||
if (LFC_ENABLED())
|
||||
{
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
|
||||
if (entry != NULL)
|
||||
{
|
||||
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
|
||||
{
|
||||
if ((entry->bitmap[chunk_offs >> 5] &
|
||||
(1 << (chunk_offs & 31))) != 0)
|
||||
{
|
||||
BITMAP_SET(bitmap, i);
|
||||
found++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
i += this_chunk;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Break out of the iteration before doing expensive stuff for
|
||||
* a next iteration
|
||||
*/
|
||||
if (i + 1 >= nblocks)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Prepare for the next iteration. We don't unlock here, as that'd
|
||||
* probably be more expensive than the gains it'd get us.
|
||||
*/
|
||||
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
|
||||
}
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
#if USE_ASSERT_CHECKING
|
||||
do {
|
||||
int count = 0;
|
||||
|
||||
for (int j = 0; j < nblocks; j++)
|
||||
{
|
||||
if (BITMAP_ISSET(bitmap, j))
|
||||
count++;
|
||||
}
|
||||
|
||||
Assert(count == found);
|
||||
} while (false);
|
||||
#endif
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Evict a page (if present) from the local file cache
|
||||
*/
|
||||
@@ -548,91 +643,171 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to read page from local cache.
|
||||
* Returns true if page is found in local cache.
|
||||
* In case of error local file cache is disabled (lfc->limit is set to zero).
|
||||
* Try to read pages from local cache.
|
||||
* Returns the number of pages read from the local cache, and sets bits in
|
||||
* 'read' for the pages which were read. This may scribble over buffers not
|
||||
* marked in 'read', so be careful with operation ordering.
|
||||
*
|
||||
* In case of error local file cache is disabled (lfc->limit is set to zero),
|
||||
* and -1 is returned. Note that 'read' and the buffers may be touched and in
|
||||
* an otherwise invalid state.
|
||||
*
|
||||
* If the mask argument is supplied, bits will be set at the offsets of pages
|
||||
* that were present and read from the LFC.
|
||||
*/
|
||||
bool
|
||||
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer)
|
||||
int
|
||||
lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
void **buffers, BlockNumber nblocks, bits8 *mask)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry *entry;
|
||||
ssize_t rc;
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
bool result = true;
|
||||
uint32 hash;
|
||||
uint64 generation;
|
||||
uint32 entry_offset;
|
||||
int blocks_read = 0;
|
||||
int buf_offset = 0;
|
||||
|
||||
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
if (!lfc_ensure_opened())
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (!LFC_ENABLED())
|
||||
/*
|
||||
* For every chunk that has blocks we're interested in, we
|
||||
* 1. get the chunk header
|
||||
* 2. Check if the chunk actually has the blocks we're interested in
|
||||
* 3. Read the blocks we're looking for (in one preadv), assuming they exist
|
||||
* 4. Update the statistics for the read call.
|
||||
*
|
||||
* If there is an error, we do an early return.
|
||||
*/
|
||||
while (nblocks > 0)
|
||||
{
|
||||
struct iovec iov[PG_IOV_MAX];
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
|
||||
int iteration_hits = 0;
|
||||
int iteration_misses = 0;
|
||||
Assert(blocks_in_chunk > 0);
|
||||
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
iov[i].iov_base = buffers[buf_offset + i];
|
||||
iov[i].iov_len = BLCKSZ;
|
||||
}
|
||||
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
/* We can return the blocks we've read before LFC got disabled;
|
||||
* assuming we read any. */
|
||||
if (!LFC_ENABLED())
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
return blocks_read;
|
||||
}
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
|
||||
/* Approximate working set for the blocks assumed in this entry */
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
tag.blockNum = blkno + i;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
|
||||
if (entry == NULL)
|
||||
{
|
||||
/* Pages are not cached */
|
||||
lfc_ctl->misses += blocks_in_chunk;
|
||||
pgBufferUsage.file_cache.misses += blocks_in_chunk;
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
buf_offset += blocks_in_chunk;
|
||||
nblocks -= blocks_in_chunk;
|
||||
blkno += blocks_in_chunk;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Unlink entry from LRU list to pin it for the duration of IO operation */
|
||||
if (entry->access_count++ == 0)
|
||||
dlist_delete(&entry->list_node);
|
||||
|
||||
generation = lfc_ctl->generation;
|
||||
entry_offset = entry->offset;
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
/*
|
||||
* If the page is valid, we consider it "read".
|
||||
* All other pages will be fetched separately by the next cache
|
||||
*/
|
||||
if (entry->bitmap[(chunk_offs + i) / 32] & (1 << ((chunk_offs + i) % 32)))
|
||||
{
|
||||
BITMAP_SET(mask, buf_offset + i);
|
||||
iteration_hits++;
|
||||
}
|
||||
else
|
||||
iteration_misses++;
|
||||
}
|
||||
|
||||
/* Approximate working set */
|
||||
tag.blockNum = blkno;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
Assert(iteration_hits + iteration_misses > 0);
|
||||
|
||||
if (iteration_hits != 0)
|
||||
{
|
||||
rc = preadv(lfc_desc, iov, blocks_in_chunk,
|
||||
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
|
||||
if (rc != (BLCKSZ * blocks_in_chunk))
|
||||
{
|
||||
lfc_disable("read");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Place entry to the head of LRU list */
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (lfc_ctl->generation == generation)
|
||||
{
|
||||
CriticalAssert(LFC_ENABLED());
|
||||
lfc_ctl->hits += iteration_hits;
|
||||
lfc_ctl->misses += iteration_misses;
|
||||
pgBufferUsage.file_cache.hits += iteration_hits;
|
||||
pgBufferUsage.file_cache.misses += iteration_misses;
|
||||
CriticalAssert(entry->access_count > 0);
|
||||
if (--entry->access_count == 0)
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* generation mismatch, assume error condition */
|
||||
LWLockRelease(lfc_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
|
||||
{
|
||||
/* Page is not cached */
|
||||
lfc_ctl->misses += 1;
|
||||
pgBufferUsage.file_cache.misses += 1;
|
||||
LWLockRelease(lfc_lock);
|
||||
return false;
|
||||
}
|
||||
/* Unlink entry from LRU list to pin it for the duration of IO operation */
|
||||
if (entry->access_count++ == 0)
|
||||
dlist_delete(&entry->list_node);
|
||||
generation = lfc_ctl->generation;
|
||||
entry_offset = entry->offset;
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
if (rc != BLCKSZ)
|
||||
{
|
||||
lfc_disable("read");
|
||||
return false;
|
||||
buf_offset += blocks_in_chunk;
|
||||
nblocks -= blocks_in_chunk;
|
||||
blkno += blocks_in_chunk;
|
||||
blocks_read += iteration_hits;
|
||||
}
|
||||
|
||||
/* Place entry to the head of LRU list */
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (lfc_ctl->generation == generation)
|
||||
{
|
||||
CriticalAssert(LFC_ENABLED());
|
||||
lfc_ctl->hits += 1;
|
||||
pgBufferUsage.file_cache.hits += 1;
|
||||
CriticalAssert(entry->access_count > 0);
|
||||
if (--entry->access_count == 0)
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
}
|
||||
else
|
||||
result = false;
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
return result;
|
||||
return blocks_read;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -640,20 +815,17 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* If cache is full then evict some other page.
|
||||
*/
|
||||
void
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer)
|
||||
#else
|
||||
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void *buffer)
|
||||
#endif
|
||||
lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
const void *const *buffers, BlockNumber nblocks)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry *entry;
|
||||
ssize_t rc;
|
||||
bool found;
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
uint32 hash;
|
||||
uint64 generation;
|
||||
uint32 entry_offset;
|
||||
int buf_offset = 0;
|
||||
|
||||
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
|
||||
return;
|
||||
@@ -661,110 +833,142 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void
|
||||
if (!lfc_ensure_opened())
|
||||
return;
|
||||
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (!LFC_ENABLED())
|
||||
/*
|
||||
* For every chunk that has blocks we're interested in, we
|
||||
* 1. get the chunk header
|
||||
* 2. Check if the chunk actually has the blocks we're interested in
|
||||
* 3. Read the blocks we're looking for (in one preadv), assuming they exist
|
||||
* 4. Update the statistics for the read call.
|
||||
*
|
||||
* If there is an error, we do an early return.
|
||||
*/
|
||||
while (nblocks > 0)
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
return;
|
||||
}
|
||||
struct iovec iov[PG_IOV_MAX];
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
|
||||
Assert(blocks_in_chunk > 0);
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
|
||||
|
||||
if (found)
|
||||
{
|
||||
/*
|
||||
* Unlink entry from LRU list to pin it for the duration of IO
|
||||
* operation
|
||||
*/
|
||||
if (entry->access_count++ == 0)
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We have two choices if all cache pages are pinned (i.e. used in IO
|
||||
* operations):
|
||||
*
|
||||
* 1) Wait until some of this operation is completed and pages is
|
||||
* unpinned.
|
||||
*
|
||||
* 2) Allocate one more chunk, so that specified cache size is more
|
||||
* recommendation than hard limit.
|
||||
*
|
||||
* As far as probability of such event (that all pages are pinned) is
|
||||
* considered to be very very small: there are should be very large
|
||||
* number of concurrent IO operations and them are limited by
|
||||
* max_connections, we prefer not to complicate code and use second
|
||||
* approach.
|
||||
*/
|
||||
if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
/* Cache overflow: evict least recently used chunk */
|
||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
|
||||
|
||||
CriticalAssert(victim->access_count == 0);
|
||||
entry->offset = victim->offset; /* grab victim's chunk */
|
||||
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
|
||||
neon_log(DEBUG2, "Swap file cache page");
|
||||
iov[i].iov_base = unconstify(void *, buffers[buf_offset + i]);
|
||||
iov[i].iov_len = BLCKSZ;
|
||||
}
|
||||
else if (!dlist_is_empty(&lfc_ctl->holes))
|
||||
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (!LFC_ENABLED())
|
||||
{
|
||||
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
|
||||
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
|
||||
uint32 offset = hole->offset;
|
||||
bool found;
|
||||
LWLockRelease(lfc_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
|
||||
CriticalAssert(found);
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
|
||||
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = offset; /* reuse the hole */
|
||||
if (found)
|
||||
{
|
||||
/*
|
||||
* Unlink entry from LRU list to pin it for the duration of IO
|
||||
* operation
|
||||
*/
|
||||
if (entry->access_count++ == 0)
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = lfc_ctl->size++; /* allocate new chunk at end
|
||||
* of file */
|
||||
}
|
||||
entry->access_count = 1;
|
||||
entry->hash = hash;
|
||||
memset(entry->bitmap, 0, sizeof entry->bitmap);
|
||||
}
|
||||
|
||||
generation = lfc_ctl->generation;
|
||||
entry_offset = entry->offset;
|
||||
lfc_ctl->writes += 1;
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
if (rc != BLCKSZ)
|
||||
{
|
||||
lfc_disable("write");
|
||||
}
|
||||
else
|
||||
{
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (lfc_ctl->generation == generation)
|
||||
{
|
||||
CriticalAssert(LFC_ENABLED());
|
||||
/* Place entry to the head of LRU list */
|
||||
CriticalAssert(entry->access_count > 0);
|
||||
if (--entry->access_count == 0)
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
|
||||
entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
|
||||
/*
|
||||
* We have two choices if all cache pages are pinned (i.e. used in IO
|
||||
* operations):
|
||||
*
|
||||
* 1) Wait until some of this operation is completed and pages is
|
||||
* unpinned.
|
||||
*
|
||||
* 2) Allocate one more chunk, so that specified cache size is more
|
||||
* recommendation than hard limit.
|
||||
*
|
||||
* As far as probability of such event (that all pages are pinned) is
|
||||
* considered to be very very small: there are should be very large
|
||||
* number of concurrent IO operations and them are limited by
|
||||
* max_connections, we prefer not to complicate code and use second
|
||||
* approach.
|
||||
*/
|
||||
if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
|
||||
{
|
||||
/* Cache overflow: evict least recently used chunk */
|
||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
|
||||
|
||||
CriticalAssert(victim->access_count == 0);
|
||||
entry->offset = victim->offset; /* grab victim's chunk */
|
||||
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
|
||||
neon_log(DEBUG2, "Swap file cache page");
|
||||
}
|
||||
else if (!dlist_is_empty(&lfc_ctl->holes))
|
||||
{
|
||||
/* We can reuse a hole that was left behind when the LFC was shrunk previously */
|
||||
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
|
||||
uint32 offset = hole->offset;
|
||||
bool found;
|
||||
|
||||
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
|
||||
CriticalAssert(found);
|
||||
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = offset; /* reuse the hole */
|
||||
}
|
||||
else
|
||||
{
|
||||
lfc_ctl->used += 1;
|
||||
entry->offset = lfc_ctl->size++; /* allocate new chunk at end
|
||||
* of file */
|
||||
}
|
||||
entry->access_count = 1;
|
||||
entry->hash = hash;
|
||||
memset(entry->bitmap, 0, sizeof entry->bitmap);
|
||||
}
|
||||
|
||||
generation = lfc_ctl->generation;
|
||||
entry_offset = entry->offset;
|
||||
lfc_ctl->writes += blocks_in_chunk;
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
rc = pwritev(lfc_desc, iov, blocks_in_chunk,
|
||||
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
if (rc != BLCKSZ * blocks_in_chunk)
|
||||
{
|
||||
lfc_disable("write");
|
||||
}
|
||||
else
|
||||
{
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
if (lfc_ctl->generation == generation)
|
||||
{
|
||||
CriticalAssert(LFC_ENABLED());
|
||||
/* Place entry to the head of LRU list */
|
||||
CriticalAssert(entry->access_count > 0);
|
||||
if (--entry->access_count == 0)
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
entry->bitmap[(chunk_offs + i) >> 5] |=
|
||||
(1 << ((chunk_offs + i) & 31));
|
||||
}
|
||||
}
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
}
|
||||
blkno += blocks_in_chunk;
|
||||
buf_offset += blocks_in_chunk;
|
||||
nblocks -= blocks_in_chunk;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1059,7 +1263,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
|
||||
int32 dc;
|
||||
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, 1.0);
|
||||
LWLockRelease(lfc_lock);
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
@@ -1076,7 +1280,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
|
||||
int32 dc;
|
||||
bool reset = PG_GETARG_BOOL(0);
|
||||
LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1, 1.0);
|
||||
if (reset)
|
||||
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
|
||||
LWLockRelease(lfc_lock);
|
||||
@@ -1084,3 +1288,21 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
|
||||
}
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(approximate_optimal_cache_size);
|
||||
|
||||
Datum
|
||||
approximate_optimal_cache_size(PG_FUNCTION_ARGS)
|
||||
{
|
||||
if (lfc_size_limit != 0)
|
||||
{
|
||||
int32 dc;
|
||||
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
||||
double min_hit_ratio = PG_ARGISNULL(1) ? 1.0 : PG_GETARG_FLOAT8(1);
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, min_hit_ratio);
|
||||
LWLockRelease(lfc_lock);
|
||||
PG_RETURN_INT32(dc);
|
||||
}
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
|
||||
*
|
||||
* Implements https://hal.science/hal-00465313/document
|
||||
*
|
||||
*
|
||||
* Based on Hideaki Ohno's C++ implementation. This is probably not ideally
|
||||
* suited to estimating the cardinality of very large sets; in particular, we
|
||||
* have not attempted to further optimize the implementation as described in
|
||||
@@ -126,22 +126,78 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
|
||||
/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
|
||||
count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
|
||||
|
||||
cState->regs[index][count] = now;
|
||||
if (cState->regs[index][count].ts)
|
||||
{
|
||||
/* update histgoram */
|
||||
int64_t delta = (now - cState->regs[index][count].ts)/USECS_PER_SEC;
|
||||
uint32_t new_histogram[HIST_SIZE] = {0};
|
||||
for (int i = 0; i < HIST_SIZE; i++) {
|
||||
/* Use middle point of interval */
|
||||
uint32 interval_log2 = pg_ceil_log2_32((delta + (HIST_MIN_INTERVAL*((1<<i) + ((1<<i)/2))/2)) / HIST_MIN_INTERVAL);
|
||||
uint32 cell = Min(interval_log2, HIST_SIZE-1);
|
||||
new_histogram[cell] += cState->regs[index][count].histogram[i];
|
||||
}
|
||||
memcpy(cState->regs[index][count].histogram, new_histogram, sizeof new_histogram);
|
||||
}
|
||||
cState->regs[index][count].ts = now;
|
||||
cState->regs[index][count].histogram[0] += 1; // most recent access always goes to first histogram backet
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
getAccessCount(const HyperLogLogRegister* reg, time_t duration)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
// Simplest solution is to take in account all points fro overlapped interval
|
||||
// for (size_t i = 0; i < HIST_SIZE && HIST_MIN_INTERVAL*((1 << i)/2) <= duration; i++) {
|
||||
for (size_t i = 0; i < HIST_SIZE; i++) {
|
||||
uint32_t high_boundary = HIST_MIN_INTERVAL*(1 << i);
|
||||
uint32_t low_boundary = HIST_MIN_INTERVAL*((1 << i)/2);
|
||||
if (high_boundary >= duration) {
|
||||
// Assume uniform distribution of points within interval and use proportional number of points
|
||||
Assert(duration >= low_boundary);
|
||||
count += reg->histogram[i] * (duration - low_boundary) / (high_boundary - low_boundary);
|
||||
break; // it's last interval within specified time range
|
||||
} else {
|
||||
count += reg->histogram[i];
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static uint8
|
||||
getMaximum(const TimestampTz* reg, TimestampTz since)
|
||||
getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, double min_hit_ratio)
|
||||
{
|
||||
uint8 max = 0;
|
||||
|
||||
for (size_t i = 0; i < HLL_C_BITS + 1; i++)
|
||||
size_t i, j;
|
||||
if (min_hit_ratio == 1.0)
|
||||
{
|
||||
if (reg[i] >= since)
|
||||
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||
{
|
||||
max = i;
|
||||
if (reg[i].ts >= since)
|
||||
{
|
||||
max = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t total_count = 0;
|
||||
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||
{
|
||||
total_count += getAccessCount(®[i], duration);
|
||||
}
|
||||
if (total_count != 0)
|
||||
{
|
||||
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||
{
|
||||
// Take in account only bits with access frequncy exceeding maximal miss rate (1 - hit rate)
|
||||
if (reg[i].ts >= since && 1.0 - (double)getAccessCount(®[i], duration) / total_count <= min_hit_ratio)
|
||||
{
|
||||
max = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return max;
|
||||
}
|
||||
|
||||
@@ -150,7 +206,7 @@ getMaximum(const TimestampTz* reg, TimestampTz since)
|
||||
* Estimates cardinality, based on elements added so far
|
||||
*/
|
||||
double
|
||||
estimateSHLL(HyperLogLogState *cState, time_t duration)
|
||||
estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
|
||||
{
|
||||
double result;
|
||||
double sum = 0.0;
|
||||
@@ -161,7 +217,7 @@ estimateSHLL(HyperLogLogState *cState, time_t duration)
|
||||
|
||||
for (i = 0; i < HLL_N_REGISTERS; i++)
|
||||
{
|
||||
R[i] = getMaximum(cState->regs[i], since);
|
||||
R[i] = getMaximum(cState->regs[i], since, duration, min_hit_ratio);
|
||||
sum += 1.0 / pow(2.0, R[i]);
|
||||
}
|
||||
|
||||
|
||||
@@ -53,6 +53,14 @@
|
||||
#define HLL_C_BITS (32 - HLL_BIT_WIDTH)
|
||||
#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)
|
||||
|
||||
/*
|
||||
* Number of histogram cells. We use exponential histogram with first interval
|
||||
* equals to one minutes. Autoscaler request LFC statistic with intervals 1,2,...,60 minutes
|
||||
* so 2^8=64 seems to be enough for our needs.
|
||||
*/
|
||||
#define HIST_SIZE 8
|
||||
#define HIST_MIN_INTERVAL 60 /* seconds */
|
||||
|
||||
/*
|
||||
* HyperLogLog is an approximate technique for computing the number of distinct
|
||||
* entries in a set. Importantly, it does this by using a fixed amount of
|
||||
@@ -69,18 +77,21 @@
|
||||
* modified timestamp >= the query timestamp. This value is the number of bits
|
||||
* for this register in the normal HLL calculation.
|
||||
*
|
||||
* The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
|
||||
* Usage could be halved if we decide to reduce the required time dimension
|
||||
* precision; as 32 bits in second precision should be enough for statistics.
|
||||
* However, that is not yet implemented.
|
||||
* The memory usage is 2^B * (C + 1) * sizeof(HyperLogLogRegister), or 920kiB.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
TimestampTz ts; /* last access timestamp */
|
||||
uint32_t histogram[HIST_SIZE]; /* access counter exponential histogram */
|
||||
} HyperLogLogRegister;
|
||||
|
||||
typedef struct HyperLogLogState
|
||||
{
|
||||
TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
|
||||
HyperLogLogRegister regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
|
||||
} HyperLogLogState;
|
||||
|
||||
extern void initSHLL(HyperLogLogState *cState);
|
||||
extern void addSHLL(HyperLogLogState *cState, uint32 hash);
|
||||
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);
|
||||
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration, double min_hit_ratio);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -537,7 +537,11 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
/* No more polling needed; connection succeeded */
|
||||
shard->last_connect_time = GetCurrentTimestamp();
|
||||
|
||||
#if PG_MAJORVERSION_NUM >= 17
|
||||
shard->wes_read = CreateWaitEventSet(NULL, 3);
|
||||
#else
|
||||
shard->wes_read = CreateWaitEventSet(TopMemoryContext, 3);
|
||||
#endif
|
||||
AddWaitEventToSet(shard->wes_read, WL_LATCH_SET, PGINVALID_SOCKET,
|
||||
MyLatch, NULL);
|
||||
AddWaitEventToSet(shard->wes_read, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
|
||||
|
||||
@@ -639,9 +639,7 @@ _PG_init(void)
|
||||
|
||||
pg_init_libpagestore();
|
||||
pg_init_walproposer();
|
||||
WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
|
||||
LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
|
||||
SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
|
||||
Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
|
||||
|
||||
InitLogicalReplicationMonitor();
|
||||
|
||||
|
||||
@@ -6,7 +6,11 @@
|
||||
#ifndef NEON_PGVERSIONCOMPAT_H
|
||||
#define NEON_PGVERSIONCOMPAT_H
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 17
|
||||
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
|
||||
#else
|
||||
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != INVALID_PROC_NUMBER)
|
||||
#endif
|
||||
|
||||
#define RelFileInfoEquals(a, b) ( \
|
||||
NInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \
|
||||
@@ -50,7 +54,7 @@
|
||||
#define CopyNRelFileInfoToBufTag(tag, rinfo) \
|
||||
do { \
|
||||
(tag).rnode = (rinfo); \
|
||||
} while (false);
|
||||
} while (false)
|
||||
|
||||
#define BufTagGetNRelFileInfo(tag) tag.rnode
|
||||
|
||||
@@ -98,7 +102,7 @@
|
||||
(tag).spcOid = (rinfo).spcOid; \
|
||||
(tag).dbOid = (rinfo).dbOid; \
|
||||
(tag).relNumber = (rinfo).relNumber; \
|
||||
} while (false);
|
||||
} while (false)
|
||||
|
||||
#define BufTagGetNRelFileInfo(tag) \
|
||||
((RelFileLocator) { \
|
||||
@@ -113,4 +117,10 @@
|
||||
#define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 17
|
||||
#define ProcNumber BackendId
|
||||
#define INVALID_PROC_NUMBER InvalidBackendId
|
||||
#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
|
||||
#endif
|
||||
|
||||
#endif /* NEON_PGVERSIONCOMPAT_H */
|
||||
|
||||
@@ -6,8 +6,6 @@
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* contrib/neon/pagestore_client.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef pageserver_h
|
||||
@@ -187,7 +185,7 @@ extern char *nm_to_string(NeonMessage *msg);
|
||||
* API
|
||||
*/
|
||||
|
||||
typedef unsigned shardno_t;
|
||||
typedef uint16 shardno_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -211,7 +209,7 @@ extern int neon_protocol_version;
|
||||
|
||||
extern shardno_t get_shard_number(BufferTag* tag);
|
||||
|
||||
extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
|
||||
extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
|
||||
extern void smgr_init_neon(void);
|
||||
extern void readahead_buffer_resize(int newsize, void *extra);
|
||||
|
||||
@@ -233,8 +231,13 @@ extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nbuffers, bool skipFsync);
|
||||
#endif
|
||||
|
||||
#if PG_MAJORVERSION_NUM >=17
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, int nblocks);
|
||||
#else
|
||||
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* LSN values associated with each request to the pageserver
|
||||
@@ -269,19 +272,11 @@ typedef struct
|
||||
} neon_request_lsns;
|
||||
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
char *buffer);
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
neon_request_lsns request_lsns, char *buffer);
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, char *buffer, bool skipFsync);
|
||||
#else
|
||||
extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
void *buffer);
|
||||
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
neon_request_lsns request_lsns, void *buffer);
|
||||
extern void neon_write(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, const void *buffer, bool skipFsync);
|
||||
#endif
|
||||
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
|
||||
BlockNumber blocknum, BlockNumber nblocks);
|
||||
@@ -299,17 +294,34 @@ extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockN
|
||||
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
|
||||
|
||||
/* functions for local file cache */
|
||||
#if PG_MAJORVERSION_NUM < 16
|
||||
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer);
|
||||
#else
|
||||
extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
const void *buffer);
|
||||
#endif
|
||||
extern bool lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer);
|
||||
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
|
||||
BlockNumber blkno, const void *const *buffers,
|
||||
BlockNumber nblocks);
|
||||
/* returns number of blocks read, with one bit set in *read for each */
|
||||
extern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,
|
||||
BlockNumber blkno, void **buffers,
|
||||
BlockNumber nblocks, bits8 *mask);
|
||||
|
||||
extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
|
||||
BlockNumber blkno);
|
||||
extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
|
||||
BlockNumber blkno, int nblocks, bits8 *bitmap);
|
||||
extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_init(void);
|
||||
|
||||
static inline bool
|
||||
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
void *buffer)
|
||||
{
|
||||
bits8 rv = 0;
|
||||
return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
const void *buffer)
|
||||
{
|
||||
return lfc_writev(rinfo, forkNum, blkno, &buffer, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user