mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-26 14:50:36 +00:00
Compare commits
107 Commits
skyzh/anon
...
skyzh/comp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ee5588a6f | ||
|
|
9761b6a64e | ||
|
|
897cffb9d8 | ||
|
|
552088ac16 | ||
|
|
51fda118f6 | ||
|
|
e96398a552 | ||
|
|
f9d8256d55 | ||
|
|
411c3aa0d6 | ||
|
|
65b69392ea | ||
|
|
8d70f88b37 | ||
|
|
bcfe013094 | ||
|
|
d0a02f3649 | ||
|
|
8af9412eb2 | ||
|
|
96e35e11a6 | ||
|
|
745061ddf8 | ||
|
|
0c828c57e2 | ||
|
|
8e2e9f0fed | ||
|
|
b77b9bdc9f | ||
|
|
81f9aba005 | ||
|
|
88ff8a7803 | ||
|
|
0c075fab3a | ||
|
|
80e1630042 | ||
|
|
57499640c5 | ||
|
|
793ad50b7d | ||
|
|
7a1331eee5 | ||
|
|
4ef74215e1 | ||
|
|
d4cbc8cfeb | ||
|
|
47c35f67c3 | ||
|
|
45b558f480 | ||
|
|
a73402e646 | ||
|
|
07b974480c | ||
|
|
62f5d484d9 | ||
|
|
4df3987054 | ||
|
|
0624565617 | ||
|
|
7d5f6b6a52 | ||
|
|
f7c61e856f | ||
|
|
57c21aff9f | ||
|
|
248558dee8 | ||
|
|
3bad52543f | ||
|
|
3d64a7ddcd | ||
|
|
25f1e5cfeb | ||
|
|
8dd555d396 | ||
|
|
01b6843e12 | ||
|
|
93987b5a4a | ||
|
|
33baca07b6 | ||
|
|
923974d4da | ||
|
|
e7277885b3 | ||
|
|
80262e724f | ||
|
|
85b954f449 | ||
|
|
76328ada05 | ||
|
|
b54b632c6a | ||
|
|
9909551f47 | ||
|
|
700b102b0f | ||
|
|
dbadb0f9bb | ||
|
|
8297f7a181 | ||
|
|
2090e928d1 | ||
|
|
05eff3a67e | ||
|
|
c6cf5e7c0f | ||
|
|
e0c7f1ce15 | ||
|
|
6f5c262684 | ||
|
|
9768f09f6b | ||
|
|
db900ae9d0 | ||
|
|
4d9036bf1f | ||
|
|
b3bedda6fd | ||
|
|
b782b11b33 | ||
|
|
5069123b6d | ||
|
|
fb0406e9d2 | ||
|
|
b8a311131e | ||
|
|
d589498c6f | ||
|
|
6f34f97573 | ||
|
|
b86432c29e | ||
|
|
ac1205c14c | ||
|
|
e3ff87ce3b | ||
|
|
0595320c87 | ||
|
|
92d5e0e87a | ||
|
|
3a3bd34a28 | ||
|
|
64949a37a9 | ||
|
|
6f8fcdf9ea | ||
|
|
fcb55a2aa2 | ||
|
|
f36cf3f885 | ||
|
|
8dca188974 | ||
|
|
b7fa93f6b7 | ||
|
|
1e8e04bb2c | ||
|
|
94369af782 | ||
|
|
34b6bd416a | ||
|
|
49d5e56c08 | ||
|
|
aca81f5fa4 | ||
|
|
2dcac94194 | ||
|
|
ababa50cce | ||
|
|
163beaf9ad | ||
|
|
5b37485c99 | ||
|
|
ed958da38a | ||
|
|
cc25ef7342 | ||
|
|
71d09c78d4 | ||
|
|
62a334871f | ||
|
|
e162ab8b53 | ||
|
|
5cbdec9c79 | ||
|
|
ec6d3422a5 | ||
|
|
fecff15f18 | ||
|
|
3532ae76ef | ||
|
|
15fecffe6b | ||
|
|
98fee7a97d | ||
|
|
b7173b1ef0 | ||
|
|
24654b8eee | ||
|
|
b8304f90d6 | ||
|
|
d762ad0883 | ||
|
|
928d98b6dc |
@@ -27,7 +27,7 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
22
.github/workflows/_build-and-test-locally.yml
vendored
22
.github/workflows/_build-and-test-locally.yml
vendored
@@ -53,20 +53,6 @@ jobs:
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16 17; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
@@ -124,28 +110,28 @@ jobs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
|
||||
26
.github/workflows/benchmarking.yml
vendored
26
.github/workflows/benchmarking.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
||||
|
||||
runs-on: ${{ matrix.RUNNER }}
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
@@ -280,7 +280,7 @@ jobs:
|
||||
region_id_default=${{ env.DEFAULT_REGION_ID }}
|
||||
runner_default='["self-hosted", "us-east-2", "x64"]'
|
||||
runner_azure='["self-hosted", "eastus2", "x64"]'
|
||||
image_default="neondatabase/build-tools:pinned"
|
||||
image_default="neondatabase/build-tools:pinned-bookworm"
|
||||
matrix='{
|
||||
"pg_version" : [
|
||||
16
|
||||
@@ -299,9 +299,9 @@ jobs:
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
|
||||
}'
|
||||
|
||||
@@ -665,12 +665,16 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
# Increase timeout to 12h, default timeout is 6h
|
||||
# we have regression in clickbench causing it to run 2-3x longer
|
||||
timeout-minutes: 720
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@@ -679,7 +683,7 @@ jobs:
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
role-duration-seconds: 43200 # 12 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
@@ -716,7 +720,7 @@ jobs:
|
||||
test_selection: performance/test_perf_olap.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
|
||||
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -772,7 +776,7 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
@@ -877,7 +881,7 @@ jobs:
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
|
||||
- uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: Dockerfile.build-tools
|
||||
file: build-tools.Dockerfile
|
||||
context: .
|
||||
provenance: false
|
||||
push: true
|
||||
|
||||
26
.github/workflows/build_and_test.yml
vendored
26
.github/workflows/build_and_test.yml
vendored
@@ -683,7 +683,7 @@ jobs:
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/Dockerfile.compute-node
|
||||
file: compute/compute-node.Dockerfile
|
||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
@@ -703,7 +703,7 @@ jobs:
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/Dockerfile.compute-node
|
||||
file: compute/compute-node.Dockerfile
|
||||
target: neon-pg-ext-test
|
||||
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
@@ -728,7 +728,7 @@ jobs:
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: compute/Dockerfile.compute-node
|
||||
file: compute/compute-node.Dockerfile
|
||||
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
|
||||
tags: |
|
||||
@@ -839,6 +839,7 @@ jobs:
|
||||
- name: Build vm image
|
||||
run: |
|
||||
./vm-builder \
|
||||
-size=2G \
|
||||
-spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
|
||||
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
|
||||
@@ -1078,20 +1079,6 @@ jobs:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
|
||||
# Ref https://github.com/actions/checkout/issues/785
|
||||
#
|
||||
git config --global --add safe.directory ${{ github.workspace }}
|
||||
git config --global --add safe.directory ${GITHUB_WORKSPACE}
|
||||
for r in 14 15 16 17; do
|
||||
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
|
||||
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
|
||||
done
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Trigger deploy workflow
|
||||
@@ -1130,7 +1117,10 @@ jobs:
|
||||
|
||||
gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
|
||||
-f deployPgSniRouter=true \
|
||||
-f deployProxy=true \
|
||||
-f deployProxyLink=true \
|
||||
-f deployPrivatelinkProxy=true \
|
||||
-f deployProxyScram=true \
|
||||
-f deployProxyAuthBroker=true \
|
||||
-f branch=main \
|
||||
-f dockerTag=${{needs.tag.outputs.build-tag}}
|
||||
else
|
||||
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
id: get-build-tools-tag
|
||||
env:
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('Dockerfile.build-tools',
|
||||
${{ hashFiles('build-tools.Dockerfile',
|
||||
'.github/workflows/check-build-tools-image.yml',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
run: |
|
||||
|
||||
2
.github/workflows/cloud-regress.yml
vendored
2
.github/workflows/cloud-regress.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
|
||||
runs-on: us-east-2
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
|
||||
2
.github/workflows/trigger-e2e-tests.yml
vendored
2
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -112,7 +112,7 @@ jobs:
|
||||
# This isn't exhaustive, just the paths that are most directly compute-related.
|
||||
# For example, compute_ctl also depends on libs/utils, but we don't trigger
|
||||
# an e2e run on that.
|
||||
vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
|
||||
vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/compute-node.Dockerfile)
|
||||
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
|
||||
;;
|
||||
*)
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -6,6 +6,8 @@ __pycache__/
|
||||
test_output/
|
||||
.vscode
|
||||
.idea
|
||||
*.swp
|
||||
tags
|
||||
neon.iml
|
||||
/.neon
|
||||
/integration_tests/.neon
|
||||
|
||||
240
Cargo.lock
generated
240
Cargo.lock
generated
@@ -148,9 +148,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.5.2"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0"
|
||||
checksum = "5493c3bedbacf7fd7382c6346bbd66687d12bbaad3a89a2d2c303ee6cf20b048"
|
||||
dependencies = [
|
||||
"asn1-rs-derive",
|
||||
"asn1-rs-impl",
|
||||
@@ -164,25 +164,25 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-derive"
|
||||
version = "0.4.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c"
|
||||
checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.52",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-impl"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
|
||||
checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -310,6 +310,33 @@ dependencies = [
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-lc-rs"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f95446d919226d587817a7d21379e6eb099b97b45110a7f272a444ca5c54070"
|
||||
dependencies = [
|
||||
"aws-lc-sys",
|
||||
"mirai-annotations",
|
||||
"paste",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-lc-sys"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3ddc4a5b231dd6958b140ff3151b6412b3f4321fab354f399eec8f14b06df62"
|
||||
dependencies = [
|
||||
"bindgen 0.69.5",
|
||||
"cc",
|
||||
"cmake",
|
||||
"dunce",
|
||||
"fs_extra",
|
||||
"libc",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-runtime"
|
||||
version = "1.4.3"
|
||||
@@ -595,7 +622,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"rustls 0.21.11",
|
||||
"rustls 0.21.12",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
@@ -915,6 +942,29 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.69.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.52",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.70.1"
|
||||
@@ -924,7 +974,7 @@ dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.12.1",
|
||||
"itertools 0.10.5",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
@@ -1038,12 +1088,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.83"
|
||||
version = "1.1.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1169,6 +1220,15 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.51"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.0"
|
||||
@@ -1624,9 +1684,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "der-parser"
|
||||
version = "8.2.0"
|
||||
version = "9.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e"
|
||||
checksum = "5cd0a5c643689626bec213c4d8bd4d96acc8ffdb4ad4bb6bc16abf27d5f4b553"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"displaydoc",
|
||||
@@ -1755,6 +1815,12 @@ dependencies = [
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dunce"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
|
||||
|
||||
[[package]]
|
||||
name = "dyn-clone"
|
||||
version = "1.0.14"
|
||||
@@ -2059,6 +2125,12 @@ dependencies = [
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "fsevent-sys"
|
||||
version = "4.1.0"
|
||||
@@ -2412,6 +2484,15 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname"
|
||||
version = "0.4.0"
|
||||
@@ -2581,7 +2662,7 @@ dependencies = [
|
||||
"http 0.2.9",
|
||||
"hyper 0.14.30",
|
||||
"log",
|
||||
"rustls 0.21.11",
|
||||
"rustls 0.21.12",
|
||||
"rustls-native-certs 0.6.2",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
@@ -2801,9 +2882,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.26"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||
checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -2907,6 +2988,12 @@ dependencies = [
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
@@ -3137,6 +3224,12 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mirai-annotations"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1"
|
||||
|
||||
[[package]]
|
||||
name = "multimap"
|
||||
version = "0.8.3"
|
||||
@@ -3356,9 +3449,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "oid-registry"
|
||||
version = "0.6.1"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bedf36ffb6ba96c2eb7144ef6270557b52e54b20c0a8e1eb2ff99a6c6959bff"
|
||||
checksum = "a8d8034d9489cdaf79228eb9f6a3b8d7bb32ba00d6645ebd48eef4077ceb5bd9"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
]
|
||||
@@ -3656,6 +3749,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
"wal_decoder",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -4053,14 +4147,14 @@ dependencies = [
|
||||
"bytes",
|
||||
"once_cell",
|
||||
"pq_proto",
|
||||
"rustls 0.22.4",
|
||||
"rustls 0.23.7",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
@@ -4082,7 +4176,7 @@ name = "postgres_ffi"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bindgen",
|
||||
"bindgen 0.70.1",
|
||||
"bytes",
|
||||
"crc32c",
|
||||
"env_logger",
|
||||
@@ -4093,6 +4187,7 @@ dependencies = [
|
||||
"regex",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
"utils",
|
||||
]
|
||||
|
||||
@@ -4219,7 +4314,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck 0.5.0",
|
||||
"itertools 0.12.1",
|
||||
"itertools 0.10.5",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
@@ -4239,7 +4334,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.12.1",
|
||||
"itertools 0.10.5",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
@@ -4327,8 +4422,8 @@ dependencies = [
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash",
|
||||
"rustls 0.22.4",
|
||||
"rustls-native-certs 0.7.0",
|
||||
"rustls 0.23.7",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
@@ -4345,7 +4440,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-tungstenite",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
@@ -4509,12 +4604,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rcgen"
|
||||
version = "0.12.1"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1"
|
||||
checksum = "54077e1872c46788540de1ea3d7f4ccb1983d12f9aa909b234468676c1a36779"
|
||||
dependencies = [
|
||||
"pem",
|
||||
"ring",
|
||||
"rustls-pki-types",
|
||||
"time",
|
||||
"yasna",
|
||||
]
|
||||
@@ -4693,7 +4789,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.11",
|
||||
"rustls 0.21.12",
|
||||
"rustls-pemfile 1.0.2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4991,9 +5087,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.11"
|
||||
version = "0.21.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4"
|
||||
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring",
|
||||
@@ -5021,6 +5117,7 @@ version = "0.23.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebbbdb961df0ad3f2652da8f3fdc4b36122f568f968f45ad3316f26c025c677b"
|
||||
dependencies = [
|
||||
"aws-lc-rs",
|
||||
"log",
|
||||
"once_cell",
|
||||
"ring",
|
||||
@@ -5089,9 +5186,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pki-types"
|
||||
version = "1.3.1"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
|
||||
checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
@@ -5109,6 +5206,7 @@ version = "0.102.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
|
||||
dependencies = [
|
||||
"aws-lc-rs",
|
||||
"ring",
|
||||
"rustls-pki-types",
|
||||
"untrusted",
|
||||
@@ -5312,7 +5410,7 @@ checksum = "00421ed8fa0c995f07cde48ba6c89e80f2b312f74ff637326f392fbfd23abe02"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.21.11",
|
||||
"rustls 0.21.12",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
@@ -5807,8 +5905,8 @@ dependencies = [
|
||||
"postgres_ffi",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.22.4",
|
||||
"rustls-native-certs 0.7.0",
|
||||
"rustls 0.23.7",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"storage_controller_client",
|
||||
@@ -5930,14 +6028,13 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.12.6"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
|
||||
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
"unicode-xid",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6177,7 +6274,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-epoll-uring"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#08ccfa94ff5507727bf4d8d006666b5b192e04c6"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"nix 0.26.4",
|
||||
@@ -6236,16 +6333,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres-rustls"
|
||||
version = "0.11.1"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ea13f22eda7127c827983bdaf0d7fff9df21c8817bab02815ac277a21143677"
|
||||
checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"ring",
|
||||
"rustls 0.22.4",
|
||||
"rustls 0.23.7",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-rustls 0.26.0",
|
||||
"x509-certificate",
|
||||
]
|
||||
|
||||
@@ -6255,7 +6351,7 @@ version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5"
|
||||
dependencies = [
|
||||
"rustls 0.21.11",
|
||||
"rustls 0.21.12",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -6678,16 +6774,15 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "2.9.7"
|
||||
version = "2.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
|
||||
checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.22.4",
|
||||
"rustls 0.23.7",
|
||||
"rustls-pki-types",
|
||||
"rustls-webpki 0.102.2",
|
||||
"url",
|
||||
"webpki-roots 0.26.1",
|
||||
]
|
||||
@@ -6695,7 +6790,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "uring-common"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#08ccfa94ff5507727bf4d8d006666b5b192e04c6"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"io-uring",
|
||||
@@ -6861,6 +6956,20 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wal_decoder"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"serde",
|
||||
"tracing",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.3"
|
||||
@@ -6876,7 +6985,7 @@ name = "walproposer"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bindgen",
|
||||
"bindgen 0.70.1",
|
||||
"postgres_ffi",
|
||||
"utils",
|
||||
]
|
||||
@@ -7051,6 +7160,18 @@ dependencies = [
|
||||
"rustls-pki-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "which"
|
||||
version = "4.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
|
||||
dependencies = [
|
||||
"either",
|
||||
"home",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.5.1"
|
||||
@@ -7295,7 +7416,6 @@ dependencies = [
|
||||
"digest",
|
||||
"either",
|
||||
"fail",
|
||||
"futures",
|
||||
"futures-channel",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
@@ -7311,7 +7431,7 @@ dependencies = [
|
||||
"hyper-util",
|
||||
"indexmap 1.9.3",
|
||||
"indexmap 2.0.1",
|
||||
"itertools 0.12.1",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"log",
|
||||
@@ -7332,6 +7452,8 @@ dependencies = [
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.23.7",
|
||||
"rustls-webpki 0.102.2",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -7340,7 +7462,6 @@ dependencies = [
|
||||
"smallvec",
|
||||
"spki 0.7.3",
|
||||
"subtle",
|
||||
"syn 1.0.109",
|
||||
"syn 2.0.52",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tikv-jemalloc-sys",
|
||||
@@ -7348,6 +7469,7 @@ dependencies = [
|
||||
"time-macros",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.26.0",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
@@ -7383,9 +7505,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "x509-parser"
|
||||
version = "0.15.0"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bab0c2f54ae1d92f4fcb99c0b7ccf0b1e3451cbd395e5f115ccbdbcb18d4f634"
|
||||
checksum = "fcbc162f30700d6f3f82a24bf7cc62ffe7caea42c0b2cba8bf7f3ae50cf51f69"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"data-encoding",
|
||||
|
||||
14
Cargo.toml
14
Cargo.toml
@@ -33,6 +33,7 @@ members = [
|
||||
"libs/postgres_ffi/wal_craft",
|
||||
"libs/vm_monitor",
|
||||
"libs/walproposer",
|
||||
"libs/wal_decoder",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -142,7 +143,7 @@ reqwest-retry = "0.5"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
rustls = "0.22"
|
||||
rustls = "0.23"
|
||||
rustls-pemfile = "2"
|
||||
scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
@@ -172,8 +173,8 @@ tikv-jemalloc-ctl = "0.5"
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.11.0"
|
||||
tokio-rustls = "0.25"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
tokio-rustls = "0.26"
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
@@ -192,8 +193,8 @@ url = "2.2"
|
||||
urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.7"
|
||||
x509-parser = "0.15"
|
||||
rustls-native-certs = "0.8"
|
||||
x509-parser = "0.16"
|
||||
whoami = "1.5.1"
|
||||
|
||||
## TODO replace this with tracing
|
||||
@@ -238,13 +239,14 @@ tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
|
||||
utils = { version = "0.1", path = "./libs/utils/" }
|
||||
vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
|
||||
walproposer = { version = "0.1", path = "./libs/walproposer/" }
|
||||
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
|
||||
|
||||
## Common library dependency
|
||||
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
||||
|
||||
## Build dependencies
|
||||
criterion = "0.5.1"
|
||||
rcgen = "0.12"
|
||||
rcgen = "0.13"
|
||||
rstest = "0.18"
|
||||
camino-tempfile = "1.0.2"
|
||||
tonic-build = "0.12"
|
||||
|
||||
4
Makefile
4
Makefile
@@ -297,7 +297,7 @@ clean: postgres-clean neon-pg-clean-ext
|
||||
# This removes everything
|
||||
.PHONY: distclean
|
||||
distclean:
|
||||
rm -rf $(POSTGRES_INSTALL_DIR)
|
||||
$(RM) -r $(POSTGRES_INSTALL_DIR)
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
|
||||
.PHONY: fmt
|
||||
@@ -329,7 +329,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
|
||||
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
|
||||
rm -f pg*.BAK
|
||||
$(RM) pg*.BAK
|
||||
|
||||
# Indent pxgn/neon.
|
||||
.PHONY: neon-pgindent
|
||||
|
||||
@@ -57,6 +57,18 @@ RUN set -e \
|
||||
zstd \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# sql_exporter
|
||||
|
||||
# Keep the version the same as in compute/compute-node.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
ENV SQL_EXPORTER_VERSION=0.13.1
|
||||
RUN curl -fsSL \
|
||||
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
|
||||
--output sql_exporter.tar.gz \
|
||||
&& mkdir /tmp/sql_exporter \
|
||||
&& tar xzvf sql_exporter.tar.gz -C /tmp/sql_exporter --strip-components=1 \
|
||||
&& mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter
|
||||
|
||||
# protobuf-compiler (protoc)
|
||||
ENV PROTOC_VERSION=25.1
|
||||
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
||||
@@ -72,7 +84,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
|
||||
# LLVM
|
||||
ENV LLVM_VERSION=18
|
||||
ENV LLVM_VERSION=19
|
||||
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
|
||||
&& apt update \
|
||||
@@ -99,7 +111,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION=v2.33.0
|
||||
ENV MOLD_VERSION=v2.34.1
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
@@ -142,7 +154,7 @@ RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/sourc
|
||||
# Use the same version of libicu as the compute nodes so that
|
||||
# clusters created using inidb on pageserver can be used by computes.
|
||||
#
|
||||
# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
|
||||
# TODO: at this time, compute-node.Dockerfile uses the debian bullseye libicu
|
||||
# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
|
||||
# Debian has a few patches on top of 67.1 that we're not adding here.
|
||||
ENV ICU_VERSION=67.1
|
||||
@@ -192,7 +204,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.81.0
|
||||
ENV RUSTC_VERSION=1.82.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
@@ -20,19 +20,22 @@ neon_collector_autoscaling.yml: $(jsonnet_files)
|
||||
sql_exporter.yml: $(jsonnet_files)
|
||||
JSONNET_PATH=etc jsonnet \
|
||||
--output-file etc/$@ \
|
||||
--tla-str collector_name=neon_collector \
|
||||
--tla-str collector_file=neon_collector.yml \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' \
|
||||
etc/sql_exporter.jsonnet
|
||||
|
||||
sql_exporter_autoscaling.yml: $(jsonnet_files)
|
||||
JSONNET_PATH=etc jsonnet \
|
||||
--output-file etc/$@ \
|
||||
--tla-str collector_name=neon_collector_autoscaling \
|
||||
--tla-str collector_file=neon_collector_autoscaling.yml \
|
||||
--tla-str application_name=sql_exporter_autoscaling \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' \
|
||||
etc/sql_exporter.jsonnet
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm --force \
|
||||
$(RM) \
|
||||
etc/neon_collector.yml \
|
||||
etc/neon_collector_autoscaling.yml \
|
||||
etc/sql_exporter.yml \
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
This directory contains files that are needed to build the compute
|
||||
images, or included in the compute images.
|
||||
|
||||
Dockerfile.compute-node
|
||||
compute-node.Dockerfile
|
||||
To build the compute image
|
||||
|
||||
vm-image-spec.yaml
|
||||
@@ -14,8 +14,8 @@ etc/
|
||||
patches/
|
||||
Some extensions need to be patched to work with Neon. This
|
||||
directory contains such patches. They are applied to the extension
|
||||
sources in Dockerfile.compute-node
|
||||
sources in compute-node.Dockerfile
|
||||
|
||||
In addition to these, postgres itself, the neon postgres extension,
|
||||
and compute_ctl are built and copied into the compute image by
|
||||
Dockerfile.compute-node.
|
||||
compute-node.Dockerfile.
|
||||
|
||||
@@ -431,14 +431,11 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY compute/patches/rum.patch /rum.patch
|
||||
|
||||
# maybe version-specific
|
||||
# support for v17 is unknown
|
||||
# last release 1.3.13 - Sep 19, 2022
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
||||
# supports v17 since https://github.com/postgrespro/rum/commit/cb1edffc57736cd2a4455f8d0feab0d69928da25
|
||||
# doesn't use releases since 1.3.13 - Sep 19, 2022
|
||||
# use latest commit from the master branch
|
||||
RUN wget https://github.com/postgrespro/rum/archive/cb1edffc57736cd2a4455f8d0feab0d69928da25.tar.gz -O rum.tar.gz && \
|
||||
echo "65e0a752e99f4c3226400c9b899f997049e93503db8bf5c8072efa136d32fd83 rum.tar.gz" | sha256sum --check && \
|
||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /rum.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
@@ -666,7 +663,7 @@ RUN apt-get update && \
|
||||
#
|
||||
# Use new version only for v17
|
||||
# because Release_2024_09_1 has some backward incompatible changes
|
||||
# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
|
||||
# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
|
||||
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v17") \
|
||||
@@ -787,22 +784,6 @@ RUN case "${PG_VERSION}" in \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-sudo"
|
||||
# compile pg-neon-sudo extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS pg-sudo-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/skyzh/pg_sudo/archive/refs/heads/main.tar.gz -O pg_sudo.tar.gz && \
|
||||
mkdir pg_sudo-src && cd pg_sudo-src && tar xzf ../pg_sudo.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_neon_sudo.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-embedding-pg-build"
|
||||
@@ -843,7 +824,11 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/yliang412/postgresql_anonymizer/archive/refs/heads/master.tar.gz -O pg_anon.tar.gz && \
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -872,18 +857,98 @@ ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
|
||||
esac && \
|
||||
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
case "${PG_VERSION}" in \
|
||||
'v17') \
|
||||
echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \
|
||||
esac && \
|
||||
cargo install --locked --version 0.11.3 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions pgrx12"
|
||||
#
|
||||
# pgrx started to support Postgres 17 since version 12,
|
||||
# but some older extension aren't compatible with it.
|
||||
# This layer should be used as a base for new pgrx extensions,
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM build-deps AS rust-extensions-build-pgrx12
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y curl libclang-dev && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
|
||||
chmod +x rustup-init && \
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init && \
|
||||
cargo install --locked --version 0.12.6 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layers "pg-onnx-build" and "pgrag-pg-build"
|
||||
# Compile "pgrag" extensions
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build-pgrx12 AS pg-onnx-build
|
||||
|
||||
# cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).
|
||||
# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \
|
||||
python3 -m venv venv && \
|
||||
. venv/bin/activate && \
|
||||
python3 -m pip install cmake==3.30.5 && \
|
||||
wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root
|
||||
|
||||
|
||||
FROM pg-onnx-build AS pgrag-pg-build
|
||||
|
||||
RUN apt-get install -y protobuf-compiler && \
|
||||
wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \
|
||||
echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
|
||||
\
|
||||
cd exts/rag && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control && \
|
||||
\
|
||||
cd ../rag_bge_small_en_v15 && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control && \
|
||||
\
|
||||
cd ../rag_jina_reranker_v1_tiny_en && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control
|
||||
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-jsonschema-pg-build"
|
||||
@@ -891,21 +956,31 @@ USER root
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-jsonschema-pg-build
|
||||
FROM rust-extensions-build-pgrx12 AS pg-jsonschema-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
|
||||
# version 0.3.3 supports v17
|
||||
# last release v0.3.3 - Oct 16, 2024
|
||||
#
|
||||
# there were no breaking changes
|
||||
# so we can use the same version for all postgres versions
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16" | "v17") \
|
||||
export PG_JSONSCHEMA_VERSION=0.3.3 \
|
||||
export PG_JSONSCHEMA_CHECKSUM=40c2cffab4187e0233cb8c3bde013be92218c282f95f4469c5282f6b30d64eac \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v${PG_JSONSCHEMA_VERSION}.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "${PG_JSONSCHEMA_CHECKSUM} pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
|
||||
# `unsafe-postgres` feature allows to build pgx extensions
|
||||
# against postgres forks that decided to change their ABI name (like us).
|
||||
# With that we can build extensions without forking them and using stock
|
||||
# pgx. As this feature is new few manual version bumps were required.
|
||||
sed -i 's/pgrx = "0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
||||
|
||||
@@ -916,16 +991,27 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-graphql-pg-build
|
||||
FROM rust-extensions-build-pgrx12 AS pg-graphql-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
|
||||
# version 1.5.9 supports v17
|
||||
# last release v1.5.9 - Oct 16, 2024
|
||||
#
|
||||
# there were no breaking changes
|
||||
# so we can use the same version for all postgres versions
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16" | "v17") \
|
||||
export PG_GRAPHQL_VERSION=1.5.9 \
|
||||
export PG_GRAPHQL_CHECKSUM=cf768385a41278be1333472204fc0328118644ae443182cf52f7b9b23277e497 \
|
||||
;; \
|
||||
*) \
|
||||
echo "unexpected PostgreSQL version" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
|
||||
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v${PG_GRAPHQL_VERSION}.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "${PG_GRAPHQL_CHECKSUM} pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "=0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
# it's needed to enable extension because it uses untrusted C language
|
||||
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
|
||||
@@ -938,15 +1024,13 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-tiktoken-pg-build
|
||||
FROM rust-extensions-build-pgrx12 AS pg-tiktoken-pg-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
# doesn't use releases
|
||||
# 9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7 - on Oct 29, 2024
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/9118dd4549b7d8c0bbc98e04322499f7bf2fa6f7.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "a5bc447e7920ee149d3c064b8b9f0086c0e83939499753178f7d35788416f628 pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
# TODO update pgrx version in the pg_tiktoken repo and remove this line
|
||||
sed -i 's/pgrx = { version = "=0.10.2",/pgrx = { version = "0.11.3",/g' Cargo.toml && \
|
||||
@@ -964,6 +1048,8 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
FROM rust-extensions-build AS pg-pgx-ulid-build
|
||||
ARG PG_VERSION
|
||||
|
||||
# doesn't support v17 yet
|
||||
# https://github.com/pksunkara/pgx_ulid/pull/52
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
|
||||
esac && \
|
||||
@@ -981,16 +1067,16 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM rust-extensions-build AS pg-session-jwt-build
|
||||
FROM rust-extensions-build-pgrx12 AS pg-session-jwt-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "pg_session_jwt does not yet have a release that supports pg17" && exit 0;; \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/pg_session_jwt/archive/5aee2625af38213650e1a07ae038fdc427250ee4.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "5d91b10bc1347d36cffc456cb87bec25047935d6503dc652ca046f04760828e7 pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
# NOTE: local_proxy depends on the version of pg_session_jwt
|
||||
# Do not update without approve from proxy team
|
||||
# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
|
||||
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.1.2-v17.tar.gz -O pg_session_jwt.tar.gz && \
|
||||
echo "c8ecbed9cb8c6441bce5134a176002b043018adf9d05a08e457dda233090a86e pg_session_jwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "=0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release
|
||||
|
||||
#########################################################################################
|
||||
@@ -1053,6 +1139,34 @@ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_mooncake"
|
||||
# compile pg_mooncake extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM rust-extensions-build AS pg-mooncake-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
# The topmost commit in the `neon` branch at the time of writing this
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commits/neon/
|
||||
# https://github.com/Mooncake-Labs/pg_mooncake/commit/568b5a82b5fc16136bdf4ca5aac3e0cc261ab48d
|
||||
ENV PG_MOONCAKE_VERSION=568b5a82b5fc16136bdf4ca5aac3e0cc261ab48d
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
|
||||
RUN case "${PG_VERSION}" in \
|
||||
'v14') \
|
||||
echo "pg_mooncake is not supported on Postgres ${PG_VERSION}" && exit 0;; \
|
||||
esac && \
|
||||
git clone --depth 1 --branch neon https://github.com/Mooncake-Labs/pg_mooncake.git pg_mooncake-src && \
|
||||
cd pg_mooncake-src && \
|
||||
git checkout "${PG_MOONCAKE_VERSION}" && \
|
||||
git submodule update --init --depth 1 --recursive && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
@@ -1071,6 +1185,7 @@ COPY --from=h3-pg-build /h3/usr /
|
||||
COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1096,7 +1211,7 @@ COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
|
||||
COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-sudo-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY pgxn/ pgxn/
|
||||
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
@@ -1204,7 +1319,10 @@ RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin l
|
||||
#########################################################################################
|
||||
|
||||
FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
|
||||
FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
|
||||
|
||||
# Keep the version the same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
FROM burningalchemist/sql_exporter:0.13.1 AS sql-exporter
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1260,6 +1378,7 @@ COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.patch /ext-src/
|
||||
COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
||||
#COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
|
||||
#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
|
||||
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
||||
@@ -1285,7 +1404,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||
COPY --from=pg-sudo-pg-build /pg_sudo.tar.gz /ext-src
|
||||
COPY compute/patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
@@ -1308,7 +1427,14 @@ RUN case "${PG_VERSION}" in "v17") \
|
||||
esac && \
|
||||
cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_anon.patch
|
||||
RUN case "${PG_VERSION}" in "v17") \
|
||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||
esac && \
|
||||
patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
@@ -3,6 +3,7 @@
|
||||
metrics: [
|
||||
import 'sql_exporter/checkpoints_req.libsonnet',
|
||||
import 'sql_exporter/checkpoints_timed.libsonnet',
|
||||
import 'sql_exporter/compute_backpressure_throttling_ms.libsonnet',
|
||||
import 'sql_exporter/compute_current_lsn.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
|
||||
import 'sql_exporter/compute_receive_lsn.libsonnet',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
function(collector_file, application_name='sql_exporter') {
|
||||
function(collector_name, collector_file, connection_string) {
|
||||
// Configuration for sql_exporter for autoscaling-agent
|
||||
// Global defaults.
|
||||
global: {
|
||||
@@ -23,12 +23,12 @@ function(collector_file, application_name='sql_exporter') {
|
||||
target: {
|
||||
// Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
|
||||
// the schema gets dropped or replaced to match the driver expected DSN format.
|
||||
data_source_name: std.format('postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=%s', [application_name]),
|
||||
data_source_name: connection_string,
|
||||
|
||||
// Collectors (referenced by name) to execute on the target.
|
||||
// Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||
collectors: [
|
||||
'neon_collector',
|
||||
collector_name,
|
||||
],
|
||||
},
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
local neon = import 'neon.libsonnet';
|
||||
|
||||
local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_req.sql';
|
||||
local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_req.17.sql';
|
||||
local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_timed.sql';
|
||||
local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_timed.17.sql';
|
||||
|
||||
{
|
||||
metric_name: 'checkpoints_timed',
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
{
|
||||
metric_name: 'compute_backpressure_throttling_ms',
|
||||
type: 'gauge',
|
||||
help: 'Time compute has spent throttled',
|
||||
key_labels: null,
|
||||
values: [
|
||||
'throttled',
|
||||
],
|
||||
query: importstr 'sql_exporter/compute_backpressure_throttling_ms.sql',
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
SELECT neon.backpressure_throttling_time() AS throttled;
|
||||
@@ -1,5 +1,10 @@
|
||||
SELECT
|
||||
slot_name,
|
||||
pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
|
||||
pg_wal_lsn_diff(
|
||||
CASE
|
||||
WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn()
|
||||
ELSE pg_current_wal_lsn()
|
||||
END,
|
||||
restart_lsn)::FLOAT8 AS retained_wal
|
||||
FROM pg_replication_slots
|
||||
WHERE active = false;
|
||||
|
||||
@@ -18,7 +18,7 @@ commands:
|
||||
- name: pgbouncer
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -18,7 +18,7 @@ commands:
|
||||
- name: pgbouncer
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::BufRead;
|
||||
use std::os::unix::fs::{symlink, PermissionsExt};
|
||||
use std::path::Path;
|
||||
use std::process::{Command, Stdio};
|
||||
@@ -15,6 +14,7 @@ use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::spec::PgIdent;
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
@@ -25,8 +25,9 @@ use tracing::{debug, error, info, instrument, warn};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use compute_api::privilege::Privilege;
|
||||
use compute_api::responses::{ComputeMetrics, ComputeStatus};
|
||||
use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec};
|
||||
use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion};
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
@@ -363,8 +364,7 @@ impl ComputeNode {
|
||||
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
|
||||
|
||||
let basebackup_cmd = match lsn {
|
||||
// HACK We don't use compression on first start (Lsn(0)) because there's no API for it
|
||||
Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id),
|
||||
Lsn(0) => format!("basebackup {} {} --gzip", spec.tenant_id, spec.timeline_id),
|
||||
_ => format!(
|
||||
"basebackup {} {} {} --gzip",
|
||||
spec.tenant_id, spec.timeline_id, lsn
|
||||
@@ -373,38 +373,16 @@ impl ComputeNode {
|
||||
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
let mut measured_reader = MeasuredReader::new(copyreader);
|
||||
|
||||
// Check the magic number to see if it's a gzip or not. Even though
|
||||
// we might explicitly ask for gzip, an old pageserver with no implementation
|
||||
// of gzip compression might send us uncompressed data. After some time
|
||||
// passes we can assume all pageservers know how to compress and we can
|
||||
// delete this check.
|
||||
//
|
||||
// If the data is not gzip, it will be tar. It will not be mistakenly
|
||||
// recognized as gzip because tar starts with an ascii encoding of a filename,
|
||||
// and 0x1f and 0x8b are unlikely first characters for any filename. Moreover,
|
||||
// we send the "global" directory first from the pageserver, so it definitely
|
||||
// won't be recognized as gzip.
|
||||
let mut bufreader = std::io::BufReader::new(&mut measured_reader);
|
||||
let gzip = {
|
||||
let peek = bufreader.fill_buf().unwrap();
|
||||
peek[0] == 0x1f && peek[1] == 0x8b
|
||||
};
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
if gzip {
|
||||
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
} else {
|
||||
let mut ar = tar::Archive::new(&mut bufreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
};
|
||||
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
|
||||
// Report metrics
|
||||
let mut state = self.state.lock().unwrap();
|
||||
@@ -1373,6 +1351,97 @@ LIMIT 100",
|
||||
download_size
|
||||
}
|
||||
|
||||
pub async fn set_role_grants(
|
||||
&self,
|
||||
db_name: &PgIdent,
|
||||
schema_name: &PgIdent,
|
||||
privileges: &[Privilege],
|
||||
role_name: &PgIdent,
|
||||
) -> Result<()> {
|
||||
use tokio_postgres::config::Config;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
|
||||
conf.dbname(db_name);
|
||||
|
||||
let (db_client, conn) = conf
|
||||
.connect(NoTls)
|
||||
.await
|
||||
.context("Failed to connect to the database")?;
|
||||
tokio::spawn(conn);
|
||||
|
||||
// TODO: support other types of grants apart from schemas?
|
||||
let query = format!(
|
||||
"GRANT {} ON SCHEMA {} TO {}",
|
||||
privileges
|
||||
.iter()
|
||||
// should not be quoted as it's part of the command.
|
||||
// is already sanitized so it's ok
|
||||
.map(|p| p.as_str())
|
||||
.collect::<Vec<&'static str>>()
|
||||
.join(", "),
|
||||
// quote the schema and role name as identifiers to sanitize them.
|
||||
schema_name.pg_quote(),
|
||||
role_name.pg_quote(),
|
||||
);
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn install_extension(
|
||||
&self,
|
||||
ext_name: &PgIdent,
|
||||
db_name: &PgIdent,
|
||||
ext_version: ExtVersion,
|
||||
) -> Result<ExtVersion> {
|
||||
use tokio_postgres::config::Config;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
let mut conf = Config::from_str(self.connstr.as_str()).unwrap();
|
||||
conf.dbname(db_name);
|
||||
|
||||
let (db_client, conn) = conf
|
||||
.connect(NoTls)
|
||||
.await
|
||||
.context("Failed to connect to the database")?;
|
||||
tokio::spawn(conn);
|
||||
|
||||
let version_query = "SELECT extversion FROM pg_extension WHERE extname = $1";
|
||||
let version: Option<ExtVersion> = db_client
|
||||
.query_opt(version_query, &[&ext_name])
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", version_query))?
|
||||
.map(|row| row.get(0));
|
||||
|
||||
// sanitize the inputs as postgres idents.
|
||||
let ext_name: String = ext_name.pg_quote();
|
||||
let quoted_version: String = ext_version.pg_quote();
|
||||
|
||||
if let Some(installed_version) = version {
|
||||
if installed_version == ext_version {
|
||||
return Ok(installed_version);
|
||||
}
|
||||
let query = format!("ALTER EXTENSION {ext_name} UPDATE TO {quoted_version}");
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
} else {
|
||||
let query =
|
||||
format!("CREATE EXTENSION IF NOT EXISTS {ext_name} WITH VERSION {quoted_version}");
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
}
|
||||
|
||||
Ok(ext_version)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn prepare_preload_libraries(
|
||||
&self,
|
||||
|
||||
@@ -107,7 +107,7 @@ pub fn get_pg_version(pgbin: &str) -> String {
|
||||
// pg_config --version returns a (platform specific) human readable string
|
||||
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
|
||||
let human_version = get_pg_config("--version", pgbin);
|
||||
return parse_pg_version(&human_version).to_string();
|
||||
parse_pg_version(&human_version).to_string()
|
||||
}
|
||||
|
||||
fn parse_pg_version(human_version: &str) -> &str {
|
||||
|
||||
@@ -9,8 +9,11 @@ use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
|
||||
use compute_api::requests::{ConfigurationRequest, ExtensionInstallRequest, SetRoleGrantsRequest};
|
||||
use compute_api::responses::{
|
||||
ComputeStatus, ComputeStatusResponse, ExtensionInstallResult, GenericAPIError,
|
||||
SetRoleGrantsResponse,
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
@@ -98,6 +101,38 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/extensions") => {
|
||||
info!("serving /extensions POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for extensions request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<ExtensionInstallRequest>(&request).unwrap();
|
||||
let res = compute
|
||||
.install_extension(&request.extension, &request.database, request.version)
|
||||
.await;
|
||||
match res {
|
||||
Ok(version) => render_json(Body::from(
|
||||
serde_json::to_string(&ExtensionInstallResult {
|
||||
extension: request.extension,
|
||||
version,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => {
|
||||
error!("install_extension failed: {}", e);
|
||||
render_json_error(&e.to_string(), StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/info") => {
|
||||
let num_cpus = num_cpus::get_physical();
|
||||
info!("serving /info GET request. num_cpus: {}", num_cpus);
|
||||
@@ -165,6 +200,48 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/grants") => {
|
||||
info!("serving /grants POST request");
|
||||
let status = compute.get_status();
|
||||
if status != ComputeStatus::Running {
|
||||
let msg = format!(
|
||||
"invalid compute status for set_role_grants request: {:?}",
|
||||
status
|
||||
);
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::PRECONDITION_FAILED);
|
||||
}
|
||||
|
||||
let request = hyper::body::to_bytes(req.into_body()).await.unwrap();
|
||||
let request = serde_json::from_slice::<SetRoleGrantsRequest>(&request).unwrap();
|
||||
|
||||
let res = compute
|
||||
.set_role_grants(
|
||||
&request.database,
|
||||
&request.schema,
|
||||
&request.privileges,
|
||||
&request.role,
|
||||
)
|
||||
.await;
|
||||
match res {
|
||||
Ok(()) => render_json(Body::from(
|
||||
serde_json::to_string(&SetRoleGrantsResponse {
|
||||
database: request.database,
|
||||
schema: request.schema,
|
||||
role: request.role,
|
||||
privileges: request.privileges,
|
||||
})
|
||||
.unwrap(),
|
||||
)),
|
||||
Err(e) => render_json_error(
|
||||
&format!("could not grant role privileges to the schema: {e}"),
|
||||
// TODO: can we filter on role/schema not found errors
|
||||
// and return appropriate error code?
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// get the list of installed extensions
|
||||
// currently only used in python tests
|
||||
// TODO: call it from cplane
|
||||
|
||||
@@ -127,6 +127,41 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/grants:
|
||||
post:
|
||||
tags:
|
||||
- Grants
|
||||
summary: Apply grants to the database.
|
||||
description: ""
|
||||
operationId: setRoleGrants
|
||||
requestBody:
|
||||
description: Grants request.
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/SetRoleGrantsRequest"
|
||||
responses:
|
||||
200:
|
||||
description: Grants applied.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/SetRoleGrantsResponse"
|
||||
412:
|
||||
description: |
|
||||
Compute is not in the right state for processing the request.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
500:
|
||||
description: Error occurred during grants application.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/check_writability:
|
||||
post:
|
||||
tags:
|
||||
@@ -144,6 +179,41 @@ paths:
|
||||
description: Error text or 'true' if check passed.
|
||||
example: "true"
|
||||
|
||||
/extensions:
|
||||
post:
|
||||
tags:
|
||||
- Extensions
|
||||
summary: Install extension if possible.
|
||||
description: ""
|
||||
operationId: installExtension
|
||||
requestBody:
|
||||
description: Extension name and database to install it to.
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ExtensionInstallRequest"
|
||||
responses:
|
||||
200:
|
||||
description: Result from extension installation
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ExtensionInstallResult"
|
||||
412:
|
||||
description: |
|
||||
Compute is in the wrong state for processing the request.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
500:
|
||||
description: Error during extension installation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/configure:
|
||||
post:
|
||||
tags:
|
||||
@@ -369,7 +439,7 @@ components:
|
||||
moment, when spec was received.
|
||||
example: "2022-10-12T07:20:50.52Z"
|
||||
status:
|
||||
$ref: '#/components/schemas/ComputeStatus'
|
||||
$ref: "#/components/schemas/ComputeStatus"
|
||||
last_active:
|
||||
type: string
|
||||
description: |
|
||||
@@ -409,6 +479,38 @@ components:
|
||||
- configuration
|
||||
example: running
|
||||
|
||||
ExtensionInstallRequest:
|
||||
type: object
|
||||
required:
|
||||
- extension
|
||||
- database
|
||||
- version
|
||||
properties:
|
||||
extension:
|
||||
type: string
|
||||
description: Extension name.
|
||||
example: "pg_session_jwt"
|
||||
version:
|
||||
type: string
|
||||
description: Version of the extension.
|
||||
example: "1.0.0"
|
||||
database:
|
||||
type: string
|
||||
description: Database name.
|
||||
example: "neondb"
|
||||
|
||||
ExtensionInstallResult:
|
||||
type: object
|
||||
properties:
|
||||
extension:
|
||||
description: Name of the extension.
|
||||
type: string
|
||||
example: "pg_session_jwt"
|
||||
version:
|
||||
description: Version of the extension.
|
||||
type: string
|
||||
example: "1.0.0"
|
||||
|
||||
InstalledExtensions:
|
||||
type: object
|
||||
properties:
|
||||
@@ -427,6 +529,60 @@ components:
|
||||
n_databases:
|
||||
type: integer
|
||||
|
||||
SetRoleGrantsRequest:
|
||||
type: object
|
||||
required:
|
||||
- database
|
||||
- schema
|
||||
- privileges
|
||||
- role
|
||||
properties:
|
||||
database:
|
||||
type: string
|
||||
description: Database name.
|
||||
example: "neondb"
|
||||
schema:
|
||||
type: string
|
||||
description: Schema name.
|
||||
example: "public"
|
||||
privileges:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: List of privileges to set.
|
||||
example: ["SELECT", "INSERT"]
|
||||
role:
|
||||
type: string
|
||||
description: Role name.
|
||||
example: "neon"
|
||||
|
||||
SetRoleGrantsResponse:
|
||||
type: object
|
||||
required:
|
||||
- database
|
||||
- schema
|
||||
- privileges
|
||||
- role
|
||||
properties:
|
||||
database:
|
||||
type: string
|
||||
description: Database name.
|
||||
example: "neondb"
|
||||
schema:
|
||||
type: string
|
||||
description: Schema name.
|
||||
example: "public"
|
||||
privileges:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: List of privileges set.
|
||||
example: ["SELECT", "INSERT"]
|
||||
role:
|
||||
type: string
|
||||
description: Role name.
|
||||
example: "neon"
|
||||
|
||||
#
|
||||
# Errors
|
||||
#
|
||||
|
||||
@@ -1073,10 +1073,10 @@ async fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> any
|
||||
tenant_id,
|
||||
TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_timeline_id: None,
|
||||
ancestor_start_lsn: None,
|
||||
existing_initdb_timeline_id: None,
|
||||
pg_version: Some(args.pg_version),
|
||||
mode: pageserver_api::models::TimelineCreateRequestMode::Bootstrap {
|
||||
existing_initdb_timeline_id: None,
|
||||
pg_version: Some(args.pg_version),
|
||||
},
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
@@ -1133,10 +1133,10 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
let create_req = TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_timeline_id: None,
|
||||
existing_initdb_timeline_id: None,
|
||||
ancestor_start_lsn: None,
|
||||
pg_version: Some(args.pg_version),
|
||||
mode: pageserver_api::models::TimelineCreateRequestMode::Bootstrap {
|
||||
existing_initdb_timeline_id: None,
|
||||
pg_version: Some(args.pg_version),
|
||||
},
|
||||
};
|
||||
let timeline_info = storage_controller
|
||||
.tenant_timeline_create(tenant_id, create_req)
|
||||
@@ -1189,10 +1189,11 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
let create_req = TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_timeline_id: Some(ancestor_timeline_id),
|
||||
existing_initdb_timeline_id: None,
|
||||
ancestor_start_lsn: start_lsn,
|
||||
pg_version: None,
|
||||
mode: pageserver_api::models::TimelineCreateRequestMode::Branch {
|
||||
ancestor_timeline_id,
|
||||
ancestor_start_lsn: start_lsn,
|
||||
pg_version: None,
|
||||
},
|
||||
};
|
||||
let timeline_info = storage_controller
|
||||
.tenant_timeline_create(tenant_id, create_req)
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo};
|
||||
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
@@ -399,11 +399,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
|
||||
lsn_lease_length_for_ts: settings
|
||||
.remove("lsn_lease_length_for_ts")
|
||||
@@ -499,11 +494,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
|
||||
lsn_lease_length_for_ts: settings
|
||||
.remove("lsn_lease_length_for_ts")
|
||||
@@ -529,28 +519,6 @@ impl PageServerNode {
|
||||
Ok(self.http_client.list_timelines(*tenant_shard_id).await?)
|
||||
}
|
||||
|
||||
pub async fn timeline_create(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
new_timeline_id: TimelineId,
|
||||
ancestor_start_lsn: Option<Lsn>,
|
||||
ancestor_timeline_id: Option<TimelineId>,
|
||||
pg_version: Option<u32>,
|
||||
existing_initdb_timeline_id: Option<TimelineId>,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
let req = models::TimelineCreateRequest {
|
||||
new_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
ancestor_timeline_id,
|
||||
pg_version,
|
||||
existing_initdb_timeline_id,
|
||||
};
|
||||
Ok(self
|
||||
.http_client
|
||||
.timeline_create(tenant_shard_id, &req)
|
||||
.await?)
|
||||
}
|
||||
|
||||
/// Import a basebackup prepared using either:
|
||||
/// a) `pg_basebackup -F tar`, or
|
||||
/// b) The `fullbackup` pageserver endpoint
|
||||
|
||||
@@ -20,7 +20,16 @@ use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
use std::{fs, net::SocketAddr, path::PathBuf, str::FromStr, sync::OnceLock};
|
||||
use std::{
|
||||
ffi::OsStr,
|
||||
fs,
|
||||
net::SocketAddr,
|
||||
path::PathBuf,
|
||||
process::ExitStatus,
|
||||
str::FromStr,
|
||||
sync::OnceLock,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use tokio::process::Command;
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
@@ -168,16 +177,6 @@ impl StorageController {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// PIDFile for the postgres instance used to store storage controller state
|
||||
fn postgres_pid_file(&self) -> Utf8PathBuf {
|
||||
Utf8PathBuf::from_path_buf(
|
||||
self.env
|
||||
.base_data_dir
|
||||
.join("storage_controller_postgres.pid"),
|
||||
)
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
/// Find the directory containing postgres subdirectories, such `bin` and `lib`
|
||||
///
|
||||
/// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
|
||||
@@ -296,6 +295,31 @@ impl StorageController {
|
||||
.map_err(anyhow::Error::new)
|
||||
}
|
||||
|
||||
/// Wrapper for the pg_ctl binary, which we spawn as a short-lived subprocess when starting and stopping postgres
|
||||
async fn pg_ctl<I, S>(&self, args: I) -> ExitStatus
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: AsRef<OsStr>,
|
||||
{
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await.unwrap();
|
||||
let bin_path = pg_bin_dir.join("pg_ctl");
|
||||
|
||||
let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
|
||||
let envs = [
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
];
|
||||
|
||||
Command::new(bin_path)
|
||||
.args(args)
|
||||
.envs(envs)
|
||||
.spawn()
|
||||
.expect("Failed to spawn pg_ctl, binary_missing?")
|
||||
.wait()
|
||||
.await
|
||||
.expect("Failed to wait for pg_ctl termination")
|
||||
}
|
||||
|
||||
pub async fn start(&self, start_args: NeonStorageControllerStartArgs) -> anyhow::Result<()> {
|
||||
let instance_dir = self.storage_controller_instance_dir(start_args.instance_id);
|
||||
if let Err(err) = tokio::fs::create_dir(&instance_dir).await {
|
||||
@@ -404,20 +428,34 @@ impl StorageController {
|
||||
db_start_args
|
||||
);
|
||||
|
||||
background_process::start_process(
|
||||
"storage_controller_db",
|
||||
&self.env.base_data_dir,
|
||||
pg_bin_dir.join("pg_ctl").as_std_path(),
|
||||
db_start_args,
|
||||
vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
],
|
||||
background_process::InitialPidFile::Create(self.postgres_pid_file()),
|
||||
&start_args.start_timeout,
|
||||
|| self.pg_isready(&pg_bin_dir, postgres_port),
|
||||
)
|
||||
.await?;
|
||||
let db_start_status = self.pg_ctl(db_start_args).await;
|
||||
let start_timeout: Duration = start_args.start_timeout.into();
|
||||
let db_start_deadline = Instant::now() + start_timeout;
|
||||
if !db_start_status.success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Failed to start postgres {}",
|
||||
db_start_status.code().unwrap()
|
||||
));
|
||||
}
|
||||
|
||||
loop {
|
||||
if Instant::now() > db_start_deadline {
|
||||
return Err(anyhow::anyhow!("Timed out waiting for postgres to start"));
|
||||
}
|
||||
|
||||
match self.pg_isready(&pg_bin_dir, postgres_port).await {
|
||||
Ok(true) => {
|
||||
tracing::info!("storage controller postgres is now ready");
|
||||
break;
|
||||
}
|
||||
Ok(false) => {
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to check postgres status: {e}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.setup_database(postgres_port).await?;
|
||||
}
|
||||
@@ -583,15 +621,10 @@ impl StorageController {
|
||||
}
|
||||
|
||||
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
|
||||
println!("Stopping storage controller database...");
|
||||
let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
|
||||
let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
|
||||
.args(pg_stop_args)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
let stop_status = self.pg_ctl(pg_stop_args).await;
|
||||
if !stop_status.success() {
|
||||
match self.is_postgres_running().await {
|
||||
Ok(false) => {
|
||||
@@ -612,14 +645,9 @@ impl StorageController {
|
||||
|
||||
async fn is_postgres_running(&self) -> anyhow::Result<bool> {
|
||||
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
|
||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||
|
||||
let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
|
||||
let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
|
||||
.args(pg_status_args)
|
||||
.spawn()?
|
||||
.wait()
|
||||
.await?;
|
||||
let status_exitcode = self.pg_ctl(pg_status_args).await;
|
||||
|
||||
// pg_ctl status returns this exit code if postgres is not running: in this case it is
|
||||
// fine that stop failed. Otherwise it is an error that stop failed.
|
||||
|
||||
@@ -111,6 +111,11 @@ enum Command {
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Cancel any ongoing reconciliation for this shard
|
||||
TenantShardCancelReconcile {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
},
|
||||
/// Modify the pageserver tenant configuration of a tenant: this is the configuration structure
|
||||
/// that is passed through to pageservers, and does not affect storage controller behavior.
|
||||
TenantConfig {
|
||||
@@ -535,6 +540,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardCancelReconcile { tenant_shard_id } => {
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/cancel_reconcile"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantConfig { tenant_id, config } => {
|
||||
let tenant_conf = serde_json::from_str(&config)?;
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
Currently we build two main images:
|
||||
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/Dockerfile.compute-node).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/compute-node.Dockerfile).
|
||||
|
||||
And additional intermediate image:
|
||||
|
||||
@@ -56,7 +56,7 @@ CREATE TABLE
|
||||
postgres=# insert into t values(1, 1);
|
||||
INSERT 0 1
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
key | value
|
||||
-----+-------
|
||||
1 | 1
|
||||
(1 row)
|
||||
@@ -84,4 +84,4 @@ Access http://localhost:9001 and sign in.
|
||||
- Username: `minio`
|
||||
- Password: `password`
|
||||
|
||||
You can see durable pages and WAL data in `neon` bucket.
|
||||
You can see durable pages and WAL data in `neon` bucket.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
pub mod privilege;
|
||||
pub mod requests;
|
||||
pub mod responses;
|
||||
pub mod spec;
|
||||
|
||||
35
libs/compute_api/src/privilege.rs
Normal file
35
libs/compute_api/src/privilege.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
#[serde(rename_all = "UPPERCASE")]
|
||||
pub enum Privilege {
|
||||
Select,
|
||||
Insert,
|
||||
Update,
|
||||
Delete,
|
||||
Truncate,
|
||||
References,
|
||||
Trigger,
|
||||
Usage,
|
||||
Create,
|
||||
Connect,
|
||||
Temporary,
|
||||
Execute,
|
||||
}
|
||||
|
||||
impl Privilege {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Privilege::Select => "SELECT",
|
||||
Privilege::Insert => "INSERT",
|
||||
Privilege::Update => "UPDATE",
|
||||
Privilege::Delete => "DELETE",
|
||||
Privilege::Truncate => "TRUNCATE",
|
||||
Privilege::References => "REFERENCES",
|
||||
Privilege::Trigger => "TRIGGER",
|
||||
Privilege::Usage => "USAGE",
|
||||
Privilege::Create => "CREATE",
|
||||
Privilege::Connect => "CONNECT",
|
||||
Privilege::Temporary => "TEMPORARY",
|
||||
Privilege::Execute => "EXECUTE",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
|
||||
use crate::spec::ComputeSpec;
|
||||
use crate::{
|
||||
privilege::Privilege,
|
||||
spec::{ComputeSpec, ExtVersion, PgIdent},
|
||||
};
|
||||
use serde::Deserialize;
|
||||
|
||||
/// Request of the /configure API
|
||||
@@ -12,3 +14,18 @@ use serde::Deserialize;
|
||||
pub struct ConfigurationRequest {
|
||||
pub spec: ComputeSpec,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct ExtensionInstallRequest {
|
||||
pub extension: PgIdent,
|
||||
pub database: PgIdent,
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct SetRoleGrantsRequest {
|
||||
pub database: PgIdent,
|
||||
pub schema: PgIdent,
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
@@ -6,7 +6,10 @@ use std::fmt::Display;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
|
||||
use crate::spec::{ComputeSpec, Database, Role};
|
||||
use crate::{
|
||||
privilege::Privilege,
|
||||
spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role},
|
||||
};
|
||||
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
pub struct GenericAPIError {
|
||||
@@ -168,3 +171,16 @@ pub struct InstalledExtension {
|
||||
pub struct InstalledExtensions {
|
||||
pub extensions: Vec<InstalledExtension>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct ExtensionInstallResult {
|
||||
pub extension: PgIdent,
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct SetRoleGrantsResponse {
|
||||
pub database: PgIdent,
|
||||
pub schema: PgIdent,
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@ use remote_storage::RemotePath;
|
||||
/// intended to be used for DB / role names.
|
||||
pub type PgIdent = String;
|
||||
|
||||
/// String type alias representing Postgres extension version
|
||||
pub type ExtVersion = String;
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
|
||||
@@ -19,6 +19,7 @@ use once_cell::sync::Lazy;
|
||||
use prometheus::core::{
|
||||
Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec,
|
||||
};
|
||||
pub use prometheus::local::LocalHistogram;
|
||||
pub use prometheus::opts;
|
||||
pub use prometheus::register;
|
||||
pub use prometheus::Error;
|
||||
|
||||
@@ -250,12 +250,6 @@ pub struct TenantConfigToml {
|
||||
// Expresed in multiples of checkpoint distance.
|
||||
pub image_layer_creation_check_threshold: u8,
|
||||
|
||||
/// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
|
||||
/// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
|
||||
/// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
|
||||
/// file is written.
|
||||
pub switch_aux_file_policy: crate::models::AuxFilePolicy,
|
||||
|
||||
/// The length for an explicit LSN lease request.
|
||||
/// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
|
||||
#[serde(with = "humantime_serde")]
|
||||
@@ -475,7 +469,6 @@ impl Default for TenantConfigToml {
|
||||
lazy_slru_download: false,
|
||||
timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
|
||||
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
|
||||
switch_aux_file_policy: crate::models::AuxFilePolicy::default_tenant_config(),
|
||||
lsn_lease_length: LsnLease::DEFAULT_LENGTH,
|
||||
lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
|
||||
}
|
||||
|
||||
@@ -5,9 +5,11 @@ pub mod controller_api;
|
||||
pub mod key;
|
||||
pub mod keyspace;
|
||||
pub mod models;
|
||||
pub mod record;
|
||||
pub mod reltag;
|
||||
pub mod shard;
|
||||
/// Public API types
|
||||
pub mod upcall_api;
|
||||
pub mod value;
|
||||
|
||||
pub mod config;
|
||||
|
||||
@@ -10,7 +10,6 @@ use std::{
|
||||
io::{BufRead, Read},
|
||||
num::{NonZeroU32, NonZeroU64, NonZeroUsize},
|
||||
str::FromStr,
|
||||
sync::atomic::AtomicUsize,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
@@ -211,13 +210,30 @@ pub enum TimelineState {
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub new_timeline_id: TimelineId,
|
||||
#[serde(default)]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
pub existing_initdb_timeline_id: Option<TimelineId>,
|
||||
#[serde(default)]
|
||||
pub ancestor_start_lsn: Option<Lsn>,
|
||||
pub pg_version: Option<u32>,
|
||||
#[serde(flatten)]
|
||||
pub mode: TimelineCreateRequestMode,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(untagged)]
|
||||
pub enum TimelineCreateRequestMode {
|
||||
Branch {
|
||||
ancestor_timeline_id: TimelineId,
|
||||
#[serde(default)]
|
||||
ancestor_start_lsn: Option<Lsn>,
|
||||
// TODO: cplane sets this, but, the branching code always
|
||||
// inherits the ancestor's pg_version. Earlier code wasn't
|
||||
// using a flattened enum, so, it was an accepted field, and
|
||||
// we continue to accept it by having it here.
|
||||
pg_version: Option<u32>,
|
||||
},
|
||||
// NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
|
||||
// (serde picks the first matching enum variant, in declaration order).
|
||||
Bootstrap {
|
||||
#[serde(default)]
|
||||
existing_initdb_timeline_id: Option<TimelineId>,
|
||||
pg_version: Option<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
@@ -292,7 +308,6 @@ pub struct TenantConfig {
|
||||
pub lazy_slru_download: Option<bool>,
|
||||
pub timeline_get_throttle: Option<ThrottleConfig>,
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
pub switch_aux_file_policy: Option<AuxFilePolicy>,
|
||||
pub lsn_lease_length: Option<String>,
|
||||
pub lsn_lease_length_for_ts: Option<String>,
|
||||
}
|
||||
@@ -333,68 +348,6 @@ pub enum AuxFilePolicy {
|
||||
CrossValidation,
|
||||
}
|
||||
|
||||
impl AuxFilePolicy {
|
||||
pub fn is_valid_migration_path(from: Option<Self>, to: Self) -> bool {
|
||||
matches!(
|
||||
(from, to),
|
||||
(None, _) | (Some(AuxFilePolicy::CrossValidation), AuxFilePolicy::V2)
|
||||
)
|
||||
}
|
||||
|
||||
/// If a tenant writes aux files without setting `switch_aux_policy`, this value will be used.
|
||||
pub fn default_tenant_config() -> Self {
|
||||
Self::V2
|
||||
}
|
||||
}
|
||||
|
||||
/// The aux file policy memory flag. Users can store `Option<AuxFilePolicy>` into this atomic flag. 0 == unspecified.
|
||||
pub struct AtomicAuxFilePolicy(AtomicUsize);
|
||||
|
||||
impl AtomicAuxFilePolicy {
|
||||
pub fn new(policy: Option<AuxFilePolicy>) -> Self {
|
||||
Self(AtomicUsize::new(
|
||||
policy.map(AuxFilePolicy::to_usize).unwrap_or_default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn load(&self) -> Option<AuxFilePolicy> {
|
||||
match self.0.load(std::sync::atomic::Ordering::Acquire) {
|
||||
0 => None,
|
||||
other => Some(AuxFilePolicy::from_usize(other)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store(&self, policy: Option<AuxFilePolicy>) {
|
||||
self.0.store(
|
||||
policy.map(AuxFilePolicy::to_usize).unwrap_or_default(),
|
||||
std::sync::atomic::Ordering::Release,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl AuxFilePolicy {
|
||||
pub fn to_usize(self) -> usize {
|
||||
match self {
|
||||
Self::V1 => 1,
|
||||
Self::CrossValidation => 2,
|
||||
Self::V2 => 3,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_usize(this: usize) -> Option<Self> {
|
||||
match this {
|
||||
1 => Some(Self::V1),
|
||||
2 => Some(Self::CrossValidation),
|
||||
3 => Some(Self::V2),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_usize(this: usize) -> Self {
|
||||
Self::try_from_usize(this).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(tag = "kind")]
|
||||
pub enum EvictionPolicy {
|
||||
@@ -684,6 +637,25 @@ pub struct TimelineArchivalConfigRequest {
|
||||
pub state: TimelineArchivalState,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelinesInfoAndOffloaded {
|
||||
pub timelines: Vec<TimelineInfo>,
|
||||
pub offloaded: Vec<OffloadedTimelineInfo>,
|
||||
}
|
||||
|
||||
/// Analog of [`TimelineInfo`] for offloaded timelines.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct OffloadedTimelineInfo {
|
||||
pub tenant_id: TenantShardId,
|
||||
pub timeline_id: TimelineId,
|
||||
/// Whether the timeline has a parent it has been branched off from or not
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
/// Whether to retain the branch lsn at the ancestor or not
|
||||
pub ancestor_retain_lsn: Option<Lsn>,
|
||||
/// The time point when the timeline was archived
|
||||
pub archived_at: chrono::DateTime<chrono::Utc>,
|
||||
}
|
||||
|
||||
/// This represents the output of the "timeline_detail" and "timeline_list" API calls.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineInfo {
|
||||
@@ -1032,6 +1004,12 @@ pub mod virtual_file {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ScanDisposableKeysResponse {
|
||||
pub disposable_count: usize,
|
||||
pub not_disposable_count: usize,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum PagestreamFeMessage {
|
||||
@@ -1591,71 +1569,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aux_file_migration_path() {
|
||||
assert!(AuxFilePolicy::is_valid_migration_path(
|
||||
None,
|
||||
AuxFilePolicy::V1
|
||||
));
|
||||
assert!(AuxFilePolicy::is_valid_migration_path(
|
||||
None,
|
||||
AuxFilePolicy::V2
|
||||
));
|
||||
assert!(AuxFilePolicy::is_valid_migration_path(
|
||||
None,
|
||||
AuxFilePolicy::CrossValidation
|
||||
));
|
||||
// Self-migration is not a valid migration path, and the caller should handle it by itself.
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V1),
|
||||
AuxFilePolicy::V1
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V2),
|
||||
AuxFilePolicy::V2
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::CrossValidation),
|
||||
AuxFilePolicy::CrossValidation
|
||||
));
|
||||
// Migrations not allowed
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::CrossValidation),
|
||||
AuxFilePolicy::V1
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V1),
|
||||
AuxFilePolicy::V2
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V2),
|
||||
AuxFilePolicy::V1
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V2),
|
||||
AuxFilePolicy::CrossValidation
|
||||
));
|
||||
assert!(!AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::V1),
|
||||
AuxFilePolicy::CrossValidation
|
||||
));
|
||||
// Migrations allowed
|
||||
assert!(AuxFilePolicy::is_valid_migration_path(
|
||||
Some(AuxFilePolicy::CrossValidation),
|
||||
AuxFilePolicy::V2
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aux_parse() {
|
||||
assert_eq!(AuxFilePolicy::from_str("V2").unwrap(), AuxFilePolicy::V2);
|
||||
assert_eq!(AuxFilePolicy::from_str("v2").unwrap(), AuxFilePolicy::V2);
|
||||
assert_eq!(
|
||||
AuxFilePolicy::from_str("cross-validation").unwrap(),
|
||||
AuxFilePolicy::CrossValidation
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_image_compression_algorithm_parsing() {
|
||||
use ImageCompressionAlgorithm::*;
|
||||
|
||||
@@ -16,7 +16,7 @@ impl serde::Serialize for Partitioning {
|
||||
{
|
||||
pub struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
|
||||
|
||||
impl<'a> serde::Serialize for KeySpace<'a> {
|
||||
impl serde::Serialize for KeySpace<'_> {
|
||||
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
@@ -44,7 +44,7 @@ impl serde::Serialize for Partitioning {
|
||||
|
||||
pub struct WithDisplay<'a, T>(&'a T);
|
||||
|
||||
impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
|
||||
impl<T: std::fmt::Display> serde::Serialize for WithDisplay<'_, T> {
|
||||
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
@@ -55,7 +55,7 @@ impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
|
||||
|
||||
pub struct KeyRange<'a>(&'a std::ops::Range<crate::key::Key>);
|
||||
|
||||
impl<'a> serde::Serialize for KeyRange<'a> {
|
||||
impl serde::Serialize for KeyRange<'_> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
|
||||
113
libs/pageserver_api/src/record.rs
Normal file
113
libs/pageserver_api/src/record.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
//! This module defines the WAL record format used within the pageserver.
|
||||
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
|
||||
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::bin_ser::DeserializeError;
|
||||
|
||||
/// Each update to a page is represented by a NeonWalRecord. It can be a wrapper
|
||||
/// around a PostgreSQL WAL record, or a custom neon-specific "record".
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum NeonWalRecord {
|
||||
/// Native PostgreSQL WAL record
|
||||
Postgres { will_init: bool, rec: Bytes },
|
||||
|
||||
/// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
|
||||
ClearVisibilityMapFlags {
|
||||
new_heap_blkno: Option<u32>,
|
||||
old_heap_blkno: Option<u32>,
|
||||
flags: u8,
|
||||
},
|
||||
/// Mark transaction IDs as committed on a CLOG page
|
||||
ClogSetCommitted {
|
||||
xids: Vec<TransactionId>,
|
||||
timestamp: TimestampTz,
|
||||
},
|
||||
/// Mark transaction IDs as aborted on a CLOG page
|
||||
ClogSetAborted { xids: Vec<TransactionId> },
|
||||
/// Extend multixact offsets SLRU
|
||||
MultixactOffsetCreate {
|
||||
mid: MultiXactId,
|
||||
moff: MultiXactOffset,
|
||||
},
|
||||
/// Extend multixact members SLRU.
|
||||
MultixactMembersCreate {
|
||||
moff: MultiXactOffset,
|
||||
members: Vec<MultiXactMember>,
|
||||
},
|
||||
/// Update the map of AUX files, either writing or dropping an entry
|
||||
AuxFile {
|
||||
file_path: String,
|
||||
content: Option<Bytes>,
|
||||
},
|
||||
|
||||
/// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
|
||||
#[cfg(feature = "testing")]
|
||||
Test {
|
||||
/// Append a string to the image.
|
||||
append: String,
|
||||
/// Clear the image before appending.
|
||||
clear: bool,
|
||||
/// Treat this record as an init record. `clear` should be set to true if this field is set
|
||||
/// to true. This record does not need the history WALs to reconstruct. See [`NeonWalRecord::will_init`] and
|
||||
/// its references in `timeline.rs`.
|
||||
will_init: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl NeonWalRecord {
|
||||
/// Does replaying this WAL record initialize the page from scratch, or does
|
||||
/// it need to be applied over the previous image of the page?
|
||||
pub fn will_init(&self) -> bool {
|
||||
// If you change this function, you'll also need to change ValueBytes::will_init
|
||||
match self {
|
||||
NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,
|
||||
#[cfg(feature = "testing")]
|
||||
NeonWalRecord::Test { will_init, .. } => *will_init,
|
||||
// None of the special neon record types currently initialize the page
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn wal_append(s: impl AsRef<str>) -> Self {
|
||||
Self::Test {
|
||||
append: s.as_ref().to_string(),
|
||||
clear: false,
|
||||
will_init: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn wal_clear() -> Self {
|
||||
Self::Test {
|
||||
append: "".to_string(),
|
||||
clear: true,
|
||||
will_init: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn wal_init() -> Self {
|
||||
Self::Test {
|
||||
append: "".to_string(),
|
||||
clear: true,
|
||||
will_init: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a human-readable string to describe a WAL record
|
||||
///
|
||||
/// For debugging purposes
|
||||
pub fn describe_wal_record(rec: &NeonWalRecord) -> Result<String, DeserializeError> {
|
||||
match rec {
|
||||
NeonWalRecord::Postgres { will_init, rec } => Ok(format!(
|
||||
"will_init: {}, {}",
|
||||
will_init,
|
||||
describe_postgres_wal_record(rec)?
|
||||
)),
|
||||
_ => Ok(format!("{:?}", rec)),
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use anyhow::Result;
|
||||
//! This module defines the value type used by the storage engine.
|
||||
//!
|
||||
//! A [`Value`] represents either a completely new value for one Key ([`Value::Image`]),
|
||||
//! or a "delta" of how to get from previous version of the value to the new one
|
||||
//! ([`Value::WalRecord`]])
|
||||
//!
|
||||
//! Note that the [`Value`] type is used for the permananent storage format, so any
|
||||
//! changes to it must be backwards compatible.
|
||||
|
||||
use crate::record::NeonWalRecord;
|
||||
use bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::AddAssign;
|
||||
use std::time::Duration;
|
||||
|
||||
pub use pageserver_api::key::{Key, KEY_SIZE};
|
||||
|
||||
/// A 'value' stored for a one Key.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum Value {
|
||||
/// An Image value contains a full copy of the value
|
||||
@@ -20,10 +23,12 @@ pub enum Value {
|
||||
}
|
||||
|
||||
impl Value {
|
||||
#[inline(always)]
|
||||
pub fn is_image(&self) -> bool {
|
||||
matches!(self, Value::Image(_))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn will_init(&self) -> bool {
|
||||
match self {
|
||||
Value::Image(_) => true,
|
||||
@@ -33,17 +38,18 @@ impl Value {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) enum InvalidInput {
|
||||
pub enum InvalidInput {
|
||||
TooShortValue,
|
||||
TooShortPostgresRecord,
|
||||
}
|
||||
|
||||
/// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
|
||||
/// use this type for querying if a slice looks some particular way.
|
||||
pub(crate) struct ValueBytes;
|
||||
pub struct ValueBytes;
|
||||
|
||||
impl ValueBytes {
|
||||
pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
|
||||
#[inline(always)]
|
||||
pub fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
|
||||
if raw.len() < 12 {
|
||||
return Err(InvalidInput::TooShortValue);
|
||||
}
|
||||
@@ -79,6 +85,7 @@ impl ValueBytes {
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use bytes::Bytes;
|
||||
use utils::bin_ser::BeSer;
|
||||
|
||||
macro_rules! roundtrip {
|
||||
@@ -229,56 +236,3 @@ mod test {
|
||||
assert!(!ValueBytes::will_init(&expected).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Result of performing GC
|
||||
///
|
||||
#[derive(Default, Serialize, Debug)]
|
||||
pub struct GcResult {
|
||||
pub layers_total: u64,
|
||||
pub layers_needed_by_cutoff: u64,
|
||||
pub layers_needed_by_pitr: u64,
|
||||
pub layers_needed_by_branches: u64,
|
||||
pub layers_needed_by_leases: u64,
|
||||
pub layers_not_updated: u64,
|
||||
pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
|
||||
|
||||
#[serde(serialize_with = "serialize_duration_as_millis")]
|
||||
pub elapsed: Duration,
|
||||
|
||||
/// The layers which were garbage collected.
|
||||
///
|
||||
/// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
|
||||
/// dropped in tests.
|
||||
#[cfg(feature = "testing")]
|
||||
#[serde(skip)]
|
||||
pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
|
||||
}
|
||||
|
||||
// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
|
||||
fn serialize_duration_as_millis<S>(d: &Duration, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
d.as_millis().serialize(serializer)
|
||||
}
|
||||
|
||||
impl AddAssign for GcResult {
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
self.layers_total += other.layers_total;
|
||||
self.layers_needed_by_pitr += other.layers_needed_by_pitr;
|
||||
self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
|
||||
self.layers_needed_by_branches += other.layers_needed_by_branches;
|
||||
self.layers_needed_by_leases += other.layers_needed_by_leases;
|
||||
self.layers_not_updated += other.layers_not_updated;
|
||||
self.layers_removed += other.layers_removed;
|
||||
|
||||
self.elapsed += other.elapsed;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
{
|
||||
let mut other = other;
|
||||
self.doomed_layers.append(&mut other.doomed_layers);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -738,6 +738,20 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
QueryError::SimulatedConnectionError => {
|
||||
return Err(QueryError::SimulatedConnectionError)
|
||||
}
|
||||
err @ QueryError::Reconnect => {
|
||||
// Instruct the client to reconnect, stop processing messages
|
||||
// from this libpq connection and, finally, disconnect from the
|
||||
// server side (returning an Err achieves the later).
|
||||
//
|
||||
// Note the flushing is done by the caller.
|
||||
let reconnect_error = short_error(&err);
|
||||
self.write_message_noflush(&BeMessage::ErrorResponse(
|
||||
&reconnect_error,
|
||||
Some(err.pg_error_code()),
|
||||
))?;
|
||||
|
||||
return Err(err);
|
||||
}
|
||||
e => {
|
||||
log_query_error(query_string, &e);
|
||||
let short_error = short_error(&e);
|
||||
@@ -921,12 +935,11 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackendReader<IO> {
|
||||
/// A futures::AsyncWrite implementation that wraps all data written to it in CopyData
|
||||
/// messages.
|
||||
///
|
||||
|
||||
pub struct CopyDataWriter<'a, IO> {
|
||||
pgb: &'a mut PostgresBackend<IO>,
|
||||
}
|
||||
|
||||
impl<'a, IO: AsyncRead + AsyncWrite + Unpin> AsyncWrite for CopyDataWriter<'a, IO> {
|
||||
impl<IO: AsyncRead + AsyncWrite + Unpin> AsyncWrite for CopyDataWriter<'_, IO> {
|
||||
fn poll_write(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use postgres_backend::{AuthType, Handler, PostgresBackend, QueryError};
|
||||
use pq_proto::{BeMessage, RowDescriptor};
|
||||
use rustls::crypto::aws_lc_rs;
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
@@ -92,10 +93,13 @@ static CERT: Lazy<rustls::pki_types::CertificateDer<'static>> = Lazy::new(|| {
|
||||
async fn simple_select_ssl() {
|
||||
let (client_sock, server_sock) = make_tcp_pair().await;
|
||||
|
||||
let server_cfg = rustls::ServerConfig::builder()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![CERT.clone()], KEY.clone_key())
|
||||
.unwrap();
|
||||
let server_cfg =
|
||||
rustls::ServerConfig::builder_with_provider(Arc::new(aws_lc_rs::default_provider()))
|
||||
.with_safe_default_protocol_versions()
|
||||
.expect("aws_lc_rs should support the default protocol versions")
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![CERT.clone()], KEY.clone_key())
|
||||
.unwrap();
|
||||
let tls_config = Some(Arc::new(server_cfg));
|
||||
let pgbackend =
|
||||
PostgresBackend::new(server_sock, AuthType::Trust, tls_config).expect("pgbackend creation");
|
||||
@@ -105,13 +109,16 @@ async fn simple_select_ssl() {
|
||||
pgbackend.run(&mut handler, &CancellationToken::new()).await
|
||||
});
|
||||
|
||||
let client_cfg = rustls::ClientConfig::builder()
|
||||
.with_root_certificates({
|
||||
let mut store = rustls::RootCertStore::empty();
|
||||
store.add(CERT.clone()).unwrap();
|
||||
store
|
||||
})
|
||||
.with_no_client_auth();
|
||||
let client_cfg =
|
||||
rustls::ClientConfig::builder_with_provider(Arc::new(aws_lc_rs::default_provider()))
|
||||
.with_safe_default_protocol_versions()
|
||||
.expect("aws_lc_rs should support the default protocol versions")
|
||||
.with_root_certificates({
|
||||
let mut store = rustls::RootCertStore::empty();
|
||||
store.add(CERT.clone()).unwrap();
|
||||
store
|
||||
})
|
||||
.with_no_client_auth();
|
||||
let mut make_tls_connect = tokio_postgres_rustls::MakeRustlsConnect::new(client_cfg);
|
||||
let tls_connect = <MakeRustlsConnect as MakeTlsConnect<TcpStream>>::make_tls_connect(
|
||||
&mut make_tls_connect,
|
||||
|
||||
@@ -15,6 +15,7 @@ memoffset.workspace = true
|
||||
thiserror.workspace = true
|
||||
serde.workspace = true
|
||||
utils.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger.workspace = true
|
||||
|
||||
@@ -36,6 +36,7 @@ macro_rules! postgres_ffi {
|
||||
pub mod controlfile_utils;
|
||||
pub mod nonrelfile_utils;
|
||||
pub mod wal_craft_test_export;
|
||||
pub mod wal_generator;
|
||||
pub mod waldecoder_handler;
|
||||
pub mod xlog_utils;
|
||||
|
||||
@@ -217,6 +218,7 @@ macro_rules! enum_pgversion {
|
||||
|
||||
pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
pub mod walrecord;
|
||||
|
||||
// Export some widely used datatypes that are unlikely to change across Postgres versions
|
||||
pub use v14::bindings::RepOriginId;
|
||||
|
||||
203
libs/postgres_ffi/src/wal_generator.rs
Normal file
203
libs/postgres_ffi/src/wal_generator.rs
Normal file
@@ -0,0 +1,203 @@
|
||||
use std::ffi::CStr;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use crc32c::crc32c_append;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::bindings::{XLogLongPageHeaderData, XLogPageHeaderData, XLOG_PAGE_MAGIC};
|
||||
use super::xlog_utils::{
|
||||
XlLogicalMessage, XLOG_RECORD_CRC_OFFS, XLOG_SIZE_OF_XLOG_RECORD, XLP_BKP_REMOVABLE,
|
||||
XLP_FIRST_IS_CONTRECORD,
|
||||
};
|
||||
use super::XLogRecord;
|
||||
use crate::pg_constants::{
|
||||
RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE, XLP_LONG_HEADER, XLR_BLOCK_ID_DATA_LONG,
|
||||
XLR_BLOCK_ID_DATA_SHORT,
|
||||
};
|
||||
use crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
|
||||
|
||||
/// Generates binary WAL records for use in tests and benchmarks. Currently only generates logical
|
||||
/// messages (effectively noops) with a fixed payload. It is used as an iterator which yields
|
||||
/// encoded bytes for a single WAL record, including internal page headers if it spans pages.
|
||||
/// Concatenating the bytes will yield a complete, well-formed WAL, which can be chunked at segment
|
||||
/// boundaries if desired. Not optimized for performance.
|
||||
///
|
||||
/// The WAL format is version-dependant (see e.g. `XLOG_PAGE_MAGIC`), so make sure to import this
|
||||
/// for the appropriate Postgres version (e.g. `postgres_ffi::v17::wal_generator::WalGenerator`).
|
||||
///
|
||||
/// A WAL is split into 16 MB segments. Each segment is split into 8 KB pages, with headers.
|
||||
/// Records are arbitrary length, 8-byte aligned, and may span pages. The layout is e.g.:
|
||||
///
|
||||
/// | Segment 1 | Segment 2 | Segment 3 |
|
||||
/// | Page 1 | Page 2 | Page 3 | Page 4 | Page 5 | Page 6 | Page 7 | Page 8 | Page 9 |
|
||||
/// | R1 | R2 |R3| R4 | R5 | R6 | R7 | R8 |
|
||||
///
|
||||
/// TODO: support generating actual tables and rows.
|
||||
#[derive(Default)]
|
||||
pub struct WalGenerator {
|
||||
/// Current LSN to append the next record at.
|
||||
///
|
||||
/// Callers can modify this (and prev_lsn) to restart generation at a different LSN, but should
|
||||
/// ensure that the LSN is on a valid record boundary (i.e. we can't start appending in the
|
||||
/// middle on an existing record or header, or beyond the end of the existing WAL).
|
||||
pub lsn: Lsn,
|
||||
/// The starting LSN of the previous record. Used in WAL record headers. The Safekeeper doesn't
|
||||
/// care about this, unlike Postgres, but we include it for completeness.
|
||||
pub prev_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl WalGenerator {
|
||||
// For now, hardcode the message payload.
|
||||
// TODO: support specifying the payload size.
|
||||
const PREFIX: &CStr = c"prefix";
|
||||
const MESSAGE: &[u8] = b"message";
|
||||
|
||||
// Hardcode the sys, timeline, and DB IDs. We can make them configurable if we care about them.
|
||||
const SYS_ID: u64 = 0;
|
||||
const TIMELINE_ID: u32 = 1;
|
||||
const DB_ID: u32 = 0;
|
||||
|
||||
/// Creates a new WAL generator, which emits logical message records (noops).
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Encodes a logical message (basically a noop), with the given prefix and message.
|
||||
pub(crate) fn encode_logical_message(prefix: &CStr, message: &[u8]) -> Bytes {
|
||||
let prefix = prefix.to_bytes_with_nul();
|
||||
let header = XlLogicalMessage {
|
||||
db_id: Self::DB_ID,
|
||||
transactional: 0,
|
||||
prefix_size: prefix.len() as u64,
|
||||
message_size: message.len() as u64,
|
||||
};
|
||||
[&header.encode(), prefix, message].concat().into()
|
||||
}
|
||||
|
||||
/// Encode a WAL record with the given payload data (e.g. a logical message).
|
||||
pub(crate) fn encode_record(data: Bytes, rmid: u8, info: u8, prev_lsn: Lsn) -> Bytes {
|
||||
// Prefix data with block ID and length.
|
||||
let data_header = Bytes::from(match data.len() {
|
||||
0 => vec![],
|
||||
1..=255 => vec![XLR_BLOCK_ID_DATA_SHORT, data.len() as u8],
|
||||
256.. => {
|
||||
let len_bytes = (data.len() as u32).to_le_bytes();
|
||||
[&[XLR_BLOCK_ID_DATA_LONG], len_bytes.as_slice()].concat()
|
||||
}
|
||||
});
|
||||
|
||||
// Construct the WAL record header.
|
||||
let mut header = XLogRecord {
|
||||
xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD + data_header.len() + data.len()) as u32,
|
||||
xl_xid: 0,
|
||||
xl_prev: prev_lsn.into(),
|
||||
xl_info: info,
|
||||
xl_rmid: rmid,
|
||||
__bindgen_padding_0: [0; 2],
|
||||
xl_crc: 0, // see below
|
||||
};
|
||||
|
||||
// Compute the CRC checksum for the data, and the header up to the CRC field.
|
||||
let mut crc = 0;
|
||||
crc = crc32c_append(crc, &data_header);
|
||||
crc = crc32c_append(crc, &data);
|
||||
crc = crc32c_append(crc, &header.encode().unwrap()[0..XLOG_RECORD_CRC_OFFS]);
|
||||
header.xl_crc = crc;
|
||||
|
||||
// Encode the final header and record.
|
||||
let header = header.encode().unwrap();
|
||||
|
||||
[header, data_header, data].concat().into()
|
||||
}
|
||||
|
||||
/// Injects page headers on 8KB page boundaries. Takes the current LSN position where the record
|
||||
/// is to be appended.
|
||||
fn encode_pages(record: Bytes, mut lsn: Lsn) -> Bytes {
|
||||
// Fast path: record fits in current page, and the page already has a header.
|
||||
if lsn.remaining_in_block() as usize >= record.len() && lsn.block_offset() > 0 {
|
||||
return record;
|
||||
}
|
||||
|
||||
let mut pages = BytesMut::new();
|
||||
let mut remaining = record.clone(); // Bytes::clone() is cheap
|
||||
while !remaining.is_empty() {
|
||||
// At new page boundary, inject page header.
|
||||
if lsn.block_offset() == 0 {
|
||||
let mut page_header = XLogPageHeaderData {
|
||||
xlp_magic: XLOG_PAGE_MAGIC as u16,
|
||||
xlp_info: XLP_BKP_REMOVABLE,
|
||||
xlp_tli: Self::TIMELINE_ID,
|
||||
xlp_pageaddr: lsn.0,
|
||||
xlp_rem_len: 0,
|
||||
__bindgen_padding_0: [0; 4],
|
||||
};
|
||||
// If the record was split across page boundaries, mark as continuation.
|
||||
if remaining.len() < record.len() {
|
||||
page_header.xlp_rem_len = remaining.len() as u32;
|
||||
page_header.xlp_info |= XLP_FIRST_IS_CONTRECORD;
|
||||
}
|
||||
// At start of segment, use a long page header.
|
||||
let page_header = if lsn.segment_offset(WAL_SEGMENT_SIZE) == 0 {
|
||||
page_header.xlp_info |= XLP_LONG_HEADER;
|
||||
XLogLongPageHeaderData {
|
||||
std: page_header,
|
||||
xlp_sysid: Self::SYS_ID,
|
||||
xlp_seg_size: WAL_SEGMENT_SIZE as u32,
|
||||
xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
|
||||
}
|
||||
.encode()
|
||||
.unwrap()
|
||||
} else {
|
||||
page_header.encode().unwrap()
|
||||
};
|
||||
pages.extend_from_slice(&page_header);
|
||||
lsn += page_header.len() as u64;
|
||||
}
|
||||
|
||||
// Append the record up to the next page boundary, if any.
|
||||
let page_free = lsn.remaining_in_block() as usize;
|
||||
let chunk = remaining.split_to(std::cmp::min(page_free, remaining.len()));
|
||||
pages.extend_from_slice(&chunk);
|
||||
lsn += chunk.len() as u64;
|
||||
}
|
||||
pages.freeze()
|
||||
}
|
||||
|
||||
/// Records must be 8-byte aligned. Take an encoded record (including any injected page
|
||||
/// boundaries), starting at the given LSN, and add any necessary padding at the end.
|
||||
fn pad_record(record: Bytes, mut lsn: Lsn) -> Bytes {
|
||||
lsn += record.len() as u64;
|
||||
let padding = lsn.calc_padding(8u64) as usize;
|
||||
if padding == 0 {
|
||||
return record;
|
||||
}
|
||||
[record, Bytes::from(vec![0; padding])].concat().into()
|
||||
}
|
||||
|
||||
/// Generates a record with an arbitrary payload at the current LSN, then increments the LSN.
|
||||
pub fn generate_record(&mut self, data: Bytes, rmid: u8, info: u8) -> Bytes {
|
||||
let record = Self::encode_record(data, rmid, info, self.prev_lsn);
|
||||
let record = Self::encode_pages(record, self.lsn);
|
||||
let record = Self::pad_record(record, self.lsn);
|
||||
self.prev_lsn = self.lsn;
|
||||
self.lsn += record.len() as u64;
|
||||
record
|
||||
}
|
||||
|
||||
/// Generates a logical message at the current LSN. Can be used to construct arbitrary messages.
|
||||
pub fn generate_logical_message(&mut self, prefix: &CStr, message: &[u8]) -> Bytes {
|
||||
let data = Self::encode_logical_message(prefix, message);
|
||||
self.generate_record(data, RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate WAL records as an iterator.
|
||||
impl Iterator for WalGenerator {
|
||||
type Item = (Lsn, Bytes);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let lsn = self.lsn;
|
||||
let record = self.generate_logical_message(Self::PREFIX, Self::MESSAGE);
|
||||
Some((lsn, record))
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,15 +7,14 @@
|
||||
// have been named the same as the corresponding PostgreSQL functions instead.
|
||||
//
|
||||
|
||||
use crc32c::crc32c_append;
|
||||
|
||||
use super::super::waldecoder::WalStreamDecoder;
|
||||
use super::bindings::{
|
||||
CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID, TimestampTz,
|
||||
XLogLongPageHeaderData, XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
|
||||
};
|
||||
use super::wal_generator::WalGenerator;
|
||||
use super::PG_MAJORVERSION;
|
||||
use crate::pg_constants;
|
||||
use crate::pg_constants::{self, RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE};
|
||||
use crate::PG_TLI;
|
||||
use crate::{uint32, uint64, Oid};
|
||||
use crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
|
||||
@@ -26,7 +25,7 @@ use bytes::{Buf, Bytes};
|
||||
use log::*;
|
||||
|
||||
use serde::Serialize;
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::{CString, OsStr};
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use std::io::ErrorKind;
|
||||
@@ -39,6 +38,7 @@ use utils::bin_ser::SerializeError;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
pub const XLP_BKP_REMOVABLE: u16 = 0x0004;
|
||||
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||
pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
|
||||
pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
|
||||
@@ -489,64 +489,16 @@ impl XlLogicalMessage {
|
||||
/// Create new WAL record for non-transactional logical message.
|
||||
/// Used for creating artificial WAL for tests, as LogicalMessage
|
||||
/// record is basically no-op.
|
||||
///
|
||||
/// NOTE: This leaves the xl_prev field zero. The safekeeper and
|
||||
/// pageserver tolerate that, but PostgreSQL does not.
|
||||
pub fn encode_logical_message(prefix: &str, message: &str) -> Vec<u8> {
|
||||
let mut prefix_bytes: Vec<u8> = Vec::with_capacity(prefix.len() + 1);
|
||||
prefix_bytes.write_all(prefix.as_bytes()).unwrap();
|
||||
prefix_bytes.push(0);
|
||||
|
||||
let message_bytes = message.as_bytes();
|
||||
|
||||
let logical_message = XlLogicalMessage {
|
||||
db_id: 0,
|
||||
transactional: 0,
|
||||
prefix_size: prefix_bytes.len() as u64,
|
||||
message_size: message_bytes.len() as u64,
|
||||
};
|
||||
|
||||
let mainrdata = logical_message.encode();
|
||||
let mainrdata_len: usize = mainrdata.len() + prefix_bytes.len() + message_bytes.len();
|
||||
// only short mainrdata is supported for now
|
||||
assert!(mainrdata_len <= 255);
|
||||
let mainrdata_len = mainrdata_len as u8;
|
||||
|
||||
let mut data: Vec<u8> = vec![pg_constants::XLR_BLOCK_ID_DATA_SHORT, mainrdata_len];
|
||||
data.extend_from_slice(&mainrdata);
|
||||
data.extend_from_slice(&prefix_bytes);
|
||||
data.extend_from_slice(message_bytes);
|
||||
|
||||
let total_len = XLOG_SIZE_OF_XLOG_RECORD + data.len();
|
||||
|
||||
let mut header = XLogRecord {
|
||||
xl_tot_len: total_len as u32,
|
||||
xl_xid: 0,
|
||||
xl_prev: 0,
|
||||
xl_info: 0,
|
||||
xl_rmid: 21,
|
||||
__bindgen_padding_0: [0u8; 2usize],
|
||||
xl_crc: 0, // crc will be calculated later
|
||||
};
|
||||
|
||||
let header_bytes = header.encode().expect("failed to encode header");
|
||||
let crc = crc32c_append(0, &data);
|
||||
let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
header.xl_crc = crc;
|
||||
|
||||
let mut wal: Vec<u8> = Vec::new();
|
||||
wal.extend_from_slice(&header.encode().expect("failed to encode header"));
|
||||
wal.extend_from_slice(&data);
|
||||
|
||||
// WAL start position must be aligned at 8 bytes,
|
||||
// this will add padding for the next WAL record.
|
||||
const PADDING: usize = 8;
|
||||
let padding_rem = wal.len() % PADDING;
|
||||
if padding_rem != 0 {
|
||||
wal.resize(wal.len() + PADDING - padding_rem, 0);
|
||||
}
|
||||
|
||||
wal
|
||||
pub fn encode_logical_message(prefix: &str, message: &str) -> Bytes {
|
||||
// This function can take untrusted input, so discard any NUL bytes in the prefix string.
|
||||
let prefix = CString::new(prefix.replace('\0', "")).expect("no NULs");
|
||||
let message = message.as_bytes();
|
||||
WalGenerator::encode_record(
|
||||
WalGenerator::encode_logical_message(&prefix, message),
|
||||
RM_LOGICALMSG_ID,
|
||||
XLOG_LOGICAL_MESSAGE,
|
||||
Lsn(0),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -727,7 +727,7 @@ pub const SQLSTATE_INTERNAL_ERROR: &[u8; 5] = b"XX000";
|
||||
pub const SQLSTATE_ADMIN_SHUTDOWN: &[u8; 5] = b"57P01";
|
||||
pub const SQLSTATE_SUCCESSFUL_COMPLETION: &[u8; 5] = b"00000";
|
||||
|
||||
impl<'a> BeMessage<'a> {
|
||||
impl BeMessage<'_> {
|
||||
/// Serialize `message` to the given `buf`.
|
||||
/// Apart from smart memory managemet, BytesMut is good here as msg len
|
||||
/// precedes its body and it is handy to write it down first and then fill
|
||||
|
||||
@@ -357,22 +357,20 @@ impl RemoteStorage for LocalFs {
|
||||
.list_recursive(prefix)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
let objects = keys
|
||||
.into_iter()
|
||||
.filter_map(|k| {
|
||||
let path = k.with_base(&self.storage_root);
|
||||
if path.is_dir() {
|
||||
None
|
||||
} else {
|
||||
Some(ListingObject {
|
||||
key: k.clone(),
|
||||
// LocalFs is just for testing, so just specify a dummy time
|
||||
last_modified: SystemTime::now(),
|
||||
size: 0,
|
||||
})
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let mut objects = Vec::with_capacity(keys.len());
|
||||
for key in keys {
|
||||
let path = key.with_base(&self.storage_root);
|
||||
let metadata = file_metadata(&path).await?;
|
||||
if metadata.is_dir() {
|
||||
continue;
|
||||
}
|
||||
objects.push(ListingObject {
|
||||
key: key.clone(),
|
||||
last_modified: metadata.modified()?,
|
||||
size: metadata.len(),
|
||||
});
|
||||
}
|
||||
let objects = objects;
|
||||
|
||||
if let ListingMode::NoDelimiter = mode {
|
||||
result.keys = objects;
|
||||
@@ -410,9 +408,8 @@ impl RemoteStorage for LocalFs {
|
||||
} else {
|
||||
result.keys.push(ListingObject {
|
||||
key: RemotePath::from_string(&relative_key).unwrap(),
|
||||
// LocalFs is just for testing
|
||||
last_modified: SystemTime::now(),
|
||||
size: 0,
|
||||
last_modified: object.last_modified,
|
||||
size: object.size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ pub fn draw_svg(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
impl<'a> SvgDraw<'a> {
|
||||
impl SvgDraw<'_> {
|
||||
fn calculate_svg_layout(&mut self) {
|
||||
// Find x scale
|
||||
let segments = &self.storage.segments;
|
||||
|
||||
@@ -82,7 +82,7 @@ where
|
||||
fn extract_remote_context(headers: &HeaderMap) -> opentelemetry::Context {
|
||||
struct HeaderExtractor<'a>(&'a HeaderMap);
|
||||
|
||||
impl<'a> opentelemetry::propagation::Extractor for HeaderExtractor<'a> {
|
||||
impl opentelemetry::propagation::Extractor for HeaderExtractor<'_> {
|
||||
fn get(&self, key: &str) -> Option<&str> {
|
||||
self.0.get(key).and_then(|value| value.to_str().ok())
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::seqwait::MonotonicCounter;
|
||||
pub const XLOG_BLCKSZ: u32 = 8192;
|
||||
|
||||
/// A Postgres LSN (Log Sequence Number), also known as an XLogRecPtr
|
||||
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash)]
|
||||
#[derive(Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd, Hash)]
|
||||
pub struct Lsn(pub u64);
|
||||
|
||||
impl Serialize for Lsn {
|
||||
@@ -37,7 +37,7 @@ impl<'de> Deserialize<'de> for Lsn {
|
||||
is_human_readable_deserializer: bool,
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for LsnVisitor {
|
||||
impl Visitor<'_> for LsnVisitor {
|
||||
type Value = Lsn;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
|
||||
@@ -73,7 +73,7 @@ impl<T> Poison<T> {
|
||||
/// and subsequent calls to [`Poison::check_and_arm`] will fail with an error.
|
||||
pub struct Guard<'a, T>(&'a mut Poison<T>);
|
||||
|
||||
impl<'a, T> Guard<'a, T> {
|
||||
impl<T> Guard<'_, T> {
|
||||
pub fn data(&self) -> &T {
|
||||
&self.0.data
|
||||
}
|
||||
@@ -94,7 +94,7 @@ impl<'a, T> Guard<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Drop for Guard<'a, T> {
|
||||
impl<T> Drop for Guard<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
match self.0.state {
|
||||
State::Clean => {
|
||||
|
||||
@@ -164,7 +164,7 @@ impl TenantShardId {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Display for ShardSlug<'a> {
|
||||
impl std::fmt::Display for ShardSlug<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
|
||||
@@ -152,7 +152,7 @@ pub struct RcuWriteGuard<'a, V> {
|
||||
inner: RwLockWriteGuard<'a, RcuInner<V>>,
|
||||
}
|
||||
|
||||
impl<'a, V> Deref for RcuWriteGuard<'a, V> {
|
||||
impl<V> Deref for RcuWriteGuard<'_, V> {
|
||||
type Target = V;
|
||||
|
||||
fn deref(&self) -> &V {
|
||||
@@ -160,7 +160,7 @@ impl<'a, V> Deref for RcuWriteGuard<'a, V> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, V> RcuWriteGuard<'a, V> {
|
||||
impl<V> RcuWriteGuard<'_, V> {
|
||||
///
|
||||
/// Store a new value. The new value will be written to the Rcu immediately,
|
||||
/// and will be immediately seen by any `read` calls that start afterwards.
|
||||
|
||||
@@ -219,7 +219,7 @@ impl<'a, T> CountWaitingInitializers<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Drop for CountWaitingInitializers<'a, T> {
|
||||
impl<T> Drop for CountWaitingInitializers<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
self.0.initializers.fetch_sub(1, Ordering::Relaxed);
|
||||
}
|
||||
@@ -250,7 +250,7 @@ impl<T> std::ops::DerefMut for Guard<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Guard<'a, T> {
|
||||
impl<T> Guard<'_, T> {
|
||||
/// Take the current value, and a new permit for it's deinitialization.
|
||||
///
|
||||
/// The permit will be on a semaphore part of the new internal value, and any following
|
||||
|
||||
@@ -184,23 +184,23 @@ mod tests {
|
||||
|
||||
struct MemoryIdentity<'a>(&'a dyn Extractor);
|
||||
|
||||
impl<'a> MemoryIdentity<'a> {
|
||||
impl MemoryIdentity<'_> {
|
||||
fn as_ptr(&self) -> *const () {
|
||||
self.0 as *const _ as *const ()
|
||||
}
|
||||
}
|
||||
impl<'a> PartialEq for MemoryIdentity<'a> {
|
||||
impl PartialEq for MemoryIdentity<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.as_ptr() == other.as_ptr()
|
||||
}
|
||||
}
|
||||
impl<'a> Eq for MemoryIdentity<'a> {}
|
||||
impl<'a> Hash for MemoryIdentity<'a> {
|
||||
impl Eq for MemoryIdentity<'_> {}
|
||||
impl Hash for MemoryIdentity<'_> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.as_ptr().hash(state);
|
||||
}
|
||||
}
|
||||
impl<'a> fmt::Debug for MemoryIdentity<'a> {
|
||||
impl fmt::Debug for MemoryIdentity<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:p}: {}", self.as_ptr(), self.0.id())
|
||||
}
|
||||
|
||||
18
libs/wal_decoder/Cargo.toml
Normal file
18
libs/wal_decoder/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "wal_decoder"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[features]
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
serde.workspace = true
|
||||
tracing.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
970
libs/wal_decoder/src/decoder.rs
Normal file
970
libs/wal_decoder/src/decoder.rs
Normal file
@@ -0,0 +1,970 @@
|
||||
//! This module contains logic for decoding and interpreting
|
||||
//! raw bytes which represent a raw Postgres WAL record.
|
||||
|
||||
use crate::models::*;
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use pageserver_api::value::Value;
|
||||
use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
|
||||
use postgres_ffi::walrecord::*;
|
||||
use postgres_ffi::{page_is_new, page_set_lsn, pg_constants, BLCKSZ};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
impl InterpretedWalRecord {
|
||||
/// Decode and interpreted raw bytes which represent one Postgres WAL record.
|
||||
/// Data blocks which do not match the provided shard identity are filtered out.
|
||||
/// Shard 0 is a special case since it tracks all relation sizes. We only give it
|
||||
/// the keys that are being written as that is enough for updating relation sizes.
|
||||
pub fn from_bytes_filtered(
|
||||
buf: Bytes,
|
||||
shard: &ShardIdentity,
|
||||
lsn: Lsn,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<InterpretedWalRecord> {
|
||||
let mut decoded = DecodedWALRecord::default();
|
||||
decode_wal_record(buf, &mut decoded, pg_version)?;
|
||||
|
||||
let flush_uncommitted = if decoded.is_dbase_create_copy(pg_version) {
|
||||
FlushUncommittedRecords::Yes
|
||||
} else {
|
||||
FlushUncommittedRecords::No
|
||||
};
|
||||
|
||||
let metadata_record = MetadataRecord::from_decoded(&decoded, lsn, pg_version)?;
|
||||
|
||||
let mut blocks = Vec::default();
|
||||
for blk in decoded.blocks.iter() {
|
||||
let rel = RelTag {
|
||||
spcnode: blk.rnode_spcnode,
|
||||
dbnode: blk.rnode_dbnode,
|
||||
relnode: blk.rnode_relnode,
|
||||
forknum: blk.forknum,
|
||||
};
|
||||
|
||||
let key = rel_block_to_key(rel, blk.blkno);
|
||||
|
||||
if !key.is_valid_key_on_write_path() {
|
||||
anyhow::bail!("Unsupported key decoded at LSN {}: {}", lsn, key);
|
||||
}
|
||||
|
||||
let key_is_local = shard.is_key_local(&key);
|
||||
|
||||
tracing::debug!(
|
||||
lsn=%lsn,
|
||||
key=%key,
|
||||
"ingest: shard decision {}",
|
||||
if !key_is_local { "drop" } else { "keep" },
|
||||
);
|
||||
|
||||
if !key_is_local {
|
||||
if shard.is_shard_zero() {
|
||||
// Shard 0 tracks relation sizes. Although we will not store this block, we will observe
|
||||
// its blkno in case it implicitly extends a relation.
|
||||
blocks.push((key.to_compact(), None));
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Instead of storing full-page-image WAL record,
|
||||
// it is better to store extracted image: we can skip wal-redo
|
||||
// in this case. Also some FPI records may contain multiple (up to 32) pages,
|
||||
// so them have to be copied multiple times.
|
||||
//
|
||||
let value = if blk.apply_image
|
||||
&& blk.has_image
|
||||
&& decoded.xl_rmid == pg_constants::RM_XLOG_ID
|
||||
&& (decoded.xl_info == pg_constants::XLOG_FPI
|
||||
|| decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
|
||||
// compression of WAL is not yet supported: fall back to storing the original WAL record
|
||||
&& !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, pg_version)
|
||||
// do not materialize null pages because them most likely be soon replaced with real data
|
||||
&& blk.bimg_len != 0
|
||||
{
|
||||
// Extract page image from FPI record
|
||||
let img_len = blk.bimg_len as usize;
|
||||
let img_offs = blk.bimg_offset as usize;
|
||||
let mut image = BytesMut::with_capacity(BLCKSZ as usize);
|
||||
// TODO(vlad): skip the copy
|
||||
image.extend_from_slice(&decoded.record[img_offs..img_offs + img_len]);
|
||||
|
||||
if blk.hole_length != 0 {
|
||||
let tail = image.split_off(blk.hole_offset as usize);
|
||||
image.resize(image.len() + blk.hole_length as usize, 0u8);
|
||||
image.unsplit(tail);
|
||||
}
|
||||
//
|
||||
// Match the logic of XLogReadBufferForRedoExtended:
|
||||
// The page may be uninitialized. If so, we can't set the LSN because
|
||||
// that would corrupt the page.
|
||||
//
|
||||
if !page_is_new(&image) {
|
||||
page_set_lsn(&mut image, lsn)
|
||||
}
|
||||
assert_eq!(image.len(), BLCKSZ as usize);
|
||||
|
||||
Value::Image(image.freeze())
|
||||
} else {
|
||||
Value::WalRecord(NeonWalRecord::Postgres {
|
||||
will_init: blk.will_init || blk.apply_image,
|
||||
rec: decoded.record.clone(),
|
||||
})
|
||||
};
|
||||
|
||||
blocks.push((key.to_compact(), Some(value)));
|
||||
}
|
||||
|
||||
Ok(InterpretedWalRecord {
|
||||
metadata_record,
|
||||
blocks,
|
||||
lsn,
|
||||
flush_uncommitted,
|
||||
xid: decoded.xl_xid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl MetadataRecord {
|
||||
fn from_decoded(
|
||||
decoded: &DecodedWALRecord,
|
||||
lsn: Lsn,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
// Note: this doesn't actually copy the bytes since
|
||||
// the [`Bytes`] type implements it via a level of indirection.
|
||||
let mut buf = decoded.record.clone();
|
||||
buf.advance(decoded.main_data_offset);
|
||||
|
||||
match decoded.xl_rmid {
|
||||
pg_constants::RM_HEAP_ID | pg_constants::RM_HEAP2_ID => {
|
||||
Self::decode_heapam_record(&mut buf, decoded, pg_version)
|
||||
}
|
||||
pg_constants::RM_NEON_ID => Self::decode_neonmgr_record(&mut buf, decoded, pg_version),
|
||||
// Handle other special record types
|
||||
pg_constants::RM_SMGR_ID => Self::decode_smgr_record(&mut buf, decoded),
|
||||
pg_constants::RM_DBASE_ID => Self::decode_dbase_record(&mut buf, decoded, pg_version),
|
||||
pg_constants::RM_TBLSPC_ID => {
|
||||
tracing::trace!("XLOG_TBLSPC_CREATE/DROP is not handled yet");
|
||||
Ok(None)
|
||||
}
|
||||
pg_constants::RM_CLOG_ID => Self::decode_clog_record(&mut buf, decoded, pg_version),
|
||||
pg_constants::RM_XACT_ID => Self::decode_xact_record(&mut buf, decoded, lsn),
|
||||
pg_constants::RM_MULTIXACT_ID => {
|
||||
Self::decode_multixact_record(&mut buf, decoded, pg_version)
|
||||
}
|
||||
pg_constants::RM_RELMAP_ID => Self::decode_relmap_record(&mut buf, decoded),
|
||||
// This is an odd duck. It needs to go to all shards.
|
||||
// Since it uses the checkpoint image (that's initialized from CHECKPOINT_KEY
|
||||
// in WalIngest::new), we have to send the whole DecodedWalRecord::record to
|
||||
// the pageserver and decode it there.
|
||||
//
|
||||
// Alternatively, one can make the checkpoint part of the subscription protocol
|
||||
// to the pageserver. This should work fine, but can be done at a later point.
|
||||
pg_constants::RM_XLOG_ID => Self::decode_xlog_record(&mut buf, decoded, lsn),
|
||||
pg_constants::RM_LOGICALMSG_ID => {
|
||||
Self::decode_logical_message_record(&mut buf, decoded)
|
||||
}
|
||||
pg_constants::RM_STANDBY_ID => Self::decode_standby_record(&mut buf, decoded),
|
||||
pg_constants::RM_REPLORIGIN_ID => Self::decode_replorigin_record(&mut buf, decoded),
|
||||
_unexpected => {
|
||||
// TODO: consider failing here instead of blindly doing something without
|
||||
// understanding the protocol
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_heapam_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
// Handle VM bit updates that are implicitly part of heap records.
|
||||
|
||||
// First, look at the record to determine which VM bits need
|
||||
// to be cleared. If either of these variables is set, we
|
||||
// need to clear the corresponding bits in the visibility map.
|
||||
let mut new_heap_blkno: Option<u32> = None;
|
||||
let mut old_heap_blkno: Option<u32> = None;
|
||||
let mut flags = pg_constants::VISIBILITYMAP_VALID_BITS;
|
||||
|
||||
match pg_version {
|
||||
14 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v14::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v14::XlHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v14::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_LOCK {
|
||||
let xlrec = v14::XlHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v14::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
|
||||
let xlrec = v14::XlHeapLockUpdated::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
15 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v15::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v15::XlHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v15::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_LOCK {
|
||||
let xlrec = v15::XlHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v15::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
|
||||
let xlrec = v15::XlHeapLockUpdated::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v16::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v16::XlHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v16::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_LOCK {
|
||||
let xlrec = v16::XlHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v16::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
|
||||
let xlrec = v16::XlHeapLockUpdated::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
17 => {
|
||||
if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
if info == pg_constants::XLOG_HEAP_INSERT {
|
||||
let xlrec = v17::XlHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_DELETE {
|
||||
let xlrec = v17::XlHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_UPDATE
|
||||
|| info == pg_constants::XLOG_HEAP_HOT_UPDATE
|
||||
{
|
||||
let xlrec = v17::XlHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP_LOCK {
|
||||
let xlrec = v17::XlHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
|
||||
let xlrec = v17::XlHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
} else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
|
||||
let xlrec = v17::XlHeapLockUpdated::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
anyhow::bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
|
||||
let vm_rel = RelTag {
|
||||
forknum: VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
};
|
||||
|
||||
Ok(Some(MetadataRecord::Heapam(HeapamRecord::ClearVmBits(
|
||||
ClearVmBits {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
vm_rel,
|
||||
flags,
|
||||
},
|
||||
))))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_neonmgr_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
// Handle VM bit updates that are implicitly part of heap records.
|
||||
|
||||
// First, look at the record to determine which VM bits need
|
||||
// to be cleared. If either of these variables is set, we
|
||||
// need to clear the corresponding bits in the visibility map.
|
||||
let mut new_heap_blkno: Option<u32> = None;
|
||||
let mut old_heap_blkno: Option<u32> = None;
|
||||
let mut flags = pg_constants::VISIBILITYMAP_VALID_BITS;
|
||||
|
||||
assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);
|
||||
|
||||
match pg_version {
|
||||
16 | 17 => {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
|
||||
|
||||
match info {
|
||||
pg_constants::XLOG_NEON_HEAP_INSERT => {
|
||||
let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);
|
||||
assert_eq!(0, buf.remaining());
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_DELETE => {
|
||||
let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_UPDATE
|
||||
| pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
|
||||
let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);
|
||||
// the size of tuple data is inferred from the size of the record.
|
||||
// we can't validate the remaining number of bytes without parsing
|
||||
// the tuple data.
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
|
||||
}
|
||||
if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
|
||||
// PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
|
||||
// non-HOT update where the new tuple goes to different page than
|
||||
// the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
|
||||
// set.
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
|
||||
let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);
|
||||
|
||||
let offset_array_len =
|
||||
if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
|
||||
// the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
|
||||
0
|
||||
} else {
|
||||
size_of::<u16>() * xlrec.ntuples as usize
|
||||
};
|
||||
assert_eq!(offset_array_len, buf.remaining());
|
||||
|
||||
if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
|
||||
new_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
}
|
||||
}
|
||||
pg_constants::XLOG_NEON_HEAP_LOCK => {
|
||||
let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);
|
||||
if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
|
||||
old_heap_blkno = Some(decoded.blocks[0].blkno);
|
||||
flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
|
||||
}
|
||||
}
|
||||
info => anyhow::bail!("Unknown WAL record type for Neon RMGR: {}", info),
|
||||
}
|
||||
}
|
||||
_ => anyhow::bail!(
|
||||
"Neon RMGR has no known compatibility with PostgreSQL version {}",
|
||||
pg_version
|
||||
),
|
||||
}
|
||||
|
||||
if new_heap_blkno.is_some() || old_heap_blkno.is_some() {
|
||||
let vm_rel = RelTag {
|
||||
forknum: VISIBILITYMAP_FORKNUM,
|
||||
spcnode: decoded.blocks[0].rnode_spcnode,
|
||||
dbnode: decoded.blocks[0].rnode_dbnode,
|
||||
relnode: decoded.blocks[0].rnode_relnode,
|
||||
};
|
||||
|
||||
Ok(Some(MetadataRecord::Neonrmgr(NeonrmgrRecord::ClearVmBits(
|
||||
ClearVmBits {
|
||||
new_heap_blkno,
|
||||
old_heap_blkno,
|
||||
vm_rel,
|
||||
flags,
|
||||
},
|
||||
))))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_smgr_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_SMGR_CREATE {
|
||||
let create = XlSmgrCreate::decode(buf);
|
||||
let rel = RelTag {
|
||||
spcnode: create.rnode.spcnode,
|
||||
dbnode: create.rnode.dbnode,
|
||||
relnode: create.rnode.relnode,
|
||||
forknum: create.forknum,
|
||||
};
|
||||
|
||||
return Ok(Some(MetadataRecord::Smgr(SmgrRecord::Create(SmgrCreate {
|
||||
rel,
|
||||
}))));
|
||||
} else if info == pg_constants::XLOG_SMGR_TRUNCATE {
|
||||
let truncate = XlSmgrTruncate::decode(buf);
|
||||
return Ok(Some(MetadataRecord::Smgr(SmgrRecord::Truncate(truncate))));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_dbase_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
// TODO: Refactor this to avoid the duplication between postgres versions.
|
||||
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
tracing::debug!(%info, %pg_version, "handle RM_DBASE_ID");
|
||||
|
||||
if pg_version == 14 {
|
||||
if info == postgres_ffi::v14::bindings::XLOG_DBASE_CREATE {
|
||||
let createdb = XlCreateDatabase::decode(buf);
|
||||
tracing::debug!("XLOG_DBASE_CREATE v14");
|
||||
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {
|
||||
db_id: createdb.db_id,
|
||||
tablespace_id: createdb.tablespace_id,
|
||||
src_db_id: createdb.src_db_id,
|
||||
src_tablespace_id: createdb.src_tablespace_id,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
} else if info == postgres_ffi::v14::bindings::XLOG_DBASE_DROP {
|
||||
let dropdb = XlDropDatabase::decode(buf);
|
||||
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {
|
||||
db_id: dropdb.db_id,
|
||||
tablespace_ids: dropdb.tablespace_ids,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
}
|
||||
} else if pg_version == 15 {
|
||||
if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_WAL_LOG {
|
||||
tracing::debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
|
||||
} else if info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY {
|
||||
// The XLOG record was renamed between v14 and v15,
|
||||
// but the record format is the same.
|
||||
// So we can reuse XlCreateDatabase here.
|
||||
tracing::debug!("XLOG_DBASE_CREATE_FILE_COPY");
|
||||
|
||||
let createdb = XlCreateDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {
|
||||
db_id: createdb.db_id,
|
||||
tablespace_id: createdb.tablespace_id,
|
||||
src_db_id: createdb.src_db_id,
|
||||
src_tablespace_id: createdb.src_tablespace_id,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
} else if info == postgres_ffi::v15::bindings::XLOG_DBASE_DROP {
|
||||
let dropdb = XlDropDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {
|
||||
db_id: dropdb.db_id,
|
||||
tablespace_ids: dropdb.tablespace_ids,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
}
|
||||
} else if pg_version == 16 {
|
||||
if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_WAL_LOG {
|
||||
tracing::debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
|
||||
} else if info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY {
|
||||
// The XLOG record was renamed between v14 and v15,
|
||||
// but the record format is the same.
|
||||
// So we can reuse XlCreateDatabase here.
|
||||
tracing::debug!("XLOG_DBASE_CREATE_FILE_COPY");
|
||||
|
||||
let createdb = XlCreateDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {
|
||||
db_id: createdb.db_id,
|
||||
tablespace_id: createdb.tablespace_id,
|
||||
src_db_id: createdb.src_db_id,
|
||||
src_tablespace_id: createdb.src_tablespace_id,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
} else if info == postgres_ffi::v16::bindings::XLOG_DBASE_DROP {
|
||||
let dropdb = XlDropDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {
|
||||
db_id: dropdb.db_id,
|
||||
tablespace_ids: dropdb.tablespace_ids,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
}
|
||||
} else if pg_version == 17 {
|
||||
if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {
|
||||
tracing::debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
|
||||
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {
|
||||
// The XLOG record was renamed between v14 and v15,
|
||||
// but the record format is the same.
|
||||
// So we can reuse XlCreateDatabase here.
|
||||
tracing::debug!("XLOG_DBASE_CREATE_FILE_COPY");
|
||||
|
||||
let createdb = XlCreateDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Create(DbaseCreate {
|
||||
db_id: createdb.db_id,
|
||||
tablespace_id: createdb.tablespace_id,
|
||||
src_db_id: createdb.src_db_id,
|
||||
src_tablespace_id: createdb.src_tablespace_id,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
} else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {
|
||||
let dropdb = XlDropDatabase::decode(buf);
|
||||
let record = MetadataRecord::Dbase(DbaseRecord::Drop(DbaseDrop {
|
||||
db_id: dropdb.db_id,
|
||||
tablespace_ids: dropdb.tablespace_ids,
|
||||
}));
|
||||
|
||||
return Ok(Some(record));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_clog_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;
|
||||
|
||||
if info == pg_constants::CLOG_ZEROPAGE {
|
||||
let pageno = if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
};
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
|
||||
Ok(Some(MetadataRecord::Clog(ClogRecord::ZeroPage(
|
||||
ClogZeroPage { segno, rpageno },
|
||||
))))
|
||||
} else {
|
||||
assert!(info == pg_constants::CLOG_TRUNCATE);
|
||||
let xlrec = XlClogTruncate::decode(buf, pg_version);
|
||||
|
||||
Ok(Some(MetadataRecord::Clog(ClogRecord::Truncate(
|
||||
ClogTruncate {
|
||||
pageno: xlrec.pageno,
|
||||
oldest_xid: xlrec.oldest_xid,
|
||||
oldest_xid_db: xlrec.oldest_xid_db,
|
||||
},
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_xact_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
|
||||
let origin_id = decoded.origin_id;
|
||||
let xl_xid = decoded.xl_xid;
|
||||
|
||||
if info == pg_constants::XLOG_XACT_COMMIT {
|
||||
let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);
|
||||
return Ok(Some(MetadataRecord::Xact(XactRecord::Commit(XactCommon {
|
||||
parsed,
|
||||
origin_id,
|
||||
xl_xid,
|
||||
lsn,
|
||||
}))));
|
||||
} else if info == pg_constants::XLOG_XACT_ABORT {
|
||||
let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);
|
||||
return Ok(Some(MetadataRecord::Xact(XactRecord::Abort(XactCommon {
|
||||
parsed,
|
||||
origin_id,
|
||||
xl_xid,
|
||||
lsn,
|
||||
}))));
|
||||
} else if info == pg_constants::XLOG_XACT_COMMIT_PREPARED {
|
||||
let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);
|
||||
return Ok(Some(MetadataRecord::Xact(XactRecord::CommitPrepared(
|
||||
XactCommon {
|
||||
parsed,
|
||||
origin_id,
|
||||
xl_xid,
|
||||
lsn,
|
||||
},
|
||||
))));
|
||||
} else if info == pg_constants::XLOG_XACT_ABORT_PREPARED {
|
||||
let parsed = XlXactParsedRecord::decode(buf, decoded.xl_xid, decoded.xl_info);
|
||||
return Ok(Some(MetadataRecord::Xact(XactRecord::AbortPrepared(
|
||||
XactCommon {
|
||||
parsed,
|
||||
origin_id,
|
||||
xl_xid,
|
||||
lsn,
|
||||
},
|
||||
))));
|
||||
} else if info == pg_constants::XLOG_XACT_PREPARE {
|
||||
return Ok(Some(MetadataRecord::Xact(XactRecord::Prepare(
|
||||
XactPrepare {
|
||||
xl_xid: decoded.xl_xid,
|
||||
data: Bytes::copy_from_slice(&buf[..]),
|
||||
},
|
||||
))));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_multixact_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
pg_version: u32,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
|
||||
if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE
|
||||
|| info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE
|
||||
{
|
||||
let pageno = if pg_version < 17 {
|
||||
buf.get_u32_le()
|
||||
} else {
|
||||
buf.get_u64_le() as u32
|
||||
};
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
|
||||
let slru_kind = match info {
|
||||
pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE => SlruKind::MultiXactOffsets,
|
||||
pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE => SlruKind::MultiXactMembers,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::ZeroPage(
|
||||
MultiXactZeroPage {
|
||||
slru_kind,
|
||||
segno,
|
||||
rpageno,
|
||||
},
|
||||
))));
|
||||
} else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
|
||||
let xlrec = XlMultiXactCreate::decode(buf);
|
||||
return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::Create(
|
||||
xlrec,
|
||||
))));
|
||||
} else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
|
||||
let xlrec = XlMultiXactTruncate::decode(buf);
|
||||
return Ok(Some(MetadataRecord::MultiXact(MultiXactRecord::Truncate(
|
||||
xlrec,
|
||||
))));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_relmap_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let update = XlRelmapUpdate::decode(buf);
|
||||
|
||||
let mut buf = decoded.record.clone();
|
||||
buf.advance(decoded.main_data_offset);
|
||||
// skip xl_relmap_update
|
||||
buf.advance(12);
|
||||
|
||||
Ok(Some(MetadataRecord::Relmap(RelmapRecord::Update(
|
||||
RelmapUpdate {
|
||||
update,
|
||||
buf: Bytes::copy_from_slice(&buf[..]),
|
||||
},
|
||||
))))
|
||||
}
|
||||
|
||||
fn decode_xlog_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
lsn: Lsn,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
Ok(Some(MetadataRecord::Xlog(XlogRecord::Raw(RawXlogRecord {
|
||||
info,
|
||||
lsn,
|
||||
buf: buf.clone(),
|
||||
}))))
|
||||
}
|
||||
|
||||
fn decode_logical_message_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_LOGICAL_MESSAGE {
|
||||
let xlrec = XlLogicalMessage::decode(buf);
|
||||
let prefix = std::str::from_utf8(&buf[0..xlrec.prefix_size - 1])?;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
if prefix == "neon-test" {
|
||||
return Ok(Some(MetadataRecord::LogicalMessage(
|
||||
LogicalMessageRecord::Failpoint,
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(path) = prefix.strip_prefix("neon-file:") {
|
||||
let buf_size = xlrec.prefix_size + xlrec.message_size;
|
||||
let buf = Bytes::copy_from_slice(&buf[xlrec.prefix_size..buf_size]);
|
||||
return Ok(Some(MetadataRecord::LogicalMessage(
|
||||
LogicalMessageRecord::Put(PutLogicalMessage {
|
||||
path: path.to_string(),
|
||||
buf,
|
||||
}),
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_standby_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_RUNNING_XACTS {
|
||||
let xlrec = XlRunningXacts::decode(buf);
|
||||
return Ok(Some(MetadataRecord::Standby(StandbyRecord::RunningXacts(
|
||||
StandbyRunningXacts {
|
||||
oldest_running_xid: xlrec.oldest_running_xid,
|
||||
},
|
||||
))));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn decode_replorigin_record(
|
||||
buf: &mut Bytes,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> anyhow::Result<Option<MetadataRecord>> {
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_REPLORIGIN_SET {
|
||||
let xlrec = XlReploriginSet::decode(buf);
|
||||
return Ok(Some(MetadataRecord::Replorigin(ReploriginRecord::Set(
|
||||
xlrec,
|
||||
))));
|
||||
} else if info == pg_constants::XLOG_REPLORIGIN_DROP {
|
||||
let xlrec = XlReploriginDrop::decode(buf);
|
||||
return Ok(Some(MetadataRecord::Replorigin(ReploriginRecord::Drop(
|
||||
xlrec,
|
||||
))));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
2
libs/wal_decoder/src/lib.rs
Normal file
2
libs/wal_decoder/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod decoder;
|
||||
pub mod models;
|
||||
211
libs/wal_decoder/src/models.rs
Normal file
211
libs/wal_decoder/src/models.rs
Normal file
@@ -0,0 +1,211 @@
|
||||
//! This module houses types which represent decoded PG WAL records
|
||||
//! ready for the pageserver to interpret. They are derived from the original
|
||||
//! WAL records, so that each struct corresponds closely to one WAL record of
|
||||
//! a specific kind. They contain the same information as the original WAL records,
|
||||
//! just decoded into structs and fields for easier access.
|
||||
//!
|
||||
//! The ingestion code uses these structs to help with parsing the WAL records,
|
||||
//! and it splits them into a stream of modifications to the key-value pairs that
|
||||
//! are ultimately stored in delta layers. See also the split-out counterparts in
|
||||
//! [`postgres_ffi::walrecord`].
|
||||
//!
|
||||
//! The pipeline which processes WAL records is not super obvious, so let's follow
|
||||
//! the flow of an example XACT_COMMIT Postgres record:
|
||||
//!
|
||||
//! (Postgres XACT_COMMIT record)
|
||||
//! |
|
||||
//! |--> pageserver::walingest::WalIngest::decode_xact_record
|
||||
//! |
|
||||
//! |--> ([`XactRecord::Commit`])
|
||||
//! |
|
||||
//! |--> pageserver::walingest::WalIngest::ingest_xact_record
|
||||
//! |
|
||||
//! |--> (NeonWalRecord::ClogSetCommitted)
|
||||
//! |
|
||||
//! |--> write to KV store within the pageserver
|
||||
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::key::CompactKey;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::value::Value;
|
||||
use postgres_ffi::walrecord::{
|
||||
XlMultiXactCreate, XlMultiXactTruncate, XlRelmapUpdate, XlReploriginDrop, XlReploriginSet,
|
||||
XlSmgrTruncate, XlXactParsedRecord,
|
||||
};
|
||||
use postgres_ffi::{Oid, TransactionId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub enum FlushUncommittedRecords {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
/// An interpreted Postgres WAL record, ready to be handled by the pageserver
|
||||
pub struct InterpretedWalRecord {
|
||||
/// Optional metadata record - may cause writes to metadata keys
|
||||
/// in the storage engine
|
||||
pub metadata_record: Option<MetadataRecord>,
|
||||
/// Images or deltas for blocks modified in the original WAL record.
|
||||
/// The [`Value`] is optional to avoid sending superfluous data to
|
||||
/// shard 0 for relation size tracking.
|
||||
pub blocks: Vec<(CompactKey, Option<Value>)>,
|
||||
/// Byte offset within WAL for the end of the original PG WAL record
|
||||
pub lsn: Lsn,
|
||||
/// Whether to flush all uncommitted modifications to the storage engine
|
||||
/// before ingesting this record. This is currently only used for legacy PG
|
||||
/// database creations which read pages from a template database. Such WAL
|
||||
/// records require reading data blocks while ingesting, hence the need to flush.
|
||||
pub flush_uncommitted: FlushUncommittedRecords,
|
||||
/// Transaction id of the original PG WAL record
|
||||
pub xid: TransactionId,
|
||||
}
|
||||
|
||||
/// The interpreted part of the Postgres WAL record which requires metadata
|
||||
/// writes to the underlying storage engine.
|
||||
pub enum MetadataRecord {
|
||||
Heapam(HeapamRecord),
|
||||
Neonrmgr(NeonrmgrRecord),
|
||||
Smgr(SmgrRecord),
|
||||
Dbase(DbaseRecord),
|
||||
Clog(ClogRecord),
|
||||
Xact(XactRecord),
|
||||
MultiXact(MultiXactRecord),
|
||||
Relmap(RelmapRecord),
|
||||
Xlog(XlogRecord),
|
||||
LogicalMessage(LogicalMessageRecord),
|
||||
Standby(StandbyRecord),
|
||||
Replorigin(ReploriginRecord),
|
||||
}
|
||||
|
||||
pub enum HeapamRecord {
|
||||
ClearVmBits(ClearVmBits),
|
||||
}
|
||||
|
||||
pub struct ClearVmBits {
|
||||
pub new_heap_blkno: Option<u32>,
|
||||
pub old_heap_blkno: Option<u32>,
|
||||
pub vm_rel: RelTag,
|
||||
pub flags: u8,
|
||||
}
|
||||
|
||||
pub enum NeonrmgrRecord {
|
||||
ClearVmBits(ClearVmBits),
|
||||
}
|
||||
|
||||
pub enum SmgrRecord {
|
||||
Create(SmgrCreate),
|
||||
Truncate(XlSmgrTruncate),
|
||||
}
|
||||
|
||||
pub struct SmgrCreate {
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
pub enum DbaseRecord {
|
||||
Create(DbaseCreate),
|
||||
Drop(DbaseDrop),
|
||||
}
|
||||
|
||||
pub struct DbaseCreate {
|
||||
pub db_id: Oid,
|
||||
pub tablespace_id: Oid,
|
||||
pub src_db_id: Oid,
|
||||
pub src_tablespace_id: Oid,
|
||||
}
|
||||
|
||||
pub struct DbaseDrop {
|
||||
pub db_id: Oid,
|
||||
pub tablespace_ids: Vec<Oid>,
|
||||
}
|
||||
|
||||
pub enum ClogRecord {
|
||||
ZeroPage(ClogZeroPage),
|
||||
Truncate(ClogTruncate),
|
||||
}
|
||||
|
||||
pub struct ClogZeroPage {
|
||||
pub segno: u32,
|
||||
pub rpageno: u32,
|
||||
}
|
||||
|
||||
pub struct ClogTruncate {
|
||||
pub pageno: u32,
|
||||
pub oldest_xid: TransactionId,
|
||||
pub oldest_xid_db: Oid,
|
||||
}
|
||||
|
||||
pub enum XactRecord {
|
||||
Commit(XactCommon),
|
||||
Abort(XactCommon),
|
||||
CommitPrepared(XactCommon),
|
||||
AbortPrepared(XactCommon),
|
||||
Prepare(XactPrepare),
|
||||
}
|
||||
|
||||
pub struct XactCommon {
|
||||
pub parsed: XlXactParsedRecord,
|
||||
pub origin_id: u16,
|
||||
// Fields below are only used for logging
|
||||
pub xl_xid: TransactionId,
|
||||
pub lsn: Lsn,
|
||||
}
|
||||
|
||||
pub struct XactPrepare {
|
||||
pub xl_xid: TransactionId,
|
||||
pub data: Bytes,
|
||||
}
|
||||
|
||||
pub enum MultiXactRecord {
|
||||
ZeroPage(MultiXactZeroPage),
|
||||
Create(XlMultiXactCreate),
|
||||
Truncate(XlMultiXactTruncate),
|
||||
}
|
||||
|
||||
pub struct MultiXactZeroPage {
|
||||
pub slru_kind: SlruKind,
|
||||
pub segno: u32,
|
||||
pub rpageno: u32,
|
||||
}
|
||||
|
||||
pub enum RelmapRecord {
|
||||
Update(RelmapUpdate),
|
||||
}
|
||||
|
||||
pub struct RelmapUpdate {
|
||||
pub update: XlRelmapUpdate,
|
||||
pub buf: Bytes,
|
||||
}
|
||||
|
||||
pub enum XlogRecord {
|
||||
Raw(RawXlogRecord),
|
||||
}
|
||||
|
||||
pub struct RawXlogRecord {
|
||||
pub info: u8,
|
||||
pub lsn: Lsn,
|
||||
pub buf: Bytes,
|
||||
}
|
||||
|
||||
pub enum LogicalMessageRecord {
|
||||
Put(PutLogicalMessage),
|
||||
#[cfg(feature = "testing")]
|
||||
Failpoint,
|
||||
}
|
||||
|
||||
pub struct PutLogicalMessage {
|
||||
pub path: String,
|
||||
pub buf: Bytes,
|
||||
}
|
||||
|
||||
pub enum StandbyRecord {
|
||||
RunningXacts(StandbyRunningXacts),
|
||||
}
|
||||
|
||||
pub struct StandbyRunningXacts {
|
||||
pub oldest_running_xid: TransactionId,
|
||||
}
|
||||
|
||||
pub enum ReploriginRecord {
|
||||
Set(XlReploriginSet),
|
||||
Drop(XlReploriginDrop),
|
||||
}
|
||||
@@ -8,7 +8,7 @@ license.workspace = true
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints", "pageserver_api/testing" ]
|
||||
testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing"]
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
@@ -83,6 +83,7 @@ enum-map.workspace = true
|
||||
enumset = { workspace = true, features = ["serde"]}
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
wal_decoder.workspace = true
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
procfs.workspace = true
|
||||
|
||||
@@ -8,13 +8,12 @@ use pageserver::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
l0_flush::{L0FlushConfig, L0FlushGlobalState},
|
||||
page_cache,
|
||||
repository::Value,
|
||||
task_mgr::TaskKind,
|
||||
tenant::storage_layer::inmemory_layer::SerializedBatch,
|
||||
tenant::storage_layer::InMemoryLayer,
|
||||
virtual_file,
|
||||
};
|
||||
use pageserver_api::{key::Key, shard::TenantShardId};
|
||||
use pageserver_api::{key::Key, shard::TenantShardId, value::Value};
|
||||
use utils::{
|
||||
bin_ser::BeSer,
|
||||
id::{TenantId, TimelineId},
|
||||
@@ -164,7 +163,11 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(
|
||||
pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
|
||||
));
|
||||
virtual_file::init(16384, virtual_file::io_engine_for_bench());
|
||||
virtual_file::init(
|
||||
16384,
|
||||
virtual_file::io_engine_for_bench(),
|
||||
conf.virtual_file_io_mode,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
{
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use criterion::measurement::WallTime;
|
||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::storage_layer::LayerName;
|
||||
use pageserver::tenant::storage_layer::PersistentLayerDesc;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||
use std::cmp::{max, min};
|
||||
|
||||
@@ -60,7 +60,8 @@ use anyhow::Context;
|
||||
use bytes::{Buf, Bytes};
|
||||
use criterion::{BenchmarkId, Criterion};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
|
||||
use pageserver::{config::PageServerConf, walredo::PostgresRedoManager};
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::{key::Key, shard::TenantShardId};
|
||||
use std::{
|
||||
future::Future,
|
||||
|
||||
@@ -133,7 +133,7 @@ enum LazyLoadLayer<'a, E: CompactionJobExecutor> {
|
||||
Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),
|
||||
Unloaded(&'a E::DeltaLayer),
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
|
||||
impl<E: CompactionJobExecutor> LazyLoadLayer<'_, E> {
|
||||
fn min_key(&self) -> E::Key {
|
||||
match self {
|
||||
Self::Loaded(entries) => entries.front().unwrap().key(),
|
||||
@@ -147,23 +147,23 @@ impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
|
||||
impl<E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'_, E> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> Ord for LazyLoadLayer<'a, E> {
|
||||
impl<E: CompactionJobExecutor> Ord for LazyLoadLayer<'_, E> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
// reverse order so that we get a min-heap
|
||||
(other.min_key(), other.min_lsn()).cmp(&(self.min_key(), self.min_lsn()))
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'a, E> {
|
||||
impl<E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'_, E> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(other) == std::cmp::Ordering::Equal
|
||||
}
|
||||
}
|
||||
impl<'a, E: CompactionJobExecutor> Eq for LazyLoadLayer<'a, E> {}
|
||||
impl<E: CompactionJobExecutor> Eq for LazyLoadLayer<'_, E> {}
|
||||
|
||||
type LoadFuture<'a, E> = BoxFuture<'a, anyhow::Result<Vec<E>>>;
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
//!
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use pageserver::repository::Key;
|
||||
use pageserver_api::key::Key;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -11,7 +11,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
|
||||
match cmd {
|
||||
IndexPartCmd::Dump { path } => {
|
||||
let bytes = tokio::fs::read(path).await.context("read file")?;
|
||||
let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
|
||||
let des: IndexPart = IndexPart::from_json_bytes(&bytes).context("deserialize")?;
|
||||
let output = serde_json::to_string_pretty(&des).context("serialize output")?;
|
||||
println!("{output}");
|
||||
Ok(())
|
||||
|
||||
@@ -2,23 +2,25 @@
|
||||
//!
|
||||
//! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data.
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Result};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||
use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||
use pageserver::virtual_file::api::IoMode;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::ops::Range;
|
||||
use std::str::FromStr;
|
||||
use std::{fs, str};
|
||||
|
||||
use pageserver::page_cache::{self, PAGE_SZ};
|
||||
use pageserver::repository::{Key, KEY_SIZE};
|
||||
use pageserver::tenant::block_io::FileBlockReader;
|
||||
use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection};
|
||||
use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE};
|
||||
use pageserver::tenant::storage_layer::range_overlaps;
|
||||
use pageserver::tenant::storage_layer::{range_overlaps, LayerName};
|
||||
use pageserver::virtual_file::{self, VirtualFile};
|
||||
use pageserver_api::key::{Key, KEY_SIZE};
|
||||
|
||||
use utils::{bin_ser::BeSer, lsn::Lsn};
|
||||
|
||||
@@ -73,35 +75,15 @@ impl LayerFile {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
if split.len() != 2 {
|
||||
return None;
|
||||
}
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
|
||||
let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
|
||||
let the_lsns: [&str; 2];
|
||||
pub(crate) fn parse_filename(name: &str) -> anyhow::Result<LayerFile> {
|
||||
let layer_name =
|
||||
LayerName::from_str(name).map_err(|e| anyhow!("failed to parse layer name: {e}"))?;
|
||||
|
||||
/*
|
||||
* Generations add a -vX-XXXXXX postfix, which causes issues when we try to
|
||||
* parse 'vX' as an LSN.
|
||||
*/
|
||||
let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
|
||||
the_lsns = [lsns[0], lsns[0]];
|
||||
false
|
||||
} else {
|
||||
the_lsns = [lsns[0], lsns[1]];
|
||||
true
|
||||
};
|
||||
|
||||
let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
|
||||
let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
|
||||
let holes = Vec::new();
|
||||
Some(LayerFile {
|
||||
key_range,
|
||||
lsn_range,
|
||||
is_delta,
|
||||
Ok(LayerFile {
|
||||
key_range: layer_name.key_range().clone(),
|
||||
lsn_range: layer_name.lsn_as_range(),
|
||||
is_delta: layer_name.is_delta(),
|
||||
holes,
|
||||
})
|
||||
}
|
||||
@@ -152,7 +134,11 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
|
||||
// Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
|
||||
pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
pageserver::virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
let mut total_delta_layers = 0usize;
|
||||
@@ -174,7 +160,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
|
||||
for layer in fs::read_dir(timeline.path())? {
|
||||
let layer = layer?;
|
||||
if let Some(mut layer_file) =
|
||||
if let Ok(mut layer_file) =
|
||||
parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if layer_file.is_delta {
|
||||
|
||||
@@ -5,23 +5,12 @@ use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Subcommand;
|
||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||
use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::block_io::BlockCursor;
|
||||
use pageserver::tenant::disk_btree::DiskBtreeReader;
|
||||
use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
|
||||
use pageserver::tenant::storage_layer::{delta_layer, image_layer};
|
||||
use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
|
||||
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||
use pageserver::virtual_file::api::IoMode;
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver::{
|
||||
repository::{Key, KEY_SIZE},
|
||||
tenant::{
|
||||
block_io::FileBlockReader, disk_btree::VisitDirection,
|
||||
storage_layer::delta_layer::DELTA_KEY_SIZE,
|
||||
},
|
||||
virtual_file::VirtualFile,
|
||||
};
|
||||
use std::fs;
|
||||
use utils::bin_ser::BeSer;
|
||||
use std::fs::{self, File};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use crate::layer_map_analyzer::parse_filename;
|
||||
@@ -58,40 +47,30 @@ pub(crate) enum LayerCmd {
|
||||
}
|
||||
|
||||
async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
page_cache::init(100);
|
||||
let file = VirtualFile::open(path, ctx).await?;
|
||||
let file_id = page_cache::next_file_id();
|
||||
let block_reader = FileBlockReader::new(&file, file_id);
|
||||
let summary_blk = block_reader.read_blk(0, ctx).await?;
|
||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||
actual_summary.index_start_blk,
|
||||
actual_summary.index_root_blk,
|
||||
&block_reader,
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
// TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
|
||||
let mut all = vec![];
|
||||
tree_reader
|
||||
.visit(
|
||||
&[0u8; DELTA_KEY_SIZE],
|
||||
VisitDirection::Forwards,
|
||||
|key, value_offset| {
|
||||
let curr = Key::from_slice(&key[..KEY_SIZE]);
|
||||
all.push((curr, BlobRef(value_offset)));
|
||||
true
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let cursor = BlockCursor::new_fileblockreader(&block_reader);
|
||||
for (k, v) in all {
|
||||
let value = cursor.read_blob(v.pos(), ctx).await?;
|
||||
println!("key:{} value_len:{}", k, value.len());
|
||||
assert!(k.is_i128_representable(), "invalid key: ");
|
||||
}
|
||||
// TODO(chi): special handling for last key?
|
||||
page_cache::init(100);
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
let file = File::open(path)?;
|
||||
let delta_layer = DeltaLayer::new_for_path(path, file)?;
|
||||
delta_layer.dump(true, ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
page_cache::init(100);
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
let file = File::open(path)?;
|
||||
let image_layer = ImageLayer::new_for_path(path, file)?;
|
||||
image_layer.dump(true, ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -128,8 +107,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let mut idx = 0;
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
@@ -158,8 +136,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let mut idx = 0;
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
|
||||
{
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
if *id == idx {
|
||||
// TODO(chi): dedup code
|
||||
println!(
|
||||
@@ -175,7 +152,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
if layer_file.is_delta {
|
||||
read_delta_file(layer.path(), &ctx).await?;
|
||||
} else {
|
||||
anyhow::bail!("not supported yet :(");
|
||||
read_image_file(layer.path(), &ctx).await?;
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -190,7 +167,11 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
new_tenant_id,
|
||||
new_timeline_id,
|
||||
} => {
|
||||
pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
pageserver::virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
|
||||
@@ -24,7 +24,7 @@ use pageserver::{
|
||||
page_cache,
|
||||
task_mgr::TaskKind,
|
||||
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
|
||||
virtual_file,
|
||||
virtual_file::{self, api::IoMode},
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_ffi::ControlFileData;
|
||||
@@ -205,7 +205,11 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {
|
||||
|
||||
async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
|
||||
virtual_file::init(
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
);
|
||||
page_cache::init(100);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
dump_layerfile_from_path(path, true, &ctx).await
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use pageserver_api::models::{AuxFilePolicy, TenantConfig, TenantConfigRequest};
|
||||
use pageserver_api::models::{TenantConfig, TenantConfigRequest};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -66,10 +66,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
mgmt_api_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
tenant_id: timeline.tenant_id,
|
||||
config: TenantConfig {
|
||||
switch_aux_file_policy: Some(AuxFilePolicy::V2),
|
||||
..Default::default()
|
||||
},
|
||||
config: TenantConfig::default(),
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ pub async fn send_basebackup_tarball<'a, W>(
|
||||
req_lsn: Option<Lsn>,
|
||||
prev_lsn: Option<Lsn>,
|
||||
full_backup: bool,
|
||||
replica: bool,
|
||||
ctx: &'a RequestContext,
|
||||
) -> Result<(), BasebackupError>
|
||||
where
|
||||
@@ -110,8 +111,8 @@ where
|
||||
};
|
||||
|
||||
info!(
|
||||
"taking basebackup lsn={}, prev_lsn={} (full_backup={})",
|
||||
backup_lsn, prev_lsn, full_backup
|
||||
"taking basebackup lsn={}, prev_lsn={} (full_backup={}, replica={})",
|
||||
backup_lsn, prev_lsn, full_backup, replica
|
||||
);
|
||||
|
||||
let basebackup = Basebackup {
|
||||
@@ -120,6 +121,7 @@ where
|
||||
lsn: backup_lsn,
|
||||
prev_record_lsn: prev_lsn,
|
||||
full_backup,
|
||||
replica,
|
||||
ctx,
|
||||
};
|
||||
basebackup
|
||||
@@ -140,6 +142,7 @@ where
|
||||
lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
full_backup: bool,
|
||||
replica: bool,
|
||||
ctx: &'a RequestContext,
|
||||
}
|
||||
|
||||
@@ -372,6 +375,10 @@ where
|
||||
|
||||
for (path, content) in aux_files {
|
||||
if path.starts_with("pg_replslot") {
|
||||
// Do not create LR slots at standby because they are not used but prevent WAL truncation
|
||||
if self.replica {
|
||||
continue;
|
||||
}
|
||||
let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
|
||||
let restart_lsn = Lsn(u64::from_le_bytes(
|
||||
content[offs..offs + 8].try_into().unwrap(),
|
||||
|
||||
@@ -167,7 +167,11 @@ fn main() -> anyhow::Result<()> {
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
|
||||
virtual_file::init(
|
||||
conf.max_file_descriptors,
|
||||
conf.virtual_file_io_engine,
|
||||
conf.virtual_file_io_mode,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
|
||||
@@ -394,9 +398,7 @@ fn start_pageserver(
|
||||
ControllerUpcallClient::new(conf, &shutdown_pageserver),
|
||||
conf,
|
||||
);
|
||||
if let Some(deletion_workers) = deletion_workers {
|
||||
deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());
|
||||
}
|
||||
deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());
|
||||
|
||||
// Up to this point no significant I/O has been done: this should have been fast. Record
|
||||
// duration prior to starting I/O intensive phase of startup.
|
||||
|
||||
@@ -14,6 +14,7 @@ use itertools::Itertools as _;
|
||||
use pageserver_api::models::TenantState;
|
||||
use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime};
|
||||
@@ -35,12 +36,62 @@ const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
/// upload attempts.
|
||||
type RawMetric = (MetricsKey, (EventType, u64));
|
||||
|
||||
/// The new serializable metrics format
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct NewMetricsRoot {
|
||||
version: usize,
|
||||
metrics: Vec<NewRawMetric>,
|
||||
}
|
||||
|
||||
impl NewMetricsRoot {
|
||||
pub fn is_v2_metrics(json_value: &serde_json::Value) -> bool {
|
||||
if let Some(ver) = json_value.get("version") {
|
||||
if let Some(2) = ver.as_u64() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// The new serializable metrics format
|
||||
#[derive(Serialize)]
|
||||
struct NewMetricsRefRoot<'a> {
|
||||
version: usize,
|
||||
metrics: &'a [NewRawMetric],
|
||||
}
|
||||
|
||||
impl<'a> NewMetricsRefRoot<'a> {
|
||||
fn new(metrics: &'a [NewRawMetric]) -> Self {
|
||||
Self {
|
||||
version: 2,
|
||||
metrics,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The new serializable metrics format
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
struct NewRawMetric {
|
||||
key: MetricsKey,
|
||||
kind: EventType,
|
||||
value: u64,
|
||||
// TODO: add generation field and check against generations
|
||||
}
|
||||
|
||||
impl NewRawMetric {
|
||||
#[cfg(test)]
|
||||
fn to_kv_pair(&self) -> (MetricsKey, NewRawMetric) {
|
||||
(self.key, self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Caches the [`RawMetric`]s
|
||||
///
|
||||
/// In practice, during startup, last sent values are stored here to be used in calculating new
|
||||
/// ones. After successful uploading, the cached values are updated to cache. This used to be used
|
||||
/// for deduplication, but that is no longer needed.
|
||||
type Cache = HashMap<MetricsKey, (EventType, u64)>;
|
||||
type Cache = HashMap<MetricsKey, NewRawMetric>;
|
||||
|
||||
pub async fn run(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -231,11 +282,14 @@ async fn restore_and_reschedule(
|
||||
// collect_all_metrics
|
||||
let earlier_metric_at = found_some
|
||||
.iter()
|
||||
.map(|(_, (et, _))| et.recorded_at())
|
||||
.map(|item| item.kind.recorded_at())
|
||||
.copied()
|
||||
.next();
|
||||
|
||||
let cached = found_some.into_iter().collect::<Cache>();
|
||||
let cached = found_some
|
||||
.into_iter()
|
||||
.map(|item| (item.key, item))
|
||||
.collect::<Cache>();
|
||||
|
||||
(cached, earlier_metric_at)
|
||||
}
|
||||
|
||||
@@ -2,11 +2,33 @@ use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::RawMetric;
|
||||
use crate::consumption_metrics::NewMetricsRefRoot;
|
||||
|
||||
use super::{NewMetricsRoot, NewRawMetric, RawMetric};
|
||||
|
||||
pub(super) fn read_metrics_from_serde_value(
|
||||
json_value: serde_json::Value,
|
||||
) -> anyhow::Result<Vec<NewRawMetric>> {
|
||||
if NewMetricsRoot::is_v2_metrics(&json_value) {
|
||||
let root = serde_json::from_value::<NewMetricsRoot>(json_value)?;
|
||||
Ok(root.metrics)
|
||||
} else {
|
||||
let all_metrics = serde_json::from_value::<Vec<RawMetric>>(json_value)?;
|
||||
let all_metrics = all_metrics
|
||||
.into_iter()
|
||||
.map(|(key, (event_type, value))| NewRawMetric {
|
||||
key,
|
||||
kind: event_type,
|
||||
value,
|
||||
})
|
||||
.collect();
|
||||
Ok(all_metrics)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn read_metrics_from_disk(
|
||||
path: Arc<Utf8PathBuf>,
|
||||
) -> anyhow::Result<Vec<RawMetric>> {
|
||||
) -> anyhow::Result<Vec<NewRawMetric>> {
|
||||
// do not add context to each error, callsite will log with full path
|
||||
let span = tracing::Span::current();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
@@ -20,7 +42,8 @@ pub(super) async fn read_metrics_from_disk(
|
||||
|
||||
let mut file = std::fs::File::open(&*path)?;
|
||||
let reader = std::io::BufReader::new(&mut file);
|
||||
anyhow::Ok(serde_json::from_reader::<_, Vec<RawMetric>>(reader)?)
|
||||
let json_value = serde_json::from_reader::<_, serde_json::Value>(reader)?;
|
||||
read_metrics_from_serde_value(json_value)
|
||||
})
|
||||
.await
|
||||
.context("read metrics join error")
|
||||
@@ -63,7 +86,7 @@ fn scan_and_delete_with_same_prefix(path: &Utf8Path) -> std::io::Result<()> {
|
||||
}
|
||||
|
||||
pub(super) async fn flush_metrics_to_disk(
|
||||
current_metrics: &Arc<Vec<RawMetric>>,
|
||||
current_metrics: &Arc<Vec<NewRawMetric>>,
|
||||
path: &Arc<Utf8PathBuf>,
|
||||
) -> anyhow::Result<()> {
|
||||
use std::io::Write;
|
||||
@@ -93,8 +116,11 @@ pub(super) async fn flush_metrics_to_disk(
|
||||
// write out all of the raw metrics, to be read out later on restart as cached values
|
||||
{
|
||||
let mut writer = std::io::BufWriter::new(&mut tempfile);
|
||||
serde_json::to_writer(&mut writer, &*current_metrics)
|
||||
.context("serialize metrics")?;
|
||||
serde_json::to_writer(
|
||||
&mut writer,
|
||||
&NewMetricsRefRoot::new(current_metrics.as_ref()),
|
||||
)
|
||||
.context("serialize metrics")?;
|
||||
writer
|
||||
.into_inner()
|
||||
.map_err(|_| anyhow::anyhow!("flushing metrics failed"))?;
|
||||
|
||||
@@ -9,7 +9,7 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use super::{Cache, RawMetric};
|
||||
use super::{Cache, NewRawMetric};
|
||||
|
||||
/// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`
|
||||
/// instead of static str.
|
||||
@@ -64,11 +64,21 @@ impl MetricsKey {
|
||||
struct AbsoluteValueFactory(MetricsKey);
|
||||
|
||||
impl AbsoluteValueFactory {
|
||||
const fn at(self, time: DateTime<Utc>, val: u64) -> RawMetric {
|
||||
#[cfg(test)]
|
||||
const fn at_old_format(self, time: DateTime<Utc>, val: u64) -> super::RawMetric {
|
||||
let key = self.0;
|
||||
(key, (EventType::Absolute { time }, val))
|
||||
}
|
||||
|
||||
const fn at(self, time: DateTime<Utc>, val: u64) -> NewRawMetric {
|
||||
let key = self.0;
|
||||
NewRawMetric {
|
||||
key,
|
||||
kind: EventType::Absolute { time },
|
||||
value: val,
|
||||
}
|
||||
}
|
||||
|
||||
fn key(&self) -> &MetricsKey {
|
||||
&self.0
|
||||
}
|
||||
@@ -84,7 +94,28 @@ impl IncrementalValueFactory {
|
||||
prev_end: DateTime<Utc>,
|
||||
up_to: DateTime<Utc>,
|
||||
val: u64,
|
||||
) -> RawMetric {
|
||||
) -> NewRawMetric {
|
||||
let key = self.0;
|
||||
// cannot assert prev_end < up_to because these are realtime clock based
|
||||
let when = EventType::Incremental {
|
||||
start_time: prev_end,
|
||||
stop_time: up_to,
|
||||
};
|
||||
NewRawMetric {
|
||||
key,
|
||||
kind: when,
|
||||
value: val,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
#[cfg(test)]
|
||||
const fn from_until_old_format(
|
||||
self,
|
||||
prev_end: DateTime<Utc>,
|
||||
up_to: DateTime<Utc>,
|
||||
val: u64,
|
||||
) -> super::RawMetric {
|
||||
let key = self.0;
|
||||
// cannot assert prev_end < up_to because these are realtime clock based
|
||||
let when = EventType::Incremental {
|
||||
@@ -185,7 +216,7 @@ pub(super) async fn collect_all_metrics(
|
||||
tenant_manager: &Arc<TenantManager>,
|
||||
cached_metrics: &Cache,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<RawMetric> {
|
||||
) -> Vec<NewRawMetric> {
|
||||
use pageserver_api::models::TenantState;
|
||||
|
||||
let started_at = std::time::Instant::now();
|
||||
@@ -220,11 +251,11 @@ pub(super) async fn collect_all_metrics(
|
||||
res
|
||||
}
|
||||
|
||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<RawMetric>
|
||||
async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
|
||||
where
|
||||
S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
|
||||
{
|
||||
let mut current_metrics: Vec<RawMetric> = Vec::new();
|
||||
let mut current_metrics: Vec<NewRawMetric> = Vec::new();
|
||||
|
||||
let mut tenants = std::pin::pin!(tenants);
|
||||
|
||||
@@ -291,7 +322,7 @@ impl TenantSnapshot {
|
||||
tenant_id: TenantId,
|
||||
now: DateTime<Utc>,
|
||||
cached: &Cache,
|
||||
metrics: &mut Vec<RawMetric>,
|
||||
metrics: &mut Vec<NewRawMetric>,
|
||||
) {
|
||||
let remote_size = MetricsKey::remote_storage_size(tenant_id).at(now, self.remote_size);
|
||||
|
||||
@@ -302,9 +333,9 @@ impl TenantSnapshot {
|
||||
let mut synthetic_size = self.synthetic_size;
|
||||
|
||||
if synthetic_size == 0 {
|
||||
if let Some((_, value)) = cached.get(factory.key()) {
|
||||
// use the latest value from previous session
|
||||
synthetic_size = *value;
|
||||
if let Some(item) = cached.get(factory.key()) {
|
||||
// use the latest value from previous session, TODO: check generation number
|
||||
synthetic_size = item.value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -381,37 +412,36 @@ impl TimelineSnapshot {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
metrics: &mut Vec<RawMetric>,
|
||||
metrics: &mut Vec<NewRawMetric>,
|
||||
cache: &Cache,
|
||||
) {
|
||||
let timeline_written_size = u64::from(self.last_record_lsn);
|
||||
|
||||
let written_size_delta_key = MetricsKey::written_size_delta(tenant_id, timeline_id);
|
||||
|
||||
let last_stop_time = cache
|
||||
.get(written_size_delta_key.key())
|
||||
.map(|(until, _val)| {
|
||||
until
|
||||
.incremental_timerange()
|
||||
.expect("never create EventType::Absolute for written_size_delta")
|
||||
.end
|
||||
});
|
||||
let last_stop_time = cache.get(written_size_delta_key.key()).map(|item| {
|
||||
item.kind
|
||||
.incremental_timerange()
|
||||
.expect("never create EventType::Absolute for written_size_delta")
|
||||
.end
|
||||
});
|
||||
|
||||
let (key, written_size_now) =
|
||||
let written_size_now =
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, timeline_written_size);
|
||||
|
||||
// by default, use the last sent written_size as the basis for
|
||||
// calculating the delta. if we don't yet have one, use the load time value.
|
||||
let prev = cache
|
||||
.get(&key)
|
||||
.map(|(prev_at, prev)| {
|
||||
let prev: (DateTime<Utc>, u64) = cache
|
||||
.get(&written_size_now.key)
|
||||
.map(|item| {
|
||||
// use the prev time from our last incremental update, or default to latest
|
||||
// absolute update on the first round.
|
||||
let prev_at = prev_at
|
||||
let prev_at = item
|
||||
.kind
|
||||
.absolute_time()
|
||||
.expect("never create EventType::Incremental for written_size");
|
||||
let prev_at = last_stop_time.unwrap_or(prev_at);
|
||||
(*prev_at, *prev)
|
||||
(*prev_at, item.value)
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
// if we don't have a previous point of comparison, compare to the load time
|
||||
@@ -422,24 +452,28 @@ impl TimelineSnapshot {
|
||||
|
||||
let up_to = now;
|
||||
|
||||
if let Some(delta) = written_size_now.1.checked_sub(prev.1) {
|
||||
if let Some(delta) = written_size_now.value.checked_sub(prev.1) {
|
||||
let key_value = written_size_delta_key.from_until(prev.0, up_to, delta);
|
||||
// written_size_delta
|
||||
metrics.push(key_value);
|
||||
// written_size
|
||||
metrics.push((key, written_size_now));
|
||||
metrics.push(written_size_now);
|
||||
} else {
|
||||
// the cached value was ahead of us, report zero until we've caught up
|
||||
metrics.push(written_size_delta_key.from_until(prev.0, up_to, 0));
|
||||
// the cached value was ahead of us, report the same until we've caught up
|
||||
metrics.push((key, (written_size_now.0, prev.1)));
|
||||
metrics.push(NewRawMetric {
|
||||
key: written_size_now.key,
|
||||
kind: written_size_now.kind,
|
||||
value: prev.1,
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
let factory = MetricsKey::timeline_logical_size(tenant_id, timeline_id);
|
||||
let current_or_previous = self
|
||||
.current_exact_logical_size
|
||||
.or_else(|| cache.get(factory.key()).map(|(_, val)| *val));
|
||||
.or_else(|| cache.get(factory.key()).map(|item| item.value));
|
||||
|
||||
if let Some(size) = current_or_previous {
|
||||
metrics.push(factory.at(now, size));
|
||||
@@ -452,4 +486,4 @@ impl TimelineSnapshot {
|
||||
mod tests;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) use tests::metric_examples;
|
||||
pub(crate) use tests::{metric_examples, metric_examples_old};
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use crate::consumption_metrics::RawMetric;
|
||||
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
@@ -50,9 +52,9 @@ fn startup_collected_timeline_metrics_second_round() {
|
||||
let disk_consistent_lsn = Lsn(initdb_lsn.0 * 2);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::from([
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(before, disk_consistent_lsn.0)
|
||||
]);
|
||||
let cache = HashMap::from([MetricsKey::written_size(tenant_id, timeline_id)
|
||||
.at(before, disk_consistent_lsn.0)
|
||||
.to_kv_pair()]);
|
||||
|
||||
let snap = TimelineSnapshot {
|
||||
loaded_at: (disk_consistent_lsn, init),
|
||||
@@ -89,9 +91,13 @@ fn startup_collected_timeline_metrics_nth_round_at_same_lsn() {
|
||||
let mut metrics = Vec::new();
|
||||
let cache = HashMap::from([
|
||||
// at t=before was the last time the last_record_lsn changed
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(before, disk_consistent_lsn.0),
|
||||
MetricsKey::written_size(tenant_id, timeline_id)
|
||||
.at(before, disk_consistent_lsn.0)
|
||||
.to_kv_pair(),
|
||||
// end time of this event is used for the next ones
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, just_before, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until(before, just_before, 0)
|
||||
.to_kv_pair(),
|
||||
]);
|
||||
|
||||
let snap = TimelineSnapshot {
|
||||
@@ -138,13 +144,17 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
};
|
||||
|
||||
let mut cache = HashMap::from([
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(before_restart, 100),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(
|
||||
way_before,
|
||||
before_restart,
|
||||
// not taken into account, but the timestamps are important
|
||||
999_999_999,
|
||||
),
|
||||
MetricsKey::written_size(tenant_id, timeline_id)
|
||||
.at(before_restart, 100)
|
||||
.to_kv_pair(),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until(
|
||||
way_before,
|
||||
before_restart,
|
||||
// not taken into account, but the timestamps are important
|
||||
999_999_999,
|
||||
)
|
||||
.to_kv_pair(),
|
||||
]);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
@@ -163,7 +173,7 @@ fn post_restart_written_sizes_with_rolled_back_last_record_lsn() {
|
||||
);
|
||||
|
||||
// now if we cache these metrics, and re-run while "still in recovery"
|
||||
cache.extend(metrics.drain(..));
|
||||
cache.extend(metrics.drain(..).map(|x| x.to_kv_pair()));
|
||||
|
||||
// "still in recovery", because our snapshot did not change
|
||||
snap.to_metrics(tenant_id, timeline_id, later, &mut metrics, &cache);
|
||||
@@ -194,14 +204,14 @@ fn post_restart_current_exact_logical_size_uses_cached() {
|
||||
current_exact_logical_size: None,
|
||||
};
|
||||
|
||||
let cache = HashMap::from([
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(before_restart, 100)
|
||||
]);
|
||||
let cache = HashMap::from([MetricsKey::timeline_logical_size(tenant_id, timeline_id)
|
||||
.at(before_restart, 100)
|
||||
.to_kv_pair()]);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
snap.to_metrics(tenant_id, timeline_id, now, &mut metrics, &cache);
|
||||
|
||||
metrics.retain(|(key, _)| key.metric == Name::LogicalSize);
|
||||
metrics.retain(|item| item.key.metric == Name::LogicalSize);
|
||||
|
||||
assert_eq!(
|
||||
metrics,
|
||||
@@ -224,7 +234,9 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
|
||||
let before_restart = DateTime::<Utc>::from(now - std::time::Duration::from_secs(5 * 60));
|
||||
let now = DateTime::<Utc>::from(now);
|
||||
|
||||
let cached = HashMap::from([MetricsKey::synthetic_size(tenant_id).at(before_restart, 1000)]);
|
||||
let cached = HashMap::from([MetricsKey::synthetic_size(tenant_id)
|
||||
.at(before_restart, 1000)
|
||||
.to_kv_pair()]);
|
||||
|
||||
let mut metrics = Vec::new();
|
||||
ts.to_metrics(tenant_id, now, &cached, &mut metrics);
|
||||
@@ -278,12 +290,29 @@ fn time_backwards<const N: usize>() -> [std::time::SystemTime; N] {
|
||||
times
|
||||
}
|
||||
|
||||
pub(crate) const fn metric_examples(
|
||||
pub(crate) const fn metric_examples_old(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [RawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until_old_format(before, now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
|
||||
]
|
||||
}
|
||||
|
||||
pub(crate) const fn metric_examples(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [NewRawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
|
||||
@@ -7,7 +7,7 @@ use tokio::io::AsyncWriteExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::Instrument;
|
||||
|
||||
use super::{metrics::Name, Cache, MetricsKey, RawMetric};
|
||||
use super::{metrics::Name, Cache, MetricsKey, NewRawMetric, RawMetric};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
/// How the metrics from pageserver are identified.
|
||||
@@ -24,7 +24,7 @@ pub(super) async fn upload_metrics_http(
|
||||
client: &reqwest::Client,
|
||||
metric_collection_endpoint: &reqwest::Url,
|
||||
cancel: &CancellationToken,
|
||||
metrics: &[RawMetric],
|
||||
metrics: &[NewRawMetric],
|
||||
cached_metrics: &mut Cache,
|
||||
idempotency_keys: &[IdempotencyKey<'_>],
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -53,8 +53,8 @@ pub(super) async fn upload_metrics_http(
|
||||
|
||||
match res {
|
||||
Ok(()) => {
|
||||
for (curr_key, curr_val) in chunk {
|
||||
cached_metrics.insert(*curr_key, *curr_val);
|
||||
for item in chunk {
|
||||
cached_metrics.insert(item.key, item.clone());
|
||||
}
|
||||
uploaded += chunk.len();
|
||||
}
|
||||
@@ -86,7 +86,7 @@ pub(super) async fn upload_metrics_bucket(
|
||||
client: &GenericRemoteStorage,
|
||||
cancel: &CancellationToken,
|
||||
node_id: &str,
|
||||
metrics: &[RawMetric],
|
||||
metrics: &[NewRawMetric],
|
||||
idempotency_keys: &[IdempotencyKey<'_>],
|
||||
) -> anyhow::Result<()> {
|
||||
if metrics.is_empty() {
|
||||
@@ -140,16 +140,16 @@ pub(super) async fn upload_metrics_bucket(
|
||||
/// across different metrics sinks), and must have the same length as input.
|
||||
fn serialize_in_chunks<'a>(
|
||||
chunk_size: usize,
|
||||
input: &'a [RawMetric],
|
||||
input: &'a [NewRawMetric],
|
||||
idempotency_keys: &'a [IdempotencyKey<'a>],
|
||||
) -> impl ExactSizeIterator<Item = Result<(&'a [RawMetric], bytes::Bytes), serde_json::Error>> + 'a
|
||||
) -> impl ExactSizeIterator<Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>> + 'a
|
||||
{
|
||||
use bytes::BufMut;
|
||||
|
||||
assert_eq!(input.len(), idempotency_keys.len());
|
||||
|
||||
struct Iter<'a> {
|
||||
inner: std::slice::Chunks<'a, RawMetric>,
|
||||
inner: std::slice::Chunks<'a, NewRawMetric>,
|
||||
idempotency_keys: std::slice::Iter<'a, IdempotencyKey<'a>>,
|
||||
chunk_size: usize,
|
||||
|
||||
@@ -160,7 +160,7 @@ fn serialize_in_chunks<'a>(
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Iter<'a> {
|
||||
type Item = Result<(&'a [RawMetric], bytes::Bytes), serde_json::Error>;
|
||||
type Item = Result<(&'a [NewRawMetric], bytes::Bytes), serde_json::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let chunk = self.inner.next()?;
|
||||
@@ -198,7 +198,7 @@ fn serialize_in_chunks<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ExactSizeIterator for Iter<'a> {}
|
||||
impl ExactSizeIterator for Iter<'_> {}
|
||||
|
||||
let buffer = bytes::BytesMut::new();
|
||||
let inner = input.chunks(chunk_size);
|
||||
@@ -269,6 +269,58 @@ impl RawMetricExt for RawMetric {
|
||||
}
|
||||
}
|
||||
|
||||
impl RawMetricExt for NewRawMetric {
|
||||
fn as_event(&self, key: &IdempotencyKey<'_>) -> Event<Ids, Name> {
|
||||
let MetricsKey {
|
||||
metric,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = self.key;
|
||||
|
||||
let kind = self.kind;
|
||||
let value = self.value;
|
||||
|
||||
Event {
|
||||
kind,
|
||||
metric,
|
||||
idempotency_key: key.to_string(),
|
||||
value,
|
||||
extra: Ids {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn update_in_place(&self, event: &mut Event<Ids, Name>, key: &IdempotencyKey<'_>) {
|
||||
use std::fmt::Write;
|
||||
|
||||
let MetricsKey {
|
||||
metric,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} = self.key;
|
||||
|
||||
let kind = self.kind;
|
||||
let value = self.value;
|
||||
|
||||
*event = Event {
|
||||
kind,
|
||||
metric,
|
||||
idempotency_key: {
|
||||
event.idempotency_key.clear();
|
||||
write!(event.idempotency_key, "{key}").unwrap();
|
||||
std::mem::take(&mut event.idempotency_key)
|
||||
},
|
||||
value,
|
||||
extra: Ids {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait KeyGen<'a> {
|
||||
fn generate(&self) -> IdempotencyKey<'a>;
|
||||
}
|
||||
@@ -381,6 +433,10 @@ async fn upload(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::consumption_metrics::{
|
||||
disk_cache::read_metrics_from_serde_value, NewMetricsRefRoot,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use chrono::{DateTime, Utc};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -473,23 +529,49 @@ mod tests {
|
||||
let idempotency_key = consumption_metrics::IdempotencyKey::for_tests(*SAMPLES_NOW, "1", 0);
|
||||
let examples = examples.into_iter().zip(metric_samples());
|
||||
|
||||
for ((line, expected), (key, (kind, value))) in examples {
|
||||
for ((line, expected), item) in examples {
|
||||
let e = consumption_metrics::Event {
|
||||
kind,
|
||||
metric: key.metric,
|
||||
kind: item.kind,
|
||||
metric: item.key.metric,
|
||||
idempotency_key: idempotency_key.to_string(),
|
||||
value,
|
||||
value: item.value,
|
||||
extra: Ids {
|
||||
tenant_id: key.tenant_id,
|
||||
timeline_id: key.timeline_id,
|
||||
tenant_id: item.key.tenant_id,
|
||||
timeline_id: item.key.timeline_id,
|
||||
},
|
||||
};
|
||||
let actual = serde_json::to_string(&e).unwrap();
|
||||
assert_eq!(expected, actual, "example for {kind:?} from line {line}");
|
||||
assert_eq!(
|
||||
expected, actual,
|
||||
"example for {:?} from line {line}",
|
||||
item.kind
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn metric_samples() -> [RawMetric; 6] {
|
||||
#[test]
|
||||
fn disk_format_upgrade() {
|
||||
let old_samples_json = serde_json::to_value(metric_samples_old()).unwrap();
|
||||
let new_samples =
|
||||
serde_json::to_value(NewMetricsRefRoot::new(metric_samples().as_ref())).unwrap();
|
||||
let upgraded_samples = read_metrics_from_serde_value(old_samples_json).unwrap();
|
||||
let new_samples = read_metrics_from_serde_value(new_samples).unwrap();
|
||||
assert_eq!(upgraded_samples, new_samples);
|
||||
}
|
||||
|
||||
fn metric_samples_old() -> [RawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
let before = DateTime::parse_from_rfc3339("2023-09-14T00:00:00.123456789Z")
|
||||
.unwrap()
|
||||
.into();
|
||||
let [now, before] = [*SAMPLES_NOW, before];
|
||||
|
||||
super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
|
||||
}
|
||||
|
||||
fn metric_samples() -> [NewRawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
|
||||
@@ -618,13 +618,11 @@ impl DeletionQueue {
|
||||
/// Caller may use the returned object to construct clients with new_client.
|
||||
/// Caller should tokio::spawn the background() members of the two worker objects returned:
|
||||
/// we don't spawn those inside new() so that the caller can use their runtime/spans of choice.
|
||||
///
|
||||
/// If remote_storage is None, then the returned workers will also be None.
|
||||
pub fn new<C>(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
controller_upcall_client: Option<C>,
|
||||
conf: &'static PageServerConf,
|
||||
) -> (Self, Option<DeletionQueueWorkers<C>>)
|
||||
) -> (Self, DeletionQueueWorkers<C>)
|
||||
where
|
||||
C: ControlPlaneGenerationsApi + Send + Sync,
|
||||
{
|
||||
@@ -656,7 +654,7 @@ impl DeletionQueue {
|
||||
},
|
||||
cancel: cancel.clone(),
|
||||
},
|
||||
Some(DeletionQueueWorkers {
|
||||
DeletionQueueWorkers {
|
||||
frontend: ListWriter::new(conf, rx, backend_tx, cancel.clone()),
|
||||
backend: Validator::new(
|
||||
conf,
|
||||
@@ -667,7 +665,7 @@ impl DeletionQueue {
|
||||
cancel.clone(),
|
||||
),
|
||||
executor: Deleter::new(remote_storage, executor_rx, cancel.clone()),
|
||||
}),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@@ -696,7 +694,7 @@ impl DeletionQueue {
|
||||
mod test {
|
||||
use camino::Utf8Path;
|
||||
use hex_literal::hex;
|
||||
use pageserver_api::{shard::ShardIndex, upcall_api::ReAttachResponseTenant};
|
||||
use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant};
|
||||
use std::{io::ErrorKind, time::Duration};
|
||||
use tracing::info;
|
||||
|
||||
@@ -705,7 +703,6 @@ mod test {
|
||||
|
||||
use crate::{
|
||||
controller_upcall_client::RetryForeverError,
|
||||
repository::Key,
|
||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
||||
};
|
||||
|
||||
@@ -743,9 +740,7 @@ mod test {
|
||||
);
|
||||
|
||||
tracing::debug!("Spawning worker for new queue queue");
|
||||
let worker_join = workers
|
||||
.unwrap()
|
||||
.spawn_with(&tokio::runtime::Handle::current());
|
||||
let worker_join = workers.spawn_with(&tokio::runtime::Handle::current());
|
||||
|
||||
let old_worker_join = std::mem::replace(&mut self.worker_join, worker_join);
|
||||
let old_deletion_queue = std::mem::replace(&mut self.deletion_queue, deletion_queue);
|
||||
@@ -856,7 +851,6 @@ mod test {
|
||||
harness.conf,
|
||||
);
|
||||
|
||||
let worker = worker.unwrap();
|
||||
let worker_join = worker.spawn_with(&tokio::runtime::Handle::current());
|
||||
|
||||
Ok(TestSetup {
|
||||
|
||||
@@ -654,7 +654,7 @@ impl std::fmt::Debug for EvictionCandidate {
|
||||
let ts = chrono::DateTime::<chrono::Utc>::from(self.last_activity_ts);
|
||||
let ts = ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, true);
|
||||
struct DisplayIsDebug<'a, T>(&'a T);
|
||||
impl<'a, T: std::fmt::Display> std::fmt::Debug for DisplayIsDebug<'a, T> {
|
||||
impl<T: std::fmt::Display> std::fmt::Debug for DisplayIsDebug<'_, T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
|
||||
@@ -597,6 +597,10 @@ paths:
|
||||
Create a timeline. Returns new timeline id on success.
|
||||
Recreating the same timeline will succeed if the parameters match the existing timeline.
|
||||
If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
|
||||
|
||||
To ensure durability, the caller must retry the creation until success.
|
||||
Just because the timeline is visible via other endpoints does not mean it is durable.
|
||||
Future versions may stop showing timelines that are not yet durable.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
|
||||
@@ -26,6 +26,7 @@ use pageserver_api::models::LocationConfigListResponse;
|
||||
use pageserver_api::models::LocationConfigMode;
|
||||
use pageserver_api::models::LsnLease;
|
||||
use pageserver_api::models::LsnLeaseRequest;
|
||||
use pageserver_api::models::OffloadedTimelineInfo;
|
||||
use pageserver_api::models::ShardParameters;
|
||||
use pageserver_api::models::TenantDetails;
|
||||
use pageserver_api::models::TenantLocationConfigRequest;
|
||||
@@ -37,6 +38,8 @@ use pageserver_api::models::TenantShardSplitRequest;
|
||||
use pageserver_api::models::TenantShardSplitResponse;
|
||||
use pageserver_api::models::TenantSorting;
|
||||
use pageserver_api::models::TimelineArchivalConfigRequest;
|
||||
use pageserver_api::models::TimelineCreateRequestMode;
|
||||
use pageserver_api::models::TimelinesInfoAndOffloaded;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
use pageserver_api::models::TopTenantShardsRequest;
|
||||
use pageserver_api::models::TopTenantShardsResponse;
|
||||
@@ -77,11 +80,14 @@ use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::offload::offload_timeline;
|
||||
use crate::tenant::timeline::offload::OffloadError;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::OffloadedTimeline;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
use crate::{disk_usage_eviction_task, tenant};
|
||||
use pageserver_api::models::{
|
||||
StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
|
||||
@@ -477,6 +483,24 @@ async fn build_timeline_info_common(
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn build_timeline_offloaded_info(offloaded: &Arc<OffloadedTimeline>) -> OffloadedTimelineInfo {
|
||||
let &OffloadedTimeline {
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
ancestor_retain_lsn,
|
||||
ancestor_timeline_id,
|
||||
archived_at,
|
||||
..
|
||||
} = offloaded.as_ref();
|
||||
OffloadedTimelineInfo {
|
||||
tenant_id: tenant_shard_id,
|
||||
timeline_id,
|
||||
ancestor_retain_lsn,
|
||||
ancestor_timeline_id,
|
||||
archived_at: archived_at.and_utc(),
|
||||
}
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(
|
||||
request: Request<Body>,
|
||||
@@ -526,6 +550,26 @@ async fn timeline_create_handler(
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let new_timeline_id = request_data.new_timeline_id;
|
||||
// fill in the default pg_version if not provided & convert request into domain model
|
||||
let params: tenant::CreateTimelineParams = match request_data.mode {
|
||||
TimelineCreateRequestMode::Bootstrap {
|
||||
existing_initdb_timeline_id,
|
||||
pg_version,
|
||||
} => tenant::CreateTimelineParams::Bootstrap(tenant::CreateTimelineParamsBootstrap {
|
||||
new_timeline_id,
|
||||
existing_initdb_timeline_id,
|
||||
pg_version: pg_version.unwrap_or(DEFAULT_PG_VERSION),
|
||||
}),
|
||||
TimelineCreateRequestMode::Branch {
|
||||
ancestor_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
pg_version: _,
|
||||
} => tenant::CreateTimelineParams::Branch(tenant::CreateTimelineParamsBranch {
|
||||
new_timeline_id,
|
||||
ancestor_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
}),
|
||||
};
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
|
||||
@@ -538,22 +582,12 @@ async fn timeline_create_handler(
|
||||
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
if let Some(ancestor_id) = request_data.ancestor_timeline_id.as_ref() {
|
||||
tracing::info!(%ancestor_id, "starting to branch");
|
||||
} else {
|
||||
tracing::info!("bootstrapping");
|
||||
}
|
||||
// earlier versions of the code had pg_version and ancestor_lsn in the span
|
||||
// => continue to provide that information, but, through a log message that doesn't require us to destructure
|
||||
tracing::info!(?params, "creating timeline");
|
||||
|
||||
match tenant
|
||||
.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id,
|
||||
request_data.ancestor_start_lsn,
|
||||
request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
|
||||
request_data.existing_initdb_timeline_id,
|
||||
state.broker_client.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.create_timeline(params, state.broker_client.clone(), &ctx)
|
||||
.await
|
||||
{
|
||||
Ok(new_timeline) => {
|
||||
@@ -604,8 +638,6 @@ async fn timeline_create_handler(
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug(),
|
||||
timeline_id = %new_timeline_id,
|
||||
lsn=?request_data.ancestor_start_lsn,
|
||||
pg_version=?request_data.pg_version
|
||||
))
|
||||
.await
|
||||
}
|
||||
@@ -643,7 +675,7 @@ async fn timeline_list_handler(
|
||||
)
|
||||
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
|
||||
.await
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.context("Failed to build timeline info")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
response_data.push(timeline_info);
|
||||
@@ -658,6 +690,62 @@ async fn timeline_list_handler(
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
|
||||
async fn timeline_and_offloaded_list_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let include_non_incremental_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "include-non-incremental-logical-size")?;
|
||||
let force_await_initial_logical_size: Option<bool> =
|
||||
parse_query_param(&request, "force-await-initial-logical-size")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let response_data = async {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
let (timelines, offloadeds) = tenant.list_timelines_and_offloaded();
|
||||
|
||||
let mut timeline_infos = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size.unwrap_or(false),
|
||||
force_await_initial_logical_size.unwrap_or(false),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
|
||||
.await
|
||||
.context("Failed to build timeline info")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
timeline_infos.push(timeline_info);
|
||||
}
|
||||
let offloaded_infos = offloadeds
|
||||
.into_iter()
|
||||
.map(|offloaded| build_timeline_offloaded_info(&offloaded))
|
||||
.collect::<Vec<_>>();
|
||||
let res = TimelinesInfoAndOffloaded {
|
||||
timelines: timeline_infos,
|
||||
offloaded: offloaded_infos,
|
||||
};
|
||||
Ok::<TimelinesInfoAndOffloaded, ApiError>(res)
|
||||
}
|
||||
.instrument(info_span!("timeline_and_offloaded_list",
|
||||
tenant_id = %tenant_shard_id.tenant_id,
|
||||
shard_id = %tenant_shard_id.shard_slug()))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
|
||||
async fn timeline_preserve_initdb_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -1206,6 +1294,99 @@ async fn layer_map_info_handler(
|
||||
json_response(StatusCode::OK, layer_map_info)
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id, shard_id, timeline_id, layer_name))]
|
||||
async fn timeline_layer_scan_disposable_keys(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let layer_name: LayerName = parse_request_param(&request, "layer_name")?;
|
||||
|
||||
tracing::Span::current().record(
|
||||
"tenant_id",
|
||||
tracing::field::display(&tenant_shard_id.tenant_id),
|
||||
);
|
||||
tracing::Span::current().record(
|
||||
"shard_id",
|
||||
tracing::field::display(tenant_shard_id.shard_slug()),
|
||||
);
|
||||
tracing::Span::current().record("timeline_id", tracing::field::display(&timeline_id));
|
||||
tracing::Span::current().record("layer_name", tracing::field::display(&layer_name));
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
// technically the timeline need not be active for this scan to complete
|
||||
let timeline =
|
||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
|
||||
let guard = timeline.layers.read().await;
|
||||
let Some(layer) = guard.try_get_from_key(&layer_name.clone().into()) else {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Layer {tenant_shard_id}/{timeline_id}/{layer_name} not found").into(),
|
||||
));
|
||||
};
|
||||
|
||||
let resident_layer = layer
|
||||
.download_and_keep_resident()
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
tenant::storage_layer::layer::DownloadError::TimelineShutdown
|
||||
| tenant::storage_layer::layer::DownloadError::DownloadCancelled => {
|
||||
ApiError::ShuttingDown
|
||||
}
|
||||
tenant::storage_layer::layer::DownloadError::ContextAndConfigReallyDeniesDownloads
|
||||
| tenant::storage_layer::layer::DownloadError::DownloadRequired
|
||||
| tenant::storage_layer::layer::DownloadError::NotFile(_)
|
||||
| tenant::storage_layer::layer::DownloadError::DownloadFailed
|
||||
| tenant::storage_layer::layer::DownloadError::PreStatFailed(_) => {
|
||||
ApiError::InternalServerError(err.into())
|
||||
}
|
||||
#[cfg(test)]
|
||||
tenant::storage_layer::layer::DownloadError::Failpoint(_) => {
|
||||
ApiError::InternalServerError(err.into())
|
||||
}
|
||||
})?;
|
||||
|
||||
let keys = resident_layer
|
||||
.load_keys(&ctx)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
let shard_identity = timeline.get_shard_identity();
|
||||
|
||||
let mut disposable_count = 0;
|
||||
let mut not_disposable_count = 0;
|
||||
let cancel = cancel.clone();
|
||||
for (i, key) in keys.into_iter().enumerate() {
|
||||
if shard_identity.is_key_disposable(&key) {
|
||||
disposable_count += 1;
|
||||
tracing::debug!(key = %key, key.dbg=?key, "disposable key");
|
||||
} else {
|
||||
not_disposable_count += 1;
|
||||
}
|
||||
#[allow(clippy::collapsible_if)]
|
||||
if i % 10000 == 0 {
|
||||
if cancel.is_cancelled() || timeline.cancel.is_cancelled() || timeline.is_stopping() {
|
||||
return Err(ApiError::ShuttingDown);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
pageserver_api::models::ScanDisposableKeysResponse {
|
||||
disposable_count,
|
||||
not_disposable_count,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
async fn layer_download_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -1824,7 +2005,12 @@ async fn timeline_offload_handler(
|
||||
}
|
||||
offload_timeline(&tenant, &timeline)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.map_err(|e| {
|
||||
match e {
|
||||
OffloadError::Cancelled => ApiError::ResourceUnavailable("Timeline shutting down".into()),
|
||||
_ => ApiError::InternalServerError(anyhow!(e))
|
||||
}
|
||||
})?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -1880,6 +2066,7 @@ async fn timeline_checkpoint_handler(
|
||||
.map_err(|e|
|
||||
match e {
|
||||
CompactionError::ShuttingDown => ApiError::ShuttingDown,
|
||||
CompactionError::Offload(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
|
||||
CompactionError::Other(e) => ApiError::InternalServerError(e)
|
||||
}
|
||||
)?;
|
||||
@@ -2052,13 +2239,13 @@ async fn getpage_at_lsn_handler(
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let state = get_state(&request);
|
||||
|
||||
struct Key(crate::repository::Key);
|
||||
struct Key(pageserver_api::key::Key);
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
crate::repository::Key::from_hex(s).map(Key)
|
||||
pageserver_api::key::Key::from_hex(s).map(Key)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2993,6 +3180,9 @@ pub fn make_router(
|
||||
.get("/v1/tenant/:tenant_shard_id/timeline", |r| {
|
||||
api_handler(r, timeline_list_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_shard_id/timeline_and_offloaded", |r| {
|
||||
api_handler(r, timeline_and_offloaded_list_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_shard_id/timeline", |r| {
|
||||
api_handler(r, timeline_create_handler)
|
||||
})
|
||||
@@ -3065,6 +3255,10 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| api_handler(r, evict_timeline_layer_handler),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_name/scan_disposable_keys",
|
||||
|r| testing_api_handler("timeline_layer_scan_disposable_keys", r, timeline_layer_scan_disposable_keys),
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/block_gc",
|
||||
|r| api_handler(r, timeline_gc_blocking_handler),
|
||||
|
||||
@@ -12,6 +12,7 @@ use pageserver_api::key::rel_block_to_key;
|
||||
use tokio::io::{AsyncRead, AsyncReadExt};
|
||||
use tokio_tar::Archive;
|
||||
use tracing::*;
|
||||
use wal_decoder::models::InterpretedWalRecord;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::context::RequestContext;
|
||||
@@ -19,8 +20,6 @@ use crate::metrics::WAL_INGEST;
|
||||
use crate::pgdatadir_mapping::*;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::walingest::WalIngest;
|
||||
use crate::walrecord::decode_wal_record;
|
||||
use crate::walrecord::DecodedWALRecord;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::*;
|
||||
@@ -313,11 +312,15 @@ async fn import_wal(
|
||||
let mut modification = tline.begin_modification(last_lsn);
|
||||
while last_lsn <= endpoint {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let mut decoded = DecodedWALRecord::default();
|
||||
decode_wal_record(recdata, &mut decoded, tline.pg_version)?;
|
||||
let interpreted = InterpretedWalRecord::from_bytes_filtered(
|
||||
recdata,
|
||||
tline.get_shard_identity(),
|
||||
lsn,
|
||||
tline.pg_version,
|
||||
)?;
|
||||
|
||||
walingest
|
||||
.ingest_record(decoded, lsn, &mut modification, ctx)
|
||||
.ingest_record(interpreted, &mut modification, ctx)
|
||||
.await?;
|
||||
WAL_INGEST.records_committed.inc();
|
||||
|
||||
@@ -454,10 +457,15 @@ pub async fn import_wal_from_tar(
|
||||
let mut modification = tline.begin_modification(last_lsn);
|
||||
while last_lsn <= end_lsn {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let mut decoded = DecodedWALRecord::default();
|
||||
decode_wal_record(recdata, &mut decoded, tline.pg_version)?;
|
||||
let interpreted = InterpretedWalRecord::from_bytes_filtered(
|
||||
recdata,
|
||||
tline.get_shard_identity(),
|
||||
lsn,
|
||||
tline.pg_version,
|
||||
)?;
|
||||
|
||||
walingest
|
||||
.ingest_record(decoded, lsn, &mut modification, ctx)
|
||||
.ingest_record(interpreted, &mut modification, ctx)
|
||||
.await?;
|
||||
modification.commit(ctx).await?;
|
||||
last_lsn = lsn;
|
||||
|
||||
@@ -24,7 +24,6 @@ pub mod metrics;
|
||||
pub mod page_cache;
|
||||
pub mod page_service;
|
||||
pub mod pgdatadir_mapping;
|
||||
pub mod repository;
|
||||
pub mod span;
|
||||
pub(crate) mod statvfs;
|
||||
pub mod task_mgr;
|
||||
@@ -32,7 +31,6 @@ pub mod tenant;
|
||||
pub mod utilization;
|
||||
pub mod virtual_file;
|
||||
pub mod walingest;
|
||||
pub mod walrecord;
|
||||
pub mod walredo;
|
||||
|
||||
use camino::Utf8Path;
|
||||
|
||||
@@ -1189,7 +1189,7 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
op: SmgrQueryType,
|
||||
}
|
||||
|
||||
impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
|
||||
fn drop(&mut self) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = self
|
||||
@@ -1560,7 +1560,7 @@ impl BasebackupQueryTime {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
||||
impl BasebackupQueryTimeOngoingRecording<'_, '_> {
|
||||
pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
|
||||
let elapsed = self.start.elapsed();
|
||||
let ex_throttled = self
|
||||
@@ -2092,6 +2092,7 @@ pub(crate) struct WalIngestMetrics {
|
||||
pub(crate) records_received: IntCounter,
|
||||
pub(crate) records_committed: IntCounter,
|
||||
pub(crate) records_filtered: IntCounter,
|
||||
pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
|
||||
}
|
||||
|
||||
pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
|
||||
@@ -2115,6 +2116,11 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
|
||||
"Number of WAL records filtered out due to sharding"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
gap_blocks_zeroed_on_rel_extend: register_int_counter!(
|
||||
"pageserver_gap_blocks_zeroed_on_rel_extend",
|
||||
"Total number of zero gap blocks written on relation extends"
|
||||
)
|
||||
.expect("failed to define a metric"),
|
||||
});
|
||||
|
||||
pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
@@ -3034,13 +3040,111 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
|
||||
}
|
||||
|
||||
pub mod tokio_epoll_uring {
|
||||
use metrics::{register_int_counter, UIntGauge};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
/// Shared storage for tokio-epoll-uring thread local metrics.
|
||||
pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
|
||||
Lazy::new(|| {
|
||||
let slots_submission_queue_depth = register_histogram!(
|
||||
"pageserver_tokio_epoll_uring_slots_submission_queue_depth",
|
||||
"The slots waiters queue depth of each tokio_epoll_uring system",
|
||||
vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
ThreadLocalMetricsStorage {
|
||||
observers: Mutex::new(HashMap::new()),
|
||||
slots_submission_queue_depth,
|
||||
}
|
||||
});
|
||||
|
||||
pub struct ThreadLocalMetricsStorage {
|
||||
/// List of thread local metrics observers.
|
||||
observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
|
||||
/// A histogram shared between all thread local systems
|
||||
/// for collecting slots submission queue depth.
|
||||
slots_submission_queue_depth: Histogram,
|
||||
}
|
||||
|
||||
/// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
|
||||
/// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
|
||||
///
|
||||
/// The System makes observations into [`Self`] and periodically, the collector
|
||||
/// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
|
||||
///
|
||||
/// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
|
||||
/// But except for the periodic flush, the lock is uncontended so there's no waiting
|
||||
/// for cache coherence protocol to get an exclusive cache line.
|
||||
pub struct ThreadLocalMetrics {
|
||||
/// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
|
||||
slots_submission_queue_depth: Mutex<LocalHistogram>,
|
||||
}
|
||||
|
||||
impl ThreadLocalMetricsStorage {
|
||||
/// Registers a new thread local system. Returns a thread local metrics observer.
|
||||
pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
|
||||
let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
|
||||
self.slots_submission_queue_depth.local(),
|
||||
));
|
||||
let mut g = self.observers.lock().unwrap();
|
||||
g.insert(id, Arc::clone(&per_system_metrics));
|
||||
per_system_metrics
|
||||
}
|
||||
|
||||
/// Removes metrics observer for a thread local system.
|
||||
/// This should be called before dropping a thread local system.
|
||||
pub fn remove_system(&self, id: u64) {
|
||||
let mut g = self.observers.lock().unwrap();
|
||||
g.remove(&id);
|
||||
}
|
||||
|
||||
/// Flush all thread local metrics to the shared storage.
|
||||
pub fn flush_thread_local_metrics(&self) {
|
||||
let g = self.observers.lock().unwrap();
|
||||
g.values().for_each(|local| {
|
||||
local.flush();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl ThreadLocalMetrics {
|
||||
pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
|
||||
ThreadLocalMetrics {
|
||||
slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
|
||||
}
|
||||
}
|
||||
|
||||
/// Flushes the thread local metrics to shared aggregator.
|
||||
pub fn flush(&self) {
|
||||
let Self {
|
||||
slots_submission_queue_depth,
|
||||
} = self;
|
||||
slots_submission_queue_depth.lock().unwrap().flush();
|
||||
}
|
||||
}
|
||||
|
||||
impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
|
||||
fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
|
||||
let Self {
|
||||
slots_submission_queue_depth,
|
||||
} = self;
|
||||
slots_submission_queue_depth
|
||||
.lock()
|
||||
.unwrap()
|
||||
.observe(queue_depth as f64);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Collector {
|
||||
descs: Vec<metrics::core::Desc>,
|
||||
systems_created: UIntGauge,
|
||||
systems_destroyed: UIntGauge,
|
||||
thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
|
||||
}
|
||||
|
||||
impl metrics::core::Collector for Collector {
|
||||
@@ -3050,7 +3154,7 @@ pub mod tokio_epoll_uring {
|
||||
|
||||
fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
|
||||
let mut mfs = Vec::with_capacity(Self::NMETRICS);
|
||||
let tokio_epoll_uring::metrics::Metrics {
|
||||
let tokio_epoll_uring::metrics::GlobalMetrics {
|
||||
systems_created,
|
||||
systems_destroyed,
|
||||
} = tokio_epoll_uring::metrics::global();
|
||||
@@ -3058,12 +3162,21 @@ pub mod tokio_epoll_uring {
|
||||
mfs.extend(self.systems_created.collect());
|
||||
self.systems_destroyed.set(systems_destroyed);
|
||||
mfs.extend(self.systems_destroyed.collect());
|
||||
|
||||
self.thread_local_metrics_storage
|
||||
.flush_thread_local_metrics();
|
||||
|
||||
mfs.extend(
|
||||
self.thread_local_metrics_storage
|
||||
.slots_submission_queue_depth
|
||||
.collect(),
|
||||
);
|
||||
mfs
|
||||
}
|
||||
}
|
||||
|
||||
impl Collector {
|
||||
const NMETRICS: usize = 2;
|
||||
const NMETRICS: usize = 3;
|
||||
|
||||
#[allow(clippy::new_without_default)]
|
||||
pub fn new() -> Self {
|
||||
@@ -3095,6 +3208,7 @@ pub mod tokio_epoll_uring {
|
||||
descs,
|
||||
systems_created,
|
||||
systems_destroyed,
|
||||
thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3454,6 +3568,7 @@ pub fn preinitialize_metrics() {
|
||||
Lazy::force(&RECONSTRUCT_TIME);
|
||||
Lazy::force(&BASEBACKUP_QUERY_TIME);
|
||||
Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
|
||||
Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
|
||||
|
||||
tenant_throttling::preinitialize_global_metrics();
|
||||
}
|
||||
|
||||
@@ -82,6 +82,7 @@ use once_cell::sync::OnceCell;
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
|
||||
virtual_file::{IoBufferMut, IoPageSlice},
|
||||
};
|
||||
|
||||
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
||||
@@ -144,7 +145,7 @@ struct SlotInner {
|
||||
key: Option<CacheKey>,
|
||||
// for `coalesce_readers_permit`
|
||||
permit: std::sync::Mutex<Weak<PinnedSlotsPermit>>,
|
||||
buf: &'static mut [u8; PAGE_SZ],
|
||||
buf: IoPageSlice<'static>,
|
||||
}
|
||||
|
||||
impl Slot {
|
||||
@@ -234,13 +235,13 @@ impl std::ops::Deref for PageReadGuard<'_> {
|
||||
type Target = [u8; PAGE_SZ];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.slot_guard.buf
|
||||
self.slot_guard.buf.deref()
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8; PAGE_SZ]> for PageReadGuard<'_> {
|
||||
fn as_ref(&self) -> &[u8; PAGE_SZ] {
|
||||
self.slot_guard.buf
|
||||
self.slot_guard.buf.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,7 +267,7 @@ enum PageWriteGuardState<'i> {
|
||||
impl std::ops::DerefMut for PageWriteGuard<'_> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
match &mut self.state {
|
||||
PageWriteGuardState::Invalid { inner, _permit } => inner.buf,
|
||||
PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref_mut(),
|
||||
PageWriteGuardState::Downgraded => unreachable!(),
|
||||
}
|
||||
}
|
||||
@@ -277,7 +278,7 @@ impl std::ops::Deref for PageWriteGuard<'_> {
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match &self.state {
|
||||
PageWriteGuardState::Invalid { inner, _permit } => inner.buf,
|
||||
PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref(),
|
||||
PageWriteGuardState::Downgraded => unreachable!(),
|
||||
}
|
||||
}
|
||||
@@ -643,7 +644,7 @@ impl PageCache {
|
||||
// We could use Vec::leak here, but that potentially also leaks
|
||||
// uninitialized reserved capacity. With into_boxed_slice and Box::leak
|
||||
// this is avoided.
|
||||
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
|
||||
let page_buffer = IoBufferMut::with_capacity_zeroed(num_pages * PAGE_SZ).leak();
|
||||
|
||||
let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;
|
||||
size_metrics.max_bytes.set_page_sz(num_pages);
|
||||
@@ -652,7 +653,8 @@ impl PageCache {
|
||||
let slots = page_buffer
|
||||
.chunks_exact_mut(PAGE_SZ)
|
||||
.map(|chunk| {
|
||||
let buf: &mut [u8; PAGE_SZ] = chunk.try_into().unwrap();
|
||||
// SAFETY: Each chunk has `PAGE_SZ` (8192) bytes, greater than 512, still aligned.
|
||||
let buf = unsafe { IoPageSlice::new_unchecked(chunk.try_into().unwrap()) };
|
||||
|
||||
Slot {
|
||||
inner: tokio::sync::RwLock::new(SlotInner {
|
||||
|
||||
@@ -1080,6 +1080,7 @@ impl PageServerHandler {
|
||||
prev_lsn: Option<Lsn>,
|
||||
full_backup: bool,
|
||||
gzip: bool,
|
||||
replica: bool,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
@@ -1132,6 +1133,7 @@ impl PageServerHandler {
|
||||
lsn,
|
||||
prev_lsn,
|
||||
full_backup,
|
||||
replica,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1154,6 +1156,7 @@ impl PageServerHandler {
|
||||
lsn,
|
||||
prev_lsn,
|
||||
full_backup,
|
||||
replica,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1170,6 +1173,7 @@ impl PageServerHandler {
|
||||
lsn,
|
||||
prev_lsn,
|
||||
full_backup,
|
||||
replica,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1326,24 +1330,27 @@ where
|
||||
.for_command(ComputeCommandKind::Basebackup)
|
||||
.inc();
|
||||
|
||||
let lsn = if let Some(lsn_str) = params.get(2) {
|
||||
Some(
|
||||
Lsn::from_str(lsn_str)
|
||||
.with_context(|| format!("Failed to parse Lsn from {lsn_str}"))?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let gzip = match params.get(3) {
|
||||
Some(&"--gzip") => true,
|
||||
None => false,
|
||||
Some(third_param) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"Parameter in position 3 unknown {third_param}",
|
||||
)))
|
||||
let mut lsn = None;
|
||||
let mut replica = false;
|
||||
let mut gzip = false;
|
||||
for param in ¶ms[2..] {
|
||||
if param.starts_with("--") {
|
||||
match *param {
|
||||
"--gzip" => gzip = true,
|
||||
"--replica" => replica = true,
|
||||
_ => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"Unknown parameter {param}",
|
||||
)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lsn = Some(
|
||||
Lsn::from_str(param)
|
||||
.with_context(|| format!("Failed to parse Lsn from {param}"))?,
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording(&ctx);
|
||||
let res = async {
|
||||
@@ -1355,6 +1362,7 @@ where
|
||||
None,
|
||||
false,
|
||||
gzip,
|
||||
replica,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -1415,6 +1423,7 @@ where
|
||||
prev_lsn,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -7,14 +7,14 @@
|
||||
//! Clarify that)
|
||||
//!
|
||||
use super::tenant::{PageReconstructError, Timeline};
|
||||
use crate::aux_file;
|
||||
use crate::context::RequestContext;
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
||||
use crate::walrecord::NeonWalRecord;
|
||||
use crate::{aux_file, repository::*};
|
||||
use anyhow::{ensure, Context};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{
|
||||
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
|
||||
relmap_file_key, repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
|
||||
@@ -22,7 +22,9 @@ use pageserver_api::key::{
|
||||
CompactKey, AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
|
||||
};
|
||||
use pageserver_api::keyspace::SparseKeySpace;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::value::Value;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
@@ -1506,35 +1508,42 @@ impl<'a> DatadirModification<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Drop a relation.
|
||||
pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||
/// Drop some relations
|
||||
pub(crate) async fn put_rel_drops(
|
||||
&mut self,
|
||||
drop_relations: HashMap<(u32, u32), Vec<RelTag>>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
for ((spc_node, db_node), rel_tags) in drop_relations {
|
||||
let dir_key = rel_dir_to_key(spc_node, db_node);
|
||||
let buf = self.get(dir_key, ctx).await?;
|
||||
let mut dir = RelDirectory::des(&buf)?;
|
||||
|
||||
// Remove it from the directory entry
|
||||
let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||
let buf = self.get(dir_key, ctx).await?;
|
||||
let mut dir = RelDirectory::des(&buf)?;
|
||||
let mut dirty = false;
|
||||
for rel_tag in rel_tags {
|
||||
if dir.rels.remove(&(rel_tag.relnode, rel_tag.forknum)) {
|
||||
dirty = true;
|
||||
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::Rel, dir.rels.len()));
|
||||
// update logical size
|
||||
let size_key = rel_size_to_key(rel_tag);
|
||||
let old_size = self.get(size_key, ctx).await?.get_u32_le();
|
||||
self.pending_nblocks -= old_size as i64;
|
||||
|
||||
if dir.rels.remove(&(rel.relnode, rel.forknum)) {
|
||||
self.put(dir_key, Value::Image(Bytes::from(RelDirectory::ser(&dir)?)));
|
||||
} else {
|
||||
warn!("dropped rel {} did not exist in rel directory", rel);
|
||||
// Remove entry from relation size cache
|
||||
self.tline.remove_cached_rel_size(&rel_tag);
|
||||
|
||||
// Delete size entry, as well as all blocks
|
||||
self.delete(rel_key_range(rel_tag));
|
||||
}
|
||||
}
|
||||
|
||||
if dirty {
|
||||
self.put(dir_key, Value::Image(Bytes::from(RelDirectory::ser(&dir)?)));
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::Rel, dir.rels.len()));
|
||||
}
|
||||
}
|
||||
|
||||
// update logical size
|
||||
let size_key = rel_size_to_key(rel);
|
||||
let old_size = self.get(size_key, ctx).await?.get_u32_le();
|
||||
self.pending_nblocks -= old_size as i64;
|
||||
|
||||
// Remove enty from relation size cache
|
||||
self.tline.remove_cached_rel_size(&rel);
|
||||
|
||||
// Delete size entry, as well as all blocks
|
||||
self.delete(rel_key_range(rel));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ pub mod mock {
|
||||
let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
|
||||
|
||||
// round it up to the nearest block multiple
|
||||
let used_blocks = (used_bytes + (blocksize - 1)) / blocksize;
|
||||
let used_blocks = used_bytes.div_ceil(*blocksize);
|
||||
|
||||
if used_blocks > *total_blocks {
|
||||
panic!(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,8 @@
|
||||
use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
|
||||
#[cfg(test)]
|
||||
use crate::virtual_file::IoBufferMut;
|
||||
use crate::virtual_file::VirtualFile;
|
||||
use bytes::Bytes;
|
||||
use std::ops::Deref;
|
||||
@@ -40,7 +42,7 @@ pub enum BlockLease<'a> {
|
||||
#[cfg(test)]
|
||||
Arc(std::sync::Arc<[u8; PAGE_SZ]>),
|
||||
#[cfg(test)]
|
||||
Vec(Vec<u8>),
|
||||
IoBufferMut(IoBufferMut),
|
||||
}
|
||||
|
||||
impl From<PageReadGuard<'static>> for BlockLease<'static> {
|
||||
@@ -50,13 +52,13 @@ impl From<PageReadGuard<'static>> for BlockLease<'static> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl<'a> From<std::sync::Arc<[u8; PAGE_SZ]>> for BlockLease<'a> {
|
||||
impl From<std::sync::Arc<[u8; PAGE_SZ]>> for BlockLease<'_> {
|
||||
fn from(value: std::sync::Arc<[u8; PAGE_SZ]>) -> Self {
|
||||
BlockLease::Arc(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for BlockLease<'a> {
|
||||
impl Deref for BlockLease<'_> {
|
||||
type Target = [u8; PAGE_SZ];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
@@ -67,7 +69,7 @@ impl<'a> Deref for BlockLease<'a> {
|
||||
#[cfg(test)]
|
||||
BlockLease::Arc(v) => v.deref(),
|
||||
#[cfg(test)]
|
||||
BlockLease::Vec(v) => {
|
||||
BlockLease::IoBufferMut(v) => {
|
||||
TryFrom::try_from(&v[..]).expect("caller must ensure that v has PAGE_SZ")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
//! may lead to a data loss.
|
||||
//!
|
||||
pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
|
||||
use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::models::CompactionAlgorithmSettings;
|
||||
use pageserver_api::models::EvictionPolicy;
|
||||
use pageserver_api::models::{self, ThrottleConfig};
|
||||
@@ -341,10 +340,6 @@ pub struct TenantConfOpt {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub image_layer_creation_check_threshold: Option<u8>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(default)]
|
||||
pub switch_aux_file_policy: Option<AuxFilePolicy>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(with = "humantime_serde")]
|
||||
#[serde(default)]
|
||||
@@ -410,9 +405,6 @@ impl TenantConfOpt {
|
||||
image_layer_creation_check_threshold: self
|
||||
.image_layer_creation_check_threshold
|
||||
.unwrap_or(global_conf.image_layer_creation_check_threshold),
|
||||
switch_aux_file_policy: self
|
||||
.switch_aux_file_policy
|
||||
.unwrap_or(global_conf.switch_aux_file_policy),
|
||||
lsn_lease_length: self
|
||||
.lsn_lease_length
|
||||
.unwrap_or(global_conf.lsn_lease_length),
|
||||
@@ -470,7 +462,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
|
||||
lazy_slru_download: value.lazy_slru_download,
|
||||
timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
|
||||
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
|
||||
switch_aux_file_policy: value.switch_aux_file_policy,
|
||||
lsn_lease_length: value.lsn_lease_length.map(humantime),
|
||||
lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ struct OnDiskNode<'a, const L: usize> {
|
||||
values: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a, const L: usize> OnDiskNode<'a, L> {
|
||||
impl<const L: usize> OnDiskNode<'_, L> {
|
||||
///
|
||||
/// Interpret a PAGE_SZ page as a node.
|
||||
///
|
||||
|
||||
@@ -6,10 +6,11 @@ use crate::config::PageServerConf;
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache;
|
||||
use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
|
||||
use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
|
||||
use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
|
||||
use crate::virtual_file::owned_buffers_io::util::size_tracking_writer;
|
||||
use crate::virtual_file::owned_buffers_io::write::Buffer;
|
||||
use crate::virtual_file::{self, owned_buffers_io, VirtualFile};
|
||||
use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile};
|
||||
use bytes::BytesMut;
|
||||
use camino::Utf8PathBuf;
|
||||
use num_traits::Num;
|
||||
@@ -107,15 +108,18 @@ impl EphemeralFile {
|
||||
self.page_cache_file_id
|
||||
}
|
||||
|
||||
pub(crate) async fn load_to_vec(&self, ctx: &RequestContext) -> Result<Vec<u8>, io::Error> {
|
||||
pub(crate) async fn load_to_io_buf(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<IoBufferMut, io::Error> {
|
||||
let size = self.len().into_usize();
|
||||
let vec = Vec::with_capacity(size);
|
||||
let (slice, nread) = self.read_exact_at_eof_ok(0, vec.slice_full(), ctx).await?;
|
||||
let buf = IoBufferMut::with_capacity(size);
|
||||
let (slice, nread) = self.read_exact_at_eof_ok(0, buf.slice_full(), ctx).await?;
|
||||
assert_eq!(nread, size);
|
||||
let vec = slice.into_inner();
|
||||
assert_eq!(vec.len(), nread);
|
||||
assert_eq!(vec.capacity(), size, "we shouldn't be reallocating");
|
||||
Ok(vec)
|
||||
let buf = slice.into_inner();
|
||||
assert_eq!(buf.len(), nread);
|
||||
assert_eq!(buf.capacity(), size, "we shouldn't be reallocating");
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Returns the offset at which the first byte of the input was written, for use
|
||||
@@ -158,7 +162,7 @@ impl EphemeralFile {
|
||||
}
|
||||
|
||||
impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
|
||||
async fn read_exact_at_eof_ok<'a, 'b, B: tokio_epoll_uring::IoBufMut + Send>(
|
||||
async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
|
||||
&'b self,
|
||||
start: u64,
|
||||
dst: tokio_epoll_uring::Slice<B>,
|
||||
@@ -345,7 +349,7 @@ mod tests {
|
||||
assert!(file.len() as usize == write_nbytes);
|
||||
for i in 0..write_nbytes {
|
||||
assert_eq!(value_offsets[i], i.into_u64());
|
||||
let buf = Vec::with_capacity(1);
|
||||
let buf = IoBufferMut::with_capacity(1);
|
||||
let (buf_slice, nread) = file
|
||||
.read_exact_at_eof_ok(i.into_u64(), buf.slice_full(), &ctx)
|
||||
.await
|
||||
@@ -385,7 +389,7 @@ mod tests {
|
||||
|
||||
// assert the state is as this test expects it to be
|
||||
assert_eq!(
|
||||
&file.load_to_vec(&ctx).await.unwrap(),
|
||||
&file.load_to_io_buf(&ctx).await.unwrap(),
|
||||
&content[0..cap + cap / 2]
|
||||
);
|
||||
let md = file
|
||||
@@ -440,7 +444,7 @@ mod tests {
|
||||
let (buf, nread) = file
|
||||
.read_exact_at_eof_ok(
|
||||
start.into_u64(),
|
||||
Vec::with_capacity(len).slice_full(),
|
||||
IoBufferMut::with_capacity(len).slice_full(),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user