mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-13 21:30:37 +00:00
Compare commits
114 Commits
pg-extensi
...
copy_data
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68488f5c74 | ||
|
|
1389927d36 | ||
|
|
06357afe6d | ||
|
|
6943dac164 | ||
|
|
505aa242ac | ||
|
|
1c516906e7 | ||
|
|
7d7cd8375c | ||
|
|
c92b7543b5 | ||
|
|
dbf88cf2d7 | ||
|
|
f1db87ac36 | ||
|
|
3f9defbfb4 | ||
|
|
c7143dbde6 | ||
|
|
cbf9a40889 | ||
|
|
10aba174c9 | ||
|
|
ab2ea8cfa5 | ||
|
|
9c8c55e819 | ||
|
|
10110bee69 | ||
|
|
cff7ae0b0d | ||
|
|
78a7f68902 | ||
|
|
24eaa3b7ca | ||
|
|
26828560a8 | ||
|
|
86604b3b7d | ||
|
|
4957bb2d48 | ||
|
|
ff1a1aea86 | ||
|
|
c9f05d418d | ||
|
|
9de1a6fb14 | ||
|
|
fbd37740c5 | ||
|
|
3e55d9dec6 | ||
|
|
f558f88a08 | ||
|
|
b990200496 | ||
|
|
7e20b49da4 | ||
|
|
032b603011 | ||
|
|
ca0e0781c8 | ||
|
|
b2a5e91a88 | ||
|
|
44e7d5132f | ||
|
|
c19681bc12 | ||
|
|
ec9b585837 | ||
|
|
02ef246db6 | ||
|
|
195d4932c6 | ||
|
|
7fe0a4bf1a | ||
|
|
ef2b9ffbcb | ||
|
|
250a27fb85 | ||
|
|
d748615c1f | ||
|
|
681c6910c2 | ||
|
|
148f0f9b21 | ||
|
|
a7f3f5f356 | ||
|
|
00d1cfa503 | ||
|
|
1faf69a698 | ||
|
|
44a441080d | ||
|
|
c215389f1c | ||
|
|
b1477b4448 | ||
|
|
a500bb06fb | ||
|
|
15456625c2 | ||
|
|
a3f0dd2d30 | ||
|
|
76718472be | ||
|
|
c07b6ffbdc | ||
|
|
6c3605fc24 | ||
|
|
d96d51a3b7 | ||
|
|
a010b2108a | ||
|
|
2f618f46be | ||
|
|
d3aa8a48ea | ||
|
|
e4da76f021 | ||
|
|
870740c949 | ||
|
|
75d583c04a | ||
|
|
b4c5beff9f | ||
|
|
90e1f629e8 | ||
|
|
2023e22ed3 | ||
|
|
036fda392f | ||
|
|
557abc18f3 | ||
|
|
3b06a5bc54 | ||
|
|
1b947fc8af | ||
|
|
78082d0b9f | ||
|
|
190c3ba610 | ||
|
|
14d495ae14 | ||
|
|
472cc17b7a | ||
|
|
76413a0fb8 | ||
|
|
e60b70b475 | ||
|
|
2252c5c282 | ||
|
|
94f315d490 | ||
|
|
cd3faa8c0c | ||
|
|
a7a0c3cd27 | ||
|
|
ee9a5bae43 | ||
|
|
9484b96d7c | ||
|
|
ebee8247b5 | ||
|
|
3164ad7052 | ||
|
|
a0b3990411 | ||
|
|
4385e0c291 | ||
|
|
3693d1f431 | ||
|
|
fdf7a67ed2 | ||
|
|
1299df87d2 | ||
|
|
754ceaefac | ||
|
|
143fa0da42 | ||
|
|
4936ab6842 | ||
|
|
939593d0d3 | ||
|
|
2011cc05cd | ||
|
|
b0286e3c46 | ||
|
|
e4f05ce0a2 | ||
|
|
8d106708d7 | ||
|
|
f450369b20 | ||
|
|
aad918fb56 | ||
|
|
86dd8c96d3 | ||
|
|
6a65c4a4fe | ||
|
|
e9072ee178 | ||
|
|
7e17979d7a | ||
|
|
227271ccad | ||
|
|
fbf0367e27 | ||
|
|
a21b55fe0b | ||
|
|
add51e1372 | ||
|
|
cdce04d721 | ||
|
|
6bac770811 | ||
|
|
c82d19d8d6 | ||
|
|
d73639646e | ||
|
|
d53f9ab3eb | ||
|
|
8560a98d68 |
9
.github/workflows/benchmarking.yml
vendored
9
.github/workflows/benchmarking.yml
vendored
@@ -180,7 +180,8 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
timeout-minutes: 360 # 6h
|
# Increase timeout to 8h, default timeout is 6h
|
||||||
|
timeout-minutes: 480
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@@ -321,8 +322,6 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
timeout-minutes: 360 # 6h
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
@@ -414,8 +413,6 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
timeout-minutes: 360 # 6h
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
@@ -501,8 +498,6 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
timeout-minutes: 360 # 6h
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
|||||||
177
.github/workflows/build_and_test.yml
vendored
177
.github/workflows/build_and_test.yml
vendored
@@ -264,7 +264,7 @@ jobs:
|
|||||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-public-dev
|
export REMOTE_STORAGE_S3_BUCKET=neon-github-public-dev
|
||||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||||
${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test pagination_tests -- s3_pagination_should_work --exact
|
${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test test_real_s3
|
||||||
|
|
||||||
- name: Install rust binaries
|
- name: Install rust binaries
|
||||||
run: |
|
run: |
|
||||||
@@ -623,51 +623,6 @@ jobs:
|
|||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
run: rm -rf ~/.ecr
|
run: rm -rf ~/.ecr
|
||||||
|
|
||||||
|
|
||||||
neon-image-depot:
|
|
||||||
# For testing this will run side-by-side for a few merges.
|
|
||||||
# This action is not really optimized yet, but gets the job done
|
|
||||||
runs-on: [ self-hosted, gen3, large ]
|
|
||||||
needs: [ tag ]
|
|
||||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
id-token: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup go
|
|
||||||
uses: actions/setup-go@v3
|
|
||||||
with:
|
|
||||||
go-version: '1.19'
|
|
||||||
|
|
||||||
- name: Set up Depot CLI
|
|
||||||
uses: depot/setup-action@v1
|
|
||||||
|
|
||||||
- name: Install Crane & ECR helper
|
|
||||||
run: go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
|
|
||||||
|
|
||||||
- name: Configure ECR login
|
|
||||||
run: |
|
|
||||||
mkdir /github/home/.docker/
|
|
||||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: depot/build-push-action@v1
|
|
||||||
with:
|
|
||||||
# if no depot.json file is at the root of your repo, you must specify the project id
|
|
||||||
project: nrdv0s4kcs
|
|
||||||
push: true
|
|
||||||
tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
|
|
||||||
build-args: |
|
|
||||||
GIT_VERSION=${{ github.sha }}
|
|
||||||
REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
|
||||||
|
|
||||||
compute-tools-image:
|
compute-tools-image:
|
||||||
runs-on: [ self-hosted, gen3, large ]
|
runs-on: [ self-hosted, gen3, large ]
|
||||||
needs: [ tag ]
|
needs: [ tag ]
|
||||||
@@ -704,6 +659,7 @@ jobs:
|
|||||||
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
|
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
|
||||||
--context .
|
--context .
|
||||||
--build-arg GIT_VERSION=${{ github.sha }}
|
--build-arg GIT_VERSION=${{ github.sha }}
|
||||||
|
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
|
||||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
--dockerfile Dockerfile.compute-tools
|
--dockerfile Dockerfile.compute-tools
|
||||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||||
@@ -761,10 +717,40 @@ jobs:
|
|||||||
--context .
|
--context .
|
||||||
--build-arg GIT_VERSION=${{ github.sha }}
|
--build-arg GIT_VERSION=${{ github.sha }}
|
||||||
--build-arg PG_VERSION=${{ matrix.version }}
|
--build-arg PG_VERSION=${{ matrix.version }}
|
||||||
|
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
|
||||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
--dockerfile Dockerfile.compute-node
|
--dockerfile Dockerfile.compute-node
|
||||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
--destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
--destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
|
--cleanup
|
||||||
|
|
||||||
|
# Due to a kaniko bug, we can't use cache for extensions image, thus it takes about the same amount of time as compute-node image to build (~10 min)
|
||||||
|
# During the transition period we need to have extensions in both places (in S3 and in compute-node image),
|
||||||
|
# so we won't build extension twice, but extract them from compute-node.
|
||||||
|
#
|
||||||
|
# For now we use extensions image only for new custom extensitons
|
||||||
|
- name: Kaniko build extensions only
|
||||||
|
run: |
|
||||||
|
# Kaniko is suposed to clean up after itself if --cleanup flag is set, but it doesn't.
|
||||||
|
# Despite some fixes were made in https://github.com/GoogleContainerTools/kaniko/pull/2504 (in kaniko v1.11.0),
|
||||||
|
# it still fails with error:
|
||||||
|
# error building image: could not save file: copying file: symlink postgres /kaniko/1/usr/local/pgsql/bin/postmaster: file exists
|
||||||
|
#
|
||||||
|
# Ref https://github.com/GoogleContainerTools/kaniko/issues/1406
|
||||||
|
find /kaniko -maxdepth 1 -mindepth 1 -type d -regex "/kaniko/[0-9]*" -exec rm -rv {} \;
|
||||||
|
|
||||||
|
/kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true \
|
||||||
|
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
|
||||||
|
--context . \
|
||||||
|
--build-arg GIT_VERSION=${{ github.sha }} \
|
||||||
|
--build-arg PG_VERSION=${{ matrix.version }} \
|
||||||
|
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}} \
|
||||||
|
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com \
|
||||||
|
--dockerfile Dockerfile.compute-node \
|
||||||
|
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
||||||
|
--destination neondatabase/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
||||||
|
--cleanup \
|
||||||
|
--target postgres-extensions
|
||||||
|
|
||||||
# Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
|
# Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
|
||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
@@ -781,7 +767,7 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
shell: sh -eu {0}
|
shell: sh -eu {0}
|
||||||
env:
|
env:
|
||||||
VM_BUILDER_VERSION: v0.8.0
|
VM_BUILDER_VERSION: v0.11.1
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -883,8 +869,10 @@ jobs:
|
|||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
|
||||||
- name: Push images to production ECR
|
- name: Push images to production ECR
|
||||||
if: |
|
if: |
|
||||||
@@ -895,8 +883,10 @@ jobs:
|
|||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
||||||
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
||||||
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:latest
|
||||||
|
|
||||||
- name: Configure Docker Hub login
|
- name: Configure Docker Hub login
|
||||||
run: |
|
run: |
|
||||||
@@ -918,16 +908,93 @@ jobs:
|
|||||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
crane tag neondatabase/extensions-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
crane tag neondatabase/extensions-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
|
|
||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
run: rm -rf ~/.ecr
|
run: rm -rf ~/.ecr
|
||||||
|
|
||||||
|
upload-postgres-extensions-to-s3:
|
||||||
|
if: |
|
||||||
|
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||||
|
github.event_name != 'workflow_dispatch'
|
||||||
|
runs-on: ${{ github.ref_name == 'release' && fromJSON('["self-hosted", "prod", "x64"]') || fromJSON('["self-hosted", "gen3", "small"]') }}
|
||||||
|
needs: [ tag, promote-images ]
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
version: [ v14, v15 ]
|
||||||
|
|
||||||
|
env:
|
||||||
|
# While on transition period we extract public extensions from compute-node image and custom extensions from extensions image.
|
||||||
|
# Later all the extensions will be moved to extensions image.
|
||||||
|
EXTENSIONS_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:latest
|
||||||
|
COMPUTE_NODE_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:latest
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ github.ref_name == 'release' && secrets.AWS_ACCESS_KEY_PROD || secrets.AWS_ACCESS_KEY_DEV }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ github.ref_name == 'release' && secrets.AWS_SECRET_KEY_PROD || secrets.AWS_SECRET_KEY_DEV }}
|
||||||
|
S3_BUCKETS: |
|
||||||
|
${{ github.ref_name == 'release' &&
|
||||||
|
'neon-prod-extensions-ap-southeast-1 neon-prod-extensions-eu-central-1 neon-prod-extensions-us-east-1 neon-prod-extensions-us-east-2 neon-prod-extensions-us-west-2' ||
|
||||||
|
'neon-dev-extensions-eu-central-1 neon-dev-extensions-eu-west-1 neon-dev-extensions-us-east-2' }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Pull postgres-extensions image
|
||||||
|
run: |
|
||||||
|
docker pull ${EXTENSIONS_IMAGE}
|
||||||
|
docker pull ${COMPUTE_NODE_IMAGE}
|
||||||
|
|
||||||
|
- name: Create postgres-extensions container
|
||||||
|
id: create-container
|
||||||
|
run: |
|
||||||
|
EID=$(docker create ${EXTENSIONS_IMAGE} true)
|
||||||
|
echo "EID=${EID}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
CID=$(docker create ${COMPUTE_NODE_IMAGE} true)
|
||||||
|
echo "CID=${CID}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Extract postgres-extensions from container
|
||||||
|
run: |
|
||||||
|
rm -rf ./extensions-to-upload ./custom-extensions # Just in case
|
||||||
|
|
||||||
|
# In compute image we have a bit different directory layout
|
||||||
|
mkdir -p extensions-to-upload/share
|
||||||
|
docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/share/extension ./extensions-to-upload/share/extension
|
||||||
|
docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/lib ./extensions-to-upload/lib
|
||||||
|
|
||||||
|
# Delete Neon extensitons (they always present on compute-node image)
|
||||||
|
rm -rf ./extensions-to-upload/share/extension/neon*
|
||||||
|
rm -rf ./extensions-to-upload/lib/neon*
|
||||||
|
|
||||||
|
# Delete leftovers from the extension build step
|
||||||
|
rm -rf ./extensions-to-upload/lib/pgxs
|
||||||
|
rm -rf ./extensions-to-upload/lib/pkgconfig
|
||||||
|
|
||||||
|
docker cp ${{ steps.create-container.outputs.EID }}:/extensions ./custom-extensions
|
||||||
|
for EXT_NAME in $(ls ./custom-extensions); do
|
||||||
|
mkdir -p ./extensions-to-upload/${EXT_NAME}/share
|
||||||
|
|
||||||
|
mv ./custom-extensions/${EXT_NAME}/share/extension ./extensions-to-upload/${EXT_NAME}/share/extension
|
||||||
|
mv ./custom-extensions/${EXT_NAME}/lib ./extensions-to-upload/${EXT_NAME}/lib
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Upload postgres-extensions to S3
|
||||||
|
run: |
|
||||||
|
for BUCKET in $(echo ${S3_BUCKETS}); do
|
||||||
|
aws s3 cp --recursive --only-show-errors ./extensions-to-upload s3://${BUCKET}/${{ needs.tag.outputs.build-tag }}/${{ matrix.version }}
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Cleanup
|
||||||
|
if: ${{ always() && (steps.create-container.outputs.CID || steps.create-container.outputs.EID) }}
|
||||||
|
run: |
|
||||||
|
docker rm ${{ steps.create-container.outputs.CID }} || true
|
||||||
|
docker rm ${{ steps.create-container.outputs.EID }} || true
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||||
needs: [ promote-images, tag, regress-tests ]
|
needs: [ upload-postgres-extensions-to-s3, promote-images, tag, regress-tests ]
|
||||||
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
|
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- name: Fix git ownership
|
- name: Fix git ownership
|
||||||
@@ -959,6 +1026,20 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Create git tag
|
||||||
|
if: github.ref_name == 'release'
|
||||||
|
uses: actions/github-script@v6
|
||||||
|
with:
|
||||||
|
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||||
|
retries: 5
|
||||||
|
script: |
|
||||||
|
github.rest.git.createRef({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
|
||||||
|
sha: context.sha,
|
||||||
|
})
|
||||||
|
|
||||||
promote-compatibility-data:
|
promote-compatibility-data:
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container:
|
container:
|
||||||
|
|||||||
1
.github/workflows/release.yml
vendored
1
.github/workflows/release.yml
vendored
@@ -3,6 +3,7 @@ name: Create Release Branch
|
|||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 10 * * 2'
|
- cron: '0 10 * * 2'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
create_release_branch:
|
create_release_branch:
|
||||||
|
|||||||
227
Cargo.lock
generated
227
Cargo.lock
generated
@@ -200,17 +200,6 @@ dependencies = [
|
|||||||
"critical-section",
|
"critical-section",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "atty"
|
|
||||||
version = "0.2.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
|
||||||
dependencies = [
|
|
||||||
"hermit-abi 0.1.19",
|
|
||||||
"libc",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
@@ -805,18 +794,6 @@ dependencies = [
|
|||||||
"libloading",
|
"libloading",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap"
|
|
||||||
version = "3.2.25"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags",
|
|
||||||
"clap_lex 0.2.4",
|
|
||||||
"indexmap",
|
|
||||||
"textwrap",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.3.0"
|
version = "4.3.0"
|
||||||
@@ -837,7 +814,7 @@ dependencies = [
|
|||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"clap_lex 0.5.0",
|
"clap_lex",
|
||||||
"strsim",
|
"strsim",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -853,15 +830,6 @@ dependencies = [
|
|||||||
"syn 2.0.16",
|
"syn 2.0.16",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap_lex"
|
|
||||||
version = "0.2.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
|
||||||
dependencies = [
|
|
||||||
"os_str_bytes",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_lex"
|
name = "clap_lex"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@@ -915,7 +883,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"futures",
|
"futures",
|
||||||
"hyper",
|
"hyper",
|
||||||
@@ -977,7 +945,7 @@ name = "control_plane"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"git-version",
|
"git-version",
|
||||||
@@ -1047,19 +1015,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "criterion"
|
name = "criterion"
|
||||||
version = "0.4.0"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
|
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anes",
|
"anes",
|
||||||
"atty",
|
|
||||||
"cast",
|
"cast",
|
||||||
"ciborium",
|
"ciborium",
|
||||||
"clap 3.2.25",
|
"clap",
|
||||||
"criterion-plot",
|
"criterion-plot",
|
||||||
|
"is-terminal",
|
||||||
"itertools",
|
"itertools",
|
||||||
"lazy_static",
|
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
"once_cell",
|
||||||
"oorandom",
|
"oorandom",
|
||||||
"plotters",
|
"plotters",
|
||||||
"rayon",
|
"rayon",
|
||||||
@@ -1140,7 +1108,7 @@ dependencies = [
|
|||||||
"crossterm_winapi",
|
"crossterm_winapi",
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
"signal-hook",
|
"signal-hook",
|
||||||
"signal-hook-mio",
|
"signal-hook-mio",
|
||||||
"winapi",
|
"winapi",
|
||||||
@@ -1210,7 +1178,7 @@ dependencies = [
|
|||||||
"hashbrown 0.12.3",
|
"hashbrown 0.12.3",
|
||||||
"lock_api",
|
"lock_api",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot_core",
|
"parking_lot_core 0.9.7",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1676,15 +1644,6 @@ version = "0.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hermit-abi"
|
|
||||||
version = "0.1.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.2.6"
|
version = "0.2.6"
|
||||||
@@ -1939,6 +1898,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2267,16 +2229,6 @@ dependencies = [
|
|||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nu-ansi-term"
|
|
||||||
version = "0.46.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
|
||||||
dependencies = [
|
|
||||||
"overload",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-bigint"
|
name = "num-bigint"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
@@ -2349,9 +2301,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl"
|
name = "openssl"
|
||||||
version = "0.10.52"
|
version = "0.10.55"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
|
checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@@ -2381,9 +2333,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl-sys"
|
name = "openssl-sys"
|
||||||
version = "0.9.87"
|
version = "0.9.90"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
|
checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"libc",
|
"libc",
|
||||||
@@ -2504,31 +2456,19 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "os_str_bytes"
|
|
||||||
version = "6.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "outref"
|
name = "outref"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "overload"
|
|
||||||
version = "0.1.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pagectl"
|
name = "pagectl"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"git-version",
|
"git-version",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
@@ -2547,7 +2487,7 @@ dependencies = [
|
|||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"close_fds",
|
"close_fds",
|
||||||
"const_format",
|
"const_format",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
@@ -2629,6 +2569,17 @@ dependencies = [
|
|||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
|
||||||
|
dependencies = [
|
||||||
|
"instant",
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core 0.8.6",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
@@ -2636,7 +2587,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"lock_api",
|
"lock_api",
|
||||||
"parking_lot_core",
|
"parking_lot_core 0.9.7",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"instant",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall 0.2.16",
|
||||||
|
"smallvec",
|
||||||
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2652,6 +2617,16 @@ dependencies = [
|
|||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pbkdf2"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0ca0b5a68607598bf3bad68f32227a8164f6254833f84eafaac409cd6746c31"
|
||||||
|
dependencies = [
|
||||||
|
"digest",
|
||||||
|
"hmac",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "peeking_take_while"
|
name = "peeking_take_while"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@@ -2770,7 +2745,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres"
|
name = "postgres"
|
||||||
version = "0.19.4"
|
version = "0.19.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
@@ -2783,7 +2758,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-native-tls"
|
name = "postgres-native-tls"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"native-tls",
|
"native-tls",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -2794,7 +2769,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-protocol"
|
name = "postgres-protocol"
|
||||||
version = "0.6.4"
|
version = "0.6.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.20.0",
|
"base64 0.20.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -2812,7 +2787,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-types"
|
name = "postgres-types"
|
||||||
version = "0.2.4"
|
version = "0.2.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
@@ -2957,7 +2932,7 @@ dependencies = [
|
|||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
"memchr",
|
"memchr",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
"procfs",
|
"procfs",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
@@ -3022,12 +2997,11 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"atty",
|
|
||||||
"base64 0.13.1",
|
"base64 0.13.1",
|
||||||
"bstr",
|
"bstr",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
"git-version",
|
||||||
@@ -3045,7 +3019,8 @@ dependencies = [
|
|||||||
"native-tls",
|
"native-tls",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
|
"pbkdf2",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"postgres-native-tls",
|
"postgres-native-tls",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
@@ -3056,6 +3031,7 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"reqwest-middleware",
|
"reqwest-middleware",
|
||||||
|
"reqwest-retry",
|
||||||
"reqwest-tracing",
|
"reqwest-tracing",
|
||||||
"routerify",
|
"routerify",
|
||||||
"rstest",
|
"rstest",
|
||||||
@@ -3291,6 +3267,29 @@ dependencies = [
|
|||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "reqwest-retry"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48d0fd6ef4c6d23790399fe15efc8d12cd9f3d4133958f9bd7801ee5cbaec6c4"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"async-trait",
|
||||||
|
"chrono",
|
||||||
|
"futures",
|
||||||
|
"getrandom",
|
||||||
|
"http",
|
||||||
|
"hyper",
|
||||||
|
"parking_lot 0.11.2",
|
||||||
|
"reqwest",
|
||||||
|
"reqwest-middleware",
|
||||||
|
"retry-policies",
|
||||||
|
"task-local-extensions",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
"wasm-timer",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest-tracing"
|
name = "reqwest-tracing"
|
||||||
version = "0.4.4"
|
version = "0.4.4"
|
||||||
@@ -3309,6 +3308,17 @@ dependencies = [
|
|||||||
"tracing-opentelemetry",
|
"tracing-opentelemetry",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "retry-policies"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e09bbcb5003282bcb688f0bae741b278e9c7e8f378f561522c9806c58e075d9b"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"chrono",
|
||||||
|
"rand",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.16.20"
|
version = "0.16.20"
|
||||||
@@ -3507,7 +3517,7 @@ dependencies = [
|
|||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"const_format",
|
"const_format",
|
||||||
"crc32c",
|
"crc32c",
|
||||||
"fs2",
|
"fs2",
|
||||||
@@ -3518,7 +3528,7 @@ dependencies = [
|
|||||||
"hyper",
|
"hyper",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
"postgres",
|
"postgres",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
@@ -3937,7 +3947,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"bytes",
|
"bytes",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"const_format",
|
"const_format",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -3947,7 +3957,7 @@ dependencies = [
|
|||||||
"hyper",
|
"hyper",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
"prost",
|
"prost",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
@@ -4118,12 +4128,6 @@ dependencies = [
|
|||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "textwrap"
|
|
||||||
version = "0.16.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.40"
|
version = "1.0.40"
|
||||||
@@ -4272,7 +4276,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-postgres"
|
name = "tokio-postgres"
|
||||||
version = "0.7.7"
|
version = "0.7.7"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9#2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -4281,7 +4285,7 @@ dependencies = [
|
|||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"log",
|
"log",
|
||||||
"parking_lot",
|
"parking_lot 0.12.1",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"phf",
|
"phf",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
@@ -4539,7 +4543,7 @@ name = "trace"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"utils",
|
"utils",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
@@ -4641,7 +4645,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"matchers",
|
"matchers",
|
||||||
"nu-ansi-term",
|
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"regex",
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -4810,7 +4813,6 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"atty",
|
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -4887,7 +4889,7 @@ name = "wal_craft"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@@ -4991,6 +4993,21 @@ version = "0.2.86"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-timer"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f"
|
||||||
|
dependencies = [
|
||||||
|
"futures",
|
||||||
|
"js-sys",
|
||||||
|
"parking_lot 0.11.2",
|
||||||
|
"pin-utils",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"wasm-bindgen-futures",
|
||||||
|
"web-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "web-sys"
|
name = "web-sys"
|
||||||
version = "0.3.63"
|
version = "0.3.63"
|
||||||
@@ -5252,7 +5269,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap 4.3.0",
|
"clap",
|
||||||
"clap_builder",
|
"clap_builder",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"either",
|
"either",
|
||||||
|
|||||||
19
Cargo.toml
19
Cargo.toml
@@ -34,7 +34,6 @@ license = "Apache-2.0"
|
|||||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||||
async-stream = "0.3"
|
async-stream = "0.3"
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
atty = "0.2.14"
|
|
||||||
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
||||||
aws-sdk-s3 = "0.27"
|
aws-sdk-s3 = "0.27"
|
||||||
aws-smithy-http = "0.55"
|
aws-smithy-http = "0.55"
|
||||||
@@ -87,6 +86,7 @@ opentelemetry = "0.18.0"
|
|||||||
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||||
opentelemetry-semantic-conventions = "0.10.0"
|
opentelemetry-semantic-conventions = "0.10.0"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
|
pbkdf2 = "0.12.1"
|
||||||
pin-project-lite = "0.2"
|
pin-project-lite = "0.2"
|
||||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||||
prost = "0.11"
|
prost = "0.11"
|
||||||
@@ -95,6 +95,7 @@ regex = "1.4"
|
|||||||
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
||||||
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
|
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
|
||||||
reqwest-middleware = "0.2.0"
|
reqwest-middleware = "0.2.0"
|
||||||
|
reqwest-retry = "0.2.2"
|
||||||
routerify = "3"
|
routerify = "3"
|
||||||
rpds = "0.13"
|
rpds = "0.13"
|
||||||
rustls = "0.20"
|
rustls = "0.20"
|
||||||
@@ -128,7 +129,7 @@ tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
|||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-error = "0.2.0"
|
tracing-error = "0.2.0"
|
||||||
tracing-opentelemetry = "0.18.0"
|
tracing-opentelemetry = "0.18.0"
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter"] }
|
||||||
url = "2.2"
|
url = "2.2"
|
||||||
uuid = { version = "1.2", features = ["v4", "serde"] }
|
uuid = { version = "1.2", features = ["v4", "serde"] }
|
||||||
walkdir = "2.3.2"
|
walkdir = "2.3.2"
|
||||||
@@ -140,11 +141,11 @@ env_logger = "0.10"
|
|||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
||||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
|
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
|
||||||
|
|
||||||
## Other git libraries
|
## Other git libraries
|
||||||
@@ -170,7 +171,7 @@ utils = { version = "0.1", path = "./libs/utils/" }
|
|||||||
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
||||||
|
|
||||||
## Build dependencies
|
## Build dependencies
|
||||||
criterion = "0.4"
|
criterion = "0.5.1"
|
||||||
rcgen = "0.10"
|
rcgen = "0.10"
|
||||||
rstest = "0.17"
|
rstest = "0.17"
|
||||||
tempfile = "3.4"
|
tempfile = "3.4"
|
||||||
@@ -180,7 +181,7 @@ tonic-build = "0.9"
|
|||||||
|
|
||||||
# This is only needed for proxy's tests.
|
# This is only needed for proxy's tests.
|
||||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
|
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
||||||
|
|
||||||
# Changes the MAX_THREADS limit from 4096 to 32768.
|
# Changes the MAX_THREADS limit from 4096 to 32768.
|
||||||
# This is a temporary workaround for using tracing from many threads in safekeepers code,
|
# This is a temporary workaround for using tracing from many threads in safekeepers code,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ ARG PG_VERSION
|
|||||||
ARG REPOSITORY=neondatabase
|
ARG REPOSITORY=neondatabase
|
||||||
ARG IMAGE=rust
|
ARG IMAGE=rust
|
||||||
ARG TAG=pinned
|
ARG TAG=pinned
|
||||||
|
ARG BUILD_TAG
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
@@ -67,7 +68,7 @@ RUN apt update && \
|
|||||||
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
||||||
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
|
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
|
||||||
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||||
cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
make clean && cp -R /sfcgal/* /
|
make clean && cp -R /sfcgal/* /
|
||||||
|
|
||||||
@@ -95,7 +96,7 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
|
|||||||
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||||
mkdir build && \
|
mkdir build && \
|
||||||
cd build && \
|
cd build && \
|
||||||
cmake .. && \
|
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
|
||||||
@@ -188,8 +189,8 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
|
|||||||
FROM build-deps AS vector-pg-build
|
FROM build-deps AS vector-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
|
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.4.tar.gz -O pgvector.tar.gz && \
|
||||||
echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
|
echo "1cb70a63f8928e396474796c22a20be9f7285a8a013009deb8152445b61b72e6 pgvector.tar.gz" | sha256sum --check && \
|
||||||
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||||
@@ -355,7 +356,7 @@ RUN apt-get update && \
|
|||||||
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
|
wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
|
||||||
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
|
echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
|
||||||
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||||
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
|
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
|
||||||
cd build && \
|
cd build && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
@@ -410,7 +411,7 @@ RUN apt-get update && \
|
|||||||
mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
||||||
mkdir build && \
|
mkdir build && \
|
||||||
cd build && \
|
cd build && \
|
||||||
cmake .. && \
|
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control
|
||||||
@@ -432,6 +433,108 @@ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.5.2.tar.gz -O
|
|||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layer "rdkit-pg-build"
|
||||||
|
# compile rdkit extension
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
FROM build-deps AS rdkit-pg-build
|
||||||
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
cmake \
|
||||||
|
libboost-iostreams1.74-dev \
|
||||||
|
libboost-regex1.74-dev \
|
||||||
|
libboost-serialization1.74-dev \
|
||||||
|
libboost-system1.74-dev \
|
||||||
|
libeigen3-dev \
|
||||||
|
libfreetype6-dev
|
||||||
|
|
||||||
|
ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||||
|
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_1.tar.gz -O rdkit.tar.gz && \
|
||||||
|
echo "db346afbd0ba52c843926a2a62f8a38c7b774ffab37eaf382d789a824f21996c rdkit.tar.gz" | sha256sum --check && \
|
||||||
|
mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||||
|
cmake \
|
||||||
|
-D RDK_BUILD_CAIRO_SUPPORT=OFF \
|
||||||
|
-D RDK_BUILD_INCHI_SUPPORT=ON \
|
||||||
|
-D RDK_BUILD_AVALON_SUPPORT=ON \
|
||||||
|
-D RDK_BUILD_PYTHON_WRAPPERS=OFF \
|
||||||
|
-D RDK_BUILD_DESCRIPTORS3D=OFF \
|
||||||
|
-D RDK_BUILD_FREESASA_SUPPORT=OFF \
|
||||||
|
-D RDK_BUILD_COORDGEN_SUPPORT=ON \
|
||||||
|
-D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \
|
||||||
|
-D RDK_BUILD_YAEHMOP_SUPPORT=OFF \
|
||||||
|
-D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \
|
||||||
|
-D RDK_USE_URF=OFF \
|
||||||
|
-D RDK_BUILD_PGSQL=ON \
|
||||||
|
-D RDK_PGSQL_STATIC=ON \
|
||||||
|
-D PostgreSQL_CONFIG=pg_config \
|
||||||
|
-D PostgreSQL_INCLUDE_DIR=`pg_config --includedir` \
|
||||||
|
-D PostgreSQL_TYPE_INCLUDE_DIR=`pg_config --includedir-server` \
|
||||||
|
-D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \
|
||||||
|
-D RDK_INSTALL_INTREE=OFF \
|
||||||
|
-D CMAKE_BUILD_TYPE=Release \
|
||||||
|
. && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layer "pg-uuidv7-pg-build"
|
||||||
|
# compile pg_uuidv7 extension
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
FROM build-deps AS pg-uuidv7-pg-build
|
||||||
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
|
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||||
|
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||||
|
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||||
|
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layer "pg-roaringbitmap-pg-build"
|
||||||
|
# compile pg_roaringbitmap extension
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
FROM build-deps AS pg-roaringbitmap-pg-build
|
||||||
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
|
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||||
|
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||||
|
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||||
|
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layer "pg-anon-pg-build"
|
||||||
|
# compile anon extension
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
FROM build-deps AS pg-anon-pg-build
|
||||||
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
|
# Kaniko doesn't allow to do `${from#/usr/local/pgsql/}`, so we use `${from:17}` instead
|
||||||
|
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||||
|
RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgresql_anonymizer-1.1.0.tar.gz -O pg_anon.tar.gz && \
|
||||||
|
echo "08b09d2ff9b962f96c60db7e6f8e79cf7253eb8772516998fc35ece08633d3ad pg_anon.tar.gz" | sha256sum --check && \
|
||||||
|
mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||||
|
find /usr/local/pgsql -type f | sort > /before.txt && \
|
||||||
|
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||||
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
|
||||||
|
find /usr/local/pgsql -type f | sort > /after.txt && \
|
||||||
|
/bin/bash -c 'for from in $(comm -13 /before.txt /after.txt); do to=/extensions/anon/${from:17} && mkdir -p $(dirname ${to}) && cp -a ${from} ${to}; done'
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Layer "rust extensions"
|
# Layer "rust extensions"
|
||||||
@@ -540,6 +643,7 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -
|
|||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM build-deps AS neon-pg-ext-build
|
FROM build-deps AS neon-pg-ext-build
|
||||||
|
# Public extensions
|
||||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=postgis-build /sfcgal/* /
|
COPY --from=postgis-build /sfcgal/* /
|
||||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
@@ -564,6 +668,9 @@ COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|||||||
COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY pgxn/ pgxn/
|
COPY pgxn/ pgxn/
|
||||||
|
|
||||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||||
@@ -585,6 +692,9 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
|||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||||
|
ARG BUILD_TAG
|
||||||
|
ENV BUILD_TAG=$BUILD_TAG
|
||||||
|
|
||||||
USER nonroot
|
USER nonroot
|
||||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||||
COPY --chown=nonroot . .
|
COPY --chown=nonroot . .
|
||||||
@@ -609,6 +719,22 @@ RUN rm -r /usr/local/pgsql/include
|
|||||||
# if they were to be used by other libraries.
|
# if they were to be used by other libraries.
|
||||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Extenstion only
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
FROM scratch AS postgres-extensions
|
||||||
|
# After the transition this layer will include all extensitons.
|
||||||
|
# As for now, it's only for new custom ones
|
||||||
|
#
|
||||||
|
# # Default extensions
|
||||||
|
# COPY --from=postgres-cleanup-layer /usr/local/pgsql/share/extension /usr/local/pgsql/share/extension
|
||||||
|
# COPY --from=postgres-cleanup-layer /usr/local/pgsql/lib /usr/local/pgsql/lib
|
||||||
|
# Custom extensions
|
||||||
|
COPY --from=pg-anon-pg-build /extensions/anon/lib/ /extensions/anon/lib
|
||||||
|
COPY --from=pg-anon-pg-build /extensions/anon/share/extension /extensions/anon/share/extension
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Final layer
|
# Final layer
|
||||||
@@ -637,14 +763,19 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
|
|||||||
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
|
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
|
||||||
# libxml2, libxslt1.1 for xml2
|
# libxml2, libxslt1.1 for xml2
|
||||||
# libzstd1 for zstd
|
# libzstd1 for zstd
|
||||||
|
# libboost*, libfreetype6, and zlib1g for rdkit
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install --no-install-recommends -y \
|
apt install --no-install-recommends -y \
|
||||||
gdb \
|
gdb \
|
||||||
locales \
|
|
||||||
libicu67 \
|
libicu67 \
|
||||||
liblz4-1 \
|
liblz4-1 \
|
||||||
libreadline8 \
|
libreadline8 \
|
||||||
|
libboost-iostreams1.74.0 \
|
||||||
|
libboost-regex1.74.0 \
|
||||||
|
libboost-serialization1.74.0 \
|
||||||
|
libboost-system1.74.0 \
|
||||||
libossp-uuid16 \
|
libossp-uuid16 \
|
||||||
|
libfreetype6 \
|
||||||
libgeos-c1v5 \
|
libgeos-c1v5 \
|
||||||
libgdal28 \
|
libgdal28 \
|
||||||
libproj19 \
|
libproj19 \
|
||||||
@@ -654,7 +785,9 @@ RUN apt update && \
|
|||||||
libxslt1.1 \
|
libxslt1.1 \
|
||||||
libzstd1 \
|
libzstd1 \
|
||||||
libcurl4-openssl-dev \
|
libcurl4-openssl-dev \
|
||||||
procps && \
|
locales \
|
||||||
|
procps \
|
||||||
|
zlib1g && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
ARG REPOSITORY=neondatabase
|
ARG REPOSITORY=neondatabase
|
||||||
ARG IMAGE=rust
|
ARG IMAGE=rust
|
||||||
ARG TAG=pinned
|
ARG TAG=pinned
|
||||||
|
ARG BUILD_TAG
|
||||||
|
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
|
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
|
||||||
WORKDIR /home/nonroot
|
WORKDIR /home/nonroot
|
||||||
@@ -16,6 +17,8 @@ ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
|||||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
ARG CACHEPOT_BUCKET=neon-github-dev
|
||||||
#ARG AWS_ACCESS_KEY_ID
|
#ARG AWS_ACCESS_KEY_ID
|
||||||
#ARG AWS_SECRET_ACCESS_KEY
|
#ARG AWS_SECRET_ACCESS_KEY
|
||||||
|
ARG BUILD_TAG
|
||||||
|
ENV BUILD_TAG=$BUILD_TAG
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
|||||||
16
README.md
16
README.md
@@ -132,13 +132,13 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
|
|||||||
# Create repository in .neon with proper paths to binaries and data
|
# Create repository in .neon with proper paths to binaries and data
|
||||||
# Later that would be responsibility of a package install script
|
# Later that would be responsibility of a package install script
|
||||||
> cargo neon init
|
> cargo neon init
|
||||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
Initializing pageserver node 1 at '127.0.0.1:64000' in ".neon"
|
||||||
|
|
||||||
# start pageserver, safekeeper, and broker for their intercommunication
|
# start pageserver, safekeeper, and broker for their intercommunication
|
||||||
> cargo neon start
|
> cargo neon start
|
||||||
Starting neon broker at 127.0.0.1:50051
|
Starting neon broker at 127.0.0.1:50051.
|
||||||
storage_broker started, pid: 2918372
|
storage_broker started, pid: 2918372
|
||||||
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
Starting pageserver node 1 at '127.0.0.1:64000' in ".neon".
|
||||||
pageserver started, pid: 2918386
|
pageserver started, pid: 2918386
|
||||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
||||||
safekeeper 1 started, pid: 2918437
|
safekeeper 1 started, pid: 2918437
|
||||||
@@ -152,8 +152,7 @@ Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
|
|||||||
# start postgres compute node
|
# start postgres compute node
|
||||||
> cargo neon endpoint start main
|
> cargo neon endpoint start main
|
||||||
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55432/postgres'
|
||||||
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
|
||||||
|
|
||||||
# check list of running postgres instances
|
# check list of running postgres instances
|
||||||
> cargo neon endpoint list
|
> cargo neon endpoint list
|
||||||
@@ -189,18 +188,17 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
|
|||||||
# start postgres on that branch
|
# start postgres on that branch
|
||||||
> cargo neon endpoint start migration_check --branch-name migration_check
|
> cargo neon endpoint start migration_check --branch-name migration_check
|
||||||
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
||||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
|
Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'
|
||||||
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
|
|
||||||
|
|
||||||
# check the new list of running postgres instances
|
# check the new list of running postgres instances
|
||||||
> cargo neon endpoint list
|
> cargo neon endpoint list
|
||||||
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
|
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
|
||||||
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
|
migration_check 127.0.0.1:55434 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
|
||||||
|
|
||||||
# this new postgres instance will have all the data from 'main' postgres,
|
# this new postgres instance will have all the data from 'main' postgres,
|
||||||
# but all modifications would not affect data in original postgres
|
# but all modifications would not affect data in original postgres
|
||||||
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
|
> psql -p55434 -h 127.0.0.1 -U cloud_admin postgres
|
||||||
postgres=# select * from t;
|
postgres=# select * from t;
|
||||||
key | value
|
key | value
|
||||||
-----+-------
|
-----+-------
|
||||||
|
|||||||
@@ -54,9 +54,15 @@ use compute_tools::monitor::launch_monitor;
|
|||||||
use compute_tools::params::*;
|
use compute_tools::params::*;
|
||||||
use compute_tools::spec::*;
|
use compute_tools::spec::*;
|
||||||
|
|
||||||
|
const BUILD_TAG_DEFAULT: &str = "local";
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||||
|
|
||||||
|
let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
|
||||||
|
|
||||||
|
info!("build_tag: {build_tag}");
|
||||||
|
|
||||||
let matches = cli().get_matches();
|
let matches = cli().get_matches();
|
||||||
|
|
||||||
let http_port = *matches
|
let http_port = *matches
|
||||||
@@ -250,6 +256,16 @@ fn main() -> Result<()> {
|
|||||||
exit_code = ecode.code()
|
exit_code = ecode.code()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Maybe sync safekeepers again, to speed up next startup
|
||||||
|
let compute_state = compute.state.lock().unwrap().clone();
|
||||||
|
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
|
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||||
|
info!("syncing safekeepers on shutdown");
|
||||||
|
let storage_auth_token = pspec.storage_auth_token.clone();
|
||||||
|
let lsn = compute.sync_safekeepers(storage_auth_token)?;
|
||||||
|
info!("synced safekeepers at lsn {lsn}");
|
||||||
|
}
|
||||||
|
|
||||||
if let Err(err) = compute.check_for_core_dumps() {
|
if let Err(err) = compute.check_for_core_dumps() {
|
||||||
error!("error while checking for core dumps: {err:?}");
|
error!("error while checking for core dumps: {err:?}");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -133,6 +133,84 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
|
||||||
|
/// that we give to customers
|
||||||
|
fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||||
|
let roles = spec
|
||||||
|
.cluster
|
||||||
|
.roles
|
||||||
|
.iter()
|
||||||
|
.map(|r| format!("'{}'", escape_literal(&r.name)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let dbs = spec
|
||||||
|
.cluster
|
||||||
|
.databases
|
||||||
|
.iter()
|
||||||
|
.map(|db| format!("'{}'", escape_literal(&db.name)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let roles_decl = if roles.is_empty() {
|
||||||
|
String::from("roles text[] := NULL;")
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
r#"
|
||||||
|
roles text[] := ARRAY(SELECT rolname
|
||||||
|
FROM pg_catalog.pg_roles
|
||||||
|
WHERE rolname IN ({}));"#,
|
||||||
|
roles.join(", ")
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let database_decl = if dbs.is_empty() {
|
||||||
|
String::from("dbs text[] := NULL;")
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
r#"
|
||||||
|
dbs text[] := ARRAY(SELECT datname
|
||||||
|
FROM pg_catalog.pg_database
|
||||||
|
WHERE datname IN ({}));"#,
|
||||||
|
dbs.join(", ")
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases
|
||||||
|
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||||
|
let query = format!(
|
||||||
|
r#"
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
r text;
|
||||||
|
{}
|
||||||
|
{}
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
||||||
|
THEN
|
||||||
|
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN IN ROLE pg_read_all_data, pg_write_all_data;
|
||||||
|
IF array_length(roles, 1) IS NOT NULL THEN
|
||||||
|
EXECUTE format('GRANT neon_superuser TO %s',
|
||||||
|
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
|
||||||
|
FOREACH r IN ARRAY roles LOOP
|
||||||
|
EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r));
|
||||||
|
END LOOP;
|
||||||
|
END IF;
|
||||||
|
IF array_length(dbs, 1) IS NOT NULL THEN
|
||||||
|
EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser',
|
||||||
|
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', '));
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;"#,
|
||||||
|
roles_decl, database_decl,
|
||||||
|
);
|
||||||
|
info!("Neon superuser created:\n{}", &query);
|
||||||
|
client
|
||||||
|
.simple_query(&query)
|
||||||
|
.map_err(|e| anyhow::anyhow!(e).context(query))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
impl ComputeNode {
|
impl ComputeNode {
|
||||||
pub fn set_status(&self, status: ComputeStatus) {
|
pub fn set_status(&self, status: ComputeStatus) {
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
@@ -157,7 +235,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||||
// unarchive it to `pgdata` directory overriding all its previous content.
|
// unarchive it to `pgdata` directory overriding all its previous content.
|
||||||
#[instrument(skip(self, compute_state))]
|
#[instrument(skip_all, fields(%lsn))]
|
||||||
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
@@ -199,8 +277,8 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
||||||
// and return the reported LSN back to the caller.
|
// and return the reported LSN back to the caller.
|
||||||
#[instrument(skip(self, storage_auth_token))]
|
#[instrument(skip_all)]
|
||||||
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
pub fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
|
|
||||||
let sync_handle = Command::new(&self.pgbin)
|
let sync_handle = Command::new(&self.pgbin)
|
||||||
@@ -244,7 +322,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Do all the preparations like PGDATA directory creation, configuration,
|
/// Do all the preparations like PGDATA directory creation, configuration,
|
||||||
/// safekeepers sync, basebackup, etc.
|
/// safekeepers sync, basebackup, etc.
|
||||||
#[instrument(skip(self, compute_state))]
|
#[instrument(skip_all)]
|
||||||
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
||||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
let spec = &pspec.spec;
|
let spec = &pspec.spec;
|
||||||
@@ -302,7 +380,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Start Postgres as a child process and manage DBs/roles.
|
/// Start Postgres as a child process and manage DBs/roles.
|
||||||
/// After that this will hang waiting on the postmaster process to exit.
|
/// After that this will hang waiting on the postmaster process to exit.
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip_all)]
|
||||||
pub fn start_postgres(
|
pub fn start_postgres(
|
||||||
&self,
|
&self,
|
||||||
storage_auth_token: Option<String>,
|
storage_auth_token: Option<String>,
|
||||||
@@ -326,7 +404,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Do initial configuration of the already started Postgres.
|
/// Do initial configuration of the already started Postgres.
|
||||||
#[instrument(skip(self, compute_state))]
|
#[instrument(skip_all)]
|
||||||
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
||||||
// If connection fails,
|
// If connection fails,
|
||||||
// it may be the old node with `zenith_admin` superuser.
|
// it may be the old node with `zenith_admin` superuser.
|
||||||
@@ -347,6 +425,8 @@ impl ComputeNode {
|
|||||||
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
|
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
|
||||||
|
|
||||||
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
|
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
|
||||||
|
// Disable forwarding so that users don't get a cloud_admin role
|
||||||
|
client.simple_query("SET neon.forward_ddl = false")?;
|
||||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||||
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
||||||
drop(client);
|
drop(client);
|
||||||
@@ -357,31 +437,28 @@ impl ComputeNode {
|
|||||||
Ok(client) => client,
|
Ok(client) => client,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
|
||||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||||
client.simple_query("SET neon.forward_ddl = false")?;
|
client.simple_query("SET neon.forward_ddl = false")?;
|
||||||
|
|
||||||
|
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||||
|
create_neon_superuser(spec, &mut client)?;
|
||||||
handle_roles(spec, &mut client)?;
|
handle_roles(spec, &mut client)?;
|
||||||
handle_databases(spec, &mut client)?;
|
handle_databases(spec, &mut client)?;
|
||||||
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_grants(spec, self.connstr.as_str(), &mut client)?;
|
handle_grants(spec, self.connstr.as_str())?;
|
||||||
handle_extensions(spec, &mut client)?;
|
handle_extensions(spec, &mut client)?;
|
||||||
|
|
||||||
// 'Close' connection
|
// 'Close' connection
|
||||||
drop(client);
|
drop(client);
|
||||||
|
|
||||||
info!(
|
|
||||||
"finished configuration of compute for project {}",
|
|
||||||
spec.cluster.cluster_id.as_deref().unwrap_or("None")
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
|
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
|
||||||
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
||||||
// have opened connection to Postgres and superuser access.
|
// have opened connection to Postgres and superuser access.
|
||||||
#[instrument(skip(self, client))]
|
#[instrument(skip_all)]
|
||||||
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
|
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
|
||||||
client.simple_query("SELECT pg_reload_conf()")?;
|
client.simple_query("SELECT pg_reload_conf()")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -389,7 +466,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Similar to `apply_config()`, but does a bit different sequence of operations,
|
/// Similar to `apply_config()`, but does a bit different sequence of operations,
|
||||||
/// as it's used to reconfigure a previously started and configured Postgres node.
|
/// as it's used to reconfigure a previously started and configured Postgres node.
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip_all)]
|
||||||
pub fn reconfigure(&self) -> Result<()> {
|
pub fn reconfigure(&self) -> Result<()> {
|
||||||
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
|
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
|
||||||
|
|
||||||
@@ -407,7 +484,7 @@ impl ComputeNode {
|
|||||||
handle_roles(&spec, &mut client)?;
|
handle_roles(&spec, &mut client)?;
|
||||||
handle_databases(&spec, &mut client)?;
|
handle_databases(&spec, &mut client)?;
|
||||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_grants(&spec, self.connstr.as_str(), &mut client)?;
|
handle_grants(&spec, self.connstr.as_str())?;
|
||||||
handle_extensions(&spec, &mut client)?;
|
handle_extensions(&spec, &mut client)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -424,36 +501,41 @@ impl ComputeNode {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip_all)]
|
||||||
pub fn start_compute(&self) -> Result<std::process::Child> {
|
pub fn start_compute(&self) -> Result<std::process::Child> {
|
||||||
let compute_state = self.state.lock().unwrap().clone();
|
let compute_state = self.state.lock().unwrap().clone();
|
||||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
info!(
|
info!(
|
||||||
"starting compute for project {}, operation {}, tenant {}, timeline {}",
|
"starting compute for project {}, operation {}, tenant {}, timeline {}",
|
||||||
spec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
|
pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
|
||||||
spec.spec.operation_uuid.as_deref().unwrap_or("None"),
|
pspec.spec.operation_uuid.as_deref().unwrap_or("None"),
|
||||||
spec.tenant_id,
|
pspec.tenant_id,
|
||||||
spec.timeline_id,
|
pspec.timeline_id,
|
||||||
);
|
);
|
||||||
|
|
||||||
self.prepare_pgdata(&compute_state)?;
|
self.prepare_pgdata(&compute_state)?;
|
||||||
|
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
|
let pg = self.start_postgres(pspec.storage_auth_token.clone())?;
|
||||||
|
|
||||||
let pg = self.start_postgres(spec.storage_auth_token.clone())?;
|
let config_time = Utc::now();
|
||||||
|
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
|
||||||
if spec.spec.mode == ComputeMode::Primary {
|
|
||||||
self.apply_config(&compute_state)?;
|
self.apply_config(&compute_state)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let startup_end_time = Utc::now();
|
let startup_end_time = Utc::now();
|
||||||
{
|
{
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
state.metrics.config_ms = startup_end_time
|
state.metrics.start_postgres_ms = config_time
|
||||||
.signed_duration_since(start_time)
|
.signed_duration_since(start_time)
|
||||||
.to_std()
|
.to_std()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.as_millis() as u64;
|
.as_millis() as u64;
|
||||||
|
state.metrics.config_ms = startup_end_time
|
||||||
|
.signed_duration_since(config_time)
|
||||||
|
.to_std()
|
||||||
|
.unwrap()
|
||||||
|
.as_millis() as u64;
|
||||||
state.metrics.total_startup_ms = startup_end_time
|
state.metrics.total_startup_ms = startup_end_time
|
||||||
.signed_duration_since(compute_state.start_time)
|
.signed_duration_since(compute_state.start_time)
|
||||||
.to_std()
|
.to_std()
|
||||||
@@ -462,6 +544,11 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
self.set_status(ComputeStatus::Running);
|
self.set_status(ComputeStatus::Running);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"finished configuration of compute for project {}",
|
||||||
|
pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None")
|
||||||
|
);
|
||||||
|
|
||||||
Ok(pg)
|
Ok(pg)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use compute_api::responses::ComputeStatus;
|
|||||||
|
|
||||||
use crate::compute::ComputeNode;
|
use crate::compute::ComputeNode;
|
||||||
|
|
||||||
#[instrument(skip(compute))]
|
#[instrument(skip_all)]
|
||||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||||
info!("waiting for reconfiguration requests");
|
info!("waiting for reconfiguration requests");
|
||||||
loop {
|
loop {
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
|
|||||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
|
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
|
||||||
|
|
||||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||||
|
.with_ansi(false)
|
||||||
.with_target(false)
|
.with_target(false)
|
||||||
.with_writer(std::io::stderr);
|
.with_writer(std::io::stderr);
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
|||||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||||
|
|
||||||
/// Escape a string for including it in a SQL literal
|
/// Escape a string for including it in a SQL literal
|
||||||
fn escape_literal(s: &str) -> String {
|
pub fn escape_literal(s: &str) -> String {
|
||||||
s.replace('\'', "''").replace('\\', "\\\\")
|
s.replace('\'', "''").replace('\\', "\\\\")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,7 +215,7 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
|
|||||||
/// Wait for Postgres to become ready to accept connections. It's ready to
|
/// Wait for Postgres to become ready to accept connections. It's ready to
|
||||||
/// accept connections when the state-field in `pgdata/postmaster.pid` says
|
/// accept connections when the state-field in `pgdata/postmaster.pid` says
|
||||||
/// 'ready'.
|
/// 'ready'.
|
||||||
#[instrument(skip(pg))]
|
#[instrument(skip_all, fields(pgdata = %pgdata.display()))]
|
||||||
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
|
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
|
||||||
let pid_path = pgdata.join("postmaster.pid");
|
let pid_path = pgdata.join("postmaster.pid");
|
||||||
|
|
||||||
|
|||||||
@@ -269,17 +269,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
xact.execute(query.as_str(), &[])?;
|
xact.execute(query.as_str(), &[])?;
|
||||||
}
|
}
|
||||||
RoleAction::Create => {
|
RoleAction::Create => {
|
||||||
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
|
let mut query: String = format!(
|
||||||
|
"CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser",
|
||||||
|
name.pg_quote()
|
||||||
|
);
|
||||||
info!("role create query: '{}'", &query);
|
info!("role create query: '{}'", &query);
|
||||||
query.push_str(&role.to_pg_options());
|
query.push_str(&role.to_pg_options());
|
||||||
xact.execute(query.as_str(), &[])?;
|
xact.execute(query.as_str(), &[])?;
|
||||||
|
|
||||||
let grant_query = format!(
|
|
||||||
"GRANT pg_read_all_data, pg_write_all_data TO {}",
|
|
||||||
name.pg_quote()
|
|
||||||
);
|
|
||||||
xact.execute(grant_query.as_str(), &[])?;
|
|
||||||
info!("role grant query: '{}'", &grant_query);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -476,6 +472,11 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
query.push_str(&db.to_pg_options());
|
query.push_str(&db.to_pg_options());
|
||||||
let _guard = info_span!("executing", query).entered();
|
let _guard = info_span!("executing", query).entered();
|
||||||
client.execute(query.as_str(), &[])?;
|
client.execute(query.as_str(), &[])?;
|
||||||
|
let grant_query: String = format!(
|
||||||
|
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||||
|
name.pg_quote()
|
||||||
|
);
|
||||||
|
client.execute(grant_query.as_str(), &[])?;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -495,35 +496,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
||||||
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
|
pub fn handle_grants(spec: &ComputeSpec, connstr: &str) -> Result<()> {
|
||||||
info!("cluster spec grants:");
|
info!("cluster spec grants:");
|
||||||
|
|
||||||
// We now have a separate `web_access` role to connect to the database
|
|
||||||
// via the web interface and proxy link auth. And also we grant a
|
|
||||||
// read / write all data privilege to every role. So also grant
|
|
||||||
// create to everyone.
|
|
||||||
// XXX: later we should stop messing with Postgres ACL in such horrible
|
|
||||||
// ways.
|
|
||||||
let roles = spec
|
|
||||||
.cluster
|
|
||||||
.roles
|
|
||||||
.iter()
|
|
||||||
.map(|r| r.name.pg_quote())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
for db in &spec.cluster.databases {
|
|
||||||
let dbname = &db.name;
|
|
||||||
|
|
||||||
let query: String = format!(
|
|
||||||
"GRANT CREATE ON DATABASE {} TO {}",
|
|
||||||
dbname.pg_quote(),
|
|
||||||
roles.join(", ")
|
|
||||||
);
|
|
||||||
info!("grant query {}", &query);
|
|
||||||
|
|
||||||
client.execute(query.as_str(), &[])?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||||
// atomically.
|
// atomically.
|
||||||
|
|||||||
@@ -180,6 +180,11 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait until process is gone
|
// Wait until process is gone
|
||||||
|
wait_until_stopped(process_name, pid)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
|
||||||
for retries in 0..RETRIES {
|
for retries in 0..RETRIES {
|
||||||
match process_has_stopped(pid) {
|
match process_has_stopped(pid) {
|
||||||
Ok(true) => {
|
Ok(true) => {
|
||||||
|
|||||||
@@ -308,7 +308,8 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
|||||||
|
|
||||||
let mut env =
|
let mut env =
|
||||||
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
||||||
env.init(pg_version)
|
let force = init_match.get_flag("force");
|
||||||
|
env.init(pg_version, force)
|
||||||
.context("Failed to initialize neon repository")?;
|
.context("Failed to initialize neon repository")?;
|
||||||
|
|
||||||
// Initialize pageserver, create initial tenant and timeline.
|
// Initialize pageserver, create initial tenant and timeline.
|
||||||
@@ -1013,6 +1014,13 @@ fn cli() -> Command {
|
|||||||
.help("If set, the node will be a hot replica on the specified timeline")
|
.help("If set, the node will be a hot replica on the specified timeline")
|
||||||
.required(false);
|
.required(false);
|
||||||
|
|
||||||
|
let force_arg = Arg::new("force")
|
||||||
|
.value_parser(value_parser!(bool))
|
||||||
|
.long("force")
|
||||||
|
.action(ArgAction::SetTrue)
|
||||||
|
.help("Force initialization even if the repository is not empty")
|
||||||
|
.required(false);
|
||||||
|
|
||||||
Command::new("Neon CLI")
|
Command::new("Neon CLI")
|
||||||
.arg_required_else_help(true)
|
.arg_required_else_help(true)
|
||||||
.version(GIT_VERSION)
|
.version(GIT_VERSION)
|
||||||
@@ -1028,6 +1036,7 @@ fn cli() -> Command {
|
|||||||
.value_name("config"),
|
.value_name("config"),
|
||||||
)
|
)
|
||||||
.arg(pg_version_arg.clone())
|
.arg(pg_version_arg.clone())
|
||||||
|
.arg(force_arg)
|
||||||
)
|
)
|
||||||
.subcommand(
|
.subcommand(
|
||||||
Command::new("timeline")
|
Command::new("timeline")
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ pub struct EndpointConf {
|
|||||||
pg_port: u16,
|
pg_port: u16,
|
||||||
http_port: u16,
|
http_port: u16,
|
||||||
pg_version: u32,
|
pg_version: u32,
|
||||||
|
skip_pg_catalog_updates: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -135,6 +136,7 @@ impl ComputeControlPlane {
|
|||||||
mode,
|
mode,
|
||||||
tenant_id,
|
tenant_id,
|
||||||
pg_version,
|
pg_version,
|
||||||
|
skip_pg_catalog_updates: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
ep.create_endpoint_dir()?;
|
ep.create_endpoint_dir()?;
|
||||||
@@ -148,6 +150,7 @@ impl ComputeControlPlane {
|
|||||||
http_port,
|
http_port,
|
||||||
pg_port,
|
pg_port,
|
||||||
pg_version,
|
pg_version,
|
||||||
|
skip_pg_catalog_updates: false,
|
||||||
})?,
|
})?,
|
||||||
)?;
|
)?;
|
||||||
std::fs::write(
|
std::fs::write(
|
||||||
@@ -183,6 +186,9 @@ pub struct Endpoint {
|
|||||||
// the endpoint runs in.
|
// the endpoint runs in.
|
||||||
pub env: LocalEnv,
|
pub env: LocalEnv,
|
||||||
pageserver: Arc<PageServerNode>,
|
pageserver: Arc<PageServerNode>,
|
||||||
|
|
||||||
|
// Optimizations
|
||||||
|
skip_pg_catalog_updates: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Endpoint {
|
impl Endpoint {
|
||||||
@@ -216,6 +222,7 @@ impl Endpoint {
|
|||||||
mode: conf.mode,
|
mode: conf.mode,
|
||||||
tenant_id: conf.tenant_id,
|
tenant_id: conf.tenant_id,
|
||||||
pg_version: conf.pg_version,
|
pg_version: conf.pg_version,
|
||||||
|
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -398,6 +405,16 @@ impl Endpoint {
|
|||||||
String::from_utf8_lossy(&pg_ctl.stderr),
|
String::from_utf8_lossy(&pg_ctl.stderr),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Also wait for the compute_ctl process to die. It might have some cleanup
|
||||||
|
// work to do after postgres stops, like syncing safekeepers, etc.
|
||||||
|
//
|
||||||
|
// TODO use background_process::stop_process instead
|
||||||
|
let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
|
||||||
|
let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;
|
||||||
|
let pid = nix::unistd::Pid::from_raw(pid as i32);
|
||||||
|
crate::background_process::wait_until_stopped("compute_ctl", pid)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -450,6 +467,7 @@ impl Endpoint {
|
|||||||
|
|
||||||
// Create spec file
|
// Create spec file
|
||||||
let spec = ComputeSpec {
|
let spec = ComputeSpec {
|
||||||
|
skip_pg_catalog_updates: self.skip_pg_catalog_updates,
|
||||||
format_version: 1.0,
|
format_version: 1.0,
|
||||||
operation_uuid: None,
|
operation_uuid: None,
|
||||||
cluster: Cluster {
|
cluster: Cluster {
|
||||||
@@ -499,7 +517,13 @@ impl Endpoint {
|
|||||||
.stdin(std::process::Stdio::null())
|
.stdin(std::process::Stdio::null())
|
||||||
.stderr(logfile.try_clone()?)
|
.stderr(logfile.try_clone()?)
|
||||||
.stdout(logfile);
|
.stdout(logfile);
|
||||||
let _child = cmd.spawn()?;
|
let child = cmd.spawn()?;
|
||||||
|
|
||||||
|
// Write down the pid so we can wait for it when we want to stop
|
||||||
|
// TODO use background_process::start_process instead
|
||||||
|
let pid = child.id();
|
||||||
|
let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
|
||||||
|
std::fs::write(pidfile_path, pid.to_string())?;
|
||||||
|
|
||||||
// Wait for it to start
|
// Wait for it to start
|
||||||
let mut attempt = 0;
|
let mut attempt = 0;
|
||||||
|
|||||||
@@ -364,7 +364,7 @@ impl LocalEnv {
|
|||||||
//
|
//
|
||||||
// Initialize a new Neon repository
|
// Initialize a new Neon repository
|
||||||
//
|
//
|
||||||
pub fn init(&mut self, pg_version: u32) -> anyhow::Result<()> {
|
pub fn init(&mut self, pg_version: u32, force: bool) -> anyhow::Result<()> {
|
||||||
// check if config already exists
|
// check if config already exists
|
||||||
let base_path = &self.base_data_dir;
|
let base_path = &self.base_data_dir;
|
||||||
ensure!(
|
ensure!(
|
||||||
@@ -372,11 +372,29 @@ impl LocalEnv {
|
|||||||
"repository base path is missing"
|
"repository base path is missing"
|
||||||
);
|
);
|
||||||
|
|
||||||
ensure!(
|
if base_path.exists() {
|
||||||
!base_path.exists(),
|
if force {
|
||||||
"directory '{}' already exists. Perhaps already initialized?",
|
println!("removing all contents of '{}'", base_path.display());
|
||||||
base_path.display()
|
// instead of directly calling `remove_dir_all`, we keep the original dir but removing
|
||||||
);
|
// all contents inside. This helps if the developer symbol links another directory (i.e.,
|
||||||
|
// S3 local SSD) to the `.neon` base directory.
|
||||||
|
for entry in std::fs::read_dir(base_path)? {
|
||||||
|
let entry = entry?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.is_dir() {
|
||||||
|
fs::remove_dir_all(&path)?;
|
||||||
|
} else {
|
||||||
|
fs::remove_file(&path)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bail!(
|
||||||
|
"directory '{}' already exists. Perhaps already initialized? (Hint: use --force to remove all contents)",
|
||||||
|
base_path.display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Can't find postgres binary at {}",
|
"Can't find postgres binary at {}",
|
||||||
@@ -392,7 +410,9 @@ impl LocalEnv {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fs::create_dir(base_path)?;
|
if !base_path.exists() {
|
||||||
|
fs::create_dir(base_path)?;
|
||||||
|
}
|
||||||
|
|
||||||
// Generate keypair for JWT.
|
// Generate keypair for JWT.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ pub struct ComputeMetrics {
|
|||||||
pub wait_for_spec_ms: u64,
|
pub wait_for_spec_ms: u64,
|
||||||
pub sync_safekeepers_ms: u64,
|
pub sync_safekeepers_ms: u64,
|
||||||
pub basebackup_ms: u64,
|
pub basebackup_ms: u64,
|
||||||
|
pub start_postgres_ms: u64,
|
||||||
pub config_ms: u64,
|
pub config_ms: u64,
|
||||||
pub total_startup_ms: u64,
|
pub total_startup_ms: u64,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,12 @@ pub struct ComputeSpec {
|
|||||||
pub cluster: Cluster,
|
pub cluster: Cluster,
|
||||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||||
|
|
||||||
|
/// An optinal hint that can be passed to speed up startup time if we know
|
||||||
|
/// that no pg catalog mutations (like role creation, database creation,
|
||||||
|
/// extension creation) need to be done on the actual database to start.
|
||||||
|
#[serde(default)] // Default false
|
||||||
|
pub skip_pg_catalog_updates: bool,
|
||||||
|
|
||||||
// Information needed to connect to the storage layer.
|
// Information needed to connect to the storage layer.
|
||||||
//
|
//
|
||||||
// `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
|
// `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
|
||||||
@@ -142,4 +148,14 @@ mod tests {
|
|||||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||||
let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_unknown_fields() {
|
||||||
|
// Forward compatibility test
|
||||||
|
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||||
|
let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
|
||||||
|
let ob = json.as_object_mut().unwrap();
|
||||||
|
ob.insert("unknown_field_123123123".into(), "hello".into());
|
||||||
|
let _spec: ComputeSpec = serde_json::from_value(json).unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ use prometheus::{Registry, Result};
|
|||||||
pub mod launch_timestamp;
|
pub mod launch_timestamp;
|
||||||
mod wrappers;
|
mod wrappers;
|
||||||
pub use wrappers::{CountedReader, CountedWriter};
|
pub use wrappers::{CountedReader, CountedWriter};
|
||||||
|
pub mod metric_vec_duration;
|
||||||
|
|
||||||
pub type UIntGauge = GenericGauge<AtomicU64>;
|
pub type UIntGauge = GenericGauge<AtomicU64>;
|
||||||
pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
|
pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
|
||||||
|
|||||||
23
libs/metrics/src/metric_vec_duration.rs
Normal file
23
libs/metrics/src/metric_vec_duration.rs
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
//! Helpers for observing duration on HistogramVec / CounterVec / GaugeVec / MetricVec<T>.
|
||||||
|
|
||||||
|
use std::{future::Future, time::Instant};
|
||||||
|
|
||||||
|
pub trait DurationResultObserver {
|
||||||
|
fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn observe_async_block_duration_by_result<
|
||||||
|
T,
|
||||||
|
E,
|
||||||
|
F: Future<Output = Result<T, E>>,
|
||||||
|
O: DurationResultObserver,
|
||||||
|
>(
|
||||||
|
observer: &O,
|
||||||
|
block: F,
|
||||||
|
) -> Result<T, E> {
|
||||||
|
let start = Instant::now();
|
||||||
|
let result = block.await;
|
||||||
|
let duration = start.elapsed();
|
||||||
|
observer.observe_result(&result, duration);
|
||||||
|
result
|
||||||
|
}
|
||||||
@@ -152,7 +152,7 @@ pub enum ActivatingFrom {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A state of a timeline in pageserver's memory.
|
/// A state of a timeline in pageserver's memory.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||||
pub enum TimelineState {
|
pub enum TimelineState {
|
||||||
/// The timeline is recognized by the pageserver but is not yet operational.
|
/// The timeline is recognized by the pageserver but is not yet operational.
|
||||||
/// In particular, the walreceiver connection loop is not running for this timeline.
|
/// In particular, the walreceiver connection loop is not running for this timeline.
|
||||||
@@ -165,7 +165,7 @@ pub enum TimelineState {
|
|||||||
/// It cannot transition back into any other state.
|
/// It cannot transition back into any other state.
|
||||||
Stopping,
|
Stopping,
|
||||||
/// The timeline is broken and not operational (previous states: Loading or Active).
|
/// The timeline is broken and not operational (previous states: Loading or Active).
|
||||||
Broken,
|
Broken { reason: String, backtrace: String },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[serde_as]
|
#[serde_as]
|
||||||
|
|||||||
@@ -70,6 +70,14 @@ impl RemotePath {
|
|||||||
pub fn join(&self, segment: &Path) -> Self {
|
pub fn join(&self, segment: &Path) -> Self {
|
||||||
Self(self.0.join(segment))
|
Self(self.0.join(segment))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_path(&self) -> &PathBuf {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extension(&self) -> Option<&str> {
|
||||||
|
self.0.extension()?.to_str()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Storage (potentially remote) API to manage its state.
|
/// Storage (potentially remote) API to manage its state.
|
||||||
@@ -86,6 +94,19 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
|||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
) -> Result<Vec<RemotePath>, DownloadError>;
|
) -> Result<Vec<RemotePath>, DownloadError>;
|
||||||
|
|
||||||
|
/// Lists all files in directory "recursively"
|
||||||
|
/// (not really recursively, because AWS has a flat namespace)
|
||||||
|
/// Note: This is subtely different than list_prefixes,
|
||||||
|
/// because it is for listing files instead of listing
|
||||||
|
/// names sharing common prefixes.
|
||||||
|
/// For example,
|
||||||
|
/// list_files("foo/bar") = ["foo/bar/cat123.txt",
|
||||||
|
/// "foo/bar/cat567.txt", "foo/bar/dog123.txt", "foo/bar/dog456.txt"]
|
||||||
|
/// whereas,
|
||||||
|
/// list_prefixes("foo/bar/") = ["cat", "dog"]
|
||||||
|
/// See `test_real_s3.rs` for more details.
|
||||||
|
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
|
||||||
|
|
||||||
/// Streams the local file contents into remote into the remote storage entry.
|
/// Streams the local file contents into remote into the remote storage entry.
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
@@ -111,6 +132,8 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
|||||||
) -> Result<Download, DownloadError>;
|
) -> Result<Download, DownloadError>;
|
||||||
|
|
||||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
|
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
|
||||||
|
|
||||||
|
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Download {
|
pub struct Download {
|
||||||
@@ -172,6 +195,14 @@ impl GenericRemoteStorage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||||
|
match self {
|
||||||
|
Self::LocalFs(s) => s.list_files(folder).await,
|
||||||
|
Self::AwsS3(s) => s.list_files(folder).await,
|
||||||
|
Self::Unreliable(s) => s.list_files(folder).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn upload(
|
pub async fn upload(
|
||||||
&self,
|
&self,
|
||||||
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -223,6 +254,14 @@ impl GenericRemoteStorage {
|
|||||||
Self::Unreliable(s) => s.delete(path).await,
|
Self::Unreliable(s) => s.delete(path).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::LocalFs(s) => s.delete_objects(paths).await,
|
||||||
|
Self::AwsS3(s) => s.delete_objects(paths).await,
|
||||||
|
Self::Unreliable(s) => s.delete_objects(paths).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GenericRemoteStorage {
|
impl GenericRemoteStorage {
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use tokio::{
|
|||||||
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
|
io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
|
||||||
};
|
};
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::{crashsafe::path_with_suffix_extension, fs_ext::is_directory_empty};
|
||||||
|
|
||||||
use crate::{Download, DownloadError, RemotePath};
|
use crate::{Download, DownloadError, RemotePath};
|
||||||
|
|
||||||
@@ -48,6 +48,14 @@ impl LocalFs {
|
|||||||
Ok(Self { storage_root })
|
Ok(Self { storage_root })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mirrors S3Bucket::s3_object_to_relative_path
|
||||||
|
fn local_file_to_relative_path(&self, key: PathBuf) -> RemotePath {
|
||||||
|
let relative_path = key
|
||||||
|
.strip_prefix(&self.storage_root)
|
||||||
|
.expect("relative path must contain storage_root as prefix");
|
||||||
|
RemotePath(relative_path.into())
|
||||||
|
}
|
||||||
|
|
||||||
async fn read_storage_metadata(
|
async fn read_storage_metadata(
|
||||||
&self,
|
&self,
|
||||||
file_path: &Path,
|
file_path: &Path,
|
||||||
@@ -101,19 +109,63 @@ impl RemoteStorage for LocalFs {
|
|||||||
Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
|
Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
|
||||||
None => Cow::Borrowed(&self.storage_root),
|
None => Cow::Borrowed(&self.storage_root),
|
||||||
};
|
};
|
||||||
Ok(get_all_files(path.as_ref(), false)
|
|
||||||
|
let prefixes_to_filter = get_all_files(path.as_ref(), false)
|
||||||
.await
|
.await
|
||||||
.map_err(DownloadError::Other)?
|
.map_err(DownloadError::Other)?;
|
||||||
.into_iter()
|
|
||||||
.map(|path| {
|
let mut prefixes = Vec::with_capacity(prefixes_to_filter.len());
|
||||||
path.strip_prefix(&self.storage_root)
|
|
||||||
.context("Failed to strip preifix")
|
// filter out empty directories to mirror s3 behavior.
|
||||||
|
for prefix in prefixes_to_filter {
|
||||||
|
if prefix.is_dir()
|
||||||
|
&& is_directory_empty(&prefix)
|
||||||
|
.await
|
||||||
|
.map_err(DownloadError::Other)?
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
prefixes.push(
|
||||||
|
prefix
|
||||||
|
.strip_prefix(&self.storage_root)
|
||||||
|
.context("Failed to strip prefix")
|
||||||
.and_then(RemotePath::new)
|
.and_then(RemotePath::new)
|
||||||
.expect(
|
.expect(
|
||||||
"We list files for storage root, hence should be able to remote the prefix",
|
"We list files for storage root, hence should be able to remote the prefix",
|
||||||
)
|
),
|
||||||
})
|
)
|
||||||
.collect())
|
}
|
||||||
|
|
||||||
|
Ok(prefixes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// recursively lists all files in a directory,
|
||||||
|
// mirroring the `list_files` for `s3_bucket`
|
||||||
|
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||||
|
let full_path = match folder {
|
||||||
|
Some(folder) => folder.with_base(&self.storage_root),
|
||||||
|
None => self.storage_root.clone(),
|
||||||
|
};
|
||||||
|
let mut files = vec![];
|
||||||
|
let mut directory_queue = vec![full_path.clone()];
|
||||||
|
|
||||||
|
while !directory_queue.is_empty() {
|
||||||
|
let cur_folder = directory_queue
|
||||||
|
.pop()
|
||||||
|
.expect("queue cannot be empty: we just checked");
|
||||||
|
let mut entries = fs::read_dir(cur_folder.clone()).await?;
|
||||||
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
|
let file_name: PathBuf = entry.file_name().into();
|
||||||
|
let full_file_name = cur_folder.clone().join(&file_name);
|
||||||
|
let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());
|
||||||
|
files.push(file_remote_path.clone());
|
||||||
|
if full_file_name.is_dir() {
|
||||||
|
directory_queue.push(full_file_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(files)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
@@ -291,11 +343,25 @@ impl RemoteStorage for LocalFs {
|
|||||||
|
|
||||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||||
let file_path = path.with_base(&self.storage_root);
|
let file_path = path.with_base(&self.storage_root);
|
||||||
if file_path.exists() && file_path.is_file() {
|
if !file_path.exists() {
|
||||||
Ok(fs::remove_file(file_path).await?)
|
// See https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html
|
||||||
} else {
|
// > If there isn't a null version, Amazon S3 does not remove any objects but will still respond that the command was successful.
|
||||||
bail!("File {file_path:?} either does not exist or is not a file")
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !file_path.is_file() {
|
||||||
|
anyhow::bail!("{file_path:?} is not a file");
|
||||||
|
}
|
||||||
|
Ok(fs::remove_file(file_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| anyhow::anyhow!(e))?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
||||||
|
for path in paths {
|
||||||
|
self.delete(path).await?
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,7 +386,7 @@ where
|
|||||||
let file_type = dir_entry.file_type().await?;
|
let file_type = dir_entry.file_type().await?;
|
||||||
let entry_path = dir_entry.path();
|
let entry_path = dir_entry.path();
|
||||||
if file_type.is_symlink() {
|
if file_type.is_symlink() {
|
||||||
debug!("{entry_path:?} us a symlink, skipping")
|
debug!("{entry_path:?} is a symlink, skipping")
|
||||||
} else if file_type.is_dir() {
|
} else if file_type.is_dir() {
|
||||||
if recursive {
|
if recursive {
|
||||||
paths.extend(get_all_files(&entry_path, true).await?.into_iter())
|
paths.extend(get_all_files(&entry_path, true).await?.into_iter())
|
||||||
@@ -595,15 +661,11 @@ mod fs_tests {
|
|||||||
storage.delete(&upload_target).await?;
|
storage.delete(&upload_target).await?;
|
||||||
assert!(storage.list().await?.is_empty());
|
assert!(storage.list().await?.is_empty());
|
||||||
|
|
||||||
match storage.delete(&upload_target).await {
|
storage
|
||||||
Ok(()) => panic!("Should not allow deleting non-existing storage files"),
|
.delete(&upload_target)
|
||||||
Err(e) => {
|
.await
|
||||||
let error_string = e.to_string();
|
.expect("Should allow deleting non-existing storage files");
|
||||||
assert!(error_string.contains("does not exist"));
|
|
||||||
let expected_path = upload_target.with_base(&storage.storage_root);
|
|
||||||
assert!(error_string.contains(expected_path.to_str().unwrap()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ use aws_sdk_s3::{
|
|||||||
error::SdkError,
|
error::SdkError,
|
||||||
operation::get_object::GetObjectError,
|
operation::get_object::GetObjectError,
|
||||||
primitives::ByteStream,
|
primitives::ByteStream,
|
||||||
|
types::{Delete, ObjectIdentifier},
|
||||||
Client,
|
Client,
|
||||||
};
|
};
|
||||||
use aws_smithy_http::body::SdkBody;
|
use aws_smithy_http::body::SdkBody;
|
||||||
@@ -33,6 +34,8 @@ use crate::{
|
|||||||
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const MAX_DELETE_OBJECTS_REQUEST_SIZE: usize = 1000;
|
||||||
|
|
||||||
pub(super) mod metrics {
|
pub(super) mod metrics {
|
||||||
use metrics::{register_int_counter_vec, IntCounterVec};
|
use metrics::{register_int_counter_vec, IntCounterVec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
@@ -81,12 +84,24 @@ pub(super) mod metrics {
|
|||||||
.inc();
|
.inc();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn inc_delete_objects(count: u64) {
|
||||||
|
S3_REQUESTS_COUNT
|
||||||
|
.with_label_values(&["delete_object"])
|
||||||
|
.inc_by(count);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn inc_delete_object_fail() {
|
pub fn inc_delete_object_fail() {
|
||||||
S3_REQUESTS_FAIL_COUNT
|
S3_REQUESTS_FAIL_COUNT
|
||||||
.with_label_values(&["delete_object"])
|
.with_label_values(&["delete_object"])
|
||||||
.inc();
|
.inc();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn inc_delete_objects_fail(count: u64) {
|
||||||
|
S3_REQUESTS_FAIL_COUNT
|
||||||
|
.with_label_values(&["delete_object"])
|
||||||
|
.inc_by(count);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn inc_list_objects() {
|
pub fn inc_list_objects() {
|
||||||
S3_REQUESTS_COUNT.with_label_values(&["list_objects"]).inc();
|
S3_REQUESTS_COUNT.with_label_values(&["list_objects"]).inc();
|
||||||
}
|
}
|
||||||
@@ -332,6 +347,51 @@ impl RemoteStorage for S3Bucket {
|
|||||||
Ok(document_keys)
|
Ok(document_keys)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// See the doc for `RemoteStorage::list_files`
|
||||||
|
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||||
|
let folder_name = folder
|
||||||
|
.map(|p| self.relative_path_to_s3_object(p))
|
||||||
|
.or_else(|| self.prefix_in_bucket.clone());
|
||||||
|
|
||||||
|
// AWS may need to break the response into several parts
|
||||||
|
let mut continuation_token = None;
|
||||||
|
let mut all_files = vec![];
|
||||||
|
loop {
|
||||||
|
let _guard = self
|
||||||
|
.concurrency_limiter
|
||||||
|
.acquire()
|
||||||
|
.await
|
||||||
|
.context("Concurrency limiter semaphore got closed during S3 list_files")?;
|
||||||
|
metrics::inc_list_objects();
|
||||||
|
|
||||||
|
let response = self
|
||||||
|
.client
|
||||||
|
.list_objects_v2()
|
||||||
|
.bucket(self.bucket_name.clone())
|
||||||
|
.set_prefix(folder_name.clone())
|
||||||
|
.set_continuation_token(continuation_token)
|
||||||
|
.set_max_keys(self.max_keys_per_list_response)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
metrics::inc_list_objects_fail();
|
||||||
|
e
|
||||||
|
})
|
||||||
|
.context("Failed to list files in S3 bucket")?;
|
||||||
|
|
||||||
|
for object in response.contents().unwrap_or_default() {
|
||||||
|
let object_path = object.key().expect("response does not contain a key");
|
||||||
|
let remote_path = self.s3_object_to_relative_path(object_path);
|
||||||
|
all_files.push(remote_path);
|
||||||
|
}
|
||||||
|
match response.next_continuation_token {
|
||||||
|
Some(new_token) => continuation_token = Some(new_token),
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(all_files)
|
||||||
|
}
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -396,6 +456,50 @@ impl RemoteStorage for S3Bucket {
|
|||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
||||||
|
let _guard = self
|
||||||
|
.concurrency_limiter
|
||||||
|
.acquire()
|
||||||
|
.await
|
||||||
|
.context("Concurrency limiter semaphore got closed during S3 delete")?;
|
||||||
|
|
||||||
|
let mut delete_objects = Vec::with_capacity(paths.len());
|
||||||
|
for path in paths {
|
||||||
|
let obj_id = ObjectIdentifier::builder()
|
||||||
|
.set_key(Some(self.relative_path_to_s3_object(path)))
|
||||||
|
.build();
|
||||||
|
delete_objects.push(obj_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
for chunk in delete_objects.chunks(MAX_DELETE_OBJECTS_REQUEST_SIZE) {
|
||||||
|
metrics::inc_delete_objects(chunk.len() as u64);
|
||||||
|
|
||||||
|
let resp = self
|
||||||
|
.client
|
||||||
|
.delete_objects()
|
||||||
|
.bucket(self.bucket_name.clone())
|
||||||
|
.delete(Delete::builder().set_objects(Some(chunk.to_vec())).build())
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match resp {
|
||||||
|
Ok(resp) => {
|
||||||
|
if let Some(errors) = resp.errors {
|
||||||
|
metrics::inc_delete_objects_fail(errors.len() as u64);
|
||||||
|
return Err(anyhow::format_err!(
|
||||||
|
"Failed to delete {} objects",
|
||||||
|
errors.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
metrics::inc_delete_objects_fail(chunk.len() as u64);
|
||||||
|
return Err(e.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||||
let _guard = self
|
let _guard = self
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ enum RemoteOp {
|
|||||||
Upload(RemotePath),
|
Upload(RemotePath),
|
||||||
Download(RemotePath),
|
Download(RemotePath),
|
||||||
Delete(RemotePath),
|
Delete(RemotePath),
|
||||||
|
DeleteObjects(Vec<RemotePath>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UnreliableWrapper {
|
impl UnreliableWrapper {
|
||||||
@@ -82,6 +83,11 @@ impl RemoteStorage for UnreliableWrapper {
|
|||||||
self.inner.list_prefixes(prefix).await
|
self.inner.list_prefixes(prefix).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||||
|
self.attempt(RemoteOp::ListPrefixes(folder.cloned()))?;
|
||||||
|
self.inner.list_files(folder).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
|
data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -119,4 +125,21 @@ impl RemoteStorage for UnreliableWrapper {
|
|||||||
self.attempt(RemoteOp::Delete(path.clone()))?;
|
self.attempt(RemoteOp::Delete(path.clone()))?;
|
||||||
self.inner.delete(path).await
|
self.inner.delete(path).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
||||||
|
self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
|
||||||
|
let mut error_counter = 0;
|
||||||
|
for path in paths {
|
||||||
|
if (self.delete(path).await).is_err() {
|
||||||
|
error_counter += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if error_counter > 0 {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
|
"failed to delete {} objects",
|
||||||
|
error_counter
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,274 +0,0 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
use std::env;
|
|
||||||
use std::num::{NonZeroU32, NonZeroUsize};
|
|
||||||
use std::ops::ControlFlow;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::UNIX_EPOCH;
|
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use remote_storage::{
|
|
||||||
GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
|
|
||||||
};
|
|
||||||
use test_context::{test_context, AsyncTestContext};
|
|
||||||
use tokio::task::JoinSet;
|
|
||||||
use tracing::{debug, error, info};
|
|
||||||
|
|
||||||
const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";
|
|
||||||
|
|
||||||
/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
|
|
||||||
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
|
|
||||||
/// See the client creation in [`create_s3_client`] for details on the required env vars.
|
|
||||||
/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
|
|
||||||
/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
|
|
||||||
///
|
|
||||||
/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_s3_data`]
|
|
||||||
/// where
|
|
||||||
/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
|
|
||||||
/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
|
|
||||||
///
|
|
||||||
/// Then, verifies that the client does return correct prefixes when queried:
|
|
||||||
/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
|
|
||||||
/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
|
|
||||||
///
|
|
||||||
/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
|
|
||||||
/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
|
|
||||||
/// since current default AWS S3 pagination limit is 1000.
|
|
||||||
/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
|
|
||||||
///
|
|
||||||
/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
|
|
||||||
/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
|
|
||||||
#[test_context(MaybeEnabledS3)]
|
|
||||||
#[tokio::test]
|
|
||||||
async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
|
|
||||||
let ctx = match ctx {
|
|
||||||
MaybeEnabledS3::Enabled(ctx) => ctx,
|
|
||||||
MaybeEnabledS3::Disabled => return Ok(()),
|
|
||||||
MaybeEnabledS3::UploadsFailed(e, _) => anyhow::bail!("S3 init failed: {e:?}"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let test_client = Arc::clone(&ctx.client_with_excessive_pagination);
|
|
||||||
let expected_remote_prefixes = ctx.remote_prefixes.clone();
|
|
||||||
|
|
||||||
let base_prefix =
|
|
||||||
RemotePath::new(Path::new(ctx.base_prefix_str)).context("common_prefix construction")?;
|
|
||||||
let root_remote_prefixes = test_client
|
|
||||||
.list_prefixes(None)
|
|
||||||
.await
|
|
||||||
.context("client list root prefixes failure")?
|
|
||||||
.into_iter()
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
assert_eq!(
|
|
||||||
root_remote_prefixes, HashSet::from([base_prefix.clone()]),
|
|
||||||
"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
let nested_remote_prefixes = test_client
|
|
||||||
.list_prefixes(Some(&base_prefix))
|
|
||||||
.await
|
|
||||||
.context("client list nested prefixes failure")?
|
|
||||||
.into_iter()
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
let remote_only_prefixes = nested_remote_prefixes
|
|
||||||
.difference(&expected_remote_prefixes)
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
let missing_uploaded_prefixes = expected_remote_prefixes
|
|
||||||
.difference(&nested_remote_prefixes)
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
assert_eq!(
|
|
||||||
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
|
|
||||||
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
enum MaybeEnabledS3 {
|
|
||||||
Enabled(S3WithTestBlobs),
|
|
||||||
Disabled,
|
|
||||||
UploadsFailed(anyhow::Error, S3WithTestBlobs),
|
|
||||||
}
|
|
||||||
|
|
||||||
struct S3WithTestBlobs {
|
|
||||||
client_with_excessive_pagination: Arc<GenericRemoteStorage>,
|
|
||||||
base_prefix_str: &'static str,
|
|
||||||
remote_prefixes: HashSet<RemotePath>,
|
|
||||||
remote_blobs: HashSet<RemotePath>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl AsyncTestContext for MaybeEnabledS3 {
|
|
||||||
async fn setup() -> Self {
|
|
||||||
utils::logging::init(
|
|
||||||
utils::logging::LogFormat::Test,
|
|
||||||
utils::logging::TracingErrorLayerEnablement::Disabled,
|
|
||||||
)
|
|
||||||
.expect("logging init failed");
|
|
||||||
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
|
|
||||||
info!(
|
|
||||||
"`{}` env variable is not set, skipping the test",
|
|
||||||
ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
|
|
||||||
);
|
|
||||||
return Self::Disabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
let max_keys_in_list_response = 10;
|
|
||||||
let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
|
|
||||||
|
|
||||||
let client_with_excessive_pagination = create_s3_client(max_keys_in_list_response)
|
|
||||||
.context("S3 client creation")
|
|
||||||
.expect("S3 client creation failed");
|
|
||||||
|
|
||||||
let base_prefix_str = "test/";
|
|
||||||
match upload_s3_data(
|
|
||||||
&client_with_excessive_pagination,
|
|
||||||
base_prefix_str,
|
|
||||||
upload_tasks_count,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
ControlFlow::Continue(uploads) => {
|
|
||||||
info!("Remote objects created successfully");
|
|
||||||
Self::Enabled(S3WithTestBlobs {
|
|
||||||
client_with_excessive_pagination,
|
|
||||||
base_prefix_str,
|
|
||||||
remote_prefixes: uploads.prefixes,
|
|
||||||
remote_blobs: uploads.blobs,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ControlFlow::Break(uploads) => Self::UploadsFailed(
|
|
||||||
anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
|
|
||||||
S3WithTestBlobs {
|
|
||||||
client_with_excessive_pagination,
|
|
||||||
base_prefix_str,
|
|
||||||
remote_prefixes: uploads.prefixes,
|
|
||||||
remote_blobs: uploads.blobs,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn teardown(self) {
|
|
||||||
match self {
|
|
||||||
Self::Disabled => {}
|
|
||||||
Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
|
|
||||||
cleanup(&ctx.client_with_excessive_pagination, ctx.remote_blobs).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_s3_client(max_keys_per_list_response: i32) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
|
||||||
let remote_storage_s3_bucket = env::var("REMOTE_STORAGE_S3_BUCKET")
|
|
||||||
.context("`REMOTE_STORAGE_S3_BUCKET` env var is not set, but real S3 tests are enabled")?;
|
|
||||||
let remote_storage_s3_region = env::var("REMOTE_STORAGE_S3_REGION")
|
|
||||||
.context("`REMOTE_STORAGE_S3_REGION` env var is not set, but real S3 tests are enabled")?;
|
|
||||||
let random_prefix_part = std::time::SystemTime::now()
|
|
||||||
.duration_since(UNIX_EPOCH)
|
|
||||||
.context("random s3 test prefix part calculation")?
|
|
||||||
.as_millis();
|
|
||||||
let remote_storage_config = RemoteStorageConfig {
|
|
||||||
max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
|
|
||||||
max_sync_errors: NonZeroU32::new(5).unwrap(),
|
|
||||||
storage: RemoteStorageKind::AwsS3(S3Config {
|
|
||||||
bucket_name: remote_storage_s3_bucket,
|
|
||||||
bucket_region: remote_storage_s3_region,
|
|
||||||
prefix_in_bucket: Some(format!("pagination_should_work_test_{random_prefix_part}/")),
|
|
||||||
endpoint: None,
|
|
||||||
concurrency_limit: NonZeroUsize::new(100).unwrap(),
|
|
||||||
max_keys_per_list_response: Some(max_keys_per_list_response),
|
|
||||||
}),
|
|
||||||
};
|
|
||||||
Ok(Arc::new(
|
|
||||||
GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Uploads {
|
|
||||||
prefixes: HashSet<RemotePath>,
|
|
||||||
blobs: HashSet<RemotePath>,
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn upload_s3_data(
|
|
||||||
client: &Arc<GenericRemoteStorage>,
|
|
||||||
base_prefix_str: &'static str,
|
|
||||||
upload_tasks_count: usize,
|
|
||||||
) -> ControlFlow<Uploads, Uploads> {
|
|
||||||
info!("Creating {upload_tasks_count} S3 files");
|
|
||||||
let mut upload_tasks = JoinSet::new();
|
|
||||||
for i in 1..upload_tasks_count + 1 {
|
|
||||||
let task_client = Arc::clone(client);
|
|
||||||
upload_tasks.spawn(async move {
|
|
||||||
let prefix = PathBuf::from(format!("{base_prefix_str}/sub_prefix_{i}/"));
|
|
||||||
let blob_prefix = RemotePath::new(&prefix)
|
|
||||||
.with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
|
|
||||||
let blob_path = blob_prefix.join(Path::new(&format!("blob_{i}")));
|
|
||||||
debug!("Creating remote item {i} at path {blob_path:?}");
|
|
||||||
|
|
||||||
let data = format!("remote blob data {i}").into_bytes();
|
|
||||||
let data_len = data.len();
|
|
||||||
task_client
|
|
||||||
.upload(std::io::Cursor::new(data), data_len, &blob_path, None)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok::<_, anyhow::Error>((blob_prefix, blob_path))
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut upload_tasks_failed = false;
|
|
||||||
let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
|
|
||||||
let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
|
|
||||||
while let Some(task_run_result) = upload_tasks.join_next().await {
|
|
||||||
match task_run_result
|
|
||||||
.context("task join failed")
|
|
||||||
.and_then(|task_result| task_result.context("upload task failed"))
|
|
||||||
{
|
|
||||||
Ok((upload_prefix, upload_path)) => {
|
|
||||||
uploaded_prefixes.insert(upload_prefix);
|
|
||||||
uploaded_blobs.insert(upload_path);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Upload task failed: {e:?}");
|
|
||||||
upload_tasks_failed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let uploads = Uploads {
|
|
||||||
prefixes: uploaded_prefixes,
|
|
||||||
blobs: uploaded_blobs,
|
|
||||||
};
|
|
||||||
if upload_tasks_failed {
|
|
||||||
ControlFlow::Break(uploads)
|
|
||||||
} else {
|
|
||||||
ControlFlow::Continue(uploads)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<RemotePath>) {
|
|
||||||
info!(
|
|
||||||
"Removing {} objects from the remote storage during cleanup",
|
|
||||||
objects_to_delete.len()
|
|
||||||
);
|
|
||||||
let mut delete_tasks = JoinSet::new();
|
|
||||||
for object_to_delete in objects_to_delete {
|
|
||||||
let task_client = Arc::clone(client);
|
|
||||||
delete_tasks.spawn(async move {
|
|
||||||
debug!("Deleting remote item at path {object_to_delete:?}");
|
|
||||||
task_client
|
|
||||||
.delete(&object_to_delete)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("{object_to_delete:?} removal"))
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some(task_run_result) = delete_tasks.join_next().await {
|
|
||||||
match task_run_result {
|
|
||||||
Ok(task_result) => match task_result {
|
|
||||||
Ok(()) => {}
|
|
||||||
Err(e) => error!("Delete task failed: {e:?}"),
|
|
||||||
},
|
|
||||||
Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
542
libs/remote_storage/tests/test_real_s3.rs
Normal file
542
libs/remote_storage/tests/test_real_s3.rs
Normal file
@@ -0,0 +1,542 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
use std::env;
|
||||||
|
use std::num::{NonZeroU32, NonZeroUsize};
|
||||||
|
use std::ops::ControlFlow;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::UNIX_EPOCH;
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
use once_cell::sync::OnceCell;
|
||||||
|
use remote_storage::{
|
||||||
|
GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
|
||||||
|
};
|
||||||
|
use test_context::{test_context, AsyncTestContext};
|
||||||
|
use tokio::task::JoinSet;
|
||||||
|
use tracing::{debug, error, info};
|
||||||
|
|
||||||
|
static LOGGING_DONE: OnceCell<()> = OnceCell::new();
|
||||||
|
|
||||||
|
const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";
|
||||||
|
|
||||||
|
const BASE_PREFIX: &str = "test/";
|
||||||
|
|
||||||
|
/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
|
||||||
|
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
|
||||||
|
/// See the client creation in [`create_s3_client`] for details on the required env vars.
|
||||||
|
/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
|
||||||
|
/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
|
||||||
|
///
|
||||||
|
/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_s3_data`]
|
||||||
|
/// where
|
||||||
|
/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
|
||||||
|
/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
|
||||||
|
///
|
||||||
|
/// Then, verifies that the client does return correct prefixes when queried:
|
||||||
|
/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
|
||||||
|
/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
|
||||||
|
///
|
||||||
|
/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
|
||||||
|
/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
|
||||||
|
/// since current default AWS S3 pagination limit is 1000.
|
||||||
|
/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
|
||||||
|
///
|
||||||
|
/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
|
||||||
|
/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
|
||||||
|
#[test_context(MaybeEnabledS3WithTestBlobs)]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3WithTestBlobs) -> anyhow::Result<()> {
|
||||||
|
let ctx = match ctx {
|
||||||
|
MaybeEnabledS3WithTestBlobs::Enabled(ctx) => ctx,
|
||||||
|
MaybeEnabledS3WithTestBlobs::Disabled => return Ok(()),
|
||||||
|
MaybeEnabledS3WithTestBlobs::UploadsFailed(e, _) => anyhow::bail!("S3 init failed: {e:?}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let test_client = Arc::clone(&ctx.enabled.client);
|
||||||
|
let expected_remote_prefixes = ctx.remote_prefixes.clone();
|
||||||
|
|
||||||
|
let base_prefix = RemotePath::new(Path::new(ctx.enabled.base_prefix))
|
||||||
|
.context("common_prefix construction")?;
|
||||||
|
let root_remote_prefixes = test_client
|
||||||
|
.list_prefixes(None)
|
||||||
|
.await
|
||||||
|
.context("client list root prefixes failure")?
|
||||||
|
.into_iter()
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
assert_eq!(
|
||||||
|
root_remote_prefixes, HashSet::from([base_prefix.clone()]),
|
||||||
|
"remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let nested_remote_prefixes = test_client
|
||||||
|
.list_prefixes(Some(&base_prefix))
|
||||||
|
.await
|
||||||
|
.context("client list nested prefixes failure")?
|
||||||
|
.into_iter()
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
let remote_only_prefixes = nested_remote_prefixes
|
||||||
|
.difference(&expected_remote_prefixes)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
let missing_uploaded_prefixes = expected_remote_prefixes
|
||||||
|
.difference(&nested_remote_prefixes)
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
assert_eq!(
|
||||||
|
remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
|
||||||
|
"remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.
|
||||||
|
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
|
||||||
|
/// See `s3_pagination_should_work` for more information.
|
||||||
|
///
|
||||||
|
/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_s3_data`]
|
||||||
|
/// Then performs the following queries:
|
||||||
|
/// 1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
|
||||||
|
/// 2. `list_files("folder1")`. This should return all files `random_prefix/folder1/blob_{i}.txt`
|
||||||
|
#[test_context(MaybeEnabledS3WithSimpleTestBlobs)]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn s3_list_files_works(ctx: &mut MaybeEnabledS3WithSimpleTestBlobs) -> anyhow::Result<()> {
|
||||||
|
let ctx = match ctx {
|
||||||
|
MaybeEnabledS3WithSimpleTestBlobs::Enabled(ctx) => ctx,
|
||||||
|
MaybeEnabledS3WithSimpleTestBlobs::Disabled => return Ok(()),
|
||||||
|
MaybeEnabledS3WithSimpleTestBlobs::UploadsFailed(e, _) => {
|
||||||
|
anyhow::bail!("S3 init failed: {e:?}")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let test_client = Arc::clone(&ctx.enabled.client);
|
||||||
|
let base_prefix =
|
||||||
|
RemotePath::new(Path::new("folder1")).context("common_prefix construction")?;
|
||||||
|
let root_files = test_client
|
||||||
|
.list_files(None)
|
||||||
|
.await
|
||||||
|
.context("client list root files failure")?
|
||||||
|
.into_iter()
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
assert_eq!(
|
||||||
|
root_files,
|
||||||
|
ctx.remote_blobs.clone(),
|
||||||
|
"remote storage list_files on root mismatches with the uploads."
|
||||||
|
);
|
||||||
|
let nested_remote_files = test_client
|
||||||
|
.list_files(Some(&base_prefix))
|
||||||
|
.await
|
||||||
|
.context("client list nested files failure")?
|
||||||
|
.into_iter()
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
let trim_remote_blobs: HashSet<_> = ctx
|
||||||
|
.remote_blobs
|
||||||
|
.iter()
|
||||||
|
.map(|x| x.get_path().to_str().expect("must be valid name"))
|
||||||
|
.filter(|x| x.starts_with("folder1"))
|
||||||
|
.map(|x| RemotePath::new(Path::new(x)).expect("must be valid name"))
|
||||||
|
.collect();
|
||||||
|
assert_eq!(
|
||||||
|
nested_remote_files, trim_remote_blobs,
|
||||||
|
"remote storage list_files on subdirrectory mismatches with the uploads."
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test_context(MaybeEnabledS3)]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn s3_delete_non_exising_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
|
||||||
|
let ctx = match ctx {
|
||||||
|
MaybeEnabledS3::Enabled(ctx) => ctx,
|
||||||
|
MaybeEnabledS3::Disabled => return Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let path = RemotePath::new(&PathBuf::from(format!(
|
||||||
|
"{}/for_sure_there_is_nothing_there_really",
|
||||||
|
ctx.base_prefix,
|
||||||
|
)))
|
||||||
|
.with_context(|| "RemotePath conversion")?;
|
||||||
|
|
||||||
|
ctx.client.delete(&path).await.expect("should succeed");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test_context(MaybeEnabledS3)]
|
||||||
|
#[tokio::test]
|
||||||
|
async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
|
||||||
|
let ctx = match ctx {
|
||||||
|
MaybeEnabledS3::Enabled(ctx) => ctx,
|
||||||
|
MaybeEnabledS3::Disabled => return Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let path1 = RemotePath::new(&PathBuf::from(format!("{}/path1", ctx.base_prefix,)))
|
||||||
|
.with_context(|| "RemotePath conversion")?;
|
||||||
|
|
||||||
|
let path2 = RemotePath::new(&PathBuf::from(format!("{}/path2", ctx.base_prefix,)))
|
||||||
|
.with_context(|| "RemotePath conversion")?;
|
||||||
|
|
||||||
|
let path3 = RemotePath::new(&PathBuf::from(format!("{}/path3", ctx.base_prefix,)))
|
||||||
|
.with_context(|| "RemotePath conversion")?;
|
||||||
|
|
||||||
|
let data1 = "remote blob data1".as_bytes();
|
||||||
|
let data1_len = data1.len();
|
||||||
|
let data2 = "remote blob data2".as_bytes();
|
||||||
|
let data2_len = data2.len();
|
||||||
|
let data3 = "remote blob data3".as_bytes();
|
||||||
|
let data3_len = data3.len();
|
||||||
|
ctx.client
|
||||||
|
.upload(std::io::Cursor::new(data1), data1_len, &path1, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
ctx.client
|
||||||
|
.upload(std::io::Cursor::new(data2), data2_len, &path2, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
ctx.client
|
||||||
|
.upload(std::io::Cursor::new(data3), data3_len, &path3, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
ctx.client.delete_objects(&[path1, path2]).await?;
|
||||||
|
|
||||||
|
let prefixes = ctx.client.list_prefixes(None).await?;
|
||||||
|
|
||||||
|
assert_eq!(prefixes.len(), 1);
|
||||||
|
|
||||||
|
ctx.client.delete_objects(&[path3]).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_logging_ready() {
|
||||||
|
LOGGING_DONE.get_or_init(|| {
|
||||||
|
utils::logging::init(
|
||||||
|
utils::logging::LogFormat::Test,
|
||||||
|
utils::logging::TracingErrorLayerEnablement::Disabled,
|
||||||
|
)
|
||||||
|
.expect("logging init failed");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
struct EnabledS3 {
|
||||||
|
client: Arc<GenericRemoteStorage>,
|
||||||
|
base_prefix: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EnabledS3 {
|
||||||
|
async fn setup(max_keys_in_list_response: Option<i32>) -> Self {
|
||||||
|
let client = create_s3_client(max_keys_in_list_response)
|
||||||
|
.context("S3 client creation")
|
||||||
|
.expect("S3 client creation failed");
|
||||||
|
|
||||||
|
EnabledS3 {
|
||||||
|
client,
|
||||||
|
base_prefix: BASE_PREFIX,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MaybeEnabledS3 {
|
||||||
|
Enabled(EnabledS3),
|
||||||
|
Disabled,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl AsyncTestContext for MaybeEnabledS3 {
|
||||||
|
async fn setup() -> Self {
|
||||||
|
ensure_logging_ready();
|
||||||
|
|
||||||
|
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
|
||||||
|
info!(
|
||||||
|
"`{}` env variable is not set, skipping the test",
|
||||||
|
ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
|
||||||
|
);
|
||||||
|
return Self::Disabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::Enabled(EnabledS3::setup(None).await)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MaybeEnabledS3WithTestBlobs {
|
||||||
|
Enabled(S3WithTestBlobs),
|
||||||
|
Disabled,
|
||||||
|
UploadsFailed(anyhow::Error, S3WithTestBlobs),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct S3WithTestBlobs {
|
||||||
|
enabled: EnabledS3,
|
||||||
|
remote_prefixes: HashSet<RemotePath>,
|
||||||
|
remote_blobs: HashSet<RemotePath>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl AsyncTestContext for MaybeEnabledS3WithTestBlobs {
|
||||||
|
async fn setup() -> Self {
|
||||||
|
ensure_logging_ready();
|
||||||
|
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
|
||||||
|
info!(
|
||||||
|
"`{}` env variable is not set, skipping the test",
|
||||||
|
ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
|
||||||
|
);
|
||||||
|
return Self::Disabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
let max_keys_in_list_response = 10;
|
||||||
|
let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
|
||||||
|
|
||||||
|
let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
|
||||||
|
|
||||||
|
match upload_s3_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {
|
||||||
|
ControlFlow::Continue(uploads) => {
|
||||||
|
info!("Remote objects created successfully");
|
||||||
|
|
||||||
|
Self::Enabled(S3WithTestBlobs {
|
||||||
|
enabled,
|
||||||
|
remote_prefixes: uploads.prefixes,
|
||||||
|
remote_blobs: uploads.blobs,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
ControlFlow::Break(uploads) => Self::UploadsFailed(
|
||||||
|
anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
|
||||||
|
S3WithTestBlobs {
|
||||||
|
enabled,
|
||||||
|
remote_prefixes: uploads.prefixes,
|
||||||
|
remote_blobs: uploads.blobs,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn teardown(self) {
|
||||||
|
match self {
|
||||||
|
Self::Disabled => {}
|
||||||
|
Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
|
||||||
|
cleanup(&ctx.enabled.client, ctx.remote_blobs).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: the setups for the list_prefixes test and the list_files test are very similar
|
||||||
|
// However, they are not idential. The list_prefixes function is concerned with listing prefixes,
|
||||||
|
// whereas the list_files function is concerned with listing files.
|
||||||
|
// See `RemoteStorage::list_files` documentation for more details
|
||||||
|
enum MaybeEnabledS3WithSimpleTestBlobs {
|
||||||
|
Enabled(S3WithSimpleTestBlobs),
|
||||||
|
Disabled,
|
||||||
|
UploadsFailed(anyhow::Error, S3WithSimpleTestBlobs),
|
||||||
|
}
|
||||||
|
struct S3WithSimpleTestBlobs {
|
||||||
|
enabled: EnabledS3,
|
||||||
|
remote_blobs: HashSet<RemotePath>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
|
impl AsyncTestContext for MaybeEnabledS3WithSimpleTestBlobs {
|
||||||
|
async fn setup() -> Self {
|
||||||
|
ensure_logging_ready();
|
||||||
|
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
|
||||||
|
info!(
|
||||||
|
"`{}` env variable is not set, skipping the test",
|
||||||
|
ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
|
||||||
|
);
|
||||||
|
return Self::Disabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
let max_keys_in_list_response = 10;
|
||||||
|
let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
|
||||||
|
|
||||||
|
let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
|
||||||
|
|
||||||
|
match upload_simple_s3_data(&enabled.client, upload_tasks_count).await {
|
||||||
|
ControlFlow::Continue(uploads) => {
|
||||||
|
info!("Remote objects created successfully");
|
||||||
|
|
||||||
|
Self::Enabled(S3WithSimpleTestBlobs {
|
||||||
|
enabled,
|
||||||
|
remote_blobs: uploads,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
ControlFlow::Break(uploads) => Self::UploadsFailed(
|
||||||
|
anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
|
||||||
|
S3WithSimpleTestBlobs {
|
||||||
|
enabled,
|
||||||
|
remote_blobs: uploads,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn teardown(self) {
|
||||||
|
match self {
|
||||||
|
Self::Disabled => {}
|
||||||
|
Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
|
||||||
|
cleanup(&ctx.enabled.client, ctx.remote_blobs).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_s3_client(
|
||||||
|
max_keys_per_list_response: Option<i32>,
|
||||||
|
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
||||||
|
let remote_storage_s3_bucket = env::var("REMOTE_STORAGE_S3_BUCKET")
|
||||||
|
.context("`REMOTE_STORAGE_S3_BUCKET` env var is not set, but real S3 tests are enabled")?;
|
||||||
|
let remote_storage_s3_region = env::var("REMOTE_STORAGE_S3_REGION")
|
||||||
|
.context("`REMOTE_STORAGE_S3_REGION` env var is not set, but real S3 tests are enabled")?;
|
||||||
|
let random_prefix_part = std::time::SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.context("random s3 test prefix part calculation")?
|
||||||
|
.as_nanos();
|
||||||
|
let remote_storage_config = RemoteStorageConfig {
|
||||||
|
max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
|
||||||
|
max_sync_errors: NonZeroU32::new(5).unwrap(),
|
||||||
|
storage: RemoteStorageKind::AwsS3(S3Config {
|
||||||
|
bucket_name: remote_storage_s3_bucket,
|
||||||
|
bucket_region: remote_storage_s3_region,
|
||||||
|
prefix_in_bucket: Some(format!("pagination_should_work_test_{random_prefix_part}/")),
|
||||||
|
endpoint: None,
|
||||||
|
concurrency_limit: NonZeroUsize::new(100).unwrap(),
|
||||||
|
max_keys_per_list_response,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
Ok(Arc::new(
|
||||||
|
GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Uploads {
|
||||||
|
prefixes: HashSet<RemotePath>,
|
||||||
|
blobs: HashSet<RemotePath>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn upload_s3_data(
|
||||||
|
client: &Arc<GenericRemoteStorage>,
|
||||||
|
base_prefix_str: &'static str,
|
||||||
|
upload_tasks_count: usize,
|
||||||
|
) -> ControlFlow<Uploads, Uploads> {
|
||||||
|
info!("Creating {upload_tasks_count} S3 files");
|
||||||
|
let mut upload_tasks = JoinSet::new();
|
||||||
|
for i in 1..upload_tasks_count + 1 {
|
||||||
|
let task_client = Arc::clone(client);
|
||||||
|
upload_tasks.spawn(async move {
|
||||||
|
let prefix = PathBuf::from(format!("{base_prefix_str}/sub_prefix_{i}/"));
|
||||||
|
let blob_prefix = RemotePath::new(&prefix)
|
||||||
|
.with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
|
||||||
|
let blob_path = blob_prefix.join(Path::new(&format!("blob_{i}")));
|
||||||
|
debug!("Creating remote item {i} at path {blob_path:?}");
|
||||||
|
|
||||||
|
let data = format!("remote blob data {i}").into_bytes();
|
||||||
|
let data_len = data.len();
|
||||||
|
task_client
|
||||||
|
.upload(std::io::Cursor::new(data), data_len, &blob_path, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok::<_, anyhow::Error>((blob_prefix, blob_path))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut upload_tasks_failed = false;
|
||||||
|
let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
|
||||||
|
let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
|
||||||
|
while let Some(task_run_result) = upload_tasks.join_next().await {
|
||||||
|
match task_run_result
|
||||||
|
.context("task join failed")
|
||||||
|
.and_then(|task_result| task_result.context("upload task failed"))
|
||||||
|
{
|
||||||
|
Ok((upload_prefix, upload_path)) => {
|
||||||
|
uploaded_prefixes.insert(upload_prefix);
|
||||||
|
uploaded_blobs.insert(upload_path);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Upload task failed: {e:?}");
|
||||||
|
upload_tasks_failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let uploads = Uploads {
|
||||||
|
prefixes: uploaded_prefixes,
|
||||||
|
blobs: uploaded_blobs,
|
||||||
|
};
|
||||||
|
if upload_tasks_failed {
|
||||||
|
ControlFlow::Break(uploads)
|
||||||
|
} else {
|
||||||
|
ControlFlow::Continue(uploads)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<RemotePath>) {
|
||||||
|
info!(
|
||||||
|
"Removing {} objects from the remote storage during cleanup",
|
||||||
|
objects_to_delete.len()
|
||||||
|
);
|
||||||
|
let mut delete_tasks = JoinSet::new();
|
||||||
|
for object_to_delete in objects_to_delete {
|
||||||
|
let task_client = Arc::clone(client);
|
||||||
|
delete_tasks.spawn(async move {
|
||||||
|
debug!("Deleting remote item at path {object_to_delete:?}");
|
||||||
|
task_client
|
||||||
|
.delete(&object_to_delete)
|
||||||
|
.await
|
||||||
|
.with_context(|| format!("{object_to_delete:?} removal"))
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(task_run_result) = delete_tasks.join_next().await {
|
||||||
|
match task_run_result {
|
||||||
|
Ok(task_result) => match task_result {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) => error!("Delete task failed: {e:?}"),
|
||||||
|
},
|
||||||
|
Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
|
||||||
|
async fn upload_simple_s3_data(
|
||||||
|
client: &Arc<GenericRemoteStorage>,
|
||||||
|
upload_tasks_count: usize,
|
||||||
|
) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
|
||||||
|
info!("Creating {upload_tasks_count} S3 files");
|
||||||
|
let mut upload_tasks = JoinSet::new();
|
||||||
|
for i in 1..upload_tasks_count + 1 {
|
||||||
|
let task_client = Arc::clone(client);
|
||||||
|
upload_tasks.spawn(async move {
|
||||||
|
let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
|
||||||
|
let blob_path = RemotePath::new(&blob_path)
|
||||||
|
.with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
|
||||||
|
debug!("Creating remote item {i} at path {blob_path:?}");
|
||||||
|
|
||||||
|
let data = format!("remote blob data {i}").into_bytes();
|
||||||
|
let data_len = data.len();
|
||||||
|
task_client
|
||||||
|
.upload(std::io::Cursor::new(data), data_len, &blob_path, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok::<_, anyhow::Error>(blob_path)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut upload_tasks_failed = false;
|
||||||
|
let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
|
||||||
|
while let Some(task_run_result) = upload_tasks.join_next().await {
|
||||||
|
match task_run_result
|
||||||
|
.context("task join failed")
|
||||||
|
.and_then(|task_result| task_result.context("upload task failed"))
|
||||||
|
{
|
||||||
|
Ok(upload_path) => {
|
||||||
|
uploaded_blobs.insert(upload_path);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Upload task failed: {e:?}");
|
||||||
|
upload_tasks_failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if upload_tasks_failed {
|
||||||
|
ControlFlow::Break(uploaded_blobs)
|
||||||
|
} else {
|
||||||
|
ControlFlow::Continue(uploaded_blobs)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,7 +5,6 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
atty.workspace = true
|
|
||||||
sentry.workspace = true
|
sentry.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
/// Extensions to `std::fs` types.
|
/// Extensions to `std::fs` types.
|
||||||
use std::{fs, io, path::Path};
|
use std::{fs, io, path::Path};
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
|
|
||||||
pub trait PathExt {
|
pub trait PathExt {
|
||||||
/// Returns an error if `self` is not a directory.
|
/// Returns an error if `self` is not a directory.
|
||||||
fn is_empty_dir(&self) -> io::Result<bool>;
|
fn is_empty_dir(&self) -> io::Result<bool>;
|
||||||
@@ -15,10 +17,19 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn is_directory_empty(path: impl AsRef<Path>) -> anyhow::Result<bool> {
|
||||||
|
let mut dir = tokio::fs::read_dir(&path)
|
||||||
|
.await
|
||||||
|
.context(format!("read_dir({})", path.as_ref().display()))?;
|
||||||
|
Ok(dir.next_entry().await?.is_none())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use crate::fs_ext::is_directory_empty;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn is_empty_dir() {
|
fn is_empty_dir() {
|
||||||
use super::PathExt;
|
use super::PathExt;
|
||||||
@@ -42,4 +53,26 @@ mod test {
|
|||||||
std::fs::remove_file(&file_path).unwrap();
|
std::fs::remove_file(&file_path).unwrap();
|
||||||
assert!(file_path.is_empty_dir().is_err());
|
assert!(file_path.is_empty_dir().is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn is_empty_dir_async() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let dir_path = dir.path();
|
||||||
|
|
||||||
|
// test positive case
|
||||||
|
assert!(
|
||||||
|
is_directory_empty(dir_path).await.expect("test failure"),
|
||||||
|
"new tempdir should be empty"
|
||||||
|
);
|
||||||
|
|
||||||
|
// invoke on a file to ensure it returns an error
|
||||||
|
let file_path: PathBuf = dir_path.join("testfile");
|
||||||
|
let f = std::fs::File::create(&file_path).unwrap();
|
||||||
|
drop(f);
|
||||||
|
assert!(is_directory_empty(&file_path).await.is_err());
|
||||||
|
|
||||||
|
// do it again on a path, we know to be nonexistent
|
||||||
|
std::fs::remove_file(&file_path).unwrap();
|
||||||
|
assert!(is_directory_empty(file_path).await.is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,19 +1,18 @@
|
|||||||
use crate::auth::{Claims, JwtAuth};
|
use crate::auth::{Claims, JwtAuth};
|
||||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::Context;
|
||||||
use hyper::header::{HeaderName, AUTHORIZATION};
|
use hyper::header::{HeaderName, AUTHORIZATION};
|
||||||
use hyper::http::HeaderValue;
|
use hyper::http::HeaderValue;
|
||||||
use hyper::Method;
|
use hyper::Method;
|
||||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
|
use hyper::{header::CONTENT_TYPE, Body, Request, Response};
|
||||||
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use routerify::ext::RequestExt;
|
use routerify::ext::RequestExt;
|
||||||
use routerify::{Middleware, RequestInfo, Router, RouterBuilder, RouterService};
|
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||||
use tokio::task::JoinError;
|
use tokio::task::JoinError;
|
||||||
use tracing::{self, debug, info, info_span, warn, Instrument};
|
use tracing::{self, debug, info, info_span, warn, Instrument};
|
||||||
|
|
||||||
use std::future::Future;
|
use std::future::Future;
|
||||||
use std::net::TcpListener;
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
@@ -348,40 +347,6 @@ pub fn check_permission_with(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
/// Start listening for HTTP requests on given socket.
|
|
||||||
///
|
|
||||||
/// 'shutdown_future' can be used to stop. If the Future becomes
|
|
||||||
/// ready, we stop listening for new requests, and the function returns.
|
|
||||||
///
|
|
||||||
pub fn serve_thread_main<S>(
|
|
||||||
router_builder: RouterBuilder<hyper::Body, ApiError>,
|
|
||||||
listener: TcpListener,
|
|
||||||
shutdown_future: S,
|
|
||||||
) -> anyhow::Result<()>
|
|
||||||
where
|
|
||||||
S: Future<Output = ()> + Send + Sync,
|
|
||||||
{
|
|
||||||
info!("Starting an HTTP endpoint at {}", listener.local_addr()?);
|
|
||||||
|
|
||||||
// Create a Service from the router above to handle incoming requests.
|
|
||||||
let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
|
|
||||||
|
|
||||||
// Enter a single-threaded tokio runtime bound to the current thread
|
|
||||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
|
||||||
.enable_all()
|
|
||||||
.build()?;
|
|
||||||
|
|
||||||
let _guard = runtime.enter();
|
|
||||||
|
|
||||||
let server = Server::from_tcp(listener)?
|
|
||||||
.serve(service)
|
|
||||||
.with_graceful_shutdown(shutdown_future);
|
|
||||||
|
|
||||||
runtime.block_on(server)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use hyper::{header, Body, Response, StatusCode};
|
use hyper::{header, Body, Response, StatusCode};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::error::Error as StdError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::error;
|
use tracing::error;
|
||||||
|
|
||||||
@@ -15,13 +16,13 @@ pub enum ApiError {
|
|||||||
Unauthorized(String),
|
Unauthorized(String),
|
||||||
|
|
||||||
#[error("NotFound: {0}")]
|
#[error("NotFound: {0}")]
|
||||||
NotFound(anyhow::Error),
|
NotFound(Box<dyn StdError + Send + Sync + 'static>),
|
||||||
|
|
||||||
#[error("Conflict: {0}")]
|
#[error("Conflict: {0}")]
|
||||||
Conflict(String),
|
Conflict(String),
|
||||||
|
|
||||||
#[error("Precondition failed: {0}")]
|
#[error("Precondition failed: {0}")]
|
||||||
PreconditionFailed(&'static str),
|
PreconditionFailed(Box<str>),
|
||||||
|
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
InternalServerError(anyhow::Error),
|
InternalServerError(anyhow::Error),
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ pub fn init(
|
|||||||
let r = r.with({
|
let r = r.with({
|
||||||
let log_layer = tracing_subscriber::fmt::layer()
|
let log_layer = tracing_subscriber::fmt::layer()
|
||||||
.with_target(false)
|
.with_target(false)
|
||||||
.with_ansi(atty::is(atty::Stream::Stdout))
|
.with_ansi(false)
|
||||||
.with_writer(std::io::stdout);
|
.with_writer(std::io::stdout);
|
||||||
let log_layer = match log_format {
|
let log_layer = match log_format {
|
||||||
LogFormat::Json => log_layer.json().boxed(),
|
LogFormat::Json => log_layer.json().boxed(),
|
||||||
|
|||||||
@@ -1,22 +1,23 @@
|
|||||||
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
use pageserver::keyspace::{KeyPartitioning, KeySpace};
|
||||||
use pageserver::repository::Key;
|
use pageserver::repository::Key;
|
||||||
use pageserver::tenant::layer_map::LayerMap;
|
use pageserver::tenant::layer_map::LayerMap;
|
||||||
use pageserver::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
|
use pageserver::tenant::storage_layer::{tests::LayerDescriptor, Layer, LayerFileName};
|
||||||
|
use pageserver::tenant::storage_layer::{PersistentLayer, PersistentLayerDesc};
|
||||||
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
use rand::prelude::{SeedableRng, SliceRandom, StdRng};
|
||||||
use std::cmp::{max, min};
|
use std::cmp::{max, min};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
use utils::id::{TenantId, TimelineId};
|
||||||
|
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
|
fn build_layer_map(filename_dump: PathBuf) -> LayerMap {
|
||||||
let mut layer_map = LayerMap::<LayerDescriptor>::default();
|
let mut layer_map = LayerMap::default();
|
||||||
|
|
||||||
let mut min_lsn = Lsn(u64::MAX);
|
let mut min_lsn = Lsn(u64::MAX);
|
||||||
let mut max_lsn = Lsn(0);
|
let mut max_lsn = Lsn(0);
|
||||||
@@ -33,7 +34,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
|
|||||||
min_lsn = min(min_lsn, lsn_range.start);
|
min_lsn = min(min_lsn, lsn_range.start);
|
||||||
max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
|
max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
|
||||||
|
|
||||||
updates.insert_historic(layer.get_persistent_layer_desc(), Arc::new(layer));
|
updates.insert_historic(layer.layer_desc().clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("min: {min_lsn}, max: {max_lsn}");
|
println!("min: {min_lsn}, max: {max_lsn}");
|
||||||
@@ -43,7 +44,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Construct a layer map query pattern for benchmarks
|
/// Construct a layer map query pattern for benchmarks
|
||||||
fn uniform_query_pattern(layer_map: &LayerMap<LayerDescriptor>) -> Vec<(Key, Lsn)> {
|
fn uniform_query_pattern(layer_map: &LayerMap) -> Vec<(Key, Lsn)> {
|
||||||
// For each image layer we query one of the pages contained, at LSN right
|
// For each image layer we query one of the pages contained, at LSN right
|
||||||
// before the image layer was created. This gives us a somewhat uniform
|
// before the image layer was created. This gives us a somewhat uniform
|
||||||
// coverage of both the lsn and key space because image layers have
|
// coverage of both the lsn and key space because image layers have
|
||||||
@@ -69,7 +70,7 @@ fn uniform_query_pattern(layer_map: &LayerMap<LayerDescriptor>) -> Vec<(Key, Lsn
|
|||||||
|
|
||||||
// Construct a partitioning for testing get_difficulty map when we
|
// Construct a partitioning for testing get_difficulty map when we
|
||||||
// don't have an exact result of `collect_keyspace` to work with.
|
// don't have an exact result of `collect_keyspace` to work with.
|
||||||
fn uniform_key_partitioning(layer_map: &LayerMap<LayerDescriptor>, _lsn: Lsn) -> KeyPartitioning {
|
fn uniform_key_partitioning(layer_map: &LayerMap, _lsn: Lsn) -> KeyPartitioning {
|
||||||
let mut parts = Vec::new();
|
let mut parts = Vec::new();
|
||||||
|
|
||||||
// We add a partition boundary at the start of each image layer,
|
// We add a partition boundary at the start of each image layer,
|
||||||
@@ -209,13 +210,15 @@ fn bench_sequential(c: &mut Criterion) {
|
|||||||
for i in 0..100_000 {
|
for i in 0..100_000 {
|
||||||
let i32 = (i as u32) % 100;
|
let i32 = (i as u32) % 100;
|
||||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||||
let layer = LayerDescriptor {
|
let layer = LayerDescriptor::from(PersistentLayerDesc::new_img(
|
||||||
key: zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
TenantId::generate(),
|
||||||
lsn: Lsn(i)..Lsn(i + 1),
|
TimelineId::generate(),
|
||||||
is_incremental: false,
|
zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||||
short_id: format!("Layer {}", i),
|
Lsn(i),
|
||||||
};
|
false,
|
||||||
updates.insert_historic(layer.get_persistent_layer_desc(), Arc::new(layer));
|
0,
|
||||||
|
));
|
||||||
|
updates.insert_historic(layer.layer_desc().clone());
|
||||||
}
|
}
|
||||||
updates.flush();
|
updates.flush();
|
||||||
println!("Finished layer map init in {:?}", now.elapsed());
|
println!("Finished layer map init in {:?}", now.elapsed());
|
||||||
|
|||||||
@@ -495,50 +495,50 @@ fn start_pageserver(
|
|||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
|
if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
|
||||||
let background_jobs_barrier = background_jobs_barrier;
|
let background_jobs_barrier = background_jobs_barrier;
|
||||||
let metrics_ctx = RequestContext::todo_child(
|
let metrics_ctx = RequestContext::todo_child(
|
||||||
TaskKind::MetricsCollection,
|
TaskKind::MetricsCollection,
|
||||||
// This task itself shouldn't download anything.
|
// This task itself shouldn't download anything.
|
||||||
// The actual size calculation does need downloads, and
|
// The actual size calculation does need downloads, and
|
||||||
// creates a child context with the right DownloadBehavior.
|
// creates a child context with the right DownloadBehavior.
|
||||||
DownloadBehavior::Error,
|
DownloadBehavior::Error,
|
||||||
);
|
);
|
||||||
task_mgr::spawn(
|
task_mgr::spawn(
|
||||||
MGMT_REQUEST_RUNTIME.handle(),
|
crate::BACKGROUND_RUNTIME.handle(),
|
||||||
TaskKind::MetricsCollection,
|
TaskKind::MetricsCollection,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
"consumption metrics collection",
|
"consumption metrics collection",
|
||||||
true,
|
true,
|
||||||
async move {
|
async move {
|
||||||
// first wait until background jobs are cleared to launch.
|
// first wait until background jobs are cleared to launch.
|
||||||
//
|
//
|
||||||
// this is because we only process active tenants and timelines, and the
|
// this is because we only process active tenants and timelines, and the
|
||||||
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
||||||
// which will not be rate-limited.
|
// which will not be rate-limited.
|
||||||
let cancel = task_mgr::shutdown_token();
|
let cancel = task_mgr::shutdown_token();
|
||||||
|
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = cancel.cancelled() => { return Ok(()); },
|
_ = cancel.cancelled() => { return Ok(()); },
|
||||||
_ = background_jobs_barrier.wait() => {}
|
_ = background_jobs_barrier.wait() => {}
|
||||||
};
|
};
|
||||||
|
|
||||||
pageserver::consumption_metrics::collect_metrics(
|
pageserver::consumption_metrics::collect_metrics(
|
||||||
metric_collection_endpoint,
|
metric_collection_endpoint,
|
||||||
conf.metric_collection_interval,
|
conf.metric_collection_interval,
|
||||||
conf.cached_metric_collection_interval,
|
conf.cached_metric_collection_interval,
|
||||||
conf.synthetic_size_calculation_interval,
|
conf.synthetic_size_calculation_interval,
|
||||||
conf.id,
|
conf.id,
|
||||||
metrics_ctx,
|
metrics_ctx,
|
||||||
)
|
)
|
||||||
.instrument(info_span!("metrics_collection"))
|
.instrument(info_span!("metrics_collection"))
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
||||||
|
|||||||
@@ -96,12 +96,12 @@ pub mod defaults {
|
|||||||
|
|
||||||
#background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'
|
#background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'
|
||||||
|
|
||||||
# [tenant_config]
|
[tenant_config]
|
||||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||||
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
||||||
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
||||||
#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
|
#compaction_threshold = {DEFAULT_COMPACTION_THRESHOLD}
|
||||||
|
|
||||||
#gc_period = '{DEFAULT_GC_PERIOD}'
|
#gc_period = '{DEFAULT_GC_PERIOD}'
|
||||||
#gc_horizon = {DEFAULT_GC_HORIZON}
|
#gc_horizon = {DEFAULT_GC_HORIZON}
|
||||||
@@ -111,7 +111,8 @@ pub mod defaults {
|
|||||||
#min_resident_size_override = .. # in bytes
|
#min_resident_size_override = .. # in bytes
|
||||||
#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
|
#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
|
||||||
#gc_feedback = false
|
#gc_feedback = false
|
||||||
# [remote_storage]
|
|
||||||
|
[remote_storage]
|
||||||
|
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ const RESIDENT_SIZE: &str = "resident_size";
|
|||||||
const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
|
const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
|
||||||
const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";
|
const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";
|
||||||
|
|
||||||
|
const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
#[serde_as]
|
#[serde_as]
|
||||||
#[derive(Serialize, Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
struct Ids {
|
struct Ids {
|
||||||
@@ -73,7 +75,10 @@ pub async fn collect_metrics(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// define client here to reuse it for all requests
|
// define client here to reuse it for all requests
|
||||||
let client = reqwest::Client::new();
|
let client = reqwest::ClientBuilder::new()
|
||||||
|
.timeout(DEFAULT_HTTP_REPORTING_TIMEOUT)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to create http client with timeout");
|
||||||
let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
|
let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
|
||||||
let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();
|
let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();
|
||||||
|
|
||||||
@@ -83,7 +88,7 @@ pub async fn collect_metrics(
|
|||||||
info!("collect_metrics received cancellation request");
|
info!("collect_metrics received cancellation request");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
},
|
},
|
||||||
_ = ticker.tick() => {
|
tick_at = ticker.tick() => {
|
||||||
|
|
||||||
// send cached metrics every cached_metric_collection_interval
|
// send cached metrics every cached_metric_collection_interval
|
||||||
let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
|
let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
|
||||||
@@ -93,6 +98,12 @@ pub async fn collect_metrics(
|
|||||||
}
|
}
|
||||||
|
|
||||||
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
|
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
|
||||||
|
|
||||||
|
crate::tenant::tasks::warn_when_period_overrun(
|
||||||
|
tick_at.elapsed(),
|
||||||
|
metric_collection_interval,
|
||||||
|
"consumption_metrics_collect_metrics",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -273,31 +284,42 @@ pub async fn collect_metrics_iteration(
|
|||||||
})
|
})
|
||||||
.expect("PageserverConsumptionMetric should not fail serialization");
|
.expect("PageserverConsumptionMetric should not fail serialization");
|
||||||
|
|
||||||
let res = client
|
const MAX_RETRIES: u32 = 3;
|
||||||
.post(metric_collection_endpoint.clone())
|
|
||||||
.json(&chunk_json)
|
|
||||||
.send()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
match res {
|
for attempt in 0..MAX_RETRIES {
|
||||||
Ok(res) => {
|
let res = client
|
||||||
if res.status().is_success() {
|
.post(metric_collection_endpoint.clone())
|
||||||
// update cached metrics after they were sent successfully
|
.json(&chunk_json)
|
||||||
for (curr_key, curr_val) in chunk.iter() {
|
.send()
|
||||||
cached_metrics.insert(curr_key.clone(), *curr_val);
|
.await;
|
||||||
}
|
|
||||||
} else {
|
match res {
|
||||||
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
Ok(res) => {
|
||||||
for metric in chunk_to_send.iter() {
|
if res.status().is_success() {
|
||||||
// Report if the metric value is suspiciously large
|
// update cached metrics after they were sent successfully
|
||||||
if metric.value > (1u64 << 40) {
|
for (curr_key, curr_val) in chunk.iter() {
|
||||||
|
cached_metrics.insert(curr_key.clone(), *curr_val);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
||||||
|
for metric in chunk_to_send
|
||||||
|
.iter()
|
||||||
|
.filter(|metric| metric.value > (1u64 << 40))
|
||||||
|
{
|
||||||
|
// Report if the metric value is suspiciously large
|
||||||
error!("potentially abnormal metric value: {:?}", metric);
|
error!("potentially abnormal metric value: {:?}", metric);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(err) if err.is_timeout() => {
|
||||||
|
error!(attempt, "timeout sending metrics, retrying immediately");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
error!(attempt, ?err, "failed to send metrics");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
error!("failed to send metrics: {:?}", err);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -317,7 +339,7 @@ pub async fn calculate_synthetic_size_worker(
|
|||||||
_ = task_mgr::shutdown_watcher() => {
|
_ = task_mgr::shutdown_watcher() => {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
},
|
},
|
||||||
_ = ticker.tick() => {
|
tick_at = ticker.tick() => {
|
||||||
|
|
||||||
let tenants = match mgr::list_tenants().await {
|
let tenants = match mgr::list_tenants().await {
|
||||||
Ok(tenants) => tenants,
|
Ok(tenants) => tenants,
|
||||||
@@ -343,6 +365,12 @@ pub async fn calculate_synthetic_size_worker(
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
crate::tenant::tasks::warn_when_period_overrun(
|
||||||
|
tick_at.elapsed(),
|
||||||
|
synthetic_size_calculation_interval,
|
||||||
|
"consumption_metrics_synthetic_size_worker",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,7 +110,6 @@ pub fn launch_disk_usage_global_eviction_task(
|
|||||||
|
|
||||||
disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
|
disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
|
||||||
.await;
|
.await;
|
||||||
info!("disk usage based eviction task finishing");
|
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -126,13 +125,16 @@ async fn disk_usage_eviction_task(
|
|||||||
tenants_dir: &Path,
|
tenants_dir: &Path,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) {
|
) {
|
||||||
|
scopeguard::defer! {
|
||||||
|
info!("disk usage based eviction task finishing");
|
||||||
|
};
|
||||||
|
|
||||||
use crate::tenant::tasks::random_init_delay;
|
use crate::tenant::tasks::random_init_delay;
|
||||||
{
|
{
|
||||||
if random_init_delay(task_config.period, &cancel)
|
if random_init_delay(task_config.period, &cancel)
|
||||||
.await
|
.await
|
||||||
.is_err()
|
.is_err()
|
||||||
{
|
{
|
||||||
info!("shutting down");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -167,7 +169,6 @@ async fn disk_usage_eviction_task(
|
|||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = tokio::time::sleep_until(sleep_until) => {},
|
_ = tokio::time::sleep_until(sleep_until) => {},
|
||||||
_ = cancel.cancelled() => {
|
_ = cancel.cancelled() => {
|
||||||
info!("shutting down");
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -314,7 +315,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
|||||||
partition,
|
partition,
|
||||||
candidate.layer.get_tenant_id(),
|
candidate.layer.get_tenant_id(),
|
||||||
candidate.layer.get_timeline_id(),
|
candidate.layer.get_timeline_id(),
|
||||||
candidate.layer.filename().file_name(),
|
candidate.layer,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -516,7 +517,7 @@ async fn collect_eviction_candidates(
|
|||||||
if !tl.is_active() {
|
if !tl.is_active() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let info = tl.get_local_layers_for_disk_usage_eviction();
|
let info = tl.get_local_layers_for_disk_usage_eviction().await;
|
||||||
debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
|
debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
|
||||||
tenant_candidates.extend(
|
tenant_candidates.extend(
|
||||||
info.resident_layers
|
info.resident_layers
|
||||||
|
|||||||
@@ -186,10 +186,8 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
delete:
|
delete:
|
||||||
description: "Attempts to delete specified timeline. On 500 errors should be retried"
|
description: "Attempts to delete specified timeline. 500 and 409 errors should be retried"
|
||||||
responses:
|
responses:
|
||||||
"200":
|
|
||||||
description: Ok
|
|
||||||
"400":
|
"400":
|
||||||
description: Error when no tenant id found in path or no timeline id
|
description: Error when no tenant id found in path or no timeline id
|
||||||
content:
|
content:
|
||||||
@@ -214,8 +212,14 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/NotFoundError"
|
$ref: "#/components/schemas/NotFoundError"
|
||||||
|
"409":
|
||||||
|
description: Deletion is already in progress, continue polling
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/ConflictError"
|
||||||
"412":
|
"412":
|
||||||
description: Tenant is missing
|
description: Tenant is missing, or timeline has children
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
@@ -386,6 +390,7 @@ paths:
|
|||||||
"202":
|
"202":
|
||||||
description: Tenant attaching scheduled
|
description: Tenant attaching scheduled
|
||||||
"400":
|
"400":
|
||||||
|
description: Bad Request
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
@@ -717,6 +722,12 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/ForbiddenError"
|
$ref: "#/components/schemas/ForbiddenError"
|
||||||
|
"406":
|
||||||
|
description: Permanently unsatisfiable request, don't retry.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/Error"
|
||||||
"409":
|
"409":
|
||||||
description: Timeline already exists, creation skipped
|
description: Timeline already exists, creation skipped
|
||||||
content:
|
content:
|
||||||
@@ -945,7 +956,7 @@ components:
|
|||||||
type: string
|
type: string
|
||||||
enum: [ "maybe", "attached", "failed" ]
|
enum: [ "maybe", "attached", "failed" ]
|
||||||
data:
|
data:
|
||||||
- type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
reason:
|
reason:
|
||||||
type: string
|
type: string
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ use super::models::{
|
|||||||
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
||||||
};
|
};
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::disk_usage_eviction_task;
|
|
||||||
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
||||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
@@ -35,6 +34,7 @@ use crate::tenant::size::ModelInputs;
|
|||||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
||||||
use crate::{config::PageServerConf, tenant::mgr};
|
use crate::{config::PageServerConf, tenant::mgr};
|
||||||
|
use crate::{disk_usage_eviction_task, tenant};
|
||||||
use utils::{
|
use utils::{
|
||||||
auth::JwtAuth,
|
auth::JwtAuth,
|
||||||
http::{
|
http::{
|
||||||
@@ -142,7 +142,7 @@ impl From<TenantMapInsertError> for ApiError {
|
|||||||
impl From<TenantStateError> for ApiError {
|
impl From<TenantStateError> for ApiError {
|
||||||
fn from(tse: TenantStateError) -> ApiError {
|
fn from(tse: TenantStateError) -> ApiError {
|
||||||
match tse {
|
match tse {
|
||||||
TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
|
TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
||||||
_ => ApiError::InternalServerError(anyhow::Error::new(tse)),
|
_ => ApiError::InternalServerError(anyhow::Error::new(tse)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -151,7 +151,7 @@ impl From<TenantStateError> for ApiError {
|
|||||||
impl From<GetTenantError> for ApiError {
|
impl From<GetTenantError> for ApiError {
|
||||||
fn from(tse: GetTenantError) -> ApiError {
|
fn from(tse: GetTenantError) -> ApiError {
|
||||||
match tse {
|
match tse {
|
||||||
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
|
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
||||||
e @ GetTenantError::NotActive(_) => {
|
e @ GetTenantError::NotActive(_) => {
|
||||||
// Why is this not `ApiError::NotFound`?
|
// Why is this not `ApiError::NotFound`?
|
||||||
// Because we must be careful to never return 404 for a tenant if it does
|
// Because we must be careful to never return 404 for a tenant if it does
|
||||||
@@ -169,7 +169,7 @@ impl From<SetNewTenantConfigError> for ApiError {
|
|||||||
fn from(e: SetNewTenantConfigError) -> ApiError {
|
fn from(e: SetNewTenantConfigError) -> ApiError {
|
||||||
match e {
|
match e {
|
||||||
SetNewTenantConfigError::GetTenant(tid) => {
|
SetNewTenantConfigError::GetTenant(tid) => {
|
||||||
ApiError::NotFound(anyhow!("tenant {}", tid))
|
ApiError::NotFound(anyhow!("tenant {}", tid).into())
|
||||||
}
|
}
|
||||||
e @ SetNewTenantConfigError::Persist(_) => {
|
e @ SetNewTenantConfigError::Persist(_) => {
|
||||||
ApiError::InternalServerError(anyhow::Error::new(e))
|
ApiError::InternalServerError(anyhow::Error::new(e))
|
||||||
@@ -182,10 +182,12 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
|
|||||||
fn from(value: crate::tenant::DeleteTimelineError) -> Self {
|
fn from(value: crate::tenant::DeleteTimelineError) -> Self {
|
||||||
use crate::tenant::DeleteTimelineError::*;
|
use crate::tenant::DeleteTimelineError::*;
|
||||||
match value {
|
match value {
|
||||||
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found")),
|
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
|
||||||
HasChildren => ApiError::BadRequest(anyhow::anyhow!(
|
HasChildren(children) => ApiError::PreconditionFailed(
|
||||||
"Cannot delete timeline which has child timelines"
|
format!("Cannot delete timeline which has child timelines: {children:?}")
|
||||||
)),
|
.into_boxed_str(),
|
||||||
|
),
|
||||||
|
a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
|
||||||
Other(e) => ApiError::InternalServerError(e),
|
Other(e) => ApiError::InternalServerError(e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -197,9 +199,9 @@ impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
|
|||||||
match value {
|
match value {
|
||||||
// Report Precondition failed so client can distinguish between
|
// Report Precondition failed so client can distinguish between
|
||||||
// "tenant is missing" case from "timeline is missing"
|
// "tenant is missing" case from "timeline is missing"
|
||||||
Tenant(GetTenantError::NotFound(..)) => {
|
Tenant(GetTenantError::NotFound(..)) => ApiError::PreconditionFailed(
|
||||||
ApiError::PreconditionFailed("Requested tenant is missing")
|
"Requested tenant is missing".to_owned().into_boxed_str(),
|
||||||
}
|
),
|
||||||
Tenant(t) => ApiError::from(t),
|
Tenant(t) => ApiError::from(t),
|
||||||
Timeline(t) => ApiError::from(t),
|
Timeline(t) => ApiError::from(t),
|
||||||
}
|
}
|
||||||
@@ -214,7 +216,7 @@ async fn build_timeline_info(
|
|||||||
) -> anyhow::Result<TimelineInfo> {
|
) -> anyhow::Result<TimelineInfo> {
|
||||||
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
|
|
||||||
let mut info = build_timeline_info_common(timeline, ctx)?;
|
let mut info = build_timeline_info_common(timeline, ctx).await?;
|
||||||
if include_non_incremental_logical_size {
|
if include_non_incremental_logical_size {
|
||||||
// XXX we should be using spawn_ondemand_logical_size_calculation here.
|
// XXX we should be using spawn_ondemand_logical_size_calculation here.
|
||||||
// Otherwise, if someone deletes the timeline / detaches the tenant while
|
// Otherwise, if someone deletes the timeline / detaches the tenant while
|
||||||
@@ -232,7 +234,7 @@ async fn build_timeline_info(
|
|||||||
Ok(info)
|
Ok(info)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_timeline_info_common(
|
async fn build_timeline_info_common(
|
||||||
timeline: &Arc<Timeline>,
|
timeline: &Arc<Timeline>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<TimelineInfo> {
|
) -> anyhow::Result<TimelineInfo> {
|
||||||
@@ -263,7 +265,7 @@ fn build_timeline_info_common(
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let current_physical_size = Some(timeline.layer_size_sum());
|
let current_physical_size = Some(timeline.layer_size_sum().await);
|
||||||
let state = timeline.current_state();
|
let state = timeline.current_state();
|
||||||
let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));
|
let remote_consistent_lsn = timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));
|
||||||
|
|
||||||
@@ -326,14 +328,22 @@ async fn timeline_create_handler(
|
|||||||
&ctx,
|
&ctx,
|
||||||
)
|
)
|
||||||
.await {
|
.await {
|
||||||
Ok(Some(new_timeline)) => {
|
Ok(new_timeline) => {
|
||||||
// Created. Construct a TimelineInfo for it.
|
// Created. Construct a TimelineInfo for it.
|
||||||
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
||||||
|
.await
|
||||||
.map_err(ApiError::InternalServerError)?;
|
.map_err(ApiError::InternalServerError)?;
|
||||||
json_response(StatusCode::CREATED, timeline_info)
|
json_response(StatusCode::CREATED, timeline_info)
|
||||||
}
|
}
|
||||||
Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
|
Err(tenant::CreateTimelineError::AlreadyExists) => {
|
||||||
Err(err) => Err(ApiError::InternalServerError(err)),
|
json_response(StatusCode::CONFLICT, ())
|
||||||
|
}
|
||||||
|
Err(tenant::CreateTimelineError::AncestorLsn(err)) => {
|
||||||
|
json_response(StatusCode::NOT_ACCEPTABLE, HttpErrorBody::from_msg(
|
||||||
|
format!("{err:#}")
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Err(tenant::CreateTimelineError::Other(err)) => Err(ApiError::InternalServerError(err)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
.instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||||
@@ -395,7 +405,7 @@ async fn timeline_detail_handler(
|
|||||||
|
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, false)
|
.get_timeline(timeline_id, false)
|
||||||
.map_err(ApiError::NotFound)?;
|
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||||
|
|
||||||
let timeline_info = build_timeline_info(
|
let timeline_info = build_timeline_info(
|
||||||
&timeline,
|
&timeline,
|
||||||
@@ -494,7 +504,8 @@ async fn timeline_delete_handler(
|
|||||||
.instrument(info_span!("timeline_delete", tenant = %tenant_id, timeline = %timeline_id))
|
.instrument(info_span!("timeline_delete", tenant = %tenant_id, timeline = %timeline_id))
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
json_response(StatusCode::OK, ())
|
// FIXME: needs to be an error for console to retry it. Ideally Accepted should be used and retried until 404.
|
||||||
|
json_response(StatusCode::ACCEPTED, ())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn tenant_detach_handler(
|
async fn tenant_detach_handler(
|
||||||
@@ -589,7 +600,7 @@ async fn tenant_status(
|
|||||||
// Calculate total physical size of all timelines
|
// Calculate total physical size of all timelines
|
||||||
let mut current_physical_size = 0;
|
let mut current_physical_size = 0;
|
||||||
for timeline in tenant.list_timelines().iter() {
|
for timeline in tenant.list_timelines().iter() {
|
||||||
current_physical_size += timeline.layer_size_sum();
|
current_physical_size += timeline.layer_size_sum().await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let state = tenant.current_state();
|
let state = tenant.current_state();
|
||||||
@@ -699,7 +710,7 @@ async fn layer_map_info_handler(
|
|||||||
check_permission(&request, Some(tenant_id))?;
|
check_permission(&request, Some(tenant_id))?;
|
||||||
|
|
||||||
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
|
||||||
let layer_map_info = timeline.layer_map_info(reset);
|
let layer_map_info = timeline.layer_map_info(reset).await;
|
||||||
|
|
||||||
json_response(StatusCode::OK, layer_map_info)
|
json_response(StatusCode::OK, layer_map_info)
|
||||||
}
|
}
|
||||||
@@ -1058,7 +1069,7 @@ async fn timeline_download_remote_layers_handler_get(
|
|||||||
let info = timeline
|
let info = timeline
|
||||||
.get_download_all_remote_layers_task_info()
|
.get_download_all_remote_layers_task_info()
|
||||||
.context("task never started since last pageserver process start")
|
.context("task never started since last pageserver process start")
|
||||||
.map_err(ApiError::NotFound)?;
|
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||||
json_response(StatusCode::OK, info)
|
json_response(StatusCode::OK, info)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1069,7 +1080,7 @@ async fn active_timeline_of_active_tenant(
|
|||||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||||
tenant
|
tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(ApiError::NotFound)
|
.map_err(|e| ApiError::NotFound(e.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn always_panic_handler(
|
async fn always_panic_handler(
|
||||||
@@ -1125,8 +1136,6 @@ async fn disk_usage_eviction_run(
|
|||||||
freed_bytes: 0,
|
freed_bytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::task_mgr::MGMT_REQUEST_RUNTIME;
|
|
||||||
|
|
||||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||||
|
|
||||||
let state = get_state(&r);
|
let state = get_state(&r);
|
||||||
@@ -1144,7 +1153,7 @@ async fn disk_usage_eviction_run(
|
|||||||
let _g = cancel.drop_guard();
|
let _g = cancel.drop_guard();
|
||||||
|
|
||||||
crate::task_mgr::spawn(
|
crate::task_mgr::spawn(
|
||||||
MGMT_REQUEST_RUNTIME.handle(),
|
crate::task_mgr::BACKGROUND_RUNTIME.handle(),
|
||||||
TaskKind::DiskUsageEviction,
|
TaskKind::DiskUsageEviction,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
|
|||||||
@@ -75,12 +75,12 @@ pub async fn import_timeline_from_postgres_datadir(
|
|||||||
{
|
{
|
||||||
pg_control = Some(control_file);
|
pg_control = Some(control_file);
|
||||||
}
|
}
|
||||||
modification.flush()?;
|
modification.flush().await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We're done importing all the data files.
|
// We're done importing all the data files.
|
||||||
modification.commit()?;
|
modification.commit().await?;
|
||||||
|
|
||||||
// We expect the Postgres server to be shut down cleanly.
|
// We expect the Postgres server to be shut down cleanly.
|
||||||
let pg_control = pg_control.context("pg_control file not found")?;
|
let pg_control = pg_control.context("pg_control file not found")?;
|
||||||
@@ -148,17 +148,17 @@ async fn import_rel(
|
|||||||
// because there is no guarantee about the order in which we are processing segments.
|
// because there is no guarantee about the order in which we are processing segments.
|
||||||
// ignore "relation already exists" error
|
// ignore "relation already exists" error
|
||||||
//
|
//
|
||||||
// FIXME: use proper error type for this, instead of parsing the error message.
|
// FIXME: Keep track of which relations we've already created?
|
||||||
// Or better yet, keep track of which relations we've already created
|
|
||||||
// https://github.com/neondatabase/neon/issues/3309
|
// https://github.com/neondatabase/neon/issues/3309
|
||||||
if let Err(e) = modification
|
if let Err(e) = modification
|
||||||
.put_rel_creation(rel, nblocks as u32, ctx)
|
.put_rel_creation(rel, nblocks as u32, ctx)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
if e.to_string().contains("already exists") {
|
match e {
|
||||||
debug!("relation {} already exists. we must be extending it", rel);
|
RelationError::AlreadyExists => {
|
||||||
} else {
|
debug!("Relation {} already exist. We must be extending it.", rel)
|
||||||
return Err(e);
|
}
|
||||||
|
_ => return Err(e.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -359,7 +359,7 @@ pub async fn import_basebackup_from_tar(
|
|||||||
// We found the pg_control file.
|
// We found the pg_control file.
|
||||||
pg_control = Some(res);
|
pg_control = Some(res);
|
||||||
}
|
}
|
||||||
modification.flush()?;
|
modification.flush().await?;
|
||||||
}
|
}
|
||||||
tokio_tar::EntryType::Directory => {
|
tokio_tar::EntryType::Directory => {
|
||||||
debug!("directory {:?}", file_path);
|
debug!("directory {:?}", file_path);
|
||||||
@@ -377,7 +377,7 @@ pub async fn import_basebackup_from_tar(
|
|||||||
// sanity check: ensure that pg_control is loaded
|
// sanity check: ensure that pg_control is loaded
|
||||||
let _pg_control = pg_control.context("pg_control file not found")?;
|
let _pg_control = pg_control.context("pg_control file not found")?;
|
||||||
|
|
||||||
modification.commit()?;
|
modification.commit().await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -594,7 +594,7 @@ async fn import_file(
|
|||||||
// zenith.signal is not necessarily the last file, that we handle
|
// zenith.signal is not necessarily the last file, that we handle
|
||||||
// but it is ok to call `finish_write()`, because final `modification.commit()`
|
// but it is ok to call `finish_write()`, because final `modification.commit()`
|
||||||
// will update lsn once more to the final one.
|
// will update lsn once more to the final one.
|
||||||
let writer = modification.tline.writer();
|
let writer = modification.tline.writer().await;
|
||||||
writer.finish_write(prev_lsn);
|
writer.finish_write(prev_lsn);
|
||||||
|
|
||||||
debug!("imported zenith signal {}", prev_lsn);
|
debug!("imported zenith signal {}", prev_lsn);
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
use metrics::core::{AtomicU64, GenericCounter};
|
use metrics::metric_vec_duration::DurationResultObserver;
|
||||||
use metrics::{
|
use metrics::{
|
||||||
register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
|
register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
|
||||||
register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
|
register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge,
|
||||||
Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
|
register_uint_gauge_vec, Counter, CounterVec, Histogram, HistogramVec, IntCounter,
|
||||||
UIntGauge, UIntGaugeVec,
|
IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::models::TenantState;
|
use pageserver_api::models::TenantState;
|
||||||
@@ -95,21 +95,19 @@ static READ_NUM_FS_LAYERS: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Metrics collected on operations on the storage repository.
|
// Metrics collected on operations on the storage repository.
|
||||||
static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
pub static RECONSTRUCT_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||||
register_histogram_vec!(
|
register_histogram!(
|
||||||
"pageserver_getpage_reconstruct_seconds",
|
"pageserver_getpage_reconstruct_seconds",
|
||||||
"Time spent in reconstruct_value",
|
"Time spent in reconstruct_value (reconstruct a page from deltas)",
|
||||||
&["tenant_id", "timeline_id"],
|
|
||||||
CRITICAL_OP_BUCKETS.into(),
|
CRITICAL_OP_BUCKETS.into(),
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounterVec> = Lazy::new(|| {
|
pub static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
register_int_counter_vec!(
|
register_int_counter!(
|
||||||
"pageserver_materialized_cache_hits_direct_total",
|
"pageserver_materialized_cache_hits_direct_total",
|
||||||
"Number of cache hits from materialized page cache without redo",
|
"Number of cache hits from materialized page cache without redo",
|
||||||
&["tenant_id", "timeline_id"]
|
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
@@ -124,15 +122,130 @@ static GET_RECONSTRUCT_DATA_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
|
pub static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
register_int_counter_vec!(
|
register_int_counter!(
|
||||||
"pageserver_materialized_cache_hits_total",
|
"pageserver_materialized_cache_hits_total",
|
||||||
"Number of cache hits from materialized page cache",
|
"Number of cache hits from materialized page cache",
|
||||||
&["tenant_id", "timeline_id"]
|
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub struct PageCacheMetrics {
|
||||||
|
pub read_accesses_materialized_page: IntCounter,
|
||||||
|
pub read_accesses_ephemeral: IntCounter,
|
||||||
|
pub read_accesses_immutable: IntCounter,
|
||||||
|
|
||||||
|
pub read_hits_ephemeral: IntCounter,
|
||||||
|
pub read_hits_immutable: IntCounter,
|
||||||
|
pub read_hits_materialized_page_exact: IntCounter,
|
||||||
|
pub read_hits_materialized_page_older_lsn: IntCounter,
|
||||||
|
}
|
||||||
|
|
||||||
|
static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||||
|
register_int_counter_vec!(
|
||||||
|
"pageserver_page_cache_read_hits_total",
|
||||||
|
"Number of read accesses to the page cache that hit",
|
||||||
|
&["key_kind", "hit_kind"]
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
});
|
||||||
|
|
||||||
|
static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||||
|
register_int_counter_vec!(
|
||||||
|
"pageserver_page_cache_read_accesses_total",
|
||||||
|
"Number of read accesses to the page cache",
|
||||||
|
&["key_kind"]
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
|
||||||
|
read_accesses_materialized_page: {
|
||||||
|
PAGE_CACHE_READ_ACCESSES
|
||||||
|
.get_metric_with_label_values(&["materialized_page"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_accesses_ephemeral: {
|
||||||
|
PAGE_CACHE_READ_ACCESSES
|
||||||
|
.get_metric_with_label_values(&["ephemeral"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_accesses_immutable: {
|
||||||
|
PAGE_CACHE_READ_ACCESSES
|
||||||
|
.get_metric_with_label_values(&["immutable"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_hits_ephemeral: {
|
||||||
|
PAGE_CACHE_READ_HITS
|
||||||
|
.get_metric_with_label_values(&["ephemeral", "-"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_hits_immutable: {
|
||||||
|
PAGE_CACHE_READ_HITS
|
||||||
|
.get_metric_with_label_values(&["immutable", "-"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_hits_materialized_page_exact: {
|
||||||
|
PAGE_CACHE_READ_HITS
|
||||||
|
.get_metric_with_label_values(&["materialized_page", "exact"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
|
||||||
|
read_hits_materialized_page_older_lsn: {
|
||||||
|
PAGE_CACHE_READ_HITS
|
||||||
|
.get_metric_with_label_values(&["materialized_page", "older_lsn"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pub struct PageCacheSizeMetrics {
|
||||||
|
pub max_bytes: UIntGauge,
|
||||||
|
|
||||||
|
pub current_bytes_ephemeral: UIntGauge,
|
||||||
|
pub current_bytes_immutable: UIntGauge,
|
||||||
|
pub current_bytes_materialized_page: UIntGauge,
|
||||||
|
}
|
||||||
|
|
||||||
|
static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||||
|
register_uint_gauge_vec!(
|
||||||
|
"pageserver_page_cache_size_current_bytes",
|
||||||
|
"Current size of the page cache in bytes, by key kind",
|
||||||
|
&["key_kind"]
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
});
|
||||||
|
|
||||||
|
pub static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> = Lazy::new(|| PageCacheSizeMetrics {
|
||||||
|
max_bytes: {
|
||||||
|
register_uint_gauge!(
|
||||||
|
"pageserver_page_cache_size_max_bytes",
|
||||||
|
"Maximum size of the page cache in bytes"
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
},
|
||||||
|
|
||||||
|
current_bytes_ephemeral: {
|
||||||
|
PAGE_CACHE_SIZE_CURRENT_BYTES
|
||||||
|
.get_metric_with_label_values(&["ephemeral"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
current_bytes_immutable: {
|
||||||
|
PAGE_CACHE_SIZE_CURRENT_BYTES
|
||||||
|
.get_metric_with_label_values(&["immutable"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
current_bytes_materialized_page: {
|
||||||
|
PAGE_CACHE_SIZE_CURRENT_BYTES
|
||||||
|
.get_metric_with_label_values(&["materialized_page"])
|
||||||
|
.unwrap()
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||||
register_histogram_vec!(
|
register_histogram_vec!(
|
||||||
"pageserver_wait_lsn_seconds",
|
"pageserver_wait_lsn_seconds",
|
||||||
@@ -207,11 +320,11 @@ pub static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
|||||||
|
|
||||||
pub static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
pub static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||||
register_uint_gauge_vec!(
|
register_uint_gauge_vec!(
|
||||||
"pageserver_tenant_synthetic_size",
|
"pageserver_tenant_synthetic_cached_size_bytes",
|
||||||
"Synthetic size of each tenant",
|
"Synthetic size of each tenant in bytes",
|
||||||
&["tenant_id"]
|
&["tenant_id"]
|
||||||
)
|
)
|
||||||
.expect("Failed to register pageserver_tenant_synthetic_size metric")
|
.expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
||||||
@@ -428,6 +541,27 @@ pub static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub struct BasebackupQueryTime(HistogramVec);
|
||||||
|
pub static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
|
||||||
|
BasebackupQueryTime({
|
||||||
|
register_histogram_vec!(
|
||||||
|
"pageserver_basebackup_query_seconds",
|
||||||
|
"Histogram of basebackup queries durations, by result type",
|
||||||
|
&["result"],
|
||||||
|
CRITICAL_OP_BUCKETS.into(),
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
impl DurationResultObserver for BasebackupQueryTime {
|
||||||
|
fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
|
||||||
|
let label_value = if res.is_ok() { "ok" } else { "error" };
|
||||||
|
let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
|
||||||
|
metric.observe(duration.as_secs_f64());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||||
register_int_gauge_vec!(
|
register_int_gauge_vec!(
|
||||||
"pageserver_live_connections",
|
"pageserver_live_connections",
|
||||||
@@ -752,10 +886,7 @@ impl StorageTimeMetrics {
|
|||||||
pub struct TimelineMetrics {
|
pub struct TimelineMetrics {
|
||||||
tenant_id: String,
|
tenant_id: String,
|
||||||
timeline_id: String,
|
timeline_id: String,
|
||||||
pub reconstruct_time_histo: Histogram,
|
|
||||||
pub get_reconstruct_data_time_histo: Histogram,
|
pub get_reconstruct_data_time_histo: Histogram,
|
||||||
pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
|
|
||||||
pub materialized_page_cache_hit_upon_request_counter: GenericCounter<AtomicU64>,
|
|
||||||
pub flush_time_histo: StorageTimeMetrics,
|
pub flush_time_histo: StorageTimeMetrics,
|
||||||
pub compact_time_histo: StorageTimeMetrics,
|
pub compact_time_histo: StorageTimeMetrics,
|
||||||
pub create_images_time_histo: StorageTimeMetrics,
|
pub create_images_time_histo: StorageTimeMetrics,
|
||||||
@@ -783,15 +914,9 @@ impl TimelineMetrics {
|
|||||||
) -> Self {
|
) -> Self {
|
||||||
let tenant_id = tenant_id.to_string();
|
let tenant_id = tenant_id.to_string();
|
||||||
let timeline_id = timeline_id.to_string();
|
let timeline_id = timeline_id.to_string();
|
||||||
let reconstruct_time_histo = RECONSTRUCT_TIME
|
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
|
||||||
.unwrap();
|
|
||||||
let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
|
let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
|
||||||
.unwrap();
|
|
||||||
let flush_time_histo =
|
let flush_time_histo =
|
||||||
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
||||||
let compact_time_histo =
|
let compact_time_histo =
|
||||||
@@ -833,19 +958,13 @@ impl TimelineMetrics {
|
|||||||
let read_num_fs_layers = READ_NUM_FS_LAYERS
|
let read_num_fs_layers = READ_NUM_FS_LAYERS
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT
|
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
|
||||||
.unwrap();
|
|
||||||
let evictions_with_low_residence_duration =
|
let evictions_with_low_residence_duration =
|
||||||
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
||||||
|
|
||||||
TimelineMetrics {
|
TimelineMetrics {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
reconstruct_time_histo,
|
|
||||||
get_reconstruct_data_time_histo,
|
get_reconstruct_data_time_histo,
|
||||||
materialized_page_cache_hit_counter,
|
|
||||||
materialized_page_cache_hit_upon_request_counter,
|
|
||||||
flush_time_histo,
|
flush_time_histo,
|
||||||
compact_time_histo,
|
compact_time_histo,
|
||||||
create_images_time_histo,
|
create_images_time_histo,
|
||||||
@@ -872,10 +991,7 @@ impl Drop for TimelineMetrics {
|
|||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let tenant_id = &self.tenant_id;
|
let tenant_id = &self.tenant_id;
|
||||||
let timeline_id = &self.timeline_id;
|
let timeline_id = &self.timeline_id;
|
||||||
let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
|
||||||
let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]);
|
|
||||||
let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]);
|
|
||||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
@@ -968,7 +1084,6 @@ impl RemoteTimelineClientMetrics {
|
|||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
status: &'static str,
|
status: &'static str,
|
||||||
) -> Histogram {
|
) -> Histogram {
|
||||||
// XXX would be nice to have an upgradable RwLock
|
|
||||||
let mut guard = self.remote_operation_time.lock().unwrap();
|
let mut guard = self.remote_operation_time.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str(), status);
|
let key = (file_kind.as_str(), op_kind.as_str(), status);
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -990,7 +1105,6 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntGauge {
|
) -> IntGauge {
|
||||||
// XXX would be nice to have an upgradable RwLock
|
|
||||||
let mut guard = self.calls_unfinished_gauge.lock().unwrap();
|
let mut guard = self.calls_unfinished_gauge.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1011,7 +1125,6 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> Histogram {
|
) -> Histogram {
|
||||||
// XXX would be nice to have an upgradable RwLock
|
|
||||||
let mut guard = self.calls_started_hist.lock().unwrap();
|
let mut guard = self.calls_started_hist.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1032,7 +1145,6 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntCounter {
|
) -> IntCounter {
|
||||||
// XXX would be nice to have an upgradable RwLock
|
|
||||||
let mut guard = self.bytes_started_counter.lock().unwrap();
|
let mut guard = self.bytes_started_counter.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1053,7 +1165,6 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntCounter {
|
) -> IntCounter {
|
||||||
// XXX would be nice to have an upgradable RwLock
|
|
||||||
let mut guard = self.bytes_finished_counter.lock().unwrap();
|
let mut guard = self.bytes_finished_counter.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1319,4 +1430,8 @@ pub fn preinitialize_metrics() {
|
|||||||
|
|
||||||
// Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
|
// Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
|
||||||
BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
|
BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
|
||||||
|
|
||||||
|
// Python tests need these.
|
||||||
|
MATERIALIZED_PAGE_CACHE_HIT_DIRECT.get();
|
||||||
|
MATERIALIZED_PAGE_CACHE_HIT.get();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,8 +53,8 @@ use utils::{
|
|||||||
lsn::Lsn,
|
lsn::Lsn,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::repository::Key;
|
|
||||||
use crate::tenant::writeback_ephemeral_file;
|
use crate::tenant::writeback_ephemeral_file;
|
||||||
|
use crate::{metrics::PageCacheSizeMetrics, repository::Key};
|
||||||
|
|
||||||
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
||||||
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
||||||
@@ -187,6 +187,8 @@ pub struct PageCache {
|
|||||||
/// Index of the next candidate to evict, for the Clock replacement algorithm.
|
/// Index of the next candidate to evict, for the Clock replacement algorithm.
|
||||||
/// This is interpreted modulo the page cache size.
|
/// This is interpreted modulo the page cache size.
|
||||||
next_evict_slot: AtomicUsize,
|
next_evict_slot: AtomicUsize,
|
||||||
|
|
||||||
|
size_metrics: &'static PageCacheSizeMetrics,
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -313,6 +315,10 @@ impl PageCache {
|
|||||||
key: &Key,
|
key: &Key,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
) -> Option<(Lsn, PageReadGuard)> {
|
) -> Option<(Lsn, PageReadGuard)> {
|
||||||
|
crate::metrics::PAGE_CACHE
|
||||||
|
.read_accesses_materialized_page
|
||||||
|
.inc();
|
||||||
|
|
||||||
let mut cache_key = CacheKey::MaterializedPage {
|
let mut cache_key = CacheKey::MaterializedPage {
|
||||||
hash_key: MaterializedPageHashKey {
|
hash_key: MaterializedPageHashKey {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
@@ -323,8 +329,21 @@ impl PageCache {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if let Some(guard) = self.try_lock_for_read(&mut cache_key) {
|
if let Some(guard) = self.try_lock_for_read(&mut cache_key) {
|
||||||
if let CacheKey::MaterializedPage { hash_key: _, lsn } = cache_key {
|
if let CacheKey::MaterializedPage {
|
||||||
Some((lsn, guard))
|
hash_key: _,
|
||||||
|
lsn: available_lsn,
|
||||||
|
} = cache_key
|
||||||
|
{
|
||||||
|
if available_lsn == lsn {
|
||||||
|
crate::metrics::PAGE_CACHE
|
||||||
|
.read_hits_materialized_page_exact
|
||||||
|
.inc();
|
||||||
|
} else {
|
||||||
|
crate::metrics::PAGE_CACHE
|
||||||
|
.read_hits_materialized_page_older_lsn
|
||||||
|
.inc();
|
||||||
|
}
|
||||||
|
Some((available_lsn, guard))
|
||||||
} else {
|
} else {
|
||||||
panic!("unexpected key type in slot");
|
panic!("unexpected key type in slot");
|
||||||
}
|
}
|
||||||
@@ -499,11 +518,31 @@ impl PageCache {
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
|
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
|
||||||
|
let (read_access, hit) = match cache_key {
|
||||||
|
CacheKey::MaterializedPage { .. } => {
|
||||||
|
unreachable!("Materialized pages use lookup_materialized_page")
|
||||||
|
}
|
||||||
|
CacheKey::EphemeralPage { .. } => (
|
||||||
|
&crate::metrics::PAGE_CACHE.read_accesses_ephemeral,
|
||||||
|
&crate::metrics::PAGE_CACHE.read_hits_ephemeral,
|
||||||
|
),
|
||||||
|
CacheKey::ImmutableFilePage { .. } => (
|
||||||
|
&crate::metrics::PAGE_CACHE.read_accesses_immutable,
|
||||||
|
&crate::metrics::PAGE_CACHE.read_hits_immutable,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
read_access.inc();
|
||||||
|
|
||||||
|
let mut is_first_iteration = true;
|
||||||
loop {
|
loop {
|
||||||
// First check if the key already exists in the cache.
|
// First check if the key already exists in the cache.
|
||||||
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
|
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
|
||||||
|
if is_first_iteration {
|
||||||
|
hit.inc();
|
||||||
|
}
|
||||||
return Ok(ReadBufResult::Found(read_guard));
|
return Ok(ReadBufResult::Found(read_guard));
|
||||||
}
|
}
|
||||||
|
is_first_iteration = false;
|
||||||
|
|
||||||
// Not found. Find a victim buffer
|
// Not found. Find a victim buffer
|
||||||
let (slot_idx, mut inner) =
|
let (slot_idx, mut inner) =
|
||||||
@@ -681,6 +720,9 @@ impl PageCache {
|
|||||||
|
|
||||||
if let Ok(version_idx) = versions.binary_search_by_key(old_lsn, |v| v.lsn) {
|
if let Ok(version_idx) = versions.binary_search_by_key(old_lsn, |v| v.lsn) {
|
||||||
versions.remove(version_idx);
|
versions.remove(version_idx);
|
||||||
|
self.size_metrics
|
||||||
|
.current_bytes_materialized_page
|
||||||
|
.sub_page_sz(1);
|
||||||
if versions.is_empty() {
|
if versions.is_empty() {
|
||||||
old_entry.remove_entry();
|
old_entry.remove_entry();
|
||||||
}
|
}
|
||||||
@@ -693,11 +735,13 @@ impl PageCache {
|
|||||||
let mut map = self.ephemeral_page_map.write().unwrap();
|
let mut map = self.ephemeral_page_map.write().unwrap();
|
||||||
map.remove(&(*file_id, *blkno))
|
map.remove(&(*file_id, *blkno))
|
||||||
.expect("could not find old key in mapping");
|
.expect("could not find old key in mapping");
|
||||||
|
self.size_metrics.current_bytes_ephemeral.sub_page_sz(1);
|
||||||
}
|
}
|
||||||
CacheKey::ImmutableFilePage { file_id, blkno } => {
|
CacheKey::ImmutableFilePage { file_id, blkno } => {
|
||||||
let mut map = self.immutable_page_map.write().unwrap();
|
let mut map = self.immutable_page_map.write().unwrap();
|
||||||
map.remove(&(*file_id, *blkno))
|
map.remove(&(*file_id, *blkno))
|
||||||
.expect("could not find old key in mapping");
|
.expect("could not find old key in mapping");
|
||||||
|
self.size_metrics.current_bytes_immutable.sub_page_sz(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -725,6 +769,9 @@ impl PageCache {
|
|||||||
slot_idx,
|
slot_idx,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
self.size_metrics
|
||||||
|
.current_bytes_materialized_page
|
||||||
|
.add_page_sz(1);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -735,6 +782,7 @@ impl PageCache {
|
|||||||
Entry::Occupied(entry) => Some(*entry.get()),
|
Entry::Occupied(entry) => Some(*entry.get()),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(slot_idx);
|
entry.insert(slot_idx);
|
||||||
|
self.size_metrics.current_bytes_ephemeral.add_page_sz(1);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -745,6 +793,7 @@ impl PageCache {
|
|||||||
Entry::Occupied(entry) => Some(*entry.get()),
|
Entry::Occupied(entry) => Some(*entry.get()),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(slot_idx);
|
entry.insert(slot_idx);
|
||||||
|
self.size_metrics.current_bytes_immutable.add_page_sz(1);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -844,6 +893,12 @@ impl PageCache {
|
|||||||
|
|
||||||
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
|
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
|
||||||
|
|
||||||
|
let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;
|
||||||
|
size_metrics.max_bytes.set_page_sz(num_pages);
|
||||||
|
size_metrics.current_bytes_ephemeral.set_page_sz(0);
|
||||||
|
size_metrics.current_bytes_immutable.set_page_sz(0);
|
||||||
|
size_metrics.current_bytes_materialized_page.set_page_sz(0);
|
||||||
|
|
||||||
let slots = page_buffer
|
let slots = page_buffer
|
||||||
.chunks_exact_mut(PAGE_SZ)
|
.chunks_exact_mut(PAGE_SZ)
|
||||||
.map(|chunk| {
|
.map(|chunk| {
|
||||||
@@ -866,6 +921,30 @@ impl PageCache {
|
|||||||
immutable_page_map: Default::default(),
|
immutable_page_map: Default::default(),
|
||||||
slots,
|
slots,
|
||||||
next_evict_slot: AtomicUsize::new(0),
|
next_evict_slot: AtomicUsize::new(0),
|
||||||
|
size_metrics,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trait PageSzBytesMetric {
|
||||||
|
fn set_page_sz(&self, count: usize);
|
||||||
|
fn add_page_sz(&self, count: usize);
|
||||||
|
fn sub_page_sz(&self, count: usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn count_times_page_sz(count: usize) -> u64 {
|
||||||
|
u64::try_from(count).unwrap() * u64::try_from(PAGE_SZ).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PageSzBytesMetric for metrics::UIntGauge {
|
||||||
|
fn set_page_sz(&self, count: usize) {
|
||||||
|
self.set(count_times_page_sz(count));
|
||||||
|
}
|
||||||
|
fn add_page_sz(&self, count: usize) {
|
||||||
|
self.add(count_times_page_sz(count));
|
||||||
|
}
|
||||||
|
fn sub_page_sz(&self, count: usize) {
|
||||||
|
self.sub(count_times_page_sz(count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -390,7 +390,9 @@ impl PageServerHandler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Check that the timeline exists
|
// Check that the timeline exists
|
||||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
let timeline = tenant
|
||||||
|
.get_timeline(timeline_id, true)
|
||||||
|
.map_err(|e| anyhow::anyhow!(e))?;
|
||||||
|
|
||||||
// switch client to COPYBOTH
|
// switch client to COPYBOTH
|
||||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||||
@@ -902,7 +904,7 @@ where
|
|||||||
|
|
||||||
self.check_permission(Some(tenant_id))?;
|
self.check_permission(Some(tenant_id))?;
|
||||||
|
|
||||||
let lsn = if params.len() == 3 {
|
let lsn = if params.len() >= 3 {
|
||||||
Some(
|
Some(
|
||||||
Lsn::from_str(params[2])
|
Lsn::from_str(params[2])
|
||||||
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
|
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
|
||||||
@@ -911,10 +913,24 @@ where
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check that the timeline exists
|
metrics::metric_vec_duration::observe_async_block_duration_by_result(
|
||||||
self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false, ctx)
|
&*crate::metrics::BASEBACKUP_QUERY_TIME,
|
||||||
.await?;
|
async move {
|
||||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
self.handle_basebackup_request(
|
||||||
|
pgb,
|
||||||
|
tenant_id,
|
||||||
|
timeline_id,
|
||||||
|
lsn,
|
||||||
|
None,
|
||||||
|
false,
|
||||||
|
ctx,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||||
|
anyhow::Ok(())
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
// return pair of prev_lsn and last_lsn
|
// return pair of prev_lsn and last_lsn
|
||||||
else if query_string.starts_with("get_last_record_rlsn ") {
|
else if query_string.starts_with("get_last_record_rlsn ") {
|
||||||
@@ -1230,6 +1246,6 @@ async fn get_active_tenant_timeline(
|
|||||||
.map_err(GetActiveTimelineError::Tenant)?;
|
.map_err(GetActiveTimelineError::Tenant)?;
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(GetActiveTimelineError::Timeline)?;
|
.map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?;
|
||||||
Ok(timeline)
|
Ok(timeline)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,6 +43,16 @@ pub enum CalculateLogicalSizeError {
|
|||||||
Other(#[from] anyhow::Error),
|
Other(#[from] anyhow::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum RelationError {
|
||||||
|
#[error("Relation Already Exists")]
|
||||||
|
AlreadyExists,
|
||||||
|
#[error("invalid relnode")]
|
||||||
|
InvalidRelnode,
|
||||||
|
#[error(transparent)]
|
||||||
|
Other(#[from] anyhow::Error),
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
|
/// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
|
||||||
/// and other special kinds of files, in a versioned key-value store. The
|
/// and other special kinds of files, in a versioned key-value store. The
|
||||||
@@ -101,9 +111,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<Bytes, PageReconstructError> {
|
) -> Result<Bytes, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
return Err(PageReconstructError::Other(
|
||||||
"invalid relnode"
|
RelationError::InvalidRelnode.into(),
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
|
let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
|
||||||
@@ -148,9 +158,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<BlockNumber, PageReconstructError> {
|
) -> Result<BlockNumber, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
return Err(PageReconstructError::Other(
|
||||||
"invalid relnode"
|
RelationError::InvalidRelnode.into(),
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
||||||
@@ -193,9 +203,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<bool, PageReconstructError> {
|
) -> Result<bool, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
return Err(PageReconstructError::Other(
|
||||||
"invalid relnode"
|
RelationError::InvalidRelnode.into(),
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// first try to lookup relation in cache
|
// first try to lookup relation in cache
|
||||||
@@ -699,6 +709,20 @@ impl<'a> DatadirModification<'a> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn init_empty_test_timeline(&mut self) -> anyhow::Result<()> {
|
||||||
|
self.init_empty()?;
|
||||||
|
self.put_control_file(bytes::Bytes::from_static(
|
||||||
|
b"control_file contents do not matter",
|
||||||
|
))
|
||||||
|
.context("put_control_file")?;
|
||||||
|
self.put_checkpoint(bytes::Bytes::from_static(
|
||||||
|
b"checkpoint_file contents do not matter",
|
||||||
|
))
|
||||||
|
.context("put_checkpoint_file")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Put a new page version that can be constructed from a WAL record
|
/// Put a new page version that can be constructed from a WAL record
|
||||||
///
|
///
|
||||||
/// NOTE: this will *not* implicitly extend the relation, if the page is beyond the
|
/// NOTE: this will *not* implicitly extend the relation, if the page is beyond the
|
||||||
@@ -710,7 +734,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
blknum: BlockNumber,
|
blknum: BlockNumber,
|
||||||
rec: NeonWalRecord,
|
rec: NeonWalRecord,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||||
self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));
|
self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -737,7 +761,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
blknum: BlockNumber,
|
blknum: BlockNumber,
|
||||||
img: Bytes,
|
img: Bytes,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||||
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
|
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -861,32 +885,38 @@ impl<'a> DatadirModification<'a> {
|
|||||||
rel: RelTag,
|
rel: RelTag,
|
||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> Result<(), RelationError> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
if rel.relnode == 0 {
|
||||||
|
return Err(RelationError::InvalidRelnode);
|
||||||
|
}
|
||||||
// It's possible that this is the first rel for this db in this
|
// It's possible that this is the first rel for this db in this
|
||||||
// tablespace. Create the reldir entry for it if so.
|
// tablespace. Create the reldir entry for it if so.
|
||||||
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
|
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
|
||||||
|
.context("deserialize db")?;
|
||||||
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||||
let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
|
let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
|
||||||
// Didn't exist. Update dbdir
|
// Didn't exist. Update dbdir
|
||||||
dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
|
dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
|
||||||
let buf = DbDirectory::ser(&dbdir)?;
|
let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
|
||||||
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
||||||
|
|
||||||
// and create the RelDirectory
|
// and create the RelDirectory
|
||||||
RelDirectory::default()
|
RelDirectory::default()
|
||||||
} else {
|
} else {
|
||||||
// reldir already exists, fetch it
|
// reldir already exists, fetch it
|
||||||
RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?
|
RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
|
||||||
|
.context("deserialize db")?
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add the new relation to the rel directory entry, and write it back
|
// Add the new relation to the rel directory entry, and write it back
|
||||||
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
||||||
anyhow::bail!("rel {rel} already exists");
|
return Err(RelationError::AlreadyExists);
|
||||||
}
|
}
|
||||||
self.put(
|
self.put(
|
||||||
rel_dir_key,
|
rel_dir_key,
|
||||||
Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),
|
Value::Image(Bytes::from(
|
||||||
|
RelDirectory::ser(&rel_dir).context("serialize")?,
|
||||||
|
)),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Put size
|
// Put size
|
||||||
@@ -911,7 +941,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||||
let last_lsn = self.tline.get_last_record_lsn();
|
let last_lsn = self.tline.get_last_record_lsn();
|
||||||
if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
|
if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
|
||||||
let size_key = rel_size_to_key(rel);
|
let size_key = rel_size_to_key(rel);
|
||||||
@@ -942,7 +972,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||||
|
|
||||||
// Put size
|
// Put size
|
||||||
let size_key = rel_size_to_key(rel);
|
let size_key = rel_size_to_key(rel);
|
||||||
@@ -963,7 +993,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
|
|
||||||
/// Drop a relation.
|
/// Drop a relation.
|
||||||
pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
|
pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
||||||
|
|
||||||
// Remove it from the directory entry
|
// Remove it from the directory entry
|
||||||
let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||||
@@ -1108,7 +1138,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
/// retains all the metadata, but data pages are flushed. That's again OK
|
/// retains all the metadata, but data pages are flushed. That's again OK
|
||||||
/// for bulk import, where you are just loading data pages and won't try to
|
/// for bulk import, where you are just loading data pages and won't try to
|
||||||
/// modify the same pages twice.
|
/// modify the same pages twice.
|
||||||
pub fn flush(&mut self) -> anyhow::Result<()> {
|
pub async fn flush(&mut self) -> anyhow::Result<()> {
|
||||||
// Unless we have accumulated a decent amount of changes, it's not worth it
|
// Unless we have accumulated a decent amount of changes, it's not worth it
|
||||||
// to scan through the pending_updates list.
|
// to scan through the pending_updates list.
|
||||||
let pending_nblocks = self.pending_nblocks;
|
let pending_nblocks = self.pending_nblocks;
|
||||||
@@ -1116,19 +1146,20 @@ impl<'a> DatadirModification<'a> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let writer = self.tline.writer();
|
let writer = self.tline.writer().await;
|
||||||
|
|
||||||
// Flush relation and SLRU data blocks, keep metadata.
|
// Flush relation and SLRU data blocks, keep metadata.
|
||||||
let mut result: anyhow::Result<()> = Ok(());
|
let mut retained_pending_updates = HashMap::new();
|
||||||
self.pending_updates.retain(|&key, value| {
|
for (key, value) in self.pending_updates.drain() {
|
||||||
if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
|
if is_rel_block_key(key) || is_slru_block_key(key) {
|
||||||
result = writer.put(key, self.lsn, value);
|
// This bails out on first error without modifying pending_updates.
|
||||||
false
|
// That's Ok, cf this function's doc comment.
|
||||||
|
writer.put(key, self.lsn, &value).await?;
|
||||||
} else {
|
} else {
|
||||||
true
|
retained_pending_updates.insert(key, value);
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
result?;
|
self.pending_updates.extend(retained_pending_updates);
|
||||||
|
|
||||||
if pending_nblocks != 0 {
|
if pending_nblocks != 0 {
|
||||||
writer.update_current_logical_size(pending_nblocks * i64::from(BLCKSZ));
|
writer.update_current_logical_size(pending_nblocks * i64::from(BLCKSZ));
|
||||||
@@ -1143,17 +1174,17 @@ impl<'a> DatadirModification<'a> {
|
|||||||
/// underlying timeline.
|
/// underlying timeline.
|
||||||
/// All the modifications in this atomic update are stamped by the specified LSN.
|
/// All the modifications in this atomic update are stamped by the specified LSN.
|
||||||
///
|
///
|
||||||
pub fn commit(&mut self) -> anyhow::Result<()> {
|
pub async fn commit(&mut self) -> anyhow::Result<()> {
|
||||||
let writer = self.tline.writer();
|
let writer = self.tline.writer().await;
|
||||||
let lsn = self.lsn;
|
let lsn = self.lsn;
|
||||||
let pending_nblocks = self.pending_nblocks;
|
let pending_nblocks = self.pending_nblocks;
|
||||||
self.pending_nblocks = 0;
|
self.pending_nblocks = 0;
|
||||||
|
|
||||||
for (key, value) in self.pending_updates.drain() {
|
for (key, value) in self.pending_updates.drain() {
|
||||||
writer.put(key, lsn, &value)?;
|
writer.put(key, lsn, &value).await?;
|
||||||
}
|
}
|
||||||
for key_range in self.pending_deletions.drain(..) {
|
for key_range in self.pending_deletions.drain(..) {
|
||||||
writer.delete(key_range, lsn)?;
|
writer.delete(key_range, lsn).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
@@ -1593,20 +1624,6 @@ fn is_slru_block_key(key: Key) -> bool {
|
|||||||
&& key.field6 != 0xffffffff // and not SlruSegSize
|
&& key.field6 != 0xffffffff // and not SlruSegSize
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn create_test_timeline(
|
|
||||||
tenant: &crate::tenant::Tenant,
|
|
||||||
timeline_id: utils::id::TimelineId,
|
|
||||||
pg_version: u32,
|
|
||||||
ctx: &RequestContext,
|
|
||||||
) -> anyhow::Result<std::sync::Arc<Timeline>> {
|
|
||||||
let tline = tenant.create_test_timeline(timeline_id, Lsn(8), pg_version, ctx)?;
|
|
||||||
let mut m = tline.begin_modification(Lsn(8));
|
|
||||||
m.init_empty()?;
|
|
||||||
m.commit()?;
|
|
||||||
Ok(tline)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::bool_assert_comparison)]
|
#[allow(clippy::bool_assert_comparison)]
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|||||||
@@ -257,6 +257,9 @@ pub enum TaskKind {
|
|||||||
// task that handles attaching a tenant
|
// task that handles attaching a tenant
|
||||||
Attach,
|
Attach,
|
||||||
|
|
||||||
|
// Used mostly for background deletion from s3
|
||||||
|
TimelineDeletionWorker,
|
||||||
|
|
||||||
// task that handhes metrics collection
|
// task that handhes metrics collection
|
||||||
MetricsCollection,
|
MetricsCollection,
|
||||||
|
|
||||||
@@ -503,17 +506,17 @@ pub async fn shutdown_tasks(
|
|||||||
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let completed = tokio::select! {
|
let join_handle = tokio::select! {
|
||||||
biased;
|
biased;
|
||||||
_ = &mut join_handle => { true },
|
_ = &mut join_handle => { None },
|
||||||
_ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
|
_ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
|
||||||
// allow some time to elapse before logging to cut down the number of log
|
// allow some time to elapse before logging to cut down the number of log
|
||||||
// lines.
|
// lines.
|
||||||
info!("waiting for {} to shut down", task.name);
|
info!("waiting for {} to shut down", task.name);
|
||||||
false
|
Some(join_handle)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if !completed {
|
if let Some(join_handle) = join_handle {
|
||||||
// we never handled this return value, but:
|
// we never handled this return value, but:
|
||||||
// - we don't deschedule which would lead to is_cancelled
|
// - we don't deschedule which would lead to is_cancelled
|
||||||
// - panics are already logged (is_panicked)
|
// - panics are already logged (is_panicked)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -38,8 +38,8 @@ pub mod defaults {
|
|||||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
|
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
||||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
|
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||||
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
|
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
|
||||||
pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
|
pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,25 +51,23 @@ use crate::keyspace::KeyPartitioning;
|
|||||||
use crate::repository::Key;
|
use crate::repository::Key;
|
||||||
use crate::tenant::storage_layer::InMemoryLayer;
|
use crate::tenant::storage_layer::InMemoryLayer;
|
||||||
use crate::tenant::storage_layer::Layer;
|
use crate::tenant::storage_layer::Layer;
|
||||||
use anyhow::Context;
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||||
pub use historic_layer_coverage::Replacement;
|
pub use historic_layer_coverage::LayerKey;
|
||||||
|
|
||||||
use super::storage_layer::range_eq;
|
use super::storage_layer::range_eq;
|
||||||
use super::storage_layer::PersistentLayerDesc;
|
use super::storage_layer::PersistentLayerDesc;
|
||||||
use super::storage_layer::PersistentLayerKey;
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// LayerMap tracks what layers exist on a timeline.
|
/// LayerMap tracks what layers exist on a timeline.
|
||||||
///
|
///
|
||||||
pub struct LayerMap<L: ?Sized> {
|
#[derive(Default)]
|
||||||
|
pub struct LayerMap {
|
||||||
//
|
//
|
||||||
// 'open_layer' holds the current InMemoryLayer that is accepting new
|
// 'open_layer' holds the current InMemoryLayer that is accepting new
|
||||||
// records. If it is None, 'next_open_layer_at' will be set instead, indicating
|
// records. If it is None, 'next_open_layer_at' will be set instead, indicating
|
||||||
@@ -95,24 +93,6 @@ pub struct LayerMap<L: ?Sized> {
|
|||||||
/// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
|
/// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
|
||||||
/// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
|
/// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
|
||||||
l0_delta_layers: Vec<Arc<PersistentLayerDesc>>,
|
l0_delta_layers: Vec<Arc<PersistentLayerDesc>>,
|
||||||
|
|
||||||
/// Mapping from persistent layer key to the actual layer object. Currently, it stores delta, image, and
|
|
||||||
/// remote layers. In future refactors, this will be eventually moved out of LayerMap into Timeline, and
|
|
||||||
/// RemoteLayer will be removed.
|
|
||||||
mapping: HashMap<PersistentLayerKey, Arc<L>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<L: ?Sized> Default for LayerMap<L> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
open_layer: None,
|
|
||||||
next_open_layer_at: None,
|
|
||||||
frozen_layers: VecDeque::default(),
|
|
||||||
l0_delta_layers: Vec::default(),
|
|
||||||
historic: BufferedHistoricLayerCoverage::default(),
|
|
||||||
mapping: HashMap::default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The primary update API for the layer map.
|
/// The primary update API for the layer map.
|
||||||
@@ -120,24 +100,21 @@ impl<L: ?Sized> Default for LayerMap<L> {
|
|||||||
/// Batching historic layer insertions and removals is good for
|
/// Batching historic layer insertions and removals is good for
|
||||||
/// performance and this struct helps us do that correctly.
|
/// performance and this struct helps us do that correctly.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub struct BatchedUpdates<'a, L: ?Sized + Layer> {
|
pub struct BatchedUpdates<'a> {
|
||||||
// While we hold this exclusive reference to the layer map the type checker
|
// While we hold this exclusive reference to the layer map the type checker
|
||||||
// will prevent us from accidentally reading any unflushed updates.
|
// will prevent us from accidentally reading any unflushed updates.
|
||||||
layer_map: &'a mut LayerMap<L>,
|
layer_map: &'a mut LayerMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Provide ability to batch more updates while hiding the read
|
/// Provide ability to batch more updates while hiding the read
|
||||||
/// API so we don't accidentally read without flushing.
|
/// API so we don't accidentally read without flushing.
|
||||||
impl<L> BatchedUpdates<'_, L>
|
impl BatchedUpdates<'_> {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
///
|
///
|
||||||
/// Insert an on-disk layer.
|
/// Insert an on-disk layer.
|
||||||
///
|
///
|
||||||
// TODO remove the `layer` argument when `mapping` is refactored out of `LayerMap`
|
// TODO remove the `layer` argument when `mapping` is refactored out of `LayerMap`
|
||||||
pub fn insert_historic(&mut self, layer_desc: PersistentLayerDesc, layer: Arc<L>) {
|
pub fn insert_historic(&mut self, layer_desc: PersistentLayerDesc) {
|
||||||
self.layer_map.insert_historic_noflush(layer_desc, layer)
|
self.layer_map.insert_historic_noflush(layer_desc)
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -145,31 +122,8 @@ where
|
|||||||
///
|
///
|
||||||
/// This should be called when the corresponding file on disk has been deleted.
|
/// This should be called when the corresponding file on disk has been deleted.
|
||||||
///
|
///
|
||||||
pub fn remove_historic(&mut self, layer_desc: PersistentLayerDesc, layer: Arc<L>) {
|
pub fn remove_historic(&mut self, layer_desc: PersistentLayerDesc) {
|
||||||
self.layer_map.remove_historic_noflush(layer_desc, layer)
|
self.layer_map.remove_historic_noflush(layer_desc)
|
||||||
}
|
|
||||||
|
|
||||||
/// Replaces existing layer iff it is the `expected`.
|
|
||||||
///
|
|
||||||
/// If the expected layer has been removed it will not be inserted by this function.
|
|
||||||
///
|
|
||||||
/// Returned `Replacement` describes succeeding in replacement or the reason why it could not
|
|
||||||
/// be done.
|
|
||||||
///
|
|
||||||
/// TODO replacement can be done without buffering and rebuilding layer map updates.
|
|
||||||
/// One way to do that is to add a layer of indirection for returned values, so
|
|
||||||
/// that we can replace values only by updating a hashmap.
|
|
||||||
pub fn replace_historic(
|
|
||||||
&mut self,
|
|
||||||
expected_desc: PersistentLayerDesc,
|
|
||||||
expected: &Arc<L>,
|
|
||||||
new_desc: PersistentLayerDesc,
|
|
||||||
new: Arc<L>,
|
|
||||||
) -> anyhow::Result<Replacement<Arc<L>>> {
|
|
||||||
fail::fail_point!("layermap-replace-notfound", |_| Ok(Replacement::NotFound));
|
|
||||||
|
|
||||||
self.layer_map
|
|
||||||
.replace_historic_noflush(expected_desc, expected, new_desc, new)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We will flush on drop anyway, but this method makes it
|
// We will flush on drop anyway, but this method makes it
|
||||||
@@ -185,25 +139,19 @@ where
|
|||||||
// than panic later or read without flushing.
|
// than panic later or read without flushing.
|
||||||
//
|
//
|
||||||
// TODO maybe warn if flush hasn't explicitly been called
|
// TODO maybe warn if flush hasn't explicitly been called
|
||||||
impl<L> Drop for BatchedUpdates<'_, L>
|
impl Drop for BatchedUpdates<'_> {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
self.layer_map.flush_updates();
|
self.layer_map.flush_updates();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return value of LayerMap::search
|
/// Return value of LayerMap::search
|
||||||
pub struct SearchResult<L: ?Sized> {
|
pub struct SearchResult {
|
||||||
pub layer: Arc<L>,
|
pub layer: Arc<PersistentLayerDesc>,
|
||||||
pub lsn_floor: Lsn,
|
pub lsn_floor: Lsn,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<L> LayerMap<L>
|
impl LayerMap {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
///
|
///
|
||||||
/// Find the latest layer (by lsn.end) that covers the given
|
/// Find the latest layer (by lsn.end) that covers the given
|
||||||
/// 'key', with lsn.start < 'end_lsn'.
|
/// 'key', with lsn.start < 'end_lsn'.
|
||||||
@@ -235,7 +183,7 @@ where
|
|||||||
/// NOTE: This only searches the 'historic' layers, *not* the
|
/// NOTE: This only searches the 'historic' layers, *not* the
|
||||||
/// 'open' and 'frozen' layers!
|
/// 'open' and 'frozen' layers!
|
||||||
///
|
///
|
||||||
pub fn search(&self, key: Key, end_lsn: Lsn) -> Option<SearchResult<L>> {
|
pub fn search(&self, key: Key, end_lsn: Lsn) -> Option<SearchResult> {
|
||||||
let version = self.historic.get().unwrap().get_version(end_lsn.0 - 1)?;
|
let version = self.historic.get().unwrap().get_version(end_lsn.0 - 1)?;
|
||||||
let latest_delta = version.delta_coverage.query(key.to_i128());
|
let latest_delta = version.delta_coverage.query(key.to_i128());
|
||||||
let latest_image = version.image_coverage.query(key.to_i128());
|
let latest_image = version.image_coverage.query(key.to_i128());
|
||||||
@@ -244,7 +192,6 @@ where
|
|||||||
(None, None) => None,
|
(None, None) => None,
|
||||||
(None, Some(image)) => {
|
(None, Some(image)) => {
|
||||||
let lsn_floor = image.get_lsn_range().start;
|
let lsn_floor = image.get_lsn_range().start;
|
||||||
let image = self.get_layer_from_mapping(&image.key()).clone();
|
|
||||||
Some(SearchResult {
|
Some(SearchResult {
|
||||||
layer: image,
|
layer: image,
|
||||||
lsn_floor,
|
lsn_floor,
|
||||||
@@ -252,7 +199,6 @@ where
|
|||||||
}
|
}
|
||||||
(Some(delta), None) => {
|
(Some(delta), None) => {
|
||||||
let lsn_floor = delta.get_lsn_range().start;
|
let lsn_floor = delta.get_lsn_range().start;
|
||||||
let delta = self.get_layer_from_mapping(&delta.key()).clone();
|
|
||||||
Some(SearchResult {
|
Some(SearchResult {
|
||||||
layer: delta,
|
layer: delta,
|
||||||
lsn_floor,
|
lsn_floor,
|
||||||
@@ -263,7 +209,6 @@ where
|
|||||||
let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end;
|
let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end;
|
||||||
let image_exact_match = img_lsn + 1 == end_lsn;
|
let image_exact_match = img_lsn + 1 == end_lsn;
|
||||||
if image_is_newer || image_exact_match {
|
if image_is_newer || image_exact_match {
|
||||||
let image = self.get_layer_from_mapping(&image.key()).clone();
|
|
||||||
Some(SearchResult {
|
Some(SearchResult {
|
||||||
layer: image,
|
layer: image,
|
||||||
lsn_floor: img_lsn,
|
lsn_floor: img_lsn,
|
||||||
@@ -271,7 +216,6 @@ where
|
|||||||
} else {
|
} else {
|
||||||
let lsn_floor =
|
let lsn_floor =
|
||||||
std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1);
|
std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1);
|
||||||
let delta = self.get_layer_from_mapping(&delta.key()).clone();
|
|
||||||
Some(SearchResult {
|
Some(SearchResult {
|
||||||
layer: delta,
|
layer: delta,
|
||||||
lsn_floor,
|
lsn_floor,
|
||||||
@@ -282,7 +226,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Start a batch of updates, applied on drop
|
/// Start a batch of updates, applied on drop
|
||||||
pub fn batch_update(&mut self) -> BatchedUpdates<'_, L> {
|
pub fn batch_update(&mut self) -> BatchedUpdates<'_> {
|
||||||
BatchedUpdates { layer_map: self }
|
BatchedUpdates { layer_map: self }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -292,48 +236,32 @@ where
|
|||||||
/// Helper function for BatchedUpdates::insert_historic
|
/// Helper function for BatchedUpdates::insert_historic
|
||||||
///
|
///
|
||||||
/// TODO(chi): remove L generic so that we do not need to pass layer object.
|
/// TODO(chi): remove L generic so that we do not need to pass layer object.
|
||||||
pub(self) fn insert_historic_noflush(
|
pub(self) fn insert_historic_noflush(&mut self, layer_desc: PersistentLayerDesc) {
|
||||||
&mut self,
|
|
||||||
layer_desc: PersistentLayerDesc,
|
|
||||||
layer: Arc<L>,
|
|
||||||
) {
|
|
||||||
self.mapping.insert(layer_desc.key(), layer.clone());
|
|
||||||
|
|
||||||
// TODO: See #3869, resulting #4088, attempted fix and repro #4094
|
// TODO: See #3869, resulting #4088, attempted fix and repro #4094
|
||||||
|
|
||||||
if Self::is_l0(&layer) {
|
if Self::is_l0(&layer_desc) {
|
||||||
self.l0_delta_layers.push(layer_desc.clone().into());
|
self.l0_delta_layers.push(layer_desc.clone().into());
|
||||||
}
|
}
|
||||||
|
|
||||||
self.historic.insert(
|
self.historic.insert(
|
||||||
historic_layer_coverage::LayerKey::from(&*layer),
|
historic_layer_coverage::LayerKey::from(&layer_desc),
|
||||||
layer_desc.into(),
|
layer_desc.into(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_layer_from_mapping(&self, key: &PersistentLayerKey) -> &Arc<L> {
|
|
||||||
let layer = self
|
|
||||||
.mapping
|
|
||||||
.get(key)
|
|
||||||
.with_context(|| format!("{key:?}"))
|
|
||||||
.expect("inconsistent layer mapping");
|
|
||||||
layer
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Remove an on-disk layer from the map.
|
/// Remove an on-disk layer from the map.
|
||||||
///
|
///
|
||||||
/// Helper function for BatchedUpdates::remove_historic
|
/// Helper function for BatchedUpdates::remove_historic
|
||||||
///
|
///
|
||||||
pub fn remove_historic_noflush(&mut self, layer_desc: PersistentLayerDesc, layer: Arc<L>) {
|
pub fn remove_historic_noflush(&mut self, layer_desc: PersistentLayerDesc) {
|
||||||
self.historic
|
self.historic
|
||||||
.remove(historic_layer_coverage::LayerKey::from(&*layer));
|
.remove(historic_layer_coverage::LayerKey::from(&layer_desc));
|
||||||
if Self::is_l0(&layer) {
|
let layer_key = layer_desc.key();
|
||||||
|
if Self::is_l0(&layer_desc) {
|
||||||
let len_before = self.l0_delta_layers.len();
|
let len_before = self.l0_delta_layers.len();
|
||||||
let mut l0_delta_layers = std::mem::take(&mut self.l0_delta_layers);
|
let mut l0_delta_layers = std::mem::take(&mut self.l0_delta_layers);
|
||||||
l0_delta_layers.retain(|other| {
|
l0_delta_layers.retain(|other| other.key() != layer_key);
|
||||||
!Self::compare_arced_layers(self.get_layer_from_mapping(&other.key()), &layer)
|
|
||||||
});
|
|
||||||
self.l0_delta_layers = l0_delta_layers;
|
self.l0_delta_layers = l0_delta_layers;
|
||||||
// this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
|
// this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
|
||||||
// there's a chance that the comparison fails at runtime due to it comparing (pointer,
|
// there's a chance that the comparison fails at runtime due to it comparing (pointer,
|
||||||
@@ -344,69 +272,6 @@ where
|
|||||||
"failed to locate removed historic layer from l0_delta_layers"
|
"failed to locate removed historic layer from l0_delta_layers"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
self.mapping.remove(&layer_desc.key());
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(self) fn replace_historic_noflush(
|
|
||||||
&mut self,
|
|
||||||
expected_desc: PersistentLayerDesc,
|
|
||||||
expected: &Arc<L>,
|
|
||||||
new_desc: PersistentLayerDesc,
|
|
||||||
new: Arc<L>,
|
|
||||||
) -> anyhow::Result<Replacement<Arc<L>>> {
|
|
||||||
let key = historic_layer_coverage::LayerKey::from(&**expected);
|
|
||||||
let other = historic_layer_coverage::LayerKey::from(&*new);
|
|
||||||
|
|
||||||
let expected_l0 = Self::is_l0(expected);
|
|
||||||
let new_l0 = Self::is_l0(&new);
|
|
||||||
|
|
||||||
anyhow::ensure!(
|
|
||||||
key == other,
|
|
||||||
"expected and new must have equal LayerKeys: {key:?} != {other:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
anyhow::ensure!(
|
|
||||||
expected_l0 == new_l0,
|
|
||||||
"expected and new must both be l0 deltas or neither should be: {expected_l0} != {new_l0}"
|
|
||||||
);
|
|
||||||
|
|
||||||
let l0_index = if expected_l0 {
|
|
||||||
// find the index in case replace worked, we need to replace that as well
|
|
||||||
let pos = self.l0_delta_layers.iter().position(|slot| {
|
|
||||||
Self::compare_arced_layers(self.get_layer_from_mapping(&slot.key()), expected)
|
|
||||||
});
|
|
||||||
|
|
||||||
if pos.is_none() {
|
|
||||||
return Ok(Replacement::NotFound);
|
|
||||||
}
|
|
||||||
pos
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
let new_desc = Arc::new(new_desc);
|
|
||||||
let replaced = self.historic.replace(&key, new_desc.clone(), |existing| {
|
|
||||||
**existing == expected_desc
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Replacement::Replaced { .. } = &replaced {
|
|
||||||
self.mapping.remove(&expected_desc.key());
|
|
||||||
self.mapping.insert(new_desc.key(), new);
|
|
||||||
if let Some(index) = l0_index {
|
|
||||||
self.l0_delta_layers[index] = new_desc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let replaced = match replaced {
|
|
||||||
Replacement::Replaced { in_buffered } => Replacement::Replaced { in_buffered },
|
|
||||||
Replacement::NotFound => Replacement::NotFound,
|
|
||||||
Replacement::RemovalBuffered => Replacement::RemovalBuffered,
|
|
||||||
Replacement::Unexpected(x) => {
|
|
||||||
Replacement::Unexpected(self.get_layer_from_mapping(&x.key()).clone())
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(replaced)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper function for BatchedUpdates::drop.
|
/// Helper function for BatchedUpdates::drop.
|
||||||
@@ -454,10 +319,8 @@ where
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<L>> {
|
pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
|
||||||
self.historic
|
self.historic.iter()
|
||||||
.iter()
|
|
||||||
.map(|x| self.get_layer_from_mapping(&x.key()).clone())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -472,7 +335,7 @@ where
|
|||||||
&self,
|
&self,
|
||||||
key_range: &Range<Key>,
|
key_range: &Range<Key>,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
) -> Result<Vec<(Range<Key>, Option<Arc<L>>)>> {
|
) -> Result<Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)>> {
|
||||||
let version = match self.historic.get().unwrap().get_version(lsn.0) {
|
let version = match self.historic.get().unwrap().get_version(lsn.0) {
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
None => return Ok(vec![]),
|
None => return Ok(vec![]),
|
||||||
@@ -482,36 +345,26 @@ where
|
|||||||
let end = key_range.end.to_i128();
|
let end = key_range.end.to_i128();
|
||||||
|
|
||||||
// Initialize loop variables
|
// Initialize loop variables
|
||||||
let mut coverage: Vec<(Range<Key>, Option<Arc<L>>)> = vec![];
|
let mut coverage: Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)> = vec![];
|
||||||
let mut current_key = start;
|
let mut current_key = start;
|
||||||
let mut current_val = version.image_coverage.query(start);
|
let mut current_val = version.image_coverage.query(start);
|
||||||
|
|
||||||
// Loop through the change events and push intervals
|
// Loop through the change events and push intervals
|
||||||
for (change_key, change_val) in version.image_coverage.range(start..end) {
|
for (change_key, change_val) in version.image_coverage.range(start..end) {
|
||||||
let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
|
let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
|
||||||
coverage.push((
|
coverage.push((kr, current_val.take()));
|
||||||
kr,
|
|
||||||
current_val
|
|
||||||
.take()
|
|
||||||
.map(|l| self.get_layer_from_mapping(&l.key()).clone()),
|
|
||||||
));
|
|
||||||
current_key = change_key;
|
current_key = change_key;
|
||||||
current_val = change_val.clone();
|
current_val = change_val.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the final interval
|
// Add the final interval
|
||||||
let kr = Key::from_i128(current_key)..Key::from_i128(end);
|
let kr = Key::from_i128(current_key)..Key::from_i128(end);
|
||||||
coverage.push((
|
coverage.push((kr, current_val.take()));
|
||||||
kr,
|
|
||||||
current_val
|
|
||||||
.take()
|
|
||||||
.map(|l| self.get_layer_from_mapping(&l.key()).clone()),
|
|
||||||
));
|
|
||||||
|
|
||||||
Ok(coverage)
|
Ok(coverage)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_l0(layer: &L) -> bool {
|
pub fn is_l0(layer: &PersistentLayerDesc) -> bool {
|
||||||
range_eq(&layer.get_key_range(), &(Key::MIN..Key::MAX))
|
range_eq(&layer.get_key_range(), &(Key::MIN..Key::MAX))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -537,7 +390,7 @@ where
|
|||||||
/// TODO The optimal number should probably be slightly higher than 1, but to
|
/// TODO The optimal number should probably be slightly higher than 1, but to
|
||||||
/// implement that we need to plumb a lot more context into this function
|
/// implement that we need to plumb a lot more context into this function
|
||||||
/// than just the current partition_range.
|
/// than just the current partition_range.
|
||||||
pub fn is_reimage_worthy(layer: &L, partition_range: &Range<Key>) -> bool {
|
pub fn is_reimage_worthy(layer: &PersistentLayerDesc, partition_range: &Range<Key>) -> bool {
|
||||||
// Case 1
|
// Case 1
|
||||||
if !Self::is_l0(layer) {
|
if !Self::is_l0(layer) {
|
||||||
return true;
|
return true;
|
||||||
@@ -595,9 +448,7 @@ where
|
|||||||
let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
|
let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
|
||||||
let lr = lsn.start..val.get_lsn_range().start;
|
let lr = lsn.start..val.get_lsn_range().start;
|
||||||
if !kr.is_empty() {
|
if !kr.is_empty() {
|
||||||
let base_count =
|
let base_count = Self::is_reimage_worthy(&val, key) as usize;
|
||||||
Self::is_reimage_worthy(self.get_layer_from_mapping(&val.key()), key)
|
|
||||||
as usize;
|
|
||||||
let new_limit = limit.map(|l| l - base_count);
|
let new_limit = limit.map(|l| l - base_count);
|
||||||
let max_stacked_deltas_underneath =
|
let max_stacked_deltas_underneath =
|
||||||
self.count_deltas(&kr, &lr, new_limit)?;
|
self.count_deltas(&kr, &lr, new_limit)?;
|
||||||
@@ -620,9 +471,7 @@ where
|
|||||||
let lr = lsn.start..val.get_lsn_range().start;
|
let lr = lsn.start..val.get_lsn_range().start;
|
||||||
|
|
||||||
if !kr.is_empty() {
|
if !kr.is_empty() {
|
||||||
let base_count =
|
let base_count = Self::is_reimage_worthy(&val, key) as usize;
|
||||||
Self::is_reimage_worthy(self.get_layer_from_mapping(&val.key()), key)
|
|
||||||
as usize;
|
|
||||||
let new_limit = limit.map(|l| l - base_count);
|
let new_limit = limit.map(|l| l - base_count);
|
||||||
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit)?;
|
let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit)?;
|
||||||
max_stacked_deltas = std::cmp::max(
|
max_stacked_deltas = std::cmp::max(
|
||||||
@@ -772,12 +621,8 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return all L0 delta layers
|
/// Return all L0 delta layers
|
||||||
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<L>>> {
|
pub fn get_level0_deltas(&self) -> Result<Vec<Arc<PersistentLayerDesc>>> {
|
||||||
Ok(self
|
Ok(self.l0_delta_layers.to_vec())
|
||||||
.l0_delta_layers
|
|
||||||
.iter()
|
|
||||||
.map(|x| self.get_layer_from_mapping(&x.key()).clone())
|
|
||||||
.collect())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// debugging function to print out the contents of the layer map
|
/// debugging function to print out the contents of the layer map
|
||||||
@@ -802,72 +647,51 @@ where
|
|||||||
println!("End dump LayerMap");
|
println!("End dump LayerMap");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Similar to `Arc::ptr_eq`, but only compares the object pointers, not vtables.
|
|
||||||
///
|
|
||||||
/// Returns `true` if the two `Arc` point to the same layer, false otherwise.
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn compare_arced_layers(left: &Arc<L>, right: &Arc<L>) -> bool {
|
|
||||||
// "dyn Trait" objects are "fat pointers" in that they have two components:
|
|
||||||
// - pointer to the object
|
|
||||||
// - pointer to the vtable
|
|
||||||
//
|
|
||||||
// rust does not provide a guarantee that these vtables are unique, but however
|
|
||||||
// `Arc::ptr_eq` as of writing (at least up to 1.67) uses a comparison where both the
|
|
||||||
// pointer and the vtable need to be equal.
|
|
||||||
//
|
|
||||||
// See: https://github.com/rust-lang/rust/issues/103763
|
|
||||||
//
|
|
||||||
// A future version of rust will most likely use this form below, where we cast each
|
|
||||||
// pointer into a pointer to unit, which drops the inaccessible vtable pointer, making it
|
|
||||||
// not affect the comparison.
|
|
||||||
//
|
|
||||||
// See: https://github.com/rust-lang/rust/pull/106450
|
|
||||||
let left = Arc::as_ptr(left) as *const ();
|
|
||||||
let right = Arc::as_ptr(right) as *const ();
|
|
||||||
|
|
||||||
left == right
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{LayerMap, Replacement};
|
use super::LayerMap;
|
||||||
use crate::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
|
use crate::tenant::storage_layer::{tests::LayerDescriptor, LayerFileName};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
mod l0_delta_layers_updated {
|
mod l0_delta_layers_updated {
|
||||||
|
|
||||||
|
use crate::tenant::{
|
||||||
|
storage_layer::{PersistentLayer, PersistentLayerDesc},
|
||||||
|
timeline::LayerFileManager,
|
||||||
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn for_full_range_delta() {
|
fn for_full_range_delta() {
|
||||||
// l0_delta_layers are used by compaction, and should observe all buffered updates
|
// l0_delta_layers are used by compaction, and should observe all buffered updates
|
||||||
l0_delta_layers_updated_scenario(
|
l0_delta_layers_updated_scenario(
|
||||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000053423C21-0000000053424D69",
|
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000053423C21-0000000053424D69",
|
||||||
true
|
true
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn for_non_full_range_delta() {
|
fn for_non_full_range_delta() {
|
||||||
// has minimal uncovered areas compared to l0_delta_layers_updated_on_insert_replace_remove_for_full_range_delta
|
// has minimal uncovered areas compared to l0_delta_layers_updated_on_insert_replace_remove_for_full_range_delta
|
||||||
l0_delta_layers_updated_scenario(
|
l0_delta_layers_updated_scenario(
|
||||||
"000000000000000000000000000000000001-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE__0000000053423C21-0000000053424D69",
|
"000000000000000000000000000000000001-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE__0000000053423C21-0000000053424D69",
|
||||||
// because not full range
|
// because not full range
|
||||||
false
|
false
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn for_image() {
|
fn for_image() {
|
||||||
l0_delta_layers_updated_scenario(
|
l0_delta_layers_updated_scenario(
|
||||||
"000000000000000000000000000000000000-000000000000000000000000000000010000__0000000053424D69",
|
"000000000000000000000000000000000000-000000000000000000000000000000010000__0000000053424D69",
|
||||||
// code only checks if it is a full range layer, doesn't care about images, which must
|
// code only checks if it is a full range layer, doesn't care about images, which must
|
||||||
// mean we should in practice never have full range images
|
// mean we should in practice never have full range images
|
||||||
false
|
false
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -883,16 +707,16 @@ mod tests {
|
|||||||
let not_found = Arc::new(layer.clone());
|
let not_found = Arc::new(layer.clone());
|
||||||
let new_version = Arc::new(layer);
|
let new_version = Arc::new(layer);
|
||||||
|
|
||||||
let mut map = LayerMap::default();
|
// after the immutable storage state refactor, the replace operation
|
||||||
|
// will not use layer map any more. We keep it here for consistency in test cases
|
||||||
|
// and can remove it in the future.
|
||||||
|
let _map = LayerMap::default();
|
||||||
|
|
||||||
let res = map.batch_update().replace_historic(
|
let mut mapping = LayerFileManager::new();
|
||||||
not_found.get_persistent_layer_desc(),
|
|
||||||
¬_found,
|
|
||||||
new_version.get_persistent_layer_desc(),
|
|
||||||
new_version,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert!(matches!(res, Ok(Replacement::NotFound)), "{res:?}");
|
mapping
|
||||||
|
.replace_and_verify(not_found, new_version)
|
||||||
|
.unwrap_err();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn l0_delta_layers_updated_scenario(layer_name: &str, expected_l0: bool) {
|
fn l0_delta_layers_updated_scenario(layer_name: &str, expected_l0: bool) {
|
||||||
@@ -903,49 +727,44 @@ mod tests {
|
|||||||
let downloaded = Arc::new(skeleton);
|
let downloaded = Arc::new(skeleton);
|
||||||
|
|
||||||
let mut map = LayerMap::default();
|
let mut map = LayerMap::default();
|
||||||
|
let mut mapping = LayerFileManager::new();
|
||||||
|
|
||||||
// two disjoint Arcs in different lifecycle phases. even if it seems they must be the
|
// two disjoint Arcs in different lifecycle phases. even if it seems they must be the
|
||||||
// same layer, we use LayerMap::compare_arced_layers as the identity of layers.
|
// same layer, we use LayerMap::compare_arced_layers as the identity of layers.
|
||||||
assert!(!LayerMap::compare_arced_layers(&remote, &downloaded));
|
assert_eq!(remote.layer_desc(), downloaded.layer_desc());
|
||||||
|
|
||||||
let expected_in_counts = (1, usize::from(expected_l0));
|
let expected_in_counts = (1, usize::from(expected_l0));
|
||||||
|
|
||||||
map.batch_update()
|
map.batch_update()
|
||||||
.insert_historic(remote.get_persistent_layer_desc(), remote.clone());
|
.insert_historic(remote.layer_desc().clone());
|
||||||
assert_eq!(count_layer_in(&map, &remote), expected_in_counts);
|
mapping.insert(remote.clone());
|
||||||
|
assert_eq!(
|
||||||
let replaced = map
|
count_layer_in(&map, remote.layer_desc()),
|
||||||
.batch_update()
|
expected_in_counts
|
||||||
.replace_historic(
|
);
|
||||||
remote.get_persistent_layer_desc(),
|
|
||||||
&remote,
|
mapping
|
||||||
downloaded.get_persistent_layer_desc(),
|
.replace_and_verify(remote, downloaded.clone())
|
||||||
downloaded.clone(),
|
.expect("name derived attributes are the same");
|
||||||
)
|
assert_eq!(
|
||||||
.expect("name derived attributes are the same");
|
count_layer_in(&map, downloaded.layer_desc()),
|
||||||
assert!(
|
expected_in_counts
|
||||||
matches!(replaced, Replacement::Replaced { .. }),
|
|
||||||
"{replaced:?}"
|
|
||||||
);
|
);
|
||||||
assert_eq!(count_layer_in(&map, &downloaded), expected_in_counts);
|
|
||||||
|
|
||||||
map.batch_update()
|
map.batch_update()
|
||||||
.remove_historic(downloaded.get_persistent_layer_desc(), downloaded.clone());
|
.remove_historic(downloaded.layer_desc().clone());
|
||||||
assert_eq!(count_layer_in(&map, &downloaded), (0, 0));
|
assert_eq!(count_layer_in(&map, downloaded.layer_desc()), (0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_layer_in<L: Layer + ?Sized>(map: &LayerMap<L>, layer: &Arc<L>) -> (usize, usize) {
|
fn count_layer_in(map: &LayerMap, layer: &PersistentLayerDesc) -> (usize, usize) {
|
||||||
let historic = map
|
let historic = map
|
||||||
.iter_historic_layers()
|
.iter_historic_layers()
|
||||||
.filter(|x| LayerMap::compare_arced_layers(x, layer))
|
.filter(|x| x.key() == layer.key())
|
||||||
.count();
|
.count();
|
||||||
let l0s = map
|
let l0s = map
|
||||||
.get_level0_deltas()
|
.get_level0_deltas()
|
||||||
.expect("why does this return a result");
|
.expect("why does this return a result");
|
||||||
let l0 = l0s
|
let l0 = l0s.iter().filter(|x| x.key() == layer.key()).count();
|
||||||
.iter()
|
|
||||||
.filter(|x| LayerMap::compare_arced_layers(x, layer))
|
|
||||||
.count();
|
|
||||||
|
|
||||||
(historic, l0)
|
(historic, l0)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ use std::ops::Range;
|
|||||||
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::tenant::storage_layer::PersistentLayerDesc;
|
||||||
|
|
||||||
use super::layer_coverage::LayerCoverageTuple;
|
use super::layer_coverage::LayerCoverageTuple;
|
||||||
|
|
||||||
/// Layers in this module are identified and indexed by this data.
|
/// Layers in this module are identified and indexed by this data.
|
||||||
@@ -41,8 +43,8 @@ impl Ord for LayerKey {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, L: crate::tenant::storage_layer::Layer + ?Sized> From<&'a L> for LayerKey {
|
impl From<&PersistentLayerDesc> for LayerKey {
|
||||||
fn from(layer: &'a L) -> Self {
|
fn from(layer: &PersistentLayerDesc) -> Self {
|
||||||
let kr = layer.get_key_range();
|
let kr = layer.get_key_range();
|
||||||
let lr = layer.get_lsn_range();
|
let lr = layer.get_lsn_range();
|
||||||
LayerKey {
|
LayerKey {
|
||||||
@@ -454,59 +456,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
|||||||
self.buffer.insert(layer_key, None);
|
self.buffer.insert(layer_key, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replaces a previous layer with a new layer value.
|
|
||||||
///
|
|
||||||
/// The replacement is conditional on:
|
|
||||||
/// - there is an existing `LayerKey` record
|
|
||||||
/// - there is no buffered removal for the given `LayerKey`
|
|
||||||
/// - the given closure returns true for the current `Value`
|
|
||||||
///
|
|
||||||
/// The closure is used to compare the latest value (buffered insert, or existing layer)
|
|
||||||
/// against some expectation. This allows to use `Arc::ptr_eq` or similar which would be
|
|
||||||
/// inaccessible via `PartialEq` trait.
|
|
||||||
///
|
|
||||||
/// Returns a `Replacement` value describing the outcome; only the case of
|
|
||||||
/// `Replacement::Replaced` modifies the map and requires a rebuild.
|
|
||||||
pub fn replace<F>(
|
|
||||||
&mut self,
|
|
||||||
layer_key: &LayerKey,
|
|
||||||
new: Value,
|
|
||||||
check_expected: F,
|
|
||||||
) -> Replacement<Value>
|
|
||||||
where
|
|
||||||
F: FnOnce(&Value) -> bool,
|
|
||||||
{
|
|
||||||
let (slot, in_buffered) = match self.buffer.get(layer_key) {
|
|
||||||
Some(inner @ Some(_)) => {
|
|
||||||
// we compare against the buffered version, because there will be a later
|
|
||||||
// rebuild before querying
|
|
||||||
(inner.as_ref(), true)
|
|
||||||
}
|
|
||||||
Some(None) => {
|
|
||||||
// buffer has removal for this key; it will not be equivalent by any check_expected.
|
|
||||||
return Replacement::RemovalBuffered;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
// no pending modification for the key, check layers
|
|
||||||
(self.layers.get(layer_key), false)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match slot {
|
|
||||||
Some(existing) if !check_expected(existing) => {
|
|
||||||
// unfortunate clone here, but otherwise the nll borrowck grows the region of
|
|
||||||
// 'a to cover the whole function, and we could not mutate in the other
|
|
||||||
// Some(existing) branch
|
|
||||||
Replacement::Unexpected(existing.clone())
|
|
||||||
}
|
|
||||||
None => Replacement::NotFound,
|
|
||||||
Some(_existing) => {
|
|
||||||
self.insert(layer_key.to_owned(), new);
|
|
||||||
Replacement::Replaced { in_buffered }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn rebuild(&mut self) {
|
pub fn rebuild(&mut self) {
|
||||||
// Find the first LSN that needs to be rebuilt
|
// Find the first LSN that needs to be rebuilt
|
||||||
let rebuild_since: u64 = match self.buffer.iter().next() {
|
let rebuild_since: u64 = match self.buffer.iter().next() {
|
||||||
@@ -575,22 +524,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Outcome of the replace operation.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum Replacement<Value> {
|
|
||||||
/// Previous value was replaced with the new value.
|
|
||||||
Replaced {
|
|
||||||
/// Replacement happened for a scheduled insert.
|
|
||||||
in_buffered: bool,
|
|
||||||
},
|
|
||||||
/// Key was not found buffered updates or existing layers.
|
|
||||||
NotFound,
|
|
||||||
/// Key has been scheduled for removal, it was not replaced.
|
|
||||||
RemovalBuffered,
|
|
||||||
/// Previous value was rejected by the closure.
|
|
||||||
Unexpected(Value),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_retroactive_regression_1() {
|
fn test_retroactive_regression_1() {
|
||||||
let mut map = BufferedHistoricLayerCoverage::new();
|
let mut map = BufferedHistoricLayerCoverage::new();
|
||||||
@@ -699,139 +632,3 @@ fn test_retroactive_simple() {
|
|||||||
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
|
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_retroactive_replacement() {
|
|
||||||
let mut map = BufferedHistoricLayerCoverage::new();
|
|
||||||
|
|
||||||
let keys = [
|
|
||||||
LayerKey {
|
|
||||||
key: 0..5,
|
|
||||||
lsn: 100..101,
|
|
||||||
is_image: true,
|
|
||||||
},
|
|
||||||
LayerKey {
|
|
||||||
key: 3..9,
|
|
||||||
lsn: 110..111,
|
|
||||||
is_image: true,
|
|
||||||
},
|
|
||||||
LayerKey {
|
|
||||||
key: 4..6,
|
|
||||||
lsn: 120..121,
|
|
||||||
is_image: true,
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
let layers = [
|
|
||||||
"Image 1".to_string(),
|
|
||||||
"Image 2".to_string(),
|
|
||||||
"Image 3".to_string(),
|
|
||||||
];
|
|
||||||
|
|
||||||
for (key, layer) in keys.iter().zip(layers.iter()) {
|
|
||||||
map.insert(key.to_owned(), layer.to_owned());
|
|
||||||
}
|
|
||||||
|
|
||||||
// rebuild is not necessary here, because replace works for both buffered updates and existing
|
|
||||||
// layers.
|
|
||||||
|
|
||||||
for (key, orig_layer) in keys.iter().zip(layers.iter()) {
|
|
||||||
let replacement = format!("Remote {orig_layer}");
|
|
||||||
|
|
||||||
// evict
|
|
||||||
let ret = map.replace(key, replacement.clone(), |l| l == orig_layer);
|
|
||||||
assert!(
|
|
||||||
matches!(ret, Replacement::Replaced { .. }),
|
|
||||||
"replace {orig_layer}: {ret:?}"
|
|
||||||
);
|
|
||||||
map.rebuild();
|
|
||||||
|
|
||||||
let at = key.lsn.end + 1;
|
|
||||||
|
|
||||||
let version = map.get().expect("rebuilt").get_version(at).unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
version.image_coverage.query(4).as_deref(),
|
|
||||||
Some(replacement.as_str()),
|
|
||||||
"query for 4 at version {at} after eviction",
|
|
||||||
);
|
|
||||||
|
|
||||||
// download
|
|
||||||
let ret = map.replace(key, orig_layer.clone(), |l| l == &replacement);
|
|
||||||
assert!(
|
|
||||||
matches!(ret, Replacement::Replaced { .. }),
|
|
||||||
"replace {orig_layer} back: {ret:?}"
|
|
||||||
);
|
|
||||||
map.rebuild();
|
|
||||||
let version = map.get().expect("rebuilt").get_version(at).unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
version.image_coverage.query(4).as_deref(),
|
|
||||||
Some(orig_layer.as_str()),
|
|
||||||
"query for 4 at version {at} after download",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn missing_key_is_not_inserted_with_replace() {
|
|
||||||
let mut map = BufferedHistoricLayerCoverage::new();
|
|
||||||
let key = LayerKey {
|
|
||||||
key: 0..5,
|
|
||||||
lsn: 100..101,
|
|
||||||
is_image: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
let ret = map.replace(&key, "should not replace", |_| true);
|
|
||||||
assert!(matches!(ret, Replacement::NotFound), "{ret:?}");
|
|
||||||
map.rebuild();
|
|
||||||
assert!(map
|
|
||||||
.get()
|
|
||||||
.expect("no changes to rebuild")
|
|
||||||
.get_version(102)
|
|
||||||
.is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn replacing_buffered_insert_and_remove() {
|
|
||||||
let mut map = BufferedHistoricLayerCoverage::new();
|
|
||||||
let key = LayerKey {
|
|
||||||
key: 0..5,
|
|
||||||
lsn: 100..101,
|
|
||||||
is_image: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
map.insert(key.clone(), "Image 1");
|
|
||||||
let ret = map.replace(&key, "Remote Image 1", |&l| l == "Image 1");
|
|
||||||
assert!(
|
|
||||||
matches!(ret, Replacement::Replaced { in_buffered: true }),
|
|
||||||
"{ret:?}"
|
|
||||||
);
|
|
||||||
map.rebuild();
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
map.get()
|
|
||||||
.expect("rebuilt")
|
|
||||||
.get_version(102)
|
|
||||||
.unwrap()
|
|
||||||
.image_coverage
|
|
||||||
.query(4),
|
|
||||||
Some("Remote Image 1")
|
|
||||||
);
|
|
||||||
|
|
||||||
map.remove(key.clone());
|
|
||||||
let ret = map.replace(&key, "should not replace", |_| true);
|
|
||||||
assert!(
|
|
||||||
matches!(ret, Replacement::RemovalBuffered),
|
|
||||||
"cannot replace after scheduled remove: {ret:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
map.rebuild();
|
|
||||||
|
|
||||||
let ret = map.replace(&key, "should not replace", |_| true);
|
|
||||||
assert!(
|
|
||||||
matches!(ret, Replacement::NotFound),
|
|
||||||
"cannot replace after remove + rebuild: {ret:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
let at_version = map.get().expect("rebuilt").get_version(102);
|
|
||||||
assert!(at_version.is_none());
|
|
||||||
}
|
|
||||||
|
|||||||
325
pageserver/src/tenant/manifest.rs
Normal file
325
pageserver/src/tenant/manifest.rs
Normal file
@@ -0,0 +1,325 @@
|
|||||||
|
//! This module contains the encoding and decoding of the local manifest file.
|
||||||
|
//!
|
||||||
|
//! MANIFEST is a write-ahead log which is stored locally to each timeline. It
|
||||||
|
//! records the state of the storage engine. It contains a snapshot of the
|
||||||
|
//! state and all operations proceeding that snapshot. The file begins with a
|
||||||
|
//! header recording MANIFEST version number. After that, it contains a snapshot.
|
||||||
|
//! The snapshot is followed by a list of operations. Each operation is a list
|
||||||
|
//! of records. Each record is either an addition or a removal of a layer.
|
||||||
|
//!
|
||||||
|
//! With MANIFEST, we can:
|
||||||
|
//!
|
||||||
|
//! 1. recover state quickly by reading the file, potentially boosting the
|
||||||
|
//! startup speed.
|
||||||
|
//! 2. ensure all operations are atomic and avoid corruption, solving issues
|
||||||
|
//! like redundant image layer and preparing us for future compaction
|
||||||
|
//! strategies.
|
||||||
|
//!
|
||||||
|
//! There is also a format for storing all layer files on S3, called
|
||||||
|
//! `index_part.json`. Compared with index_part, MANIFEST is an WAL which
|
||||||
|
//! records all operations as logs, and therefore we can easily replay the
|
||||||
|
//! operations when recovering from crash, while ensuring those operations
|
||||||
|
//! are atomic upon restart.
|
||||||
|
//!
|
||||||
|
//! Currently, this is not used in the system. Future refactors will ensure
|
||||||
|
//! the storage state will be recorded in this file, and the system can be
|
||||||
|
//! recovered from this file. This is tracked in
|
||||||
|
//! https://github.com/neondatabase/neon/issues/4418
|
||||||
|
|
||||||
|
use std::io::{self, Read, Write};
|
||||||
|
|
||||||
|
use crate::virtual_file::VirtualFile;
|
||||||
|
use anyhow::Result;
|
||||||
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
|
use crc32c::crc32c;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tracing::log::warn;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use super::storage_layer::PersistentLayerDesc;
|
||||||
|
|
||||||
|
pub struct Manifest {
|
||||||
|
file: VirtualFile,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
pub struct Snapshot {
|
||||||
|
pub layers: Vec<PersistentLayerDesc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// serde by default encode this in tagged enum, and therefore it will be something
|
||||||
|
/// like `{ "AddLayer": { ... } }`.
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
pub enum Record {
|
||||||
|
AddLayer(PersistentLayerDesc),
|
||||||
|
RemoveLayer(PersistentLayerDesc),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `echo neon.manifest | sha1sum` and take the leading 8 bytes.
|
||||||
|
const MANIFEST_MAGIC_NUMBER: u64 = 0xf5c44592b806109c;
|
||||||
|
const MANIFEST_VERSION: u64 = 1;
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
pub struct ManifestHeader {
|
||||||
|
magic_number: u64,
|
||||||
|
version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
const MANIFEST_HEADER_LEN: usize = 16;
|
||||||
|
|
||||||
|
impl ManifestHeader {
|
||||||
|
fn encode(&self) -> BytesMut {
|
||||||
|
let mut buf = BytesMut::with_capacity(MANIFEST_HEADER_LEN);
|
||||||
|
buf.put_u64(self.magic_number);
|
||||||
|
buf.put_u64(self.version);
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode(mut buf: &[u8]) -> Self {
|
||||||
|
assert!(buf.len() == MANIFEST_HEADER_LEN, "invalid header");
|
||||||
|
Self {
|
||||||
|
magic_number: buf.get_u64(),
|
||||||
|
version: buf.get_u64(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
|
||||||
|
pub enum Operation {
|
||||||
|
/// A snapshot of the current state.
|
||||||
|
///
|
||||||
|
/// Lsn field represents the LSN that is persisted to disk for this snapshot.
|
||||||
|
Snapshot(Snapshot, Lsn),
|
||||||
|
/// An atomic operation that changes the state.
|
||||||
|
///
|
||||||
|
/// Lsn field represents the LSN that is persisted to disk after the operation is done.
|
||||||
|
/// This will only change when new L0 is flushed to the disk.
|
||||||
|
Operation(Vec<Record>, Lsn),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RecordHeader {
|
||||||
|
size: u32,
|
||||||
|
checksum: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
const RECORD_HEADER_LEN: usize = 8;
|
||||||
|
|
||||||
|
impl RecordHeader {
|
||||||
|
fn encode(&self) -> BytesMut {
|
||||||
|
let mut buf = BytesMut::with_capacity(RECORD_HEADER_LEN);
|
||||||
|
buf.put_u32(self.size);
|
||||||
|
buf.put_u32(self.checksum);
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode(mut buf: &[u8]) -> Self {
|
||||||
|
assert!(buf.len() == RECORD_HEADER_LEN, "invalid header");
|
||||||
|
Self {
|
||||||
|
size: buf.get_u32(),
|
||||||
|
checksum: buf.get_u32(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ManifestLoadError {
|
||||||
|
#[error("manifest header is corrupted")]
|
||||||
|
CorruptedManifestHeader,
|
||||||
|
#[error("unsupported manifest version: got {0}, expected {1}")]
|
||||||
|
UnsupportedVersion(u64, u64),
|
||||||
|
#[error("error when decoding record: {0}")]
|
||||||
|
DecodeRecord(serde_json::Error),
|
||||||
|
#[error("I/O error: {0}")]
|
||||||
|
Io(io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use = "Should check if the manifest is partially corrupted"]
|
||||||
|
pub struct ManifestPartiallyCorrupted(bool);
|
||||||
|
|
||||||
|
impl Manifest {
|
||||||
|
/// Create a new manifest by writing the manifest header and a snapshot record to the given file.
|
||||||
|
pub fn init(file: VirtualFile, snapshot: Snapshot, lsn: Lsn) -> Result<Self> {
|
||||||
|
let mut manifest = Self { file };
|
||||||
|
manifest.append_manifest_header(ManifestHeader {
|
||||||
|
magic_number: MANIFEST_MAGIC_NUMBER,
|
||||||
|
version: MANIFEST_VERSION,
|
||||||
|
})?;
|
||||||
|
manifest.append_operation(Operation::Snapshot(snapshot, lsn))?;
|
||||||
|
Ok(manifest)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load a manifest. Returns the manifest and a list of operations. If the manifest is corrupted,
|
||||||
|
/// the bool flag will be set to true and the user is responsible to reconstruct a new manifest and
|
||||||
|
/// backup the current one.
|
||||||
|
pub fn load(
|
||||||
|
mut file: VirtualFile,
|
||||||
|
) -> Result<(Self, Vec<Operation>, ManifestPartiallyCorrupted), ManifestLoadError> {
|
||||||
|
let mut buf = vec![];
|
||||||
|
file.read_to_end(&mut buf).map_err(ManifestLoadError::Io)?;
|
||||||
|
|
||||||
|
// Read manifest header
|
||||||
|
let mut buf = Bytes::from(buf);
|
||||||
|
if buf.remaining() < MANIFEST_HEADER_LEN {
|
||||||
|
return Err(ManifestLoadError::CorruptedManifestHeader);
|
||||||
|
}
|
||||||
|
let header = ManifestHeader::decode(&buf[..MANIFEST_HEADER_LEN]);
|
||||||
|
buf.advance(MANIFEST_HEADER_LEN);
|
||||||
|
if header.version != MANIFEST_VERSION {
|
||||||
|
return Err(ManifestLoadError::UnsupportedVersion(
|
||||||
|
header.version,
|
||||||
|
MANIFEST_VERSION,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read operations
|
||||||
|
let mut operations = Vec::new();
|
||||||
|
let corrupted = loop {
|
||||||
|
if buf.remaining() == 0 {
|
||||||
|
break false;
|
||||||
|
}
|
||||||
|
if buf.remaining() < RECORD_HEADER_LEN {
|
||||||
|
warn!("incomplete header when decoding manifest, could be corrupted");
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
let RecordHeader { size, checksum } = RecordHeader::decode(&buf[..RECORD_HEADER_LEN]);
|
||||||
|
let size = size as usize;
|
||||||
|
buf.advance(RECORD_HEADER_LEN);
|
||||||
|
if buf.remaining() < size {
|
||||||
|
warn!("incomplete data when decoding manifest, could be corrupted");
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
let data = &buf[..size];
|
||||||
|
if crc32c(data) != checksum {
|
||||||
|
warn!("checksum mismatch when decoding manifest, could be corrupted");
|
||||||
|
break true;
|
||||||
|
}
|
||||||
|
// if the following decode fails, we cannot use the manifest or safely ignore any record.
|
||||||
|
operations.push(serde_json::from_slice(data).map_err(ManifestLoadError::DecodeRecord)?);
|
||||||
|
buf.advance(size);
|
||||||
|
};
|
||||||
|
Ok((
|
||||||
|
Self { file },
|
||||||
|
operations,
|
||||||
|
ManifestPartiallyCorrupted(corrupted),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_data(&mut self, data: &[u8]) -> Result<()> {
|
||||||
|
if data.len() >= u32::MAX as usize {
|
||||||
|
panic!("data too large");
|
||||||
|
}
|
||||||
|
let header = RecordHeader {
|
||||||
|
size: data.len() as u32,
|
||||||
|
checksum: crc32c(data),
|
||||||
|
};
|
||||||
|
let header = header.encode();
|
||||||
|
self.file.write_all(&header)?;
|
||||||
|
self.file.write_all(data)?;
|
||||||
|
self.file.sync_all()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_manifest_header(&mut self, header: ManifestHeader) -> Result<()> {
|
||||||
|
let encoded = header.encode();
|
||||||
|
self.file.write_all(&encoded)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an operation to the manifest. The operation will be appended to the end of the file,
|
||||||
|
/// and the file will fsync.
|
||||||
|
pub fn append_operation(&mut self, operation: Operation) -> Result<()> {
|
||||||
|
let encoded = Vec::from(serde_json::to_string(&operation)?);
|
||||||
|
self.append_data(&encoded)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::fs::OpenOptions;
|
||||||
|
|
||||||
|
use crate::repository::Key;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_read_manifest() {
|
||||||
|
let testdir = crate::config::PageServerConf::test_repo_dir("test_read_manifest");
|
||||||
|
std::fs::create_dir_all(&testdir).unwrap();
|
||||||
|
let file = VirtualFile::create(&testdir.join("MANIFEST")).unwrap();
|
||||||
|
let layer1 = PersistentLayerDesc::new_test(Key::from_i128(0)..Key::from_i128(233));
|
||||||
|
let layer2 = PersistentLayerDesc::new_test(Key::from_i128(233)..Key::from_i128(2333));
|
||||||
|
let layer3 = PersistentLayerDesc::new_test(Key::from_i128(2333)..Key::from_i128(23333));
|
||||||
|
let layer4 = PersistentLayerDesc::new_test(Key::from_i128(23333)..Key::from_i128(233333));
|
||||||
|
|
||||||
|
// Write a manifest with a snapshot and some operations
|
||||||
|
let snapshot = Snapshot {
|
||||||
|
layers: vec![layer1, layer2],
|
||||||
|
};
|
||||||
|
let mut manifest = Manifest::init(file, snapshot.clone(), Lsn::from(0)).unwrap();
|
||||||
|
manifest
|
||||||
|
.append_operation(Operation::Operation(
|
||||||
|
vec![Record::AddLayer(layer3.clone())],
|
||||||
|
Lsn::from(1),
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
drop(manifest);
|
||||||
|
|
||||||
|
// Open the second time and write
|
||||||
|
let file = VirtualFile::open_with_options(
|
||||||
|
&testdir.join("MANIFEST"),
|
||||||
|
OpenOptions::new()
|
||||||
|
.read(true)
|
||||||
|
.write(true)
|
||||||
|
.create_new(false)
|
||||||
|
.truncate(false),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let (mut manifest, operations, corrupted) = Manifest::load(file).unwrap();
|
||||||
|
assert!(!corrupted.0);
|
||||||
|
assert_eq!(operations.len(), 2);
|
||||||
|
assert_eq!(
|
||||||
|
&operations[0],
|
||||||
|
&Operation::Snapshot(snapshot.clone(), Lsn::from(0))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&operations[1],
|
||||||
|
&Operation::Operation(vec![Record::AddLayer(layer3.clone())], Lsn::from(1))
|
||||||
|
);
|
||||||
|
manifest
|
||||||
|
.append_operation(Operation::Operation(
|
||||||
|
vec![
|
||||||
|
Record::RemoveLayer(layer3.clone()),
|
||||||
|
Record::AddLayer(layer4.clone()),
|
||||||
|
],
|
||||||
|
Lsn::from(2),
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
|
drop(manifest);
|
||||||
|
|
||||||
|
// Open the third time and verify
|
||||||
|
let file = VirtualFile::open_with_options(
|
||||||
|
&testdir.join("MANIFEST"),
|
||||||
|
OpenOptions::new()
|
||||||
|
.read(true)
|
||||||
|
.write(true)
|
||||||
|
.create_new(false)
|
||||||
|
.truncate(false),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let (_manifest, operations, corrupted) = Manifest::load(file).unwrap();
|
||||||
|
assert!(!corrupted.0);
|
||||||
|
assert_eq!(operations.len(), 3);
|
||||||
|
assert_eq!(&operations[0], &Operation::Snapshot(snapshot, Lsn::from(0)));
|
||||||
|
assert_eq!(
|
||||||
|
&operations[1],
|
||||||
|
&Operation::Operation(vec![Record::AddLayer(layer3.clone())], Lsn::from(1))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
&operations[2],
|
||||||
|
&Operation::Operation(
|
||||||
|
vec![Record::RemoveLayer(layer3), Record::AddLayer(layer4)],
|
||||||
|
Lsn::from(2)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -396,7 +396,9 @@ pub async fn delete_timeline(
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<(), DeleteTimelineError> {
|
) -> Result<(), DeleteTimelineError> {
|
||||||
let tenant = get_tenant(tenant_id, true).await?;
|
let tenant = get_tenant(tenant_id, true).await?;
|
||||||
tenant.delete_timeline(timeline_id, ctx).await?;
|
tenant
|
||||||
|
.prepare_and_schedule_delete_timeline(timeline_id, ctx)
|
||||||
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -673,7 +675,7 @@ pub async fn immediate_gc(
|
|||||||
.get(&tenant_id)
|
.get(&tenant_id)
|
||||||
.map(Arc::clone)
|
.map(Arc::clone)
|
||||||
.with_context(|| format!("tenant {tenant_id}"))
|
.with_context(|| format!("tenant {tenant_id}"))
|
||||||
.map_err(ApiError::NotFound)?;
|
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||||
|
|
||||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||||
// Use tenant's pitr setting
|
// Use tenant's pitr setting
|
||||||
@@ -722,11 +724,11 @@ pub async fn immediate_compact(
|
|||||||
.get(&tenant_id)
|
.get(&tenant_id)
|
||||||
.map(Arc::clone)
|
.map(Arc::clone)
|
||||||
.with_context(|| format!("tenant {tenant_id}"))
|
.with_context(|| format!("tenant {tenant_id}"))
|
||||||
.map_err(ApiError::NotFound)?;
|
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||||
|
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(ApiError::NotFound)?;
|
.map_err(|e| ApiError::NotFound(e.into()))?;
|
||||||
|
|
||||||
// Run in task_mgr to avoid race with tenant_detach operation
|
// Run in task_mgr to avoid race with tenant_detach operation
|
||||||
let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download);
|
let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download);
|
||||||
|
|||||||
@@ -210,13 +210,15 @@ use chrono::{NaiveDateTime, Utc};
|
|||||||
pub use download::{is_temp_download_file, list_remote_timelines};
|
pub use download::{is_temp_download_file, list_remote_timelines};
|
||||||
use scopeguard::ScopeGuard;
|
use scopeguard::ScopeGuard;
|
||||||
|
|
||||||
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
use std::path::Path;
|
||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||||
use std::ops::DerefMut;
|
use std::ops::DerefMut;
|
||||||
use tokio::runtime::Runtime;
|
use tokio::runtime::Runtime;
|
||||||
use tracing::{debug, error, info, warn};
|
use tracing::{debug, error, info, instrument, warn};
|
||||||
use tracing::{info_span, Instrument};
|
use tracing::{info_span, Instrument};
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
@@ -225,7 +227,9 @@ use crate::metrics::{
|
|||||||
RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
|
RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
|
||||||
REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
|
REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
|
||||||
};
|
};
|
||||||
|
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||||
|
use crate::tenant::upload_queue::Delete;
|
||||||
use crate::{
|
use crate::{
|
||||||
config::PageServerConf,
|
config::PageServerConf,
|
||||||
task_mgr,
|
task_mgr,
|
||||||
@@ -259,7 +263,7 @@ const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;
|
|||||||
|
|
||||||
pub enum MaybeDeletedIndexPart {
|
pub enum MaybeDeletedIndexPart {
|
||||||
IndexPart(IndexPart),
|
IndexPart(IndexPart),
|
||||||
Deleted,
|
Deleted(IndexPart),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Errors that can arise when calling [`RemoteTimelineClient::stop`].
|
/// Errors that can arise when calling [`RemoteTimelineClient::stop`].
|
||||||
@@ -361,11 +365,42 @@ impl RemoteTimelineClient {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Initialize the queue in stopped state. Used in startup path
|
||||||
|
/// to continue deletion operation interrupted by pageserver crash or restart.
|
||||||
|
pub fn init_upload_queue_stopped_to_continue_deletion(
|
||||||
|
&self,
|
||||||
|
index_part: &IndexPart,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
// FIXME: consider newtype for DeletedIndexPart.
|
||||||
|
let deleted_at = index_part.deleted_at.ok_or(anyhow::anyhow!(
|
||||||
|
"bug: it is responsibility of the caller to provide index part from MaybeDeletedIndexPart::Deleted"
|
||||||
|
))?;
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||||
|
upload_queue.initialize_with_current_remote_index_part(index_part)?;
|
||||||
|
self.update_remote_physical_size_gauge(Some(index_part));
|
||||||
|
}
|
||||||
|
// also locks upload queue, without dropping the guard above it will be a deadlock
|
||||||
|
self.stop().expect("initialized line above");
|
||||||
|
|
||||||
|
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||||
|
|
||||||
|
upload_queue
|
||||||
|
.stopped_mut()
|
||||||
|
.expect("stopped above")
|
||||||
|
.deleted_at = SetDeletedFlagProgress::Successful(deleted_at);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn last_uploaded_consistent_lsn(&self) -> Option<Lsn> {
|
pub fn last_uploaded_consistent_lsn(&self) -> Option<Lsn> {
|
||||||
match &*self.upload_queue.lock().unwrap() {
|
match &*self.upload_queue.lock().unwrap() {
|
||||||
UploadQueue::Uninitialized => None,
|
UploadQueue::Uninitialized => None,
|
||||||
UploadQueue::Initialized(q) => Some(q.last_uploaded_consistent_lsn),
|
UploadQueue::Initialized(q) => Some(q.last_uploaded_consistent_lsn),
|
||||||
UploadQueue::Stopped(q) => Some(q.last_uploaded_consistent_lsn),
|
UploadQueue::Stopped(q) => {
|
||||||
|
Some(q.upload_queue_for_deletion.last_uploaded_consistent_lsn)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -420,7 +455,7 @@ impl RemoteTimelineClient {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
if index_part.deleted_at.is_some() {
|
if index_part.deleted_at.is_some() {
|
||||||
Ok(MaybeDeletedIndexPart::Deleted)
|
Ok(MaybeDeletedIndexPart::Deleted(index_part))
|
||||||
} else {
|
} else {
|
||||||
Ok(MaybeDeletedIndexPart::IndexPart(index_part))
|
Ok(MaybeDeletedIndexPart::IndexPart(index_part))
|
||||||
}
|
}
|
||||||
@@ -573,10 +608,7 @@ impl RemoteTimelineClient {
|
|||||||
self.calls_unfinished_metric_begin(&op);
|
self.calls_unfinished_metric_begin(&op);
|
||||||
upload_queue.queued_operations.push_back(op);
|
upload_queue.queued_operations.push_back(op);
|
||||||
|
|
||||||
info!(
|
info!("scheduled layer file upload {layer_file_name}");
|
||||||
"scheduled layer file upload {}",
|
|
||||||
layer_file_name.file_name()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Launch the task immediately, if possible
|
// Launch the task immediately, if possible
|
||||||
self.launch_queued_tasks(upload_queue);
|
self.launch_queued_tasks(upload_queue);
|
||||||
@@ -622,10 +654,14 @@ impl RemoteTimelineClient {
|
|||||||
|
|
||||||
// schedule the actual deletions
|
// schedule the actual deletions
|
||||||
for name in names {
|
for name in names {
|
||||||
let op = UploadOp::Delete(RemoteOpFileKind::Layer, name.clone());
|
let op = UploadOp::Delete(Delete {
|
||||||
|
file_kind: RemoteOpFileKind::Layer,
|
||||||
|
layer_file_name: name.clone(),
|
||||||
|
scheduled_from_timeline_delete: false,
|
||||||
|
});
|
||||||
self.calls_unfinished_metric_begin(&op);
|
self.calls_unfinished_metric_begin(&op);
|
||||||
upload_queue.queued_operations.push_back(op);
|
upload_queue.queued_operations.push_back(op);
|
||||||
info!("scheduled layer file deletion {}", name.file_name());
|
info!("scheduled layer file deletion {name}");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch the tasks immediately, if possible
|
// Launch the tasks immediately, if possible
|
||||||
@@ -639,18 +675,11 @@ impl RemoteTimelineClient {
|
|||||||
/// Wait for all previously scheduled uploads/deletions to complete
|
/// Wait for all previously scheduled uploads/deletions to complete
|
||||||
///
|
///
|
||||||
pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
|
pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
|
||||||
let (sender, mut receiver) = tokio::sync::watch::channel(());
|
let mut receiver = {
|
||||||
let barrier_op = UploadOp::Barrier(sender);
|
|
||||||
|
|
||||||
{
|
|
||||||
let mut guard = self.upload_queue.lock().unwrap();
|
let mut guard = self.upload_queue.lock().unwrap();
|
||||||
let upload_queue = guard.initialized_mut()?;
|
let upload_queue = guard.initialized_mut()?;
|
||||||
upload_queue.queued_operations.push_back(barrier_op);
|
self.schedule_barrier(upload_queue)
|
||||||
// Don't count this kind of operation!
|
};
|
||||||
|
|
||||||
// Launch the task immediately, if possible
|
|
||||||
self.launch_queued_tasks(upload_queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
if receiver.changed().await.is_err() {
|
if receiver.changed().await.is_err() {
|
||||||
anyhow::bail!("wait_completion aborted because upload queue was stopped");
|
anyhow::bail!("wait_completion aborted because upload queue was stopped");
|
||||||
@@ -658,6 +687,22 @@ impl RemoteTimelineClient {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn schedule_barrier(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
upload_queue: &mut UploadQueueInitialized,
|
||||||
|
) -> tokio::sync::watch::Receiver<()> {
|
||||||
|
let (sender, receiver) = tokio::sync::watch::channel(());
|
||||||
|
let barrier_op = UploadOp::Barrier(sender);
|
||||||
|
|
||||||
|
upload_queue.queued_operations.push_back(barrier_op);
|
||||||
|
// Don't count this kind of operation!
|
||||||
|
|
||||||
|
// Launch the task immediately, if possible
|
||||||
|
self.launch_queued_tasks(upload_queue);
|
||||||
|
|
||||||
|
receiver
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the deleted_at field in the remote index file.
|
/// Set the deleted_at field in the remote index file.
|
||||||
///
|
///
|
||||||
/// This fails if the upload queue has not been `stop()`ed.
|
/// This fails if the upload queue has not been `stop()`ed.
|
||||||
@@ -665,6 +710,7 @@ impl RemoteTimelineClient {
|
|||||||
/// The caller is responsible for calling `stop()` AND for waiting
|
/// The caller is responsible for calling `stop()` AND for waiting
|
||||||
/// for any ongoing upload tasks to finish after `stop()` has succeeded.
|
/// for any ongoing upload tasks to finish after `stop()` has succeeded.
|
||||||
/// Check method [`RemoteTimelineClient::stop`] for details.
|
/// Check method [`RemoteTimelineClient::stop`] for details.
|
||||||
|
#[instrument(skip_all)]
|
||||||
pub(crate) async fn persist_index_part_with_deleted_flag(
|
pub(crate) async fn persist_index_part_with_deleted_flag(
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
) -> Result<(), PersistIndexPartWithDeletedFlagError> {
|
) -> Result<(), PersistIndexPartWithDeletedFlagError> {
|
||||||
@@ -674,15 +720,7 @@ impl RemoteTimelineClient {
|
|||||||
// We must be in stopped state because otherwise
|
// We must be in stopped state because otherwise
|
||||||
// we can have inprogress index part upload that can overwrite the file
|
// we can have inprogress index part upload that can overwrite the file
|
||||||
// with missing is_deleted flag that we going to set below
|
// with missing is_deleted flag that we going to set below
|
||||||
let stopped = match &mut *locked {
|
let stopped = locked.stopped_mut()?;
|
||||||
UploadQueue::Uninitialized => {
|
|
||||||
return Err(anyhow::anyhow!("is not Stopped but Uninitialized").into())
|
|
||||||
}
|
|
||||||
UploadQueue::Initialized(_) => {
|
|
||||||
return Err(anyhow::anyhow!("is not Stopped but Initialized").into())
|
|
||||||
}
|
|
||||||
UploadQueue::Stopped(stopped) => stopped,
|
|
||||||
};
|
|
||||||
|
|
||||||
match stopped.deleted_at {
|
match stopped.deleted_at {
|
||||||
SetDeletedFlagProgress::NotRunning => (), // proceed
|
SetDeletedFlagProgress::NotRunning => (), // proceed
|
||||||
@@ -696,48 +734,34 @@ impl RemoteTimelineClient {
|
|||||||
let deleted_at = Utc::now().naive_utc();
|
let deleted_at = Utc::now().naive_utc();
|
||||||
stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);
|
stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);
|
||||||
|
|
||||||
let mut index_part = IndexPart::new(
|
let mut index_part = IndexPart::try_from(&stopped.upload_queue_for_deletion)
|
||||||
stopped.latest_files.clone(),
|
.context("IndexPart serialize")?;
|
||||||
stopped.last_uploaded_consistent_lsn,
|
|
||||||
stopped
|
|
||||||
.latest_metadata
|
|
||||||
.to_bytes()
|
|
||||||
.context("serialize metadata")?,
|
|
||||||
);
|
|
||||||
index_part.deleted_at = Some(deleted_at);
|
index_part.deleted_at = Some(deleted_at);
|
||||||
index_part
|
index_part
|
||||||
};
|
};
|
||||||
|
|
||||||
let undo_deleted_at = scopeguard::guard(Arc::clone(self), |self_clone| {
|
let undo_deleted_at = scopeguard::guard(Arc::clone(self), |self_clone| {
|
||||||
let mut locked = self_clone.upload_queue.lock().unwrap();
|
let mut locked = self_clone.upload_queue.lock().unwrap();
|
||||||
let stopped = match &mut *locked {
|
let stopped = locked
|
||||||
UploadQueue::Uninitialized | UploadQueue::Initialized(_) => unreachable!(
|
.stopped_mut()
|
||||||
"there's no way out of Stopping, and we checked it's Stopping above: {:?}",
|
.expect("there's no way out of Stopping, and we checked it's Stopping above");
|
||||||
locked.as_str(),
|
|
||||||
),
|
|
||||||
UploadQueue::Stopped(stopped) => stopped,
|
|
||||||
};
|
|
||||||
stopped.deleted_at = SetDeletedFlagProgress::NotRunning;
|
stopped.deleted_at = SetDeletedFlagProgress::NotRunning;
|
||||||
});
|
});
|
||||||
|
|
||||||
// Have a failpoint that can use the `pause` failpoint action.
|
// Have a failpoint that can use the `pause` failpoint action.
|
||||||
// We don't want to block the executor thread, hence, spawn_blocking + await.
|
// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||||
#[cfg(feature = "testing")]
|
if cfg!(feature = "testing") {
|
||||||
tokio::task::spawn_blocking({
|
tokio::task::spawn_blocking({
|
||||||
let current = tracing::Span::current();
|
let current = tracing::Span::current();
|
||||||
move || {
|
move || {
|
||||||
let _entered = current.entered();
|
let _entered = current.entered();
|
||||||
tracing::info!(
|
tracing::info!("at failpoint persist_deleted_index_part");
|
||||||
"at failpoint persist_index_part_with_deleted_flag_after_set_before_upload_pause"
|
fail::fail_point!("persist_deleted_index_part");
|
||||||
);
|
}
|
||||||
fail::fail_point!(
|
})
|
||||||
"persist_index_part_with_deleted_flag_after_set_before_upload_pause"
|
.await
|
||||||
);
|
.expect("spawn_blocking");
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.await
|
|
||||||
.expect("spawn_blocking");
|
|
||||||
|
|
||||||
upload::upload_index_part(
|
upload::upload_index_part(
|
||||||
self.conf,
|
self.conf,
|
||||||
&self.storage_impl,
|
&self.storage_impl,
|
||||||
@@ -751,13 +775,10 @@ impl RemoteTimelineClient {
|
|||||||
ScopeGuard::into_inner(undo_deleted_at);
|
ScopeGuard::into_inner(undo_deleted_at);
|
||||||
{
|
{
|
||||||
let mut locked = self.upload_queue.lock().unwrap();
|
let mut locked = self.upload_queue.lock().unwrap();
|
||||||
let stopped = match &mut *locked {
|
|
||||||
UploadQueue::Uninitialized | UploadQueue::Initialized(_) => unreachable!(
|
let stopped = locked
|
||||||
"there's no way out of Stopping, and we checked it's Stopping above: {:?}",
|
.stopped_mut()
|
||||||
locked.as_str(),
|
.expect("there's no way out of Stopping, and we checked it's Stopping above");
|
||||||
),
|
|
||||||
UploadQueue::Stopped(stopped) => stopped,
|
|
||||||
};
|
|
||||||
stopped.deleted_at = SetDeletedFlagProgress::Successful(
|
stopped.deleted_at = SetDeletedFlagProgress::Successful(
|
||||||
index_part_with_deleted_at
|
index_part_with_deleted_at
|
||||||
.deleted_at
|
.deleted_at
|
||||||
@@ -768,6 +789,90 @@ impl RemoteTimelineClient {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.
|
||||||
|
/// The function deletes layer files one by one, then lists the prefix to see if we leaked something
|
||||||
|
/// deletes leaked files if any and proceeds with deletion of index file at the end.
|
||||||
|
pub(crate) async fn delete_all(self: &Arc<Self>) -> anyhow::Result<()> {
|
||||||
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
|
|
||||||
|
let (mut receiver, deletions_queued) = {
|
||||||
|
let mut deletions_queued = 0;
|
||||||
|
|
||||||
|
let mut locked = self.upload_queue.lock().unwrap();
|
||||||
|
let stopped = locked.stopped_mut()?;
|
||||||
|
|
||||||
|
if !matches!(stopped.deleted_at, SetDeletedFlagProgress::Successful(_)) {
|
||||||
|
anyhow::bail!("deleted_at is not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
debug_assert!(stopped.upload_queue_for_deletion.no_pending_work());
|
||||||
|
|
||||||
|
stopped
|
||||||
|
.upload_queue_for_deletion
|
||||||
|
.queued_operations
|
||||||
|
.reserve(stopped.upload_queue_for_deletion.latest_files.len());
|
||||||
|
|
||||||
|
// schedule the actual deletions
|
||||||
|
for name in stopped.upload_queue_for_deletion.latest_files.keys() {
|
||||||
|
let op = UploadOp::Delete(Delete {
|
||||||
|
file_kind: RemoteOpFileKind::Layer,
|
||||||
|
layer_file_name: name.clone(),
|
||||||
|
scheduled_from_timeline_delete: true,
|
||||||
|
});
|
||||||
|
self.calls_unfinished_metric_begin(&op);
|
||||||
|
stopped
|
||||||
|
.upload_queue_for_deletion
|
||||||
|
.queued_operations
|
||||||
|
.push_back(op);
|
||||||
|
|
||||||
|
info!("scheduled layer file deletion {name}");
|
||||||
|
deletions_queued += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.launch_queued_tasks(&mut stopped.upload_queue_for_deletion);
|
||||||
|
|
||||||
|
(
|
||||||
|
self.schedule_barrier(&mut stopped.upload_queue_for_deletion),
|
||||||
|
deletions_queued,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
receiver.changed().await?;
|
||||||
|
|
||||||
|
// Do not delete index part yet, it is needed for possible retry. If we remove it first
|
||||||
|
// and retry will arrive to different pageserver there wont be any traces of it on remote storage
|
||||||
|
let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||||
|
let timeline_storage_path = self.conf.remote_path(&timeline_path)?;
|
||||||
|
|
||||||
|
let remaining = self
|
||||||
|
.storage_impl
|
||||||
|
.list_prefixes(Some(&timeline_storage_path))
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let remaining: Vec<RemotePath> = remaining
|
||||||
|
.into_iter()
|
||||||
|
.filter(|p| p.object_name() != Some(IndexPart::FILE_NAME))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !remaining.is_empty() {
|
||||||
|
warn!(
|
||||||
|
"Found {} files not bound to index_file.json, proceeding with their deletion",
|
||||||
|
remaining.len()
|
||||||
|
);
|
||||||
|
warn!("About to remove {} files", remaining.len());
|
||||||
|
self.storage_impl.delete_objects(&remaining).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_file_path = timeline_storage_path.join(Path::new(IndexPart::FILE_NAME));
|
||||||
|
|
||||||
|
debug!("deleting index part");
|
||||||
|
self.storage_impl.delete(&index_file_path).await?;
|
||||||
|
|
||||||
|
info!(deletions_queued, "done deleting, including index_part.json");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Pick next tasks from the queue, and start as many of them as possible without violating
|
/// Pick next tasks from the queue, and start as many of them as possible without violating
|
||||||
/// the ordering constraints.
|
/// the ordering constraints.
|
||||||
@@ -786,7 +891,7 @@ impl RemoteTimelineClient {
|
|||||||
// have finished.
|
// have finished.
|
||||||
upload_queue.inprogress_tasks.is_empty()
|
upload_queue.inprogress_tasks.is_empty()
|
||||||
}
|
}
|
||||||
UploadOp::Delete(_, _) => {
|
UploadOp::Delete(_) => {
|
||||||
// Wait for preceding uploads to finish. Concurrent deletions are OK, though.
|
// Wait for preceding uploads to finish. Concurrent deletions are OK, though.
|
||||||
upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
|
upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
|
||||||
}
|
}
|
||||||
@@ -817,7 +922,7 @@ impl RemoteTimelineClient {
|
|||||||
UploadOp::UploadMetadata(_, _) => {
|
UploadOp::UploadMetadata(_, _) => {
|
||||||
upload_queue.num_inprogress_metadata_uploads += 1;
|
upload_queue.num_inprogress_metadata_uploads += 1;
|
||||||
}
|
}
|
||||||
UploadOp::Delete(_, _) => {
|
UploadOp::Delete(_) => {
|
||||||
upload_queue.num_inprogress_deletions += 1;
|
upload_queue.num_inprogress_deletions += 1;
|
||||||
}
|
}
|
||||||
UploadOp::Barrier(sender) => {
|
UploadOp::Barrier(sender) => {
|
||||||
@@ -891,7 +996,6 @@ impl RemoteTimelineClient {
|
|||||||
unreachable!("we never launch an upload task if the queue is uninitialized, and once it is initialized, we never go back")
|
unreachable!("we never launch an upload task if the queue is uninitialized, and once it is initialized, we never go back")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.calls_unfinished_metric_end(&task.op);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -937,16 +1041,16 @@ impl RemoteTimelineClient {
|
|||||||
}
|
}
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
UploadOp::Delete(metric_file_kind, ref layer_file_name) => {
|
UploadOp::Delete(delete) => {
|
||||||
let path = &self
|
let path = &self
|
||||||
.conf
|
.conf
|
||||||
.timeline_path(&self.timeline_id, &self.tenant_id)
|
.timeline_path(&self.timeline_id, &self.tenant_id)
|
||||||
.join(layer_file_name.file_name());
|
.join(delete.layer_file_name.file_name());
|
||||||
delete::delete_layer(self.conf, &self.storage_impl, path)
|
delete::delete_layer(self.conf, &self.storage_impl, path)
|
||||||
.measure_remote_op(
|
.measure_remote_op(
|
||||||
self.tenant_id,
|
self.tenant_id,
|
||||||
self.timeline_id,
|
self.timeline_id,
|
||||||
*metric_file_kind,
|
delete.file_kind,
|
||||||
RemoteOpKind::Delete,
|
RemoteOpKind::Delete,
|
||||||
Arc::clone(&self.metrics),
|
Arc::clone(&self.metrics),
|
||||||
)
|
)
|
||||||
@@ -1012,11 +1116,24 @@ impl RemoteTimelineClient {
|
|||||||
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
|
let mut upload_queue_guard = self.upload_queue.lock().unwrap();
|
||||||
let upload_queue = match upload_queue_guard.deref_mut() {
|
let upload_queue = match upload_queue_guard.deref_mut() {
|
||||||
UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
|
UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
|
||||||
UploadQueue::Stopped(_) => {
|
UploadQueue::Stopped(stopped) => {
|
||||||
|
// Special care is needed for deletions, if it was an earlier deletion (not scheduled from deletion)
|
||||||
|
// then stop() took care of it so we just return.
|
||||||
|
// For deletions that come from delete_all we still want to maintain metrics, launch following tasks, etc.
|
||||||
|
match &task.op {
|
||||||
|
UploadOp::Delete(delete) if delete.scheduled_from_timeline_delete => Some(&mut stopped.upload_queue_for_deletion),
|
||||||
|
_ => None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
UploadQueue::Initialized(qi) => { Some(qi) }
|
||||||
|
};
|
||||||
|
|
||||||
|
let upload_queue = match upload_queue {
|
||||||
|
Some(upload_queue) => upload_queue,
|
||||||
|
None => {
|
||||||
info!("another concurrent task already stopped the queue");
|
info!("another concurrent task already stopped the queue");
|
||||||
return;
|
return;
|
||||||
}, // nothing to do
|
}
|
||||||
UploadQueue::Initialized(qi) => { qi }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
upload_queue.inprogress_tasks.remove(&task.task_id);
|
upload_queue.inprogress_tasks.remove(&task.task_id);
|
||||||
@@ -1029,7 +1146,7 @@ impl RemoteTimelineClient {
|
|||||||
upload_queue.num_inprogress_metadata_uploads -= 1;
|
upload_queue.num_inprogress_metadata_uploads -= 1;
|
||||||
upload_queue.last_uploaded_consistent_lsn = lsn; // XXX monotonicity check?
|
upload_queue.last_uploaded_consistent_lsn = lsn; // XXX monotonicity check?
|
||||||
}
|
}
|
||||||
UploadOp::Delete(_, _) => {
|
UploadOp::Delete(_) => {
|
||||||
upload_queue.num_inprogress_deletions -= 1;
|
upload_queue.num_inprogress_deletions -= 1;
|
||||||
}
|
}
|
||||||
UploadOp::Barrier(_) => unreachable!(),
|
UploadOp::Barrier(_) => unreachable!(),
|
||||||
@@ -1063,8 +1180,8 @@ impl RemoteTimelineClient {
|
|||||||
reason: "metadata uploads are tiny",
|
reason: "metadata uploads are tiny",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
UploadOp::Delete(file_kind, _) => (
|
UploadOp::Delete(delete) => (
|
||||||
*file_kind,
|
delete.file_kind,
|
||||||
RemoteOpKind::Delete,
|
RemoteOpKind::Delete,
|
||||||
DontTrackSize {
|
DontTrackSize {
|
||||||
reason: "should we track deletes? positive or negative sign?",
|
reason: "should we track deletes? positive or negative sign?",
|
||||||
@@ -1111,32 +1228,36 @@ impl RemoteTimelineClient {
|
|||||||
info!("another concurrent task already shut down the queue");
|
info!("another concurrent task already shut down the queue");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
UploadQueue::Initialized(UploadQueueInitialized {
|
UploadQueue::Initialized(initialized) => {
|
||||||
latest_files,
|
|
||||||
latest_metadata,
|
|
||||||
last_uploaded_consistent_lsn,
|
|
||||||
..
|
|
||||||
}) => {
|
|
||||||
info!("shutting down upload queue");
|
info!("shutting down upload queue");
|
||||||
|
|
||||||
// Replace the queue with the Stopped state, taking ownership of the old
|
// Replace the queue with the Stopped state, taking ownership of the old
|
||||||
// Initialized queue. We will do some checks on it, and then drop it.
|
// Initialized queue. We will do some checks on it, and then drop it.
|
||||||
let qi = {
|
let qi = {
|
||||||
// take or clone what we need
|
// Here we preserve working version of the upload queue for possible use during deletions.
|
||||||
let latest_files = std::mem::take(latest_files);
|
// In-place replace of Initialized to Stopped can be done with the help of https://github.com/Sgeo/take_mut
|
||||||
let last_uploaded_consistent_lsn = *last_uploaded_consistent_lsn;
|
// but for this use case it doesnt really makes sense to bring unsafe code only for this usage point.
|
||||||
// this could be Copy
|
// Deletion is not really perf sensitive so there shouldnt be any problems with cloning a fraction of it.
|
||||||
let latest_metadata = latest_metadata.clone();
|
let upload_queue_for_deletion = UploadQueueInitialized {
|
||||||
|
task_counter: 0,
|
||||||
let stopped = UploadQueueStopped {
|
latest_files: initialized.latest_files.clone(),
|
||||||
latest_files,
|
latest_files_changes_since_metadata_upload_scheduled: 0,
|
||||||
last_uploaded_consistent_lsn,
|
latest_metadata: initialized.latest_metadata.clone(),
|
||||||
latest_metadata,
|
last_uploaded_consistent_lsn: initialized.last_uploaded_consistent_lsn,
|
||||||
deleted_at: SetDeletedFlagProgress::NotRunning,
|
num_inprogress_layer_uploads: 0,
|
||||||
|
num_inprogress_metadata_uploads: 0,
|
||||||
|
num_inprogress_deletions: 0,
|
||||||
|
inprogress_tasks: HashMap::default(),
|
||||||
|
queued_operations: VecDeque::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let upload_queue =
|
let upload_queue = std::mem::replace(
|
||||||
std::mem::replace(&mut *guard, UploadQueue::Stopped(stopped));
|
&mut *guard,
|
||||||
|
UploadQueue::Stopped(UploadQueueStopped {
|
||||||
|
upload_queue_for_deletion,
|
||||||
|
deleted_at: SetDeletedFlagProgress::NotRunning,
|
||||||
|
}),
|
||||||
|
);
|
||||||
if let UploadQueue::Initialized(qi) = upload_queue {
|
if let UploadQueue::Initialized(qi) = upload_queue {
|
||||||
qi
|
qi
|
||||||
} else {
|
} else {
|
||||||
@@ -1144,8 +1265,6 @@ impl RemoteTimelineClient {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(qi.latest_files.is_empty(), "do not use this anymore");
|
|
||||||
|
|
||||||
// consistency check
|
// consistency check
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
qi.num_inprogress_layer_uploads
|
qi.num_inprogress_layer_uploads
|
||||||
@@ -1243,7 +1362,7 @@ mod tests {
|
|||||||
struct TestSetup {
|
struct TestSetup {
|
||||||
runtime: &'static tokio::runtime::Runtime,
|
runtime: &'static tokio::runtime::Runtime,
|
||||||
entered_runtime: EnterGuard<'static>,
|
entered_runtime: EnterGuard<'static>,
|
||||||
harness: TenantHarness<'static>,
|
harness: TenantHarness,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<Tenant>,
|
||||||
tenant_ctx: RequestContext,
|
tenant_ctx: RequestContext,
|
||||||
remote_fs_dir: PathBuf,
|
remote_fs_dir: PathBuf,
|
||||||
@@ -1264,7 +1383,12 @@ mod tests {
|
|||||||
let harness = TenantHarness::create(test_name)?;
|
let harness = TenantHarness::create(test_name)?;
|
||||||
let (tenant, ctx) = runtime.block_on(harness.load());
|
let (tenant, ctx) = runtime.block_on(harness.load());
|
||||||
// create an empty timeline directory
|
// create an empty timeline directory
|
||||||
let _ = tenant.create_test_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
|
let _ = runtime.block_on(tenant.create_test_timeline(
|
||||||
|
TIMELINE_ID,
|
||||||
|
Lsn(8),
|
||||||
|
DEFAULT_PG_VERSION,
|
||||||
|
&ctx,
|
||||||
|
))?;
|
||||||
|
|
||||||
let remote_fs_dir = harness.conf.workdir.join("remote_fs");
|
let remote_fs_dir = harness.conf.workdir.join("remote_fs");
|
||||||
std::fs::create_dir_all(remote_fs_dir)?;
|
std::fs::create_dir_all(remote_fs_dir)?;
|
||||||
@@ -1408,7 +1532,7 @@ mod tests {
|
|||||||
// Download back the index.json, and check that the list of files is correct
|
// Download back the index.json, and check that the list of files is correct
|
||||||
let index_part = match runtime.block_on(client.download_index_file())? {
|
let index_part = match runtime.block_on(client.download_index_file())? {
|
||||||
MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
|
MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
|
||||||
MaybeDeletedIndexPart::Deleted => panic!("unexpectedly got deleted index part"),
|
MaybeDeletedIndexPart::Deleted(_) => panic!("unexpectedly got deleted index part"),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_file_list(
|
assert_file_list(
|
||||||
|
|||||||
@@ -7,9 +7,11 @@ use std::collections::{HashMap, HashSet};
|
|||||||
use chrono::NaiveDateTime;
|
use chrono::NaiveDateTime;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_with::{serde_as, DisplayFromStr};
|
use serde_with::{serde_as, DisplayFromStr};
|
||||||
|
use utils::bin_ser::SerializeError;
|
||||||
|
|
||||||
use crate::tenant::metadata::TimelineMetadata;
|
use crate::tenant::metadata::TimelineMetadata;
|
||||||
use crate::tenant::storage_layer::LayerFileName;
|
use crate::tenant::storage_layer::LayerFileName;
|
||||||
|
use crate::tenant::upload_queue::UploadQueueInitialized;
|
||||||
|
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
@@ -115,6 +117,21 @@ impl IndexPart {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TryFrom<&UploadQueueInitialized> for IndexPart {
|
||||||
|
type Error = SerializeError;
|
||||||
|
|
||||||
|
fn try_from(upload_queue: &UploadQueueInitialized) -> Result<Self, Self::Error> {
|
||||||
|
let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
|
||||||
|
let metadata_bytes = upload_queue.latest_metadata.to_bytes()?;
|
||||||
|
|
||||||
|
Ok(Self::new(
|
||||||
|
upload_queue.latest_files.clone(),
|
||||||
|
disk_consistent_lsn,
|
||||||
|
metadata_bytes,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Serialized form of [`LayerFileMetadata`].
|
/// Serialized form of [`LayerFileMetadata`].
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
|
||||||
pub struct IndexLayerMetadata {
|
pub struct IndexLayerMetadata {
|
||||||
|
|||||||
@@ -176,13 +176,10 @@ impl LayerAccessStats {
|
|||||||
/// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
|
/// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
|
||||||
///
|
///
|
||||||
/// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
|
/// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
|
||||||
pub(crate) fn for_loading_layer<L>(
|
pub(crate) fn for_loading_layer(
|
||||||
layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
|
layer_map_lock_held_witness: &BatchedUpdates<'_>,
|
||||||
status: LayerResidenceStatus,
|
status: LayerResidenceStatus,
|
||||||
) -> Self
|
) -> Self {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
|
let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
|
||||||
new.record_residence_event(
|
new.record_residence_event(
|
||||||
layer_map_lock_held_witness,
|
layer_map_lock_held_witness,
|
||||||
@@ -197,14 +194,11 @@ impl LayerAccessStats {
|
|||||||
/// The `new_status` is not recorded in `self`.
|
/// The `new_status` is not recorded in `self`.
|
||||||
///
|
///
|
||||||
/// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
|
/// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
|
||||||
pub(crate) fn clone_for_residence_change<L>(
|
pub(crate) fn clone_for_residence_change(
|
||||||
&self,
|
&self,
|
||||||
layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
|
layer_map_lock_held_witness: &BatchedUpdates<'_>,
|
||||||
new_status: LayerResidenceStatus,
|
new_status: LayerResidenceStatus,
|
||||||
) -> LayerAccessStats
|
) -> LayerAccessStats {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
let clone = {
|
let clone = {
|
||||||
let inner = self.0.lock().unwrap();
|
let inner = self.0.lock().unwrap();
|
||||||
inner.clone()
|
inner.clone()
|
||||||
@@ -232,14 +226,12 @@ impl LayerAccessStats {
|
|||||||
/// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
|
/// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
|
||||||
/// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
|
/// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
|
||||||
///
|
///
|
||||||
pub(crate) fn record_residence_event<L>(
|
pub(crate) fn record_residence_event(
|
||||||
&self,
|
&self,
|
||||||
_layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
|
_layer_map_lock_held_witness: &BatchedUpdates<'_>,
|
||||||
status: LayerResidenceStatus,
|
status: LayerResidenceStatus,
|
||||||
reason: LayerResidenceEventReason,
|
reason: LayerResidenceEventReason,
|
||||||
) where
|
) {
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
let mut locked = self.0.lock().unwrap();
|
let mut locked = self.0.lock().unwrap();
|
||||||
locked.iter_mut().for_each(|inner| {
|
locked.iter_mut().for_each(|inner| {
|
||||||
inner
|
inner
|
||||||
@@ -343,7 +335,7 @@ impl LayerAccessStats {
|
|||||||
/// All layers should implement a minimal `std::fmt::Debug` without tenant or
|
/// All layers should implement a minimal `std::fmt::Debug` without tenant or
|
||||||
/// timeline names, because those are known in the context of which the layers
|
/// timeline names, because those are known in the context of which the layers
|
||||||
/// are used in (timeline).
|
/// are used in (timeline).
|
||||||
pub trait Layer: std::fmt::Debug + Send + Sync {
|
pub trait Layer: std::fmt::Debug + std::fmt::Display + Send + Sync {
|
||||||
/// Range of keys that this layer covers
|
/// Range of keys that this layer covers
|
||||||
fn get_key_range(&self) -> Range<Key>;
|
fn get_key_range(&self) -> Range<Key>;
|
||||||
|
|
||||||
@@ -381,18 +373,15 @@ pub trait Layer: std::fmt::Debug + Send + Sync {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<ValueReconstructResult>;
|
) -> Result<ValueReconstructResult>;
|
||||||
|
|
||||||
/// A short ID string that uniquely identifies the given layer within a [`LayerMap`].
|
|
||||||
fn short_id(&self) -> String;
|
|
||||||
|
|
||||||
/// Dump summary of the contents of the layer to stdout
|
/// Dump summary of the contents of the layer to stdout
|
||||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()>;
|
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returned by [`Layer::iter`]
|
/// Returned by [`Layer::iter`]
|
||||||
pub type LayerIter<'i> = Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + 'i>;
|
pub type LayerIter<'i> = Box<dyn Iterator<Item = Result<(Key, Lsn, Value)>> + 'i + Send>;
|
||||||
|
|
||||||
/// Returned by [`Layer::key_iter`]
|
/// Returned by [`Layer::key_iter`]
|
||||||
pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i>;
|
pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i + Send>;
|
||||||
|
|
||||||
/// A Layer contains all data in a "rectangle" consisting of a range of keys and
|
/// A Layer contains all data in a "rectangle" consisting of a range of keys and
|
||||||
/// range of LSNs.
|
/// range of LSNs.
|
||||||
@@ -473,94 +462,127 @@ pub fn downcast_remote_layer(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Holds metadata about a layer without any content. Used mostly for testing.
|
pub mod tests {
|
||||||
///
|
use super::*;
|
||||||
/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
|
|
||||||
/// LayerDescriptor.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct LayerDescriptor {
|
|
||||||
pub key: Range<Key>,
|
|
||||||
pub lsn: Range<Lsn>,
|
|
||||||
pub is_incremental: bool,
|
|
||||||
pub short_id: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LayerDescriptor {
|
/// Holds metadata about a layer without any content. Used mostly for testing.
|
||||||
/// `LayerDescriptor` is only used for testing purpose so it does not matter whether it is image / delta,
|
///
|
||||||
/// and the tenant / timeline id does not matter.
|
/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
|
||||||
pub fn get_persistent_layer_desc(&self) -> PersistentLayerDesc {
|
/// LayerDescriptor.
|
||||||
PersistentLayerDesc::new_delta(
|
#[derive(Clone, Debug)]
|
||||||
TenantId::from_array([0; 16]),
|
pub struct LayerDescriptor {
|
||||||
TimelineId::from_array([0; 16]),
|
base: PersistentLayerDesc,
|
||||||
self.key.clone(),
|
|
||||||
self.lsn.clone(),
|
|
||||||
233,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Layer for LayerDescriptor {
|
|
||||||
fn get_key_range(&self) -> Range<Key> {
|
|
||||||
self.key.clone()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_lsn_range(&self) -> Range<Lsn> {
|
impl From<PersistentLayerDesc> for LayerDescriptor {
|
||||||
self.lsn.clone()
|
fn from(base: PersistentLayerDesc) -> Self {
|
||||||
}
|
Self { base }
|
||||||
|
|
||||||
fn is_incremental(&self) -> bool {
|
|
||||||
self.is_incremental
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_value_reconstruct_data(
|
|
||||||
&self,
|
|
||||||
_key: Key,
|
|
||||||
_lsn_range: Range<Lsn>,
|
|
||||||
_reconstruct_data: &mut ValueReconstructState,
|
|
||||||
_ctx: &RequestContext,
|
|
||||||
) -> Result<ValueReconstructResult> {
|
|
||||||
todo!("This method shouldn't be part of the Layer trait")
|
|
||||||
}
|
|
||||||
|
|
||||||
fn short_id(&self) -> String {
|
|
||||||
self.short_id.clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<DeltaFileName> for LayerDescriptor {
|
|
||||||
fn from(value: DeltaFileName) -> Self {
|
|
||||||
let short_id = value.to_string();
|
|
||||||
LayerDescriptor {
|
|
||||||
key: value.key_range,
|
|
||||||
lsn: value.lsn_range,
|
|
||||||
is_incremental: true,
|
|
||||||
short_id,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl From<ImageFileName> for LayerDescriptor {
|
impl Layer for LayerDescriptor {
|
||||||
fn from(value: ImageFileName) -> Self {
|
fn get_value_reconstruct_data(
|
||||||
let short_id = value.to_string();
|
&self,
|
||||||
let lsn = value.lsn_as_range();
|
_key: Key,
|
||||||
LayerDescriptor {
|
_lsn_range: Range<Lsn>,
|
||||||
key: value.key_range,
|
_reconstruct_data: &mut ValueReconstructState,
|
||||||
lsn,
|
_ctx: &RequestContext,
|
||||||
is_incremental: false,
|
) -> Result<ValueReconstructResult> {
|
||||||
short_id,
|
todo!("This method shouldn't be part of the Layer trait")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
|
fn get_key_range(&self) -> Range<Key> {
|
||||||
|
self.layer_desc().key_range.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
|
fn get_lsn_range(&self) -> Range<Lsn> {
|
||||||
|
self.layer_desc().lsn_range.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
|
fn is_incremental(&self) -> bool {
|
||||||
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl From<LayerFileName> for LayerDescriptor {
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
fn from(value: LayerFileName) -> Self {
|
impl std::fmt::Display for LayerDescriptor {
|
||||||
match value {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
LayerFileName::Delta(d) => Self::from(d),
|
write!(f, "{}", self.layer_desc().short_id())
|
||||||
LayerFileName::Image(i) => Self::from(i),
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PersistentLayer for LayerDescriptor {
|
||||||
|
fn layer_desc(&self) -> &PersistentLayerDesc {
|
||||||
|
&self.base
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_path(&self) -> Option<PathBuf> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter(&self, _: &RequestContext) -> Result<LayerIter<'_>> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn key_iter(&self, _: &RequestContext) -> Result<LayerKeyIter<'_>> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_resident_layer_file(&self) -> Result<()> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn info(&self, _: LayerAccessStatsReset) -> HistoricLayerInfo {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn access_stats(&self) -> &LayerAccessStats {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<DeltaFileName> for LayerDescriptor {
|
||||||
|
fn from(value: DeltaFileName) -> Self {
|
||||||
|
LayerDescriptor {
|
||||||
|
base: PersistentLayerDesc::new_delta(
|
||||||
|
TenantId::from_array([0; 16]),
|
||||||
|
TimelineId::from_array([0; 16]),
|
||||||
|
value.key_range,
|
||||||
|
value.lsn_range,
|
||||||
|
233,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ImageFileName> for LayerDescriptor {
|
||||||
|
fn from(value: ImageFileName) -> Self {
|
||||||
|
LayerDescriptor {
|
||||||
|
base: PersistentLayerDesc::new_img(
|
||||||
|
TenantId::from_array([0; 16]),
|
||||||
|
TimelineId::from_array([0; 16]),
|
||||||
|
value.key_range,
|
||||||
|
value.lsn,
|
||||||
|
false,
|
||||||
|
233,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<LayerFileName> for LayerDescriptor {
|
||||||
|
fn from(value: LayerFileName) -> Self {
|
||||||
|
match value {
|
||||||
|
LayerFileName::Delta(d) => Self::from(d),
|
||||||
|
LayerFileName::Image(i) => Self::from(i),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ use crate::virtual_file::VirtualFile;
|
|||||||
use crate::{walrecord, TEMP_FILE_SUFFIX};
|
use crate::{walrecord, TEMP_FILE_SUFFIX};
|
||||||
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
|
||||||
use anyhow::{bail, ensure, Context, Result};
|
use anyhow::{bail, ensure, Context, Result};
|
||||||
|
use once_cell::sync::OnceCell;
|
||||||
use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
|
use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
|
||||||
use rand::{distributions::Alphanumeric, Rng};
|
use rand::{distributions::Alphanumeric, Rng};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -46,7 +47,6 @@ use std::io::{Seek, SeekFrom};
|
|||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::os::unix::fs::FileExt;
|
use std::os::unix::fs::FileExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
|
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
use utils::{
|
use utils::{
|
||||||
@@ -184,7 +184,7 @@ pub struct DeltaLayer {
|
|||||||
|
|
||||||
access_stats: LayerAccessStats,
|
access_stats: LayerAccessStats,
|
||||||
|
|
||||||
inner: RwLock<DeltaLayerInner>,
|
inner: OnceCell<DeltaLayerInner>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for DeltaLayer {
|
impl std::fmt::Debug for DeltaLayer {
|
||||||
@@ -201,21 +201,17 @@ impl std::fmt::Debug for DeltaLayer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DeltaLayerInner {
|
pub struct DeltaLayerInner {
|
||||||
/// If false, the fields below have not been loaded into memory yet.
|
|
||||||
loaded: bool,
|
|
||||||
|
|
||||||
// values copied from summary
|
// values copied from summary
|
||||||
index_start_blk: u32,
|
index_start_blk: u32,
|
||||||
index_root_blk: u32,
|
index_root_blk: u32,
|
||||||
|
|
||||||
/// Reader object for reading blocks from the file. (None if not loaded yet)
|
/// Reader object for reading blocks from the file.
|
||||||
file: Option<FileBlockReader<VirtualFile>>,
|
file: FileBlockReader<VirtualFile>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for DeltaLayerInner {
|
impl std::fmt::Debug for DeltaLayerInner {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
f.debug_struct("DeltaLayerInner")
|
f.debug_struct("DeltaLayerInner")
|
||||||
.field("loaded", &self.loaded)
|
|
||||||
.field("index_start_blk", &self.index_start_blk)
|
.field("index_start_blk", &self.index_start_blk)
|
||||||
.field("index_root_blk", &self.index_root_blk)
|
.field("index_root_blk", &self.index_root_blk)
|
||||||
.finish()
|
.finish()
|
||||||
@@ -246,7 +242,7 @@ impl Layer for DeltaLayer {
|
|||||||
inner.index_start_blk, inner.index_root_blk
|
inner.index_start_blk, inner.index_root_blk
|
||||||
);
|
);
|
||||||
|
|
||||||
let file = inner.file.as_ref().unwrap();
|
let file = &inner.file;
|
||||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||||
inner.index_start_blk,
|
inner.index_start_blk,
|
||||||
inner.index_root_blk,
|
inner.index_root_blk,
|
||||||
@@ -315,7 +311,7 @@ impl Layer for DeltaLayer {
|
|||||||
let inner = self.load(LayerAccessKind::GetValueReconstructData, ctx)?;
|
let inner = self.load(LayerAccessKind::GetValueReconstructData, ctx)?;
|
||||||
|
|
||||||
// Scan the page versions backwards, starting from `lsn`.
|
// Scan the page versions backwards, starting from `lsn`.
|
||||||
let file = inner.file.as_ref().unwrap();
|
let file = &inner.file;
|
||||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||||
inner.index_start_blk,
|
inner.index_start_blk,
|
||||||
inner.index_root_blk,
|
inner.index_root_blk,
|
||||||
@@ -398,10 +394,11 @@ impl Layer for DeltaLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
fn short_id(&self) -> String {
|
impl std::fmt::Display for DeltaLayer {
|
||||||
self.layer_desc().short_id()
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.layer_desc().short_id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -500,51 +497,22 @@ impl DeltaLayer {
|
|||||||
/// Open the underlying file and read the metadata into memory, if it's
|
/// Open the underlying file and read the metadata into memory, if it's
|
||||||
/// not loaded already.
|
/// not loaded already.
|
||||||
///
|
///
|
||||||
fn load(
|
fn load(&self, access_kind: LayerAccessKind, ctx: &RequestContext) -> Result<&DeltaLayerInner> {
|
||||||
&self,
|
|
||||||
access_kind: LayerAccessKind,
|
|
||||||
ctx: &RequestContext,
|
|
||||||
) -> Result<RwLockReadGuard<DeltaLayerInner>> {
|
|
||||||
self.access_stats
|
self.access_stats
|
||||||
.record_access(access_kind, ctx.task_kind());
|
.record_access(access_kind, ctx.task_kind());
|
||||||
loop {
|
// Quick exit if already loaded
|
||||||
// Quick exit if already loaded
|
self.inner
|
||||||
let inner = self.inner.read().unwrap();
|
.get_or_try_init(|| self.load_inner())
|
||||||
if inner.loaded {
|
.with_context(|| format!("Failed to load delta layer {}", self.path().display()))
|
||||||
return Ok(inner);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Need to open the file and load the metadata. Upgrade our lock to
|
|
||||||
// a write lock. (Or rather, release and re-lock in write mode.)
|
|
||||||
drop(inner);
|
|
||||||
let inner = self.inner.write().unwrap();
|
|
||||||
if !inner.loaded {
|
|
||||||
self.load_inner(inner).with_context(|| {
|
|
||||||
format!("Failed to load delta layer {}", self.path().display())
|
|
||||||
})?;
|
|
||||||
} else {
|
|
||||||
// Another thread loaded it while we were not holding the lock.
|
|
||||||
}
|
|
||||||
|
|
||||||
// We now have the file open and loaded. There's no function to do
|
|
||||||
// that in the std library RwLock, so we have to release and re-lock
|
|
||||||
// in read mode. (To be precise, the lock guard was moved in the
|
|
||||||
// above call to `load_inner`, so it's already been released). And
|
|
||||||
// while we do that, another thread could unload again, so we have
|
|
||||||
// to re-check and retry if that happens.
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_inner(&self, mut inner: RwLockWriteGuard<DeltaLayerInner>) -> Result<()> {
|
fn load_inner(&self) -> Result<DeltaLayerInner> {
|
||||||
let path = self.path();
|
let path = self.path();
|
||||||
|
|
||||||
// Open the file if it's not open already.
|
let file = VirtualFile::open(&path)
|
||||||
if inner.file.is_none() {
|
.with_context(|| format!("Failed to open file '{}'", path.display()))?;
|
||||||
let file = VirtualFile::open(&path)
|
let file = FileBlockReader::new(file);
|
||||||
.with_context(|| format!("Failed to open file '{}'", path.display()))?;
|
|
||||||
inner.file = Some(FileBlockReader::new(file));
|
|
||||||
}
|
|
||||||
let file = inner.file.as_mut().unwrap();
|
|
||||||
let summary_blk = file.read_blk(0)?;
|
let summary_blk = file.read_blk(0)?;
|
||||||
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
|
||||||
|
|
||||||
@@ -571,13 +539,13 @@ impl DeltaLayer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inner.index_start_blk = actual_summary.index_start_blk;
|
|
||||||
inner.index_root_blk = actual_summary.index_root_blk;
|
|
||||||
|
|
||||||
debug!("loaded from {}", &path.display());
|
debug!("loaded from {}", &path.display());
|
||||||
|
|
||||||
inner.loaded = true;
|
Ok(DeltaLayerInner {
|
||||||
Ok(())
|
file,
|
||||||
|
index_start_blk: actual_summary.index_start_blk,
|
||||||
|
index_root_blk: actual_summary.index_root_blk,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a DeltaLayer struct representing an existing file on disk.
|
/// Create a DeltaLayer struct representing an existing file on disk.
|
||||||
@@ -599,12 +567,7 @@ impl DeltaLayer {
|
|||||||
file_size,
|
file_size,
|
||||||
),
|
),
|
||||||
access_stats,
|
access_stats,
|
||||||
inner: RwLock::new(DeltaLayerInner {
|
inner: once_cell::sync::OnceCell::new(),
|
||||||
loaded: false,
|
|
||||||
file: None,
|
|
||||||
index_start_blk: 0,
|
|
||||||
index_root_blk: 0,
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -631,12 +594,7 @@ impl DeltaLayer {
|
|||||||
metadata.len(),
|
metadata.len(),
|
||||||
),
|
),
|
||||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||||
inner: RwLock::new(DeltaLayerInner {
|
inner: once_cell::sync::OnceCell::new(),
|
||||||
loaded: false,
|
|
||||||
file: None,
|
|
||||||
index_start_blk: 0,
|
|
||||||
index_root_blk: 0,
|
|
||||||
}),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -800,12 +758,7 @@ impl DeltaLayerWriterInner {
|
|||||||
metadata.len(),
|
metadata.len(),
|
||||||
),
|
),
|
||||||
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
|
||||||
inner: RwLock::new(DeltaLayerInner {
|
inner: once_cell::sync::OnceCell::new(),
|
||||||
loaded: false,
|
|
||||||
file: None,
|
|
||||||
index_start_blk,
|
|
||||||
index_root_blk,
|
|
||||||
}),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// fsync the file
|
// fsync the file
|
||||||
@@ -940,13 +893,13 @@ struct DeltaValueIter<'a> {
|
|||||||
reader: BlockCursor<Adapter<'a>>,
|
reader: BlockCursor<Adapter<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Adapter<'a>(RwLockReadGuard<'a, DeltaLayerInner>);
|
struct Adapter<'a>(&'a DeltaLayerInner);
|
||||||
|
|
||||||
impl<'a> BlockReader for Adapter<'a> {
|
impl<'a> BlockReader for Adapter<'a> {
|
||||||
type BlockLease = PageReadGuard<'static>;
|
type BlockLease = PageReadGuard<'static>;
|
||||||
|
|
||||||
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, std::io::Error> {
|
fn read_blk(&self, blknum: u32) -> Result<Self::BlockLease, std::io::Error> {
|
||||||
self.0.file.as_ref().unwrap().read_blk(blknum)
|
self.0.file.read_blk(blknum)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -959,8 +912,8 @@ impl<'a> Iterator for DeltaValueIter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DeltaValueIter<'a> {
|
impl<'a> DeltaValueIter<'a> {
|
||||||
fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>) -> Result<Self> {
|
fn new(inner: &'a DeltaLayerInner) -> Result<Self> {
|
||||||
let file = inner.file.as_ref().unwrap();
|
let file = &inner.file;
|
||||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||||
inner.index_start_blk,
|
inner.index_start_blk,
|
||||||
inner.index_root_blk,
|
inner.index_root_blk,
|
||||||
@@ -1033,8 +986,8 @@ impl Iterator for DeltaKeyIter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> DeltaKeyIter {
|
impl<'a> DeltaKeyIter {
|
||||||
fn new(inner: RwLockReadGuard<'a, DeltaLayerInner>) -> Result<Self> {
|
fn new(inner: &'a DeltaLayerInner) -> Result<Self> {
|
||||||
let file = inner.file.as_ref().unwrap();
|
let file = &inner.file;
|
||||||
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||||
inner.index_start_blk,
|
inner.index_start_blk,
|
||||||
inner.index_root_blk,
|
inner.index_root_blk,
|
||||||
@@ -1074,3 +1027,21 @@ impl<'a> DeltaKeyIter {
|
|||||||
Ok(iter)
|
Ok(iter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::DeltaKeyIter;
|
||||||
|
use super::DeltaLayer;
|
||||||
|
use super::DeltaValueIter;
|
||||||
|
|
||||||
|
// We will soon need the iters to be send in the compaction code.
|
||||||
|
// Cf https://github.com/neondatabase/neon/pull/4462#issuecomment-1587398883
|
||||||
|
// Cf https://github.com/neondatabase/neon/issues/4471
|
||||||
|
#[test]
|
||||||
|
fn is_send() {
|
||||||
|
fn assert_send<T: Send>() {}
|
||||||
|
assert_send::<DeltaLayer>();
|
||||||
|
assert_send::<DeltaValueIter>();
|
||||||
|
assert_send::<DeltaKeyIter>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -210,9 +210,15 @@ pub enum LayerFileName {
|
|||||||
|
|
||||||
impl LayerFileName {
|
impl LayerFileName {
|
||||||
pub fn file_name(&self) -> String {
|
pub fn file_name(&self) -> String {
|
||||||
|
self.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for LayerFileName {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Image(fname) => fname.to_string(),
|
Self::Image(fname) => write!(f, "{fname}"),
|
||||||
Self::Delta(fname) => fname.to_string(),
|
Self::Delta(fname) => write!(f, "{fname}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -230,10 +230,12 @@ impl Layer for ImageLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
fn short_id(&self) -> String {
|
impl std::fmt::Display for ImageLayer {
|
||||||
self.layer_desc().short_id()
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.layer_desc().short_id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -131,13 +131,6 @@ impl Layer for InMemoryLayer {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn short_id(&self) -> String {
|
|
||||||
let inner = self.inner.read().unwrap();
|
|
||||||
|
|
||||||
let end_lsn = inner.end_lsn.unwrap_or(Lsn(u64::MAX));
|
|
||||||
format!("inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
fn dump(&self, verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
fn dump(&self, verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||||
let inner = self.inner.read().unwrap();
|
let inner = self.inner.read().unwrap();
|
||||||
@@ -240,6 +233,15 @@ impl Layer for InMemoryLayer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for InMemoryLayer {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let inner = self.inner.read().unwrap();
|
||||||
|
|
||||||
|
let end_lsn = inner.end_lsn.unwrap_or(Lsn(u64::MAX));
|
||||||
|
write!(f, "inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl InMemoryLayer {
|
impl InMemoryLayer {
|
||||||
///
|
///
|
||||||
/// Get layer size on the disk
|
/// Get layer size on the disk
|
||||||
@@ -304,7 +306,7 @@ impl InMemoryLayer {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn put_tombstone(&self, _key_range: Range<Key>, _lsn: Lsn) -> Result<()> {
|
pub async fn put_tombstone(&self, _key_range: Range<Key>, _lsn: Lsn) -> Result<()> {
|
||||||
// TODO: Currently, we just leak the storage for any deleted keys
|
// TODO: Currently, we just leak the storage for any deleted keys
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use core::fmt::Display;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use utils::{
|
use utils::{
|
||||||
id::{TenantId, TimelineId},
|
id::{TenantId, TimelineId},
|
||||||
@@ -9,10 +10,12 @@ use crate::{context::RequestContext, repository::Key};
|
|||||||
|
|
||||||
use super::{DeltaFileName, ImageFileName, LayerFileName};
|
use super::{DeltaFileName, ImageFileName, LayerFileName};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
|
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
|
||||||
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
|
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
|
||||||
/// a unified way to generate layer information like file name.
|
/// a unified way to generate layer information like file name.
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
|
||||||
pub struct PersistentLayerDesc {
|
pub struct PersistentLayerDesc {
|
||||||
pub tenant_id: TenantId,
|
pub tenant_id: TenantId,
|
||||||
pub timeline_id: TimelineId,
|
pub timeline_id: TimelineId,
|
||||||
@@ -46,8 +49,21 @@ impl PersistentLayerDesc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn short_id(&self) -> String {
|
pub fn short_id(&self) -> impl Display {
|
||||||
self.filename().file_name()
|
self.filename()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn new_test(key_range: Range<Key>) -> Self {
|
||||||
|
Self {
|
||||||
|
tenant_id: TenantId::generate(),
|
||||||
|
timeline_id: TimelineId::generate(),
|
||||||
|
key_range,
|
||||||
|
lsn_range: Lsn(0)..Lsn(1),
|
||||||
|
is_delta: false,
|
||||||
|
is_incremental: false,
|
||||||
|
file_size: 0,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_img(
|
pub fn new_img(
|
||||||
|
|||||||
@@ -71,10 +71,7 @@ impl Layer for RemoteLayer {
|
|||||||
_reconstruct_state: &mut ValueReconstructState,
|
_reconstruct_state: &mut ValueReconstructState,
|
||||||
_ctx: &RequestContext,
|
_ctx: &RequestContext,
|
||||||
) -> Result<ValueReconstructResult> {
|
) -> Result<ValueReconstructResult> {
|
||||||
bail!(
|
bail!("layer {self} needs to be downloaded");
|
||||||
"layer {} needs to be downloaded",
|
|
||||||
self.filename().file_name()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
@@ -106,10 +103,12 @@ impl Layer for RemoteLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
fn short_id(&self) -> String {
|
impl std::fmt::Display for RemoteLayer {
|
||||||
self.layer_desc().short_id()
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.layer_desc().short_id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,15 +217,12 @@ impl RemoteLayer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create a Layer struct representing this layer, after it has been downloaded.
|
/// Create a Layer struct representing this layer, after it has been downloaded.
|
||||||
pub fn create_downloaded_layer<L>(
|
pub fn create_downloaded_layer(
|
||||||
&self,
|
&self,
|
||||||
layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
|
layer_map_lock_held_witness: &BatchedUpdates<'_>,
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
file_size: u64,
|
file_size: u64,
|
||||||
) -> Arc<dyn PersistentLayer>
|
) -> Arc<dyn PersistentLayer> {
|
||||||
where
|
|
||||||
L: ?Sized + Layer,
|
|
||||||
{
|
|
||||||
if self.desc.is_delta {
|
if self.desc.is_delta {
|
||||||
let fname = self.desc.delta_file_name();
|
let fname = self.desc.delta_file_name();
|
||||||
Arc::new(DeltaLayer::new(
|
Arc::new(DeltaLayer::new(
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -70,7 +70,6 @@ impl Timeline {
|
|||||||
};
|
};
|
||||||
|
|
||||||
self_clone.eviction_task(cancel).await;
|
self_clone.eviction_task(cancel).await;
|
||||||
info!("eviction task finishing");
|
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -78,6 +77,9 @@ impl Timeline {
|
|||||||
|
|
||||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
||||||
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
|
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
|
||||||
|
scopeguard::defer! {
|
||||||
|
info!("eviction task finishing");
|
||||||
|
}
|
||||||
use crate::tenant::tasks::random_init_delay;
|
use crate::tenant::tasks::random_init_delay;
|
||||||
{
|
{
|
||||||
let policy = self.get_eviction_policy();
|
let policy = self.get_eviction_policy();
|
||||||
@@ -86,7 +88,6 @@ impl Timeline {
|
|||||||
EvictionPolicy::NoEviction => Duration::from_secs(10),
|
EvictionPolicy::NoEviction => Duration::from_secs(10),
|
||||||
};
|
};
|
||||||
if random_init_delay(period, &cancel).await.is_err() {
|
if random_init_delay(period, &cancel).await.is_err() {
|
||||||
info!("shutting down");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -101,7 +102,6 @@ impl Timeline {
|
|||||||
ControlFlow::Continue(sleep_until) => {
|
ControlFlow::Continue(sleep_until) => {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = cancel.cancelled() => {
|
_ = cancel.cancelled() => {
|
||||||
info!("shutting down");
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ = tokio::time::sleep_until(sleep_until) => { }
|
_ = tokio::time::sleep_until(sleep_until) => { }
|
||||||
@@ -197,9 +197,11 @@ impl Timeline {
|
|||||||
// We don't want to hold the layer map lock during eviction.
|
// We don't want to hold the layer map lock during eviction.
|
||||||
// So, we just need to deal with this.
|
// So, we just need to deal with this.
|
||||||
let candidates: Vec<Arc<dyn PersistentLayer>> = {
|
let candidates: Vec<Arc<dyn PersistentLayer>> = {
|
||||||
let layers = self.layers.read().unwrap();
|
let guard = self.layers.read().await;
|
||||||
|
let (layers, mapping) = &*guard;
|
||||||
let mut candidates = Vec::new();
|
let mut candidates = Vec::new();
|
||||||
for hist_layer in layers.iter_historic_layers() {
|
for hist_layer in layers.iter_historic_layers() {
|
||||||
|
let hist_layer = mapping.get_from_desc(&hist_layer);
|
||||||
if hist_layer.is_remote_layer() {
|
if hist_layer.is_remote_layer() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -207,7 +209,7 @@ impl Timeline {
|
|||||||
let last_activity_ts = hist_layer.access_stats().latest_activity().unwrap_or_else(|| {
|
let last_activity_ts = hist_layer.access_stats().latest_activity().unwrap_or_else(|| {
|
||||||
// We only use this fallback if there's an implementation error.
|
// We only use this fallback if there's an implementation error.
|
||||||
// `latest_activity` already does rate-limited warn!() log.
|
// `latest_activity` already does rate-limited warn!() log.
|
||||||
debug!(layer=%hist_layer.filename().file_name(), "last_activity returns None, using SystemTime::now");
|
debug!(layer=%hist_layer, "last_activity returns None, using SystemTime::now");
|
||||||
SystemTime::now()
|
SystemTime::now()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ pub(super) async fn connection_manager_loop_step(
|
|||||||
match new_state {
|
match new_state {
|
||||||
// we're already active as walreceiver, no need to reactivate
|
// we're already active as walreceiver, no need to reactivate
|
||||||
TimelineState::Active => continue,
|
TimelineState::Active => continue,
|
||||||
TimelineState::Broken | TimelineState::Stopping => {
|
TimelineState::Broken { .. } | TimelineState::Stopping => {
|
||||||
debug!("timeline entered terminal state {new_state:?}, stopping wal connection manager loop");
|
debug!("timeline entered terminal state {new_state:?}, stopping wal connection manager loop");
|
||||||
return ControlFlow::Break(());
|
return ControlFlow::Break(());
|
||||||
}
|
}
|
||||||
@@ -1321,10 +1321,11 @@ mod tests {
|
|||||||
|
|
||||||
const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";
|
const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";
|
||||||
|
|
||||||
async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState {
|
async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState {
|
||||||
let (tenant, ctx) = harness.load().await;
|
let (tenant, ctx) = harness.load().await;
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.create_test_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx)
|
.create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await
|
||||||
.expect("Failed to create an empty timeline for dummy wal connection manager");
|
.expect("Failed to create an empty timeline for dummy wal connection manager");
|
||||||
|
|
||||||
ConnectionManagerState {
|
ConnectionManagerState {
|
||||||
|
|||||||
@@ -71,6 +71,8 @@ pub(super) async fn handle_walreceiver_connection(
|
|||||||
ctx: RequestContext,
|
ctx: RequestContext,
|
||||||
node: NodeId,
|
node: NodeId,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
|
|
||||||
WALRECEIVER_STARTED_CONNECTIONS.inc();
|
WALRECEIVER_STARTED_CONNECTIONS.inc();
|
||||||
|
|
||||||
// Connect to the database in replication mode.
|
// Connect to the database in replication mode.
|
||||||
@@ -140,6 +142,9 @@ pub(super) async fn handle_walreceiver_connection(
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
// Enrich the log lines emitted by this closure with meaningful context.
|
||||||
|
// TODO: technically, this task outlives the surrounding function, so, the
|
||||||
|
// spans won't be properly nested.
|
||||||
.instrument(tracing::info_span!("poller")),
|
.instrument(tracing::info_span!("poller")),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -304,12 +309,15 @@ pub(super) async fn handle_walreceiver_connection(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
timeline.check_checkpoint_distance().with_context(|| {
|
timeline
|
||||||
format!(
|
.check_checkpoint_distance()
|
||||||
"Failed to check checkpoint distance for timeline {}",
|
.await
|
||||||
timeline.timeline_id
|
.with_context(|| {
|
||||||
)
|
format!(
|
||||||
})?;
|
"Failed to check checkpoint distance for timeline {}",
|
||||||
|
timeline.timeline_id
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
if let Some(last_lsn) = status_update {
|
if let Some(last_lsn) = status_update {
|
||||||
let timeline_remote_consistent_lsn =
|
let timeline_remote_consistent_lsn =
|
||||||
|
|||||||
@@ -76,6 +76,12 @@ pub(crate) struct UploadQueueInitialized {
|
|||||||
pub(crate) queued_operations: VecDeque<UploadOp>,
|
pub(crate) queued_operations: VecDeque<UploadOp>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl UploadQueueInitialized {
|
||||||
|
pub(super) fn no_pending_work(&self) -> bool {
|
||||||
|
self.inprogress_tasks.is_empty() && self.queued_operations.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub(super) enum SetDeletedFlagProgress {
|
pub(super) enum SetDeletedFlagProgress {
|
||||||
NotRunning,
|
NotRunning,
|
||||||
@@ -84,9 +90,7 @@ pub(super) enum SetDeletedFlagProgress {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(super) struct UploadQueueStopped {
|
pub(super) struct UploadQueueStopped {
|
||||||
pub(super) latest_files: HashMap<LayerFileName, LayerFileMetadata>,
|
pub(super) upload_queue_for_deletion: UploadQueueInitialized,
|
||||||
pub(super) last_uploaded_consistent_lsn: Lsn,
|
|
||||||
pub(super) latest_metadata: TimelineMetadata,
|
|
||||||
pub(super) deleted_at: SetDeletedFlagProgress,
|
pub(super) deleted_at: SetDeletedFlagProgress,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,6 +191,15 @@ impl UploadQueue {
|
|||||||
UploadQueue::Initialized(x) => Ok(x),
|
UploadQueue::Initialized(x) => Ok(x),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn stopped_mut(&mut self) -> anyhow::Result<&mut UploadQueueStopped> {
|
||||||
|
match self {
|
||||||
|
UploadQueue::Initialized(_) | UploadQueue::Uninitialized => {
|
||||||
|
anyhow::bail!("queue is in state {}", self.as_str())
|
||||||
|
}
|
||||||
|
UploadQueue::Stopped(stopped) => Ok(stopped),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An in-progress upload or delete task.
|
/// An in-progress upload or delete task.
|
||||||
@@ -199,6 +212,13 @@ pub(crate) struct UploadTask {
|
|||||||
pub(crate) op: UploadOp,
|
pub(crate) op: UploadOp,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct Delete {
|
||||||
|
pub(crate) file_kind: RemoteOpFileKind,
|
||||||
|
pub(crate) layer_file_name: LayerFileName,
|
||||||
|
pub(crate) scheduled_from_timeline_delete: bool,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum UploadOp {
|
pub(crate) enum UploadOp {
|
||||||
/// Upload a layer file
|
/// Upload a layer file
|
||||||
@@ -207,8 +227,8 @@ pub(crate) enum UploadOp {
|
|||||||
/// Upload the metadata file
|
/// Upload the metadata file
|
||||||
UploadMetadata(IndexPart, Lsn),
|
UploadMetadata(IndexPart, Lsn),
|
||||||
|
|
||||||
/// Delete a file.
|
/// Delete a layer file
|
||||||
Delete(RemoteOpFileKind, LayerFileName),
|
Delete(Delete),
|
||||||
|
|
||||||
/// Barrier. When the barrier operation is reached,
|
/// Barrier. When the barrier operation is reached,
|
||||||
Barrier(tokio::sync::watch::Sender<()>),
|
Barrier(tokio::sync::watch::Sender<()>),
|
||||||
@@ -226,7 +246,12 @@ impl std::fmt::Display for UploadOp {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
UploadOp::UploadMetadata(_, lsn) => write!(f, "UploadMetadata(lsn: {})", lsn),
|
UploadOp::UploadMetadata(_, lsn) => write!(f, "UploadMetadata(lsn: {})", lsn),
|
||||||
UploadOp::Delete(_, path) => write!(f, "Delete({})", path.file_name()),
|
UploadOp::Delete(delete) => write!(
|
||||||
|
f,
|
||||||
|
"Delete(path: {}, scheduled_from_timeline_delete: {})",
|
||||||
|
delete.layer_file_name.file_name(),
|
||||||
|
delete.scheduled_from_timeline_delete
|
||||||
|
),
|
||||||
UploadOp::Barrier(_) => write!(f, "Barrier"),
|
UploadOp::Barrier(_) => write!(f, "Barrier"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -302,15 +302,6 @@ impl VirtualFile {
|
|||||||
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
||||||
|
|
||||||
// Perform the requested operation on it
|
// Perform the requested operation on it
|
||||||
//
|
|
||||||
// TODO: We could downgrade the locks to read mode before calling
|
|
||||||
// 'func', to allow a little bit more concurrency, but the standard
|
|
||||||
// library RwLock doesn't allow downgrading without releasing the lock,
|
|
||||||
// and that doesn't seem worth the trouble.
|
|
||||||
//
|
|
||||||
// XXX: `parking_lot::RwLock` can enable such downgrades, yet its implementation is fair and
|
|
||||||
// may deadlock on subsequent read calls.
|
|
||||||
// Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
|
|
||||||
let result = STORAGE_IO_TIME
|
let result = STORAGE_IO_TIME
|
||||||
.with_label_values(&[op, &self.tenant_id, &self.timeline_id])
|
.with_label_values(&[op, &self.tenant_id, &self.timeline_id])
|
||||||
.observe_closure_duration(|| func(&file));
|
.observe_closure_duration(|| func(&file));
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
|
|||||||
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||||
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::{Context, Result};
|
||||||
use bytes::{Buf, Bytes, BytesMut};
|
use bytes::{Buf, Bytes, BytesMut};
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
@@ -333,7 +333,7 @@ impl<'a> WalIngest<'a> {
|
|||||||
|
|
||||||
// Now that this record has been fully handled, including updating the
|
// Now that this record has been fully handled, including updating the
|
||||||
// checkpoint data, let the repository know that it is up-to-date to this LSN
|
// checkpoint data, let the repository know that it is up-to-date to this LSN
|
||||||
modification.commit()?;
|
modification.commit().await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -1082,7 +1082,10 @@ impl<'a> WalIngest<'a> {
|
|||||||
.await?
|
.await?
|
||||||
{
|
{
|
||||||
// create it with 0 size initially, the logic below will extend it
|
// create it with 0 size initially, the logic below will extend it
|
||||||
modification.put_rel_creation(rel, 0, ctx).await?;
|
modification
|
||||||
|
.put_rel_creation(rel, 0, ctx)
|
||||||
|
.await
|
||||||
|
.context("Relation Error")?;
|
||||||
0
|
0
|
||||||
} else {
|
} else {
|
||||||
self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?
|
self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?
|
||||||
@@ -1171,7 +1174,6 @@ impl<'a> WalIngest<'a> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::pgdatadir_mapping::create_test_timeline;
|
|
||||||
use crate::tenant::harness::*;
|
use crate::tenant::harness::*;
|
||||||
use crate::tenant::Timeline;
|
use crate::tenant::Timeline;
|
||||||
use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT;
|
use postgres_ffi::v14::xlog_utils::SIZEOF_CHECKPOINT;
|
||||||
@@ -1200,7 +1202,7 @@ mod tests {
|
|||||||
let mut m = tline.begin_modification(Lsn(0x10));
|
let mut m = tline.begin_modification(Lsn(0x10));
|
||||||
m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
|
m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
|
||||||
m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
|
m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;
|
let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;
|
||||||
|
|
||||||
Ok(walingest)
|
Ok(walingest)
|
||||||
@@ -1209,7 +1211,9 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_relsize() -> Result<()> {
|
async fn test_relsize() -> Result<()> {
|
||||||
let (tenant, ctx) = TenantHarness::create("test_relsize")?.load().await;
|
let (tenant, ctx) = TenantHarness::create("test_relsize")?.load().await;
|
||||||
let tline = create_test_timeline(&tenant, TIMELINE_ID, DEFAULT_PG_VERSION, &ctx)?;
|
let tline = tenant
|
||||||
|
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await?;
|
||||||
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
||||||
|
|
||||||
let mut m = tline.begin_modification(Lsn(0x20));
|
let mut m = tline.begin_modification(Lsn(0x20));
|
||||||
@@ -1217,22 +1221,22 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
let mut m = tline.begin_modification(Lsn(0x30));
|
let mut m = tline.begin_modification(Lsn(0x30));
|
||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 3"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
let mut m = tline.begin_modification(Lsn(0x40));
|
let mut m = tline.begin_modification(Lsn(0x40));
|
||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1 at 4"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
let mut m = tline.begin_modification(Lsn(0x50));
|
let mut m = tline.begin_modification(Lsn(0x50));
|
||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 2, TEST_IMG("foo blk 2 at 5"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
assert_current_logical_size(&tline, Lsn(0x50));
|
assert_current_logical_size(&tline, Lsn(0x50));
|
||||||
|
|
||||||
@@ -1318,7 +1322,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, 2, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, 2, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_current_logical_size(&tline, Lsn(0x60));
|
assert_current_logical_size(&tline, Lsn(0x60));
|
||||||
|
|
||||||
// Check reported size and contents after truncation
|
// Check reported size and contents after truncation
|
||||||
@@ -1360,7 +1364,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, 0, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, 0, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline
|
tline
|
||||||
.get_rel_size(TESTREL_A, Lsn(0x68), false, &ctx)
|
.get_rel_size(TESTREL_A, Lsn(0x68), false, &ctx)
|
||||||
@@ -1373,7 +1377,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 1, TEST_IMG("foo blk 1"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline
|
tline
|
||||||
.get_rel_size(TESTREL_A, Lsn(0x70), false, &ctx)
|
.get_rel_size(TESTREL_A, Lsn(0x70), false, &ctx)
|
||||||
@@ -1398,7 +1402,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 1500, TEST_IMG("foo blk 1500"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline
|
tline
|
||||||
.get_rel_size(TESTREL_A, Lsn(0x80), false, &ctx)
|
.get_rel_size(TESTREL_A, Lsn(0x80), false, &ctx)
|
||||||
@@ -1428,14 +1432,16 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_drop_extend() -> Result<()> {
|
async fn test_drop_extend() -> Result<()> {
|
||||||
let (tenant, ctx) = TenantHarness::create("test_drop_extend")?.load().await;
|
let (tenant, ctx) = TenantHarness::create("test_drop_extend")?.load().await;
|
||||||
let tline = create_test_timeline(&tenant, TIMELINE_ID, DEFAULT_PG_VERSION, &ctx)?;
|
let tline = tenant
|
||||||
|
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await?;
|
||||||
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
||||||
|
|
||||||
let mut m = tline.begin_modification(Lsn(0x20));
|
let mut m = tline.begin_modification(Lsn(0x20));
|
||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 2"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
// Check that rel exists and size is correct
|
// Check that rel exists and size is correct
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1454,7 +1460,7 @@ mod tests {
|
|||||||
// Drop rel
|
// Drop rel
|
||||||
let mut m = tline.begin_modification(Lsn(0x30));
|
let mut m = tline.begin_modification(Lsn(0x30));
|
||||||
walingest.put_rel_drop(&mut m, TESTREL_A, &ctx).await?;
|
walingest.put_rel_drop(&mut m, TESTREL_A, &ctx).await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
// Check that rel is not visible anymore
|
// Check that rel is not visible anymore
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1472,7 +1478,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, 0, TEST_IMG("foo blk 0 at 4"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
// Check that rel exists and size is correct
|
// Check that rel exists and size is correct
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1497,7 +1503,9 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_truncate_extend() -> Result<()> {
|
async fn test_truncate_extend() -> Result<()> {
|
||||||
let (tenant, ctx) = TenantHarness::create("test_truncate_extend")?.load().await;
|
let (tenant, ctx) = TenantHarness::create("test_truncate_extend")?.load().await;
|
||||||
let tline = create_test_timeline(&tenant, TIMELINE_ID, DEFAULT_PG_VERSION, &ctx)?;
|
let tline = tenant
|
||||||
|
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await?;
|
||||||
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
||||||
|
|
||||||
// Create a 20 MB relation (the size is arbitrary)
|
// Create a 20 MB relation (the size is arbitrary)
|
||||||
@@ -1509,7 +1517,7 @@ mod tests {
|
|||||||
.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
// The relation was created at LSN 20, not visible at LSN 1 yet.
|
// The relation was created at LSN 20, not visible at LSN 1 yet.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1554,7 +1562,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, 1, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, 1, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
// Check reported size and contents after truncation
|
// Check reported size and contents after truncation
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@@ -1603,7 +1611,7 @@ mod tests {
|
|||||||
.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, blkno, TEST_IMG(&data), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline
|
tline
|
||||||
@@ -1637,7 +1645,9 @@ mod tests {
|
|||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_large_rel() -> Result<()> {
|
async fn test_large_rel() -> Result<()> {
|
||||||
let (tenant, ctx) = TenantHarness::create("test_large_rel")?.load().await;
|
let (tenant, ctx) = TenantHarness::create("test_large_rel")?.load().await;
|
||||||
let tline = create_test_timeline(&tenant, TIMELINE_ID, DEFAULT_PG_VERSION, &ctx)?;
|
let tline = tenant
|
||||||
|
.create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await?;
|
||||||
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
let mut walingest = init_walingest_test(&tline, &ctx).await?;
|
||||||
|
|
||||||
let mut lsn = 0x10;
|
let mut lsn = 0x10;
|
||||||
@@ -1648,7 +1658,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img, &ctx)
|
.put_rel_page_image(&mut m, TESTREL_A, blknum as BlockNumber, img, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert_current_logical_size(&tline, Lsn(lsn));
|
assert_current_logical_size(&tline, Lsn(lsn));
|
||||||
@@ -1664,7 +1674,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
||||||
RELSEG_SIZE
|
RELSEG_SIZE
|
||||||
@@ -1677,7 +1687,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, RELSEG_SIZE - 1, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
||||||
RELSEG_SIZE - 1
|
RELSEG_SIZE - 1
|
||||||
@@ -1693,7 +1703,7 @@ mod tests {
|
|||||||
walingest
|
walingest
|
||||||
.put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber, &ctx)
|
.put_rel_truncation(&mut m, TESTREL_A, size as BlockNumber, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
m.commit()?;
|
m.commit().await?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
tline.get_rel_size(TESTREL_A, Lsn(lsn), false, &ctx).await?,
|
||||||
size as BlockNumber
|
size as BlockNumber
|
||||||
|
|||||||
@@ -122,6 +122,43 @@ hnsw_populate(HierarchicalNSW* hnsw, Relation indexRel, Relation heapRel)
|
|||||||
true, true, hnsw_build_callback, (void *) hnsw, NULL);
|
true, true, hnsw_build_callback, (void *) hnsw, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
|
||||||
|
static void
|
||||||
|
hnsw_check_available_memory(Size requested)
|
||||||
|
{
|
||||||
|
size_t total;
|
||||||
|
if (sysctlbyname("hw.memsize", NULL, &total, NULL, 0) < 0)
|
||||||
|
elog(ERROR, "Failed to get amount of RAM: %m");
|
||||||
|
|
||||||
|
if ((Size)NBuffers*BLCKSZ + requested >= total)
|
||||||
|
elog(ERROR, "HNSW index requeries %ld bytes while only %ld are available",
|
||||||
|
requested, total - (Size)NBuffers*BLCKSZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <sys/sysinfo.h>
|
||||||
|
|
||||||
|
static void
|
||||||
|
hnsw_check_available_memory(Size requested)
|
||||||
|
{
|
||||||
|
struct sysinfo si;
|
||||||
|
Size total;
|
||||||
|
if (sysinfo(&si) < 0)
|
||||||
|
elog(ERROR, "Failed to get amount of RAM: %n");
|
||||||
|
|
||||||
|
total = si.totalram*si.mem_unit;
|
||||||
|
if ((Size)NBuffers*BLCKSZ + requested >= total)
|
||||||
|
elog(ERROR, "HNSW index requeries %ld bytes while only %ld are available",
|
||||||
|
requested, total - (Size)NBuffers*BLCKSZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static HierarchicalNSW*
|
static HierarchicalNSW*
|
||||||
hnsw_get_index(Relation indexRel, Relation heapRel)
|
hnsw_get_index(Relation indexRel, Relation heapRel)
|
||||||
{
|
{
|
||||||
@@ -156,6 +193,8 @@ hnsw_get_index(Relation indexRel, Relation heapRel)
|
|||||||
size_data_per_element = size_links_level0 + data_size + sizeof(label_t);
|
size_data_per_element = size_links_level0 + data_size + sizeof(label_t);
|
||||||
shmem_size = hnsw_sizeof() + maxelements * size_data_per_element;
|
shmem_size = hnsw_sizeof() + maxelements * size_data_per_element;
|
||||||
|
|
||||||
|
hnsw_check_available_memory(shmem_size);
|
||||||
|
|
||||||
/* first try to attach to existed index */
|
/* first try to attach to existed index */
|
||||||
if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
|
if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
|
||||||
&mapped_address, &mapped_size, DEBUG1))
|
&mapped_address, &mapped_size, DEBUG1))
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
comment = 'hNsw index'
|
comment = 'hnsw index'
|
||||||
default_version = '0.1.0'
|
default_version = '0.1.0'
|
||||||
module_pathname = '$libdir/hnsw'
|
module_pathname = '$libdir/hnsw'
|
||||||
relocatable = true
|
relocatable = true
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
#include "port.h"
|
#include "port.h"
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
#include "utils/jsonb.h"
|
#include "utils/jsonb.h"
|
||||||
|
#include "libpq/crypt.h"
|
||||||
|
|
||||||
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
|
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
|
||||||
|
|
||||||
@@ -161,7 +162,22 @@ ConstructDeltaMessage()
|
|||||||
PushKeyValue(&state, "name", entry->name);
|
PushKeyValue(&state, "name", entry->name);
|
||||||
if (entry->password)
|
if (entry->password)
|
||||||
{
|
{
|
||||||
|
#if PG_MAJORVERSION_NUM == 14
|
||||||
|
char *logdetail;
|
||||||
|
#else
|
||||||
|
const char *logdetail;
|
||||||
|
#endif
|
||||||
PushKeyValue(&state, "password", (char *) entry->password);
|
PushKeyValue(&state, "password", (char *) entry->password);
|
||||||
|
char *encrypted_password = get_role_password(entry->name, &logdetail);
|
||||||
|
|
||||||
|
if (encrypted_password)
|
||||||
|
{
|
||||||
|
PushKeyValue(&state, "encrypted_password", encrypted_password);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
elog(ERROR, "Failed to get encrypted password: %s", logdetail);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (entry->old_name[0] != '\0')
|
if (entry->old_name[0] != '\0')
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ lfc_change_limit_hook(int newval, void *extra)
|
|||||||
hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
|
hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
|
||||||
lfc_ctl->used -= 1;
|
lfc_ctl->used -= 1;
|
||||||
}
|
}
|
||||||
elog(LOG, "set local file cache limit to %d", new_size);
|
elog(DEBUG1, "set local file cache limit to %d", new_size);
|
||||||
LWLockRelease(lfc_lock);
|
LWLockRelease(lfc_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -32,3 +32,7 @@ CREATE VIEW local_cache AS
|
|||||||
SELECT P.* FROM local_cache_pages() AS P
|
SELECT P.* FROM local_cache_pages() AS P
|
||||||
(pageoffs int8, relfilenode oid, reltablespace oid, reldatabase oid,
|
(pageoffs int8, relfilenode oid, reltablespace oid, reldatabase oid,
|
||||||
relforknumber int2, relblocknumber int8, accesscount int4);
|
relforknumber int2, relblocknumber int8, accesscount int4);
|
||||||
|
|
||||||
|
CREATE FUNCTION copy_from(conninfo cstring) RETURNS BIGINT
|
||||||
|
AS 'MODULE_PATHNAME', 'copy_from'
|
||||||
|
LANGUAGE C;
|
||||||
|
|||||||
291
pgxn/neon/neon.c
291
pgxn/neon/neon.c
@@ -13,20 +13,32 @@
|
|||||||
|
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
#include "access/xlog.h"
|
#include "access/xlog.h"
|
||||||
|
#include "access/relation.h"
|
||||||
|
#include "access/xloginsert.h"
|
||||||
#include "storage/buf_internals.h"
|
#include "storage/buf_internals.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
|
#include "catalog/namespace.h"
|
||||||
#include "replication/walsender.h"
|
#include "replication/walsender.h"
|
||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
#include "access/htup_details.h"
|
#include "access/htup_details.h"
|
||||||
#include "utils/pg_lsn.h"
|
#include "utils/pg_lsn.h"
|
||||||
#include "utils/guc.h"
|
#include "utils/guc.h"
|
||||||
|
#include "utils/wait_event.h"
|
||||||
|
#include "utils/rel.h"
|
||||||
|
#include "utils/varlena.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
#include "neon.h"
|
#include "neon.h"
|
||||||
#include "walproposer.h"
|
#include "walproposer.h"
|
||||||
#include "pagestore_client.h"
|
#include "pagestore_client.h"
|
||||||
#include "control_plane_connector.h"
|
#include "control_plane_connector.h"
|
||||||
|
|
||||||
|
#include "libpq-fe.h"
|
||||||
|
#include "libpq/pqformat.h"
|
||||||
|
#include "libpq/libpq.h"
|
||||||
|
|
||||||
PG_MODULE_MAGIC;
|
PG_MODULE_MAGIC;
|
||||||
void _PG_init(void);
|
void _PG_init(void);
|
||||||
|
|
||||||
@@ -46,6 +58,7 @@ _PG_init(void)
|
|||||||
PG_FUNCTION_INFO_V1(pg_cluster_size);
|
PG_FUNCTION_INFO_V1(pg_cluster_size);
|
||||||
PG_FUNCTION_INFO_V1(backpressure_lsns);
|
PG_FUNCTION_INFO_V1(backpressure_lsns);
|
||||||
PG_FUNCTION_INFO_V1(backpressure_throttling_time);
|
PG_FUNCTION_INFO_V1(backpressure_throttling_time);
|
||||||
|
PG_FUNCTION_INFO_V1(copy_from);
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
pg_cluster_size(PG_FUNCTION_ARGS)
|
pg_cluster_size(PG_FUNCTION_ARGS)
|
||||||
@@ -91,3 +104,281 @@ backpressure_throttling_time(PG_FUNCTION_ARGS)
|
|||||||
{
|
{
|
||||||
PG_RETURN_UINT64(BackpressureThrottlingTime());
|
PG_RETURN_UINT64(BackpressureThrottlingTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define N_RAW_PAGE_COLUMNS 4
|
||||||
|
#define COPY_FETCH_COUNT 16
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
report_error(int elevel, PGresult *res, PGconn *conn,
|
||||||
|
bool clear, const char *sql)
|
||||||
|
{
|
||||||
|
/* If requested, PGresult must be released before leaving this function. */
|
||||||
|
PG_TRY();
|
||||||
|
{
|
||||||
|
char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
|
||||||
|
char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
|
||||||
|
char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
|
||||||
|
char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
|
||||||
|
char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
|
||||||
|
int sqlstate;
|
||||||
|
|
||||||
|
if (diag_sqlstate)
|
||||||
|
sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
|
||||||
|
diag_sqlstate[1],
|
||||||
|
diag_sqlstate[2],
|
||||||
|
diag_sqlstate[3],
|
||||||
|
diag_sqlstate[4]);
|
||||||
|
else
|
||||||
|
sqlstate = ERRCODE_CONNECTION_FAILURE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we don't get a message from the PGresult, try the PGconn. This
|
||||||
|
* is needed because for connection-level failures, PQexec may just
|
||||||
|
* return NULL, not a PGresult at all.
|
||||||
|
*/
|
||||||
|
if (message_primary == NULL)
|
||||||
|
message_primary = pchomp(PQerrorMessage(conn));
|
||||||
|
|
||||||
|
ereport(elevel,
|
||||||
|
(errcode(sqlstate),
|
||||||
|
(message_primary != NULL && message_primary[0] != '\0') ?
|
||||||
|
errmsg_internal("%s", message_primary) :
|
||||||
|
errmsg("could not obtain message string for remote error"),
|
||||||
|
message_detail ? errdetail_internal("%s", message_detail) : 0,
|
||||||
|
message_hint ? errhint("%s", message_hint) : 0,
|
||||||
|
message_context ? errcontext("%s", message_context) : 0,
|
||||||
|
sql ? errcontext("remote SQL command: %s", sql) : 0));
|
||||||
|
}
|
||||||
|
PG_FINALLY();
|
||||||
|
{
|
||||||
|
if (clear)
|
||||||
|
PQclear(res);
|
||||||
|
}
|
||||||
|
PG_END_TRY();
|
||||||
|
}
|
||||||
|
|
||||||
|
static PGresult *
|
||||||
|
get_result(PGconn *conn, const char *query)
|
||||||
|
{
|
||||||
|
PGresult *volatile last_res = NULL;
|
||||||
|
|
||||||
|
/* In what follows, do not leak any PGresults on an error. */
|
||||||
|
PG_TRY();
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
PGresult *res;
|
||||||
|
|
||||||
|
while (PQisBusy(conn))
|
||||||
|
{
|
||||||
|
int wc;
|
||||||
|
|
||||||
|
/* Sleep until there's something to do */
|
||||||
|
wc = WaitLatchOrSocket(MyLatch,
|
||||||
|
WL_LATCH_SET | WL_SOCKET_READABLE |
|
||||||
|
WL_EXIT_ON_PM_DEATH,
|
||||||
|
PQsocket(conn),
|
||||||
|
-1L, PG_WAIT_EXTENSION);
|
||||||
|
ResetLatch(MyLatch);
|
||||||
|
|
||||||
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
|
/* Data available in socket? */
|
||||||
|
if (wc & WL_SOCKET_READABLE)
|
||||||
|
{
|
||||||
|
if (!PQconsumeInput(conn))
|
||||||
|
report_error(ERROR, NULL, conn, false, query);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res = PQgetResult(conn);
|
||||||
|
if (res == NULL)
|
||||||
|
break; /* query is complete */
|
||||||
|
|
||||||
|
PQclear(last_res);
|
||||||
|
last_res = res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PG_CATCH();
|
||||||
|
{
|
||||||
|
PQclear(last_res);
|
||||||
|
PG_RE_THROW();
|
||||||
|
}
|
||||||
|
PG_END_TRY();
|
||||||
|
|
||||||
|
return last_res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CREATE_COPYDATA_FUNC "\
|
||||||
|
create or replace function copydata() returns setof record as $$ \
|
||||||
|
declare \
|
||||||
|
relsize integer; \
|
||||||
|
total_relsize integer; \
|
||||||
|
content bytea; \
|
||||||
|
r record; \
|
||||||
|
fork text; \
|
||||||
|
relname text; \
|
||||||
|
pagesize integer; \
|
||||||
|
begin \
|
||||||
|
pagesize = current_setting('block_size'); \
|
||||||
|
for r in select oid,reltoastrelid from pg_class where relnamespace not in (select oid from pg_namespace where nspname in ('pg_catalog','pg_toast','information_schema')) \
|
||||||
|
loop \
|
||||||
|
relname = r.oid::regclass::text; \
|
||||||
|
total_relsize = 0; \
|
||||||
|
foreach fork in array array['main','vm','fsm'] \
|
||||||
|
loop \
|
||||||
|
relsize = pg_relation_size(r.oid, fork)/pagesize; \
|
||||||
|
total_relsize = total_relsize + relsize; \
|
||||||
|
for p in 1..relsize \
|
||||||
|
loop \
|
||||||
|
content = get_raw_page(relname, fork, p-1); \
|
||||||
|
return next row(relname,fork,p-1,content); \
|
||||||
|
end loop; \
|
||||||
|
end loop; \
|
||||||
|
if total_relsize <> 0 and r.reltoastrelid <> 0 then \
|
||||||
|
foreach relname in array array ['pg_toast.pg_toast_'||r.oid, 'pg_toast.pg_toast_'||r.oid||'_index'] \
|
||||||
|
loop \
|
||||||
|
foreach fork in array array['main','vm','fsm'] \
|
||||||
|
loop \
|
||||||
|
relsize = pg_relation_size(relname, fork)/pagesize; \
|
||||||
|
for p in 1..relsize \
|
||||||
|
loop \
|
||||||
|
content = get_raw_page(relname, fork, p-1); \
|
||||||
|
return next row(relname,fork,p-1,content); \
|
||||||
|
end loop; \
|
||||||
|
end loop; \
|
||||||
|
end loop; \
|
||||||
|
end if; \
|
||||||
|
end loop; \
|
||||||
|
end; \
|
||||||
|
$$ language plpgsql"
|
||||||
|
|
||||||
|
Datum
|
||||||
|
copy_from(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
char const* conninfo = PG_GETARG_CSTRING(0);
|
||||||
|
PGconn* conn;
|
||||||
|
char const* declare_cursor = "declare copy_data_cursor no scroll cursor for select * from copydata() as raw_page(relid text, fork text, blkno integer, content bytea)";
|
||||||
|
char* fetch_cursor = psprintf("fetch forward %d copy_data_cursor", COPY_FETCH_COUNT);
|
||||||
|
char const* close_cursor = "close copy_data_cursor";
|
||||||
|
char const* vacuum_freeze = "vacuum freeze";
|
||||||
|
char *content;
|
||||||
|
char const* relname;
|
||||||
|
BlockNumber blkno;
|
||||||
|
ForkNumber forknum;
|
||||||
|
BlockNumber prev_blkno = InvalidBlockNumber;
|
||||||
|
RangeVar *relrv;
|
||||||
|
Relation rel = NULL;
|
||||||
|
BlockNumber rel_size;
|
||||||
|
int64_t total = 0;
|
||||||
|
PGresult *res;
|
||||||
|
char blkno_buf[4];
|
||||||
|
int n_tuples;
|
||||||
|
Buffer buf;
|
||||||
|
char* toast_rel_name;
|
||||||
|
Oid relid = InvalidOid;
|
||||||
|
|
||||||
|
/* Connect to the source database */
|
||||||
|
conn = PQconnectdb(conninfo);
|
||||||
|
if (!conn || PQstatus(conn) != CONNECTION_OK)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
|
||||||
|
errmsg("could not connect to server \"%s\"",
|
||||||
|
conninfo),
|
||||||
|
errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
|
||||||
|
|
||||||
|
/* First create store procedure (assumes that pageinspector extension is already installed) */
|
||||||
|
res = PQexec(conn, CREATE_COPYDATA_FUNC);
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, CREATE_COPYDATA_FUNC);
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Freeze all tables to prevent problems with XID mapping */
|
||||||
|
res = PQexec(conn, vacuum_freeze);
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, vacuum_freeze);
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Start transaction to use cursor */
|
||||||
|
res = PQexec(conn, "BEGIN");
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, "BEGIN");
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Declare cursor (we have to use cursor to avoid materializing all database in memory) */
|
||||||
|
res = PQexec(conn, declare_cursor);
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, declare_cursor);
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Get database data */
|
||||||
|
while ((res = PQexecParams(conn, fetch_cursor, 0, NULL, NULL, NULL, NULL, 1)) != NULL)
|
||||||
|
{
|
||||||
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
|
report_error(ERROR, res, conn, true, fetch_cursor);
|
||||||
|
|
||||||
|
n_tuples = PQntuples(res);
|
||||||
|
if (PQnfields(res) != 4)
|
||||||
|
elog(ERROR, "unexpected result from copydata()");
|
||||||
|
|
||||||
|
for (int i = 0; i < n_tuples; i++)
|
||||||
|
{
|
||||||
|
relname = PQgetvalue(res, i, 0);
|
||||||
|
forknum = forkname_to_number(PQgetvalue(res, i, 1));
|
||||||
|
memcpy(&blkno, PQgetvalue(res, i, 2), sizeof(BlockNumber));
|
||||||
|
blkno = pg_ntoh32(blkno);
|
||||||
|
content = (char*)PQgetvalue(res, i, 3);
|
||||||
|
|
||||||
|
if (blkno <= prev_blkno)
|
||||||
|
{
|
||||||
|
if (forknum == MAIN_FORKNUM)
|
||||||
|
{
|
||||||
|
char* dst_rel_name = strncmp(relname, "pg_toast.", 9) == 0
|
||||||
|
/* Construct correct TOAST table name */
|
||||||
|
? psprintf("pg_toast.pg_toast_%u%s",
|
||||||
|
relid,
|
||||||
|
strcmp(relname + strlen(relname) - 5, "index") == 0 ? "_index" : "")
|
||||||
|
: (char*)relname;
|
||||||
|
if (rel)
|
||||||
|
relation_close(rel, AccessExclusiveLock);
|
||||||
|
relrv = makeRangeVarFromNameList(textToQualifiedNameList(cstring_to_text(dst_rel_name)));
|
||||||
|
rel = relation_openrv(relrv, AccessExclusiveLock);
|
||||||
|
if (dst_rel_name != relname)
|
||||||
|
pfree(dst_rel_name);
|
||||||
|
else
|
||||||
|
relid = RelationGetRelid(rel);
|
||||||
|
}
|
||||||
|
rel_size = RelationGetNumberOfBlocksInFork(rel, forknum);
|
||||||
|
}
|
||||||
|
buf = ReadBufferExtended(rel, forknum, blkno < rel_size ? blkno : P_NEW, RBM_ZERO_AND_LOCK, NULL);
|
||||||
|
MarkBufferDirty(buf);
|
||||||
|
memcpy(BufferGetPage(buf), content, BLCKSZ);
|
||||||
|
log_newpage_buffer(buf, forknum == MAIN_FORKNUM);
|
||||||
|
UnlockReleaseBuffer(buf);
|
||||||
|
|
||||||
|
total += 1;
|
||||||
|
prev_blkno = blkno;
|
||||||
|
}
|
||||||
|
PQclear(res);
|
||||||
|
if (n_tuples < COPY_FETCH_COUNT)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
res = PQexec(conn, close_cursor);
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, close_cursor);
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
if (rel)
|
||||||
|
relation_close(rel, AccessExclusiveLock);
|
||||||
|
|
||||||
|
/* Complete transaction */
|
||||||
|
res = PQexec(conn, "END");
|
||||||
|
if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
report_error(ERROR, res, conn, true, "END");
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
|
PG_RETURN_INT64(total);
|
||||||
|
}
|
||||||
|
|||||||
@@ -2675,7 +2675,6 @@ bool
|
|||||||
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||||
{
|
{
|
||||||
XLogRecPtr end_recptr = record->EndRecPtr;
|
XLogRecPtr end_recptr = record->EndRecPtr;
|
||||||
XLogRecPtr prev_end_recptr = record->ReadRecPtr - 1;
|
|
||||||
RelFileNode rnode;
|
RelFileNode rnode;
|
||||||
ForkNumber forknum;
|
ForkNumber forknum;
|
||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
@@ -2719,16 +2718,15 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
|
|
||||||
no_redo_needed = buffer < 0;
|
no_redo_needed = buffer < 0;
|
||||||
|
|
||||||
/* we don't have the buffer in memory, update lwLsn past this record */
|
/* In both cases st lwlsn past this WAL record */
|
||||||
|
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
|
||||||
|
|
||||||
|
/* we don't have the buffer in memory, update lwLsn past this record,
|
||||||
|
* also evict page fro file cache
|
||||||
|
*/
|
||||||
if (no_redo_needed)
|
if (no_redo_needed)
|
||||||
{
|
|
||||||
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
|
|
||||||
lfc_evict(rnode, forknum, blkno);
|
lfc_evict(rnode, forknum, blkno);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SetLastWrittenLSNForBlock(prev_end_recptr, rnode, forknum, blkno);
|
|
||||||
}
|
|
||||||
|
|
||||||
LWLockRelease(partitionLock);
|
LWLockRelease(partitionLock);
|
||||||
|
|
||||||
@@ -2736,7 +2734,10 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
if (get_cached_relsize(rnode, forknum, &relsize))
|
if (get_cached_relsize(rnode, forknum, &relsize))
|
||||||
{
|
{
|
||||||
if (relsize < blkno + 1)
|
if (relsize < blkno + 1)
|
||||||
|
{
|
||||||
update_cached_relsize(rnode, forknum, blkno + 1);
|
update_cached_relsize(rnode, forknum, blkno + 1);
|
||||||
|
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2768,6 +2769,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
Assert(nbresponse->n_blocks > blkno);
|
Assert(nbresponse->n_blocks > blkno);
|
||||||
|
|
||||||
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
|
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
|
||||||
|
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
||||||
|
|
||||||
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
|
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ nwp_register_gucs(void)
|
|||||||
"Walproposer reconnects to offline safekeepers once in this interval.",
|
"Walproposer reconnects to offline safekeepers once in this interval.",
|
||||||
NULL,
|
NULL,
|
||||||
&wal_acceptor_reconnect_timeout,
|
&wal_acceptor_reconnect_timeout,
|
||||||
5000, 0, INT_MAX, /* default, min, max */
|
1000, 0, INT_MAX, /* default, min, max */
|
||||||
PGC_SIGHUP, /* context */
|
PGC_SIGHUP, /* context */
|
||||||
GUC_UNIT_MS, /* flags */
|
GUC_UNIT_MS, /* flags */
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|||||||
179
poetry.lock
generated
179
poetry.lock
generated
@@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
# This file is automatically @generated by Poetry and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
@@ -855,35 +855,31 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cryptography"
|
name = "cryptography"
|
||||||
version = "39.0.1"
|
version = "41.0.0"
|
||||||
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
|
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:6687ef6d0a6497e2b58e7c5b852b53f62142cfa7cd1555795758934da363a965"},
|
{file = "cryptography-41.0.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:706843b48f9a3f9b9911979761c91541e3d90db1ca905fd63fee540a217698bc"},
|
{file = "cryptography-41.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5d2d8b87a490bfcd407ed9d49093793d0f75198a35e6eb1a923ce1ee86c62b41"},
|
{file = "cryptography-41.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83e17b26de248c33f3acffb922748151d71827d6021d98c70e6c1a25ddd78505"},
|
{file = "cryptography-41.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e124352fd3db36a9d4a21c1aa27fd5d051e621845cb87fb851c08f4f75ce8be6"},
|
{file = "cryptography-41.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:5aa67414fcdfa22cf052e640cb5ddc461924a045cacf325cd164e65312d99502"},
|
{file = "cryptography-41.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:35f7c7d015d474f4011e859e93e789c87d21f6f4880ebdc29896a60403328f1f"},
|
{file = "cryptography-41.0.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f24077a3b5298a5a06a8e0536e3ea9ec60e4c7ac486755e5fb6e6ea9b3500106"},
|
{file = "cryptography-41.0.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:f0c64d1bd842ca2633e74a1a28033d139368ad959872533b1bab8c80e8240a0c"},
|
{file = "cryptography-41.0.0-cp37-abi3-win32.whl", hash = "sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0f8da300b5c8af9f98111ffd512910bc792b4c77392a9523624680f7956a99d4"},
|
{file = "cryptography-41.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-win32.whl", hash = "sha256:fe913f20024eb2cb2f323e42a64bdf2911bb9738a15dba7d3cce48151034e3a8"},
|
{file = "cryptography-41.0.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5"},
|
||||||
{file = "cryptography-39.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:ced4e447ae29ca194449a3f1ce132ded8fcab06971ef5f618605aacaa612beac"},
|
{file = "cryptography-41.0.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb"},
|
||||||
{file = "cryptography-39.0.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:807ce09d4434881ca3a7594733669bd834f5b2c6d5c7e36f8c00f691887042ad"},
|
{file = "cryptography-41.0.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be"},
|
||||||
{file = "cryptography-39.0.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5caeb8188c24888c90b5108a441c106f7faa4c4c075a2bcae438c6e8ca73cef"},
|
{file = "cryptography-41.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9"},
|
||||||
{file = "cryptography-39.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4789d1e3e257965e960232345002262ede4d094d1a19f4d3b52e48d4d8f3b885"},
|
{file = "cryptography-41.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2"},
|
||||||
{file = "cryptography-39.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:96f1157a7c08b5b189b16b47bc9db2332269d6680a196341bf30046330d15388"},
|
{file = "cryptography-41.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d"},
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e422abdec8b5fa8462aa016786680720d78bdce7a30c652b7fadf83a4ba35336"},
|
{file = "cryptography-41.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895"},
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:b0afd054cd42f3d213bf82c629efb1ee5f22eba35bf0eec88ea9ea7304f511a2"},
|
{file = "cryptography-41.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55"},
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:6f8ba7f0328b79f08bdacc3e4e66fb4d7aab0c3584e0bd41328dce5262e26b2e"},
|
{file = "cryptography-41.0.0.tar.gz", hash = "sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78"},
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:ef8b72fa70b348724ff1218267e7f7375b8de4e8194d1636ee60510aae104cd0"},
|
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:aec5a6c9864be7df2240c382740fcf3b96928c46604eaa7f3091f58b878c0bb6"},
|
|
||||||
{file = "cryptography-39.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fdd188c8a6ef8769f148f88f859884507b954cc64db6b52f66ef199bb9ad660a"},
|
|
||||||
{file = "cryptography-39.0.1.tar.gz", hash = "sha256:d1f6198ee6d9148405e49887803907fe8962a23e6c6f83ea7d98f1c0de375695"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@@ -892,12 +888,12 @@ cffi = ">=1.12"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
|
docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
|
||||||
docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
|
docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
|
||||||
pep8test = ["black", "check-manifest", "mypy", "ruff", "types-pytz", "types-requests"]
|
nox = ["nox"]
|
||||||
sdist = ["setuptools-rust (>=0.11.4)"]
|
pep8test = ["black", "check-sdist", "mypy", "ruff"]
|
||||||
|
sdist = ["build"]
|
||||||
ssh = ["bcrypt (>=3.1.5)"]
|
ssh = ["bcrypt (>=3.1.5)"]
|
||||||
test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist", "pytz"]
|
test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
|
||||||
test-randomorder = ["pytest-randomly"]
|
test-randomorder = ["pytest-randomly"]
|
||||||
tox = ["tox"]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docker"
|
name = "docker"
|
||||||
@@ -1658,71 +1654,74 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "psycopg2-binary"
|
name = "psycopg2-binary"
|
||||||
version = "2.9.3"
|
version = "2.9.6"
|
||||||
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
|
{file = "psycopg2-binary-2.9.6.tar.gz", hash = "sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_ppc64le.whl", hash = "sha256:0a29729145aaaf1ad8bafe663131890e2111f13416b60e460dae0a96af5905c9"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a79d622f5206d695d7824cbf609a4f5b88ea6d6dab5f7c147fc6d333a8787e4"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:090f3348c0ab2cceb6dfbe6bf721ef61262ddf518cd6cc6ecc7d334996d64efa"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a9e1f75f96ea388fbcef36c70640c4efbe4650658f3d6a2967b4cc70e907352e"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c3ae8e75eb7160851e59adc77b3a19a976e50622e44fd4fd47b8b18208189d42"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-win32.whl", hash = "sha256:7b1e9b80afca7b7a386ef087db614faebbf8839b7f4db5eb107d0f1a53225029"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-win32.whl", hash = "sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:8b344adbb9a862de0c635f4f0425b7958bf5a4b927c8594e6e8d261775796d53"},
|
{file = "psycopg2_binary-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:e847774f8ffd5b398a75bc1c18fbb56564cda3d629fe68fd81971fece2d3c67e"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68641a34023d306be959101b345732360fc2ea4938982309b786f7be1b43a4a1"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3303f8807f342641851578ee7ed1f3efc9802d00a6f83c101d21c608cb864460"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_aarch64.whl", hash = "sha256:e3699852e22aa68c10de06524a3721ade969abf382da95884e6a10ff798f9281"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_ppc64le.whl", hash = "sha256:526ea0378246d9b080148f2d6681229f4b5964543c170dd10bf4faaab6e0d27f"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:b1c8068513f5b158cf7e29c43a77eb34b407db29aca749d3eb9293ee0d3103ca"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:15803fa813ea05bef089fa78835118b5434204f3a17cb9f1e5dbfd0b9deea5af"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:152f09f57417b831418304c7f30d727dc83a12761627bb826951692cc6491e57"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:404224e5fef3b193f892abdbf8961ce20e0b6642886cfe1fe1923f41aaa75c9d"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-win32.whl", hash = "sha256:1f6b813106a3abdf7b03640d36e24669234120c72e91d5cbaeb87c5f7c36c65b"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:2d872e3c9d5d075a2e104540965a1cf898b52274a5923936e5bfddb58c59c7c2"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-win32.whl", hash = "sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:10bb90fb4d523a2aa67773d4ff2b833ec00857f5912bafcfd5f5414e45280fb1"},
|
{file = "psycopg2_binary-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a52ecab70af13e899f7847b3e074eeb16ebac5615665db33bce8a1009cf33"},
|
{file = "psycopg2_binary-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:498807b927ca2510baea1b05cc91d7da4718a0f53cb766c154c417a39f1820a0"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a29b3ca4ec9defec6d42bf5feb36bb5817ba3c0230dd83b4edf4bf02684cd0ae"},
|
{file = "psycopg2_binary-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0d236c2825fa656a2d98bbb0e52370a2e852e5a0ec45fc4f402977313329174d"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:12b11322ea00ad8db8c46f18b7dfc47ae215e4df55b46c67a94b4effbaec7094"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:34b9ccdf210cbbb1303c7c4db2905fa0319391bd5904d32689e6dd5c963d2ea8"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash = "sha256:53293533fcbb94c202b7c800a12c873cfe24599656b341f56e71dd2b557be063"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d2222e61f313c4848ff05353653bf5f5cf6ce34df540e4274516880d9c3763"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c381bda330ddf2fccbafab789d83ebc6c53db126e4383e73794c74eedce855ef"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30637a20623e2a2eacc420059be11527f4458ef54352d870b8181a4c3020ae6b"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d29409b625a143649d03d0fd7b57e4b92e0ecad9726ba682244b73be91d2fdb"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8122cfc7cae0da9a3077216528b8bb3629c43b25053284cc868744bfe71eb141"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:183a517a3a63503f70f808b58bfbf962f23d73b6dccddae5aa56152ef2bcb232"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38601cbbfe600362c43714482f43b7c110b20cb0f8172422c616b09b85a750c5"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:15c4e4cfa45f5a60599d9cec5f46cd7b1b29d86a6390ec23e8eebaae84e64554"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c7e62ab8b332147a7593a385d4f368874d5fe4ad4e341770d4983442d89603e3"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2ab652e729ff4ad76d400df2624d223d6e265ef81bb8aa17fbd63607878ecbee"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c83a74b68270028dc8ee74d38ecfaf9c90eed23c8959fca95bd703d25b82c88e"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d4e6036decf4b72d6425d5b29bbd3e8f0ff1059cda7ac7b96d6ac5ed34ffbacd"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:a8c28fd40a4226b4a84bdf2d2b5b37d2c7bd49486b5adcc200e8c7ec991dfa7e"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
|
{file = "psycopg2_binary-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:51537e3d299be0db9137b321dfb6a5022caaab275775680e0c3d281feefaca6b"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf4499e0a83b7b7edcb8dabecbd8501d0d3a5ef66457200f77bde3d210d5debb"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7e13a5a2c01151f1208d5207e42f33ba86d561b7a89fca67c700b9486a06d0e2"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_ppc64le.whl", hash = "sha256:63638d875be8c2784cfc952c9ac34e2b50e43f9f0a0660b65e2a87d656b3116c"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e0f754d27fddcfd74006455b6e04e6705d6c31a612ec69ddc040a5468e44b4e"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ffb7a888a047696e7f8240d649b43fb3644f14f0ee229077e7f6b9f9081635bd"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d57c3fd55d9058645d26ae37d76e61156a27722097229d32a9e73ed54819982a"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c9d5450c566c80c396b7402895c4369a410cab5a82707b11aee1e624da7d004"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71f14375d6f73b62800530b581aed3ada394039877818b2d5f7fc77e3bb6894d"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:d1c1b569ecafe3a69380a94e6ae09a4789bbb23666f3d3a08d06bbd2451f5ef1"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:441cc2f8869a4f0f4bb408475e5ae0ee1f3b55b33f350406150277f7f35384fc"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8fc53f9af09426a61db9ba357865c77f26076d48669f2e1bb24d85a22fb52307"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65bee1e49fa6f9cf327ce0e01c4c10f39165ee76d35c846ade7cb0ec6683e303"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:af335bac6b666cc6aea16f11d486c3b794029d9df029967f9938a4bed59b6a19"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cfec476887aa231b8548ece2e06d28edc87c1397ebd83922299af2e051cf2827"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:65c07febd1936d63bfde78948b76cd4c2a411572a44ac50719ead41947d0f26b"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-win32.whl", hash = "sha256:4dfb4be774c4436a4526d0c554af0cc2e02082c38303852a36f6456ece7b3503"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
|
{file = "psycopg2_binary-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:02c6e3cf3439e213e4ee930308dc122d6fb4d4bea9aef4a12535fbd605d1a2fe"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e9182eb20f41417ea1dd8e8f7888c4d7c6e805f8a7c98c1081778a3da2bee3e4"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8a6979cf527e2603d349a91060f428bcb135aea2be3201dff794813256c274f1"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_ppc64le.whl", hash = "sha256:7af0dd86ddb2f8af5da57a976d27cd2cd15510518d582b478fbb2292428710b4"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8338a271cb71d8da40b023a35d9c1e919eba6cbd8fa20a54b748a332c355d896"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93cd1967a18aa0edd4b95b1dfd554cf15af657cb606280996d393dadc88c3c35"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3ed340d2b858d6e6fb5083f87c09996506af483227735de6964a6100b4e6a54"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bda845b664bb6c91446ca9609fc69f7db6c334ec5e4adc87571c34e4f47b7ddb"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f81e65376e52f03422e1fb475c9514185669943798ed019ac50410fb4c4df232"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:01310cf4cf26db9aea5158c217caa92d291f0500051a6469ac52166e1a16f5b7"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfb13af3c5dd3a9588000910178de17010ebcccd37b4f9794b00595e3a8ddad3"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:99485cab9ba0fa9b84f1f9e1fef106f44a46ef6afdeec8885e0b88d0772b49e8"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4c727b597c6444a16e9119386b59388f8a424223302d0c06c676ec8b4bc1f963"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-win32.whl", hash = "sha256:46f0e0a6b5fa5851bbd9ab1bc805eef362d3a230fbdfbc209f4a236d0a7a990d"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4d67fbdaf177da06374473ef6f7ed8cc0a9dc640b01abfe9e8a2ccb1b1402c1f"},
|
||||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f"},
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0892ef645c2fabb0c75ec32d79f4252542d0caec1d5d949630e7d242ca4681a3"},
|
||||||
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:02c0f3757a4300cf379eb49f543fb7ac527fb00144d39246ee40e1df684ab514"},
|
||||||
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-win32.whl", hash = "sha256:c3dba7dab16709a33a847e5cd756767271697041fbe3fe97c215b1fc1f5c9848"},
|
||||||
|
{file = "psycopg2_binary-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:f6a88f384335bb27812293fdb11ac6aee2ca3f51d3c7820fe03de0a304ab6249"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
atty.workspace = true
|
|
||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
bstr.workspace = true
|
bstr.workspace = true
|
||||||
bytes = { workspace = true, features = ["serde"] }
|
bytes = { workspace = true, features = ["serde"] }
|
||||||
@@ -30,6 +29,7 @@ metrics.workspace = true
|
|||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
opentelemetry.workspace = true
|
opentelemetry.workspace = true
|
||||||
parking_lot.workspace = true
|
parking_lot.workspace = true
|
||||||
|
pbkdf2.workspace = true
|
||||||
pin-project-lite.workspace = true
|
pin-project-lite.workspace = true
|
||||||
postgres_backend.workspace = true
|
postgres_backend.workspace = true
|
||||||
pq_proto.workspace = true
|
pq_proto.workspace = true
|
||||||
@@ -38,6 +38,7 @@ rand.workspace = true
|
|||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
reqwest = { workspace = true, features = ["json"] }
|
reqwest = { workspace = true, features = ["json"] }
|
||||||
reqwest-middleware.workspace = true
|
reqwest-middleware.workspace = true
|
||||||
|
reqwest-retry.workspace = true
|
||||||
reqwest-tracing.workspace = true
|
reqwest-tracing.workspace = true
|
||||||
routerify.workspace = true
|
routerify.workspace = true
|
||||||
rustls-pemfile.workspace = true
|
rustls-pemfile.workspace = true
|
||||||
|
|||||||
@@ -93,6 +93,15 @@ With the current approach we made the following design decisions:
|
|||||||
and column oids. Command tag capturing was added to the rust-postgres
|
and column oids. Command tag capturing was added to the rust-postgres
|
||||||
functionality as part of this change.
|
functionality as part of this change.
|
||||||
|
|
||||||
|
### Output options
|
||||||
|
|
||||||
|
User can pass several optional headers that will affect resulting json.
|
||||||
|
|
||||||
|
1. `Neon-Raw-Text-Output: true`. Return postgres values as text, without parsing them. So numbers, objects, booleans, nulls and arrays will be returned as text. That can be useful in cases when client code wants to implement it's own parsing or reuse parsing libraries from e.g. node-postgres.
|
||||||
|
2. `Neon-Array-Mode: true`. Return postgres rows as arrays instead of objects. That is more compact representation and also helps in some edge
|
||||||
|
cases where it is hard to use rows represented as objects (e.g. when several fields have the same name).
|
||||||
|
|
||||||
|
|
||||||
## Using SNI-based routing on localhost
|
## Using SNI-based routing on localhost
|
||||||
|
|
||||||
Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy:
|
Now proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so I usually use `*.localtest.me` which resolves to `127.0.0.1`. Now we can create self-signed certificate and play with proxy:
|
||||||
|
|||||||
@@ -136,18 +136,17 @@ impl Default for ConnCfg {
|
|||||||
|
|
||||||
impl ConnCfg {
|
impl ConnCfg {
|
||||||
/// Establish a raw TCP connection to the compute node.
|
/// Establish a raw TCP connection to the compute node.
|
||||||
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream, &str)> {
|
async fn connect_raw(&self, timeout: Duration) -> io::Result<(SocketAddr, TcpStream, &str)> {
|
||||||
use tokio_postgres::config::Host;
|
use tokio_postgres::config::Host;
|
||||||
|
|
||||||
// wrap TcpStream::connect with timeout
|
// wrap TcpStream::connect with timeout
|
||||||
let connect_with_timeout = |host, port| {
|
let connect_with_timeout = |host, port| {
|
||||||
let connection_timeout = Duration::from_millis(10000);
|
tokio::time::timeout(timeout, TcpStream::connect((host, port))).map(
|
||||||
tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map(
|
|
||||||
move |res| match res {
|
move |res| match res {
|
||||||
Ok(tcpstream_connect_res) => tcpstream_connect_res,
|
Ok(tcpstream_connect_res) => tcpstream_connect_res,
|
||||||
Err(_) => Err(io::Error::new(
|
Err(_) => Err(io::Error::new(
|
||||||
io::ErrorKind::TimedOut,
|
io::ErrorKind::TimedOut,
|
||||||
format!("exceeded connection timeout {connection_timeout:?}"),
|
format!("exceeded connection timeout {timeout:?}"),
|
||||||
)),
|
)),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -223,8 +222,9 @@ impl ConnCfg {
|
|||||||
async fn do_connect(
|
async fn do_connect(
|
||||||
&self,
|
&self,
|
||||||
allow_self_signed_compute: bool,
|
allow_self_signed_compute: bool,
|
||||||
|
timeout: Duration,
|
||||||
) -> Result<PostgresConnection, ConnectionError> {
|
) -> Result<PostgresConnection, ConnectionError> {
|
||||||
let (socket_addr, stream, host) = self.connect_raw().await?;
|
let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
|
||||||
|
|
||||||
let tls_connector = native_tls::TlsConnector::builder()
|
let tls_connector = native_tls::TlsConnector::builder()
|
||||||
.danger_accept_invalid_certs(allow_self_signed_compute)
|
.danger_accept_invalid_certs(allow_self_signed_compute)
|
||||||
@@ -264,8 +264,9 @@ impl ConnCfg {
|
|||||||
pub async fn connect(
|
pub async fn connect(
|
||||||
&self,
|
&self,
|
||||||
allow_self_signed_compute: bool,
|
allow_self_signed_compute: bool,
|
||||||
|
timeout: Duration,
|
||||||
) -> Result<PostgresConnection, ConnectionError> {
|
) -> Result<PostgresConnection, ConnectionError> {
|
||||||
self.do_connect(allow_self_signed_compute)
|
self.do_connect(allow_self_signed_compute, timeout)
|
||||||
.inspect_err(|err| {
|
.inspect_err(|err| {
|
||||||
// Immediately log the error we have at our disposal.
|
// Immediately log the error we have at our disposal.
|
||||||
error!("couldn't connect to compute node: {err}");
|
error!("couldn't connect to compute node: {err}");
|
||||||
|
|||||||
@@ -212,7 +212,7 @@ pub struct CacheOptions {
|
|||||||
|
|
||||||
impl CacheOptions {
|
impl CacheOptions {
|
||||||
/// Default options for [`crate::auth::caches::NodeInfoCache`].
|
/// Default options for [`crate::auth::caches::NodeInfoCache`].
|
||||||
pub const DEFAULT_OPTIONS_NODE_INFO: &str = "size=4000,ttl=5m";
|
pub const DEFAULT_OPTIONS_NODE_INFO: &str = "size=4000,ttl=4m";
|
||||||
|
|
||||||
/// Parse cache options passed via cmdline.
|
/// Parse cache options passed via cmdline.
|
||||||
/// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
|
/// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use postgres_backend::{self, AuthType, PostgresBackend, PostgresBackendTCP, Quer
|
|||||||
use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
|
use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
|
||||||
use std::future;
|
use std::future;
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tracing::{error, info, info_span};
|
use tracing::{error, info, info_span, Instrument};
|
||||||
|
|
||||||
static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);
|
static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);
|
||||||
|
|
||||||
@@ -44,19 +44,30 @@ pub async fn task_main(listener: TcpListener) -> anyhow::Result<()> {
|
|||||||
.set_nodelay(true)
|
.set_nodelay(true)
|
||||||
.context("failed to set client socket option")?;
|
.context("failed to set client socket option")?;
|
||||||
|
|
||||||
tokio::task::spawn(async move {
|
let span = info_span!("mgmt", peer = %peer_addr);
|
||||||
let span = info_span!("mgmt", peer = %peer_addr);
|
|
||||||
let _enter = span.enter();
|
|
||||||
|
|
||||||
info!("started a new console management API thread");
|
tokio::task::spawn(
|
||||||
scopeguard::defer! {
|
async move {
|
||||||
info!("console management API thread is about to finish");
|
info!("serving a new console management API connection");
|
||||||
}
|
|
||||||
|
|
||||||
if let Err(e) = handle_connection(socket).await {
|
// these might be long running connections, have a separate logging for cancelling
|
||||||
error!("thread failed with an error: {e}");
|
// on shutdown and other ways of stopping.
|
||||||
|
let cancelled = scopeguard::guard(tracing::Span::current(), |span| {
|
||||||
|
let _e = span.entered();
|
||||||
|
info!("console management API task cancelled");
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Err(e) = handle_connection(socket).await {
|
||||||
|
error!("serving failed with an error: {e}");
|
||||||
|
} else {
|
||||||
|
info!("serving completed");
|
||||||
|
}
|
||||||
|
|
||||||
|
// we can no longer get dropped
|
||||||
|
scopeguard::ScopeGuard::into_inner(cancelled);
|
||||||
}
|
}
|
||||||
});
|
.instrument(span),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,14 +88,14 @@ impl postgres_backend::Handler<tokio::net::TcpStream> for MgmtHandler {
|
|||||||
pgb: &mut PostgresBackendTCP,
|
pgb: &mut PostgresBackendTCP,
|
||||||
query: &str,
|
query: &str,
|
||||||
) -> Result<(), QueryError> {
|
) -> Result<(), QueryError> {
|
||||||
try_process_query(pgb, query).await.map_err(|e| {
|
try_process_query(pgb, query).map_err(|e| {
|
||||||
error!("failed to process response: {e:?}");
|
error!("failed to process response: {e:?}");
|
||||||
e
|
e
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), QueryError> {
|
fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), QueryError> {
|
||||||
let resp: KickSession = serde_json::from_str(query).context("Failed to parse query as json")?;
|
let resp: KickSession = serde_json::from_str(query).context("Failed to parse query as json")?;
|
||||||
|
|
||||||
let span = info_span!("event", session_id = resp.session_id);
|
let span = info_span!("event", session_id = resp.session_id);
|
||||||
|
|||||||
@@ -2,12 +2,16 @@
|
|||||||
//! Other modules should use stuff from this module instead of
|
//! Other modules should use stuff from this module instead of
|
||||||
//! directly relying on deps like `reqwest` (think loose coupling).
|
//! directly relying on deps like `reqwest` (think loose coupling).
|
||||||
|
|
||||||
|
pub mod conn_pool;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
pub mod sql_over_http;
|
pub mod sql_over_http;
|
||||||
pub mod websocket;
|
pub mod websocket;
|
||||||
|
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
pub use reqwest::{Request, Response, StatusCode};
|
pub use reqwest::{Request, Response, StatusCode};
|
||||||
pub use reqwest_middleware::{ClientWithMiddleware, Error};
|
pub use reqwest_middleware::{ClientWithMiddleware, Error};
|
||||||
|
pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
||||||
|
|
||||||
use crate::url::ApiUrl;
|
use crate::url::ApiUrl;
|
||||||
use reqwest_middleware::RequestBuilder;
|
use reqwest_middleware::RequestBuilder;
|
||||||
@@ -21,6 +25,24 @@ pub fn new_client() -> ClientWithMiddleware {
|
|||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_client_with_timeout(default_timout: Duration) -> ClientWithMiddleware {
|
||||||
|
let timeout_client = reqwest::ClientBuilder::new()
|
||||||
|
.timeout(default_timout)
|
||||||
|
.build()
|
||||||
|
.expect("Failed to create http client with timeout");
|
||||||
|
|
||||||
|
let retry_policy =
|
||||||
|
ExponentialBackoff::builder().build_with_total_retry_duration(default_timout);
|
||||||
|
|
||||||
|
reqwest_middleware::ClientBuilder::new(timeout_client)
|
||||||
|
.with(reqwest_tracing::TracingMiddleware::default())
|
||||||
|
// As per docs, "This middleware always errors when given requests with streaming bodies".
|
||||||
|
// That's all right because we only use this client to send `serde_json::RawValue`, which
|
||||||
|
// is not a stream.
|
||||||
|
.with(RetryTransientMiddleware::new_with_policy(retry_policy))
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
/// Thin convenience wrapper for an API provided by an http endpoint.
|
/// Thin convenience wrapper for an API provided by an http endpoint.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Endpoint {
|
pub struct Endpoint {
|
||||||
|
|||||||
278
proxy/src/http/conn_pool.rs
Normal file
278
proxy/src/http/conn_pool.rs
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
use parking_lot::Mutex;
|
||||||
|
use pq_proto::StartupMessageParams;
|
||||||
|
use std::fmt;
|
||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
|
use futures::TryFutureExt;
|
||||||
|
|
||||||
|
use crate::config;
|
||||||
|
use crate::{auth, console};
|
||||||
|
|
||||||
|
use super::sql_over_http::MAX_RESPONSE_SIZE;
|
||||||
|
|
||||||
|
use crate::proxy::invalidate_cache;
|
||||||
|
use crate::proxy::NUM_RETRIES_WAKE_COMPUTE;
|
||||||
|
|
||||||
|
use tracing::error;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
pub const APP_NAME: &str = "sql_over_http";
|
||||||
|
const MAX_CONNS_PER_ENDPOINT: usize = 20;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ConnInfo {
|
||||||
|
pub username: String,
|
||||||
|
pub dbname: String,
|
||||||
|
pub hostname: String,
|
||||||
|
pub password: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ConnInfo {
|
||||||
|
// hm, change to hasher to avoid cloning?
|
||||||
|
pub fn db_and_user(&self) -> (String, String) {
|
||||||
|
(self.dbname.clone(), self.username.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ConnInfo {
|
||||||
|
// use custom display to avoid logging password
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{}@{}/{}", self.username, self.hostname, self.dbname)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ConnPoolEntry {
|
||||||
|
conn: tokio_postgres::Client,
|
||||||
|
_last_access: std::time::Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-endpoint connection pool, (dbname, username) -> Vec<ConnPoolEntry>
|
||||||
|
// Number of open connections is limited by the `max_conns_per_endpoint`.
|
||||||
|
pub struct EndpointConnPool {
|
||||||
|
pools: HashMap<(String, String), Vec<ConnPoolEntry>>,
|
||||||
|
total_conns: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct GlobalConnPool {
|
||||||
|
// endpoint -> per-endpoint connection pool
|
||||||
|
//
|
||||||
|
// That should be a fairly conteded map, so return reference to the per-endpoint
|
||||||
|
// pool as early as possible and release the lock.
|
||||||
|
global_pool: Mutex<HashMap<String, Arc<Mutex<EndpointConnPool>>>>,
|
||||||
|
|
||||||
|
// Maximum number of connections per one endpoint.
|
||||||
|
// Can mix different (dbname, username) connections.
|
||||||
|
// When running out of free slots for a particular endpoint,
|
||||||
|
// falls back to opening a new connection for each request.
|
||||||
|
max_conns_per_endpoint: usize,
|
||||||
|
|
||||||
|
proxy_config: &'static crate::config::ProxyConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GlobalConnPool {
|
||||||
|
pub fn new(config: &'static crate::config::ProxyConfig) -> Arc<Self> {
|
||||||
|
Arc::new(Self {
|
||||||
|
global_pool: Mutex::new(HashMap::new()),
|
||||||
|
max_conns_per_endpoint: MAX_CONNS_PER_ENDPOINT,
|
||||||
|
proxy_config: config,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get(
|
||||||
|
&self,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
force_new: bool,
|
||||||
|
) -> anyhow::Result<tokio_postgres::Client> {
|
||||||
|
let mut client: Option<tokio_postgres::Client> = None;
|
||||||
|
|
||||||
|
if !force_new {
|
||||||
|
let pool = self.get_endpoint_pool(&conn_info.hostname).await;
|
||||||
|
|
||||||
|
// find a pool entry by (dbname, username) if exists
|
||||||
|
let mut pool = pool.lock();
|
||||||
|
let pool_entries = pool.pools.get_mut(&conn_info.db_and_user());
|
||||||
|
if let Some(pool_entries) = pool_entries {
|
||||||
|
if let Some(entry) = pool_entries.pop() {
|
||||||
|
client = Some(entry.conn);
|
||||||
|
pool.total_conns -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok return cached connection if found and establish a new one otherwise
|
||||||
|
if let Some(client) = client {
|
||||||
|
if client.is_closed() {
|
||||||
|
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||||
|
connect_to_compute(self.proxy_config, conn_info).await
|
||||||
|
} else {
|
||||||
|
info!("pool: reusing connection '{conn_info}'");
|
||||||
|
Ok(client)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
info!("pool: opening a new connection '{conn_info}'");
|
||||||
|
connect_to_compute(self.proxy_config, conn_info).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn put(
|
||||||
|
&self,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
client: tokio_postgres::Client,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let pool = self.get_endpoint_pool(&conn_info.hostname).await;
|
||||||
|
|
||||||
|
// return connection to the pool
|
||||||
|
let mut total_conns;
|
||||||
|
let mut returned = false;
|
||||||
|
let mut per_db_size = 0;
|
||||||
|
{
|
||||||
|
let mut pool = pool.lock();
|
||||||
|
total_conns = pool.total_conns;
|
||||||
|
|
||||||
|
let pool_entries: &mut Vec<ConnPoolEntry> = pool
|
||||||
|
.pools
|
||||||
|
.entry(conn_info.db_and_user())
|
||||||
|
.or_insert_with(|| Vec::with_capacity(1));
|
||||||
|
if total_conns < self.max_conns_per_endpoint {
|
||||||
|
pool_entries.push(ConnPoolEntry {
|
||||||
|
conn: client,
|
||||||
|
_last_access: std::time::Instant::now(),
|
||||||
|
});
|
||||||
|
|
||||||
|
total_conns += 1;
|
||||||
|
returned = true;
|
||||||
|
per_db_size = pool_entries.len();
|
||||||
|
|
||||||
|
pool.total_conns += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// do logging outside of the mutex
|
||||||
|
if returned {
|
||||||
|
info!("pool: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
||||||
|
} else {
|
||||||
|
info!("pool: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_endpoint_pool(&self, endpoint: &String) -> Arc<Mutex<EndpointConnPool>> {
|
||||||
|
// find or create a pool for this endpoint
|
||||||
|
let mut created = false;
|
||||||
|
let mut global_pool = self.global_pool.lock();
|
||||||
|
let pool = global_pool
|
||||||
|
.entry(endpoint.clone())
|
||||||
|
.or_insert_with(|| {
|
||||||
|
created = true;
|
||||||
|
Arc::new(Mutex::new(EndpointConnPool {
|
||||||
|
pools: HashMap::new(),
|
||||||
|
total_conns: 0,
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
.clone();
|
||||||
|
let global_pool_size = global_pool.len();
|
||||||
|
drop(global_pool);
|
||||||
|
|
||||||
|
// log new global pool size
|
||||||
|
if created {
|
||||||
|
info!(
|
||||||
|
"pool: created new pool for '{endpoint}', global pool size now {global_pool_size}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pool
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Wake up the destination if needed. Code here is a bit involved because
|
||||||
|
// we reuse the code from the usual proxy and we need to prepare few structures
|
||||||
|
// that this code expects.
|
||||||
|
//
|
||||||
|
async fn connect_to_compute(
|
||||||
|
config: &config::ProxyConfig,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
) -> anyhow::Result<tokio_postgres::Client> {
|
||||||
|
let tls = config.tls_config.as_ref();
|
||||||
|
let common_names = tls.and_then(|tls| tls.common_names.clone());
|
||||||
|
|
||||||
|
let credential_params = StartupMessageParams::new([
|
||||||
|
("user", &conn_info.username),
|
||||||
|
("database", &conn_info.dbname),
|
||||||
|
("application_name", APP_NAME),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let creds = config
|
||||||
|
.auth_backend
|
||||||
|
.as_ref()
|
||||||
|
.map(|_| {
|
||||||
|
auth::ClientCredentials::parse(
|
||||||
|
&credential_params,
|
||||||
|
Some(&conn_info.hostname),
|
||||||
|
common_names,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.transpose()?;
|
||||||
|
let extra = console::ConsoleReqExtra {
|
||||||
|
session_id: uuid::Uuid::new_v4(),
|
||||||
|
application_name: Some(APP_NAME),
|
||||||
|
};
|
||||||
|
|
||||||
|
let node_info = &mut creds.wake_compute(&extra).await?.expect("msg");
|
||||||
|
|
||||||
|
// This code is a copy of `connect_to_compute` from `src/proxy.rs` with
|
||||||
|
// the difference that it uses `tokio_postgres` for the connection.
|
||||||
|
let mut num_retries: usize = NUM_RETRIES_WAKE_COMPUTE;
|
||||||
|
loop {
|
||||||
|
match connect_to_compute_once(node_info, conn_info).await {
|
||||||
|
Err(e) if num_retries > 0 => {
|
||||||
|
info!("compute node's state has changed; requesting a wake-up");
|
||||||
|
match creds.wake_compute(&extra).await? {
|
||||||
|
// Update `node_info` and try one more time.
|
||||||
|
Some(new) => {
|
||||||
|
*node_info = new;
|
||||||
|
}
|
||||||
|
// Link auth doesn't work that way, so we just exit.
|
||||||
|
None => return Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
other => return other,
|
||||||
|
}
|
||||||
|
|
||||||
|
num_retries -= 1;
|
||||||
|
info!("retrying after wake-up ({num_retries} attempts left)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn connect_to_compute_once(
|
||||||
|
node_info: &console::CachedNodeInfo,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
) -> anyhow::Result<tokio_postgres::Client> {
|
||||||
|
let mut config = (*node_info.config).clone();
|
||||||
|
|
||||||
|
let (client, connection) = config
|
||||||
|
.user(&conn_info.username)
|
||||||
|
.password(&conn_info.password)
|
||||||
|
.dbname(&conn_info.dbname)
|
||||||
|
.max_backend_message_size(MAX_RESPONSE_SIZE)
|
||||||
|
.connect(tokio_postgres::NoTls)
|
||||||
|
.inspect_err(|e: &tokio_postgres::Error| {
|
||||||
|
error!(
|
||||||
|
"failed to connect to compute node hosts={:?} ports={:?}: {}",
|
||||||
|
node_info.config.get_hosts(),
|
||||||
|
node_info.config.get_ports(),
|
||||||
|
e
|
||||||
|
);
|
||||||
|
invalidate_cache(node_info)
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = connection.await {
|
||||||
|
error!("connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok(client)
|
||||||
|
}
|
||||||
@@ -1,8 +1,11 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use futures::pin_mut;
|
use futures::pin_mut;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use hyper::body::HttpBody;
|
use hyper::body::HttpBody;
|
||||||
|
use hyper::http::HeaderName;
|
||||||
|
use hyper::http::HeaderValue;
|
||||||
use hyper::{Body, HeaderMap, Request};
|
use hyper::{Body, HeaderMap, Request};
|
||||||
use pq_proto::StartupMessageParams;
|
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@@ -11,7 +14,8 @@ use tokio_postgres::types::Type;
|
|||||||
use tokio_postgres::Row;
|
use tokio_postgres::Row;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::{auth, config::ProxyConfig, console};
|
use super::conn_pool::ConnInfo;
|
||||||
|
use super::conn_pool::GlobalConnPool;
|
||||||
|
|
||||||
#[derive(serde::Deserialize)]
|
#[derive(serde::Deserialize)]
|
||||||
struct QueryData {
|
struct QueryData {
|
||||||
@@ -19,25 +23,33 @@ struct QueryData {
|
|||||||
params: Vec<serde_json::Value>,
|
params: Vec<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
const APP_NAME: &str = "sql_over_http";
|
pub const MAX_RESPONSE_SIZE: usize = 1024 * 1024; // 1 MB
|
||||||
const MAX_RESPONSE_SIZE: usize = 1024 * 1024; // 1 MB
|
|
||||||
const MAX_REQUEST_SIZE: u64 = 1024 * 1024; // 1 MB
|
const MAX_REQUEST_SIZE: u64 = 1024 * 1024; // 1 MB
|
||||||
|
|
||||||
|
static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
|
||||||
|
static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
|
||||||
|
static ALLOW_POOL: HeaderName = HeaderName::from_static("neon-pool-opt-in");
|
||||||
|
|
||||||
|
static HEADER_VALUE_TRUE: HeaderValue = HeaderValue::from_static("true");
|
||||||
|
|
||||||
//
|
//
|
||||||
// Convert json non-string types to strings, so that they can be passed to Postgres
|
// Convert json non-string types to strings, so that they can be passed to Postgres
|
||||||
// as parameters.
|
// as parameters.
|
||||||
//
|
//
|
||||||
fn json_to_pg_text(json: Vec<Value>) -> Result<Vec<String>, serde_json::Error> {
|
fn json_to_pg_text(json: Vec<Value>) -> Result<Vec<Option<String>>, serde_json::Error> {
|
||||||
json.iter()
|
json.iter()
|
||||||
.map(|value| {
|
.map(|value| {
|
||||||
match value {
|
match value {
|
||||||
Value::Null => serde_json::to_string(value),
|
// special care for nulls
|
||||||
Value::Bool(_) => serde_json::to_string(value),
|
Value::Null => Ok(None),
|
||||||
Value::Number(_) => serde_json::to_string(value),
|
|
||||||
Value::Object(_) => serde_json::to_string(value),
|
|
||||||
|
|
||||||
// no need to escape
|
// convert to text with escaping
|
||||||
Value::String(s) => Ok(s.to_string()),
|
Value::Bool(_) => serde_json::to_string(value).map(Some),
|
||||||
|
Value::Number(_) => serde_json::to_string(value).map(Some),
|
||||||
|
Value::Object(_) => serde_json::to_string(value).map(Some),
|
||||||
|
|
||||||
|
// avoid escaping here, as we pass this as a parameter
|
||||||
|
Value::String(s) => Ok(Some(s.to_string())),
|
||||||
|
|
||||||
// special care for arrays
|
// special care for arrays
|
||||||
Value::Array(_) => json_array_to_pg_array(value),
|
Value::Array(_) => json_array_to_pg_array(value),
|
||||||
@@ -54,25 +66,29 @@ fn json_to_pg_text(json: Vec<Value>) -> Result<Vec<String>, serde_json::Error> {
|
|||||||
//
|
//
|
||||||
// Example of the same escaping in node-postgres: packages/pg/lib/utils.js
|
// Example of the same escaping in node-postgres: packages/pg/lib/utils.js
|
||||||
//
|
//
|
||||||
fn json_array_to_pg_array(value: &Value) -> Result<String, serde_json::Error> {
|
fn json_array_to_pg_array(value: &Value) -> Result<Option<String>, serde_json::Error> {
|
||||||
match value {
|
match value {
|
||||||
// same
|
// special care for nulls
|
||||||
Value::Null => serde_json::to_string(value),
|
Value::Null => Ok(None),
|
||||||
Value::Bool(_) => serde_json::to_string(value),
|
|
||||||
Value::Number(_) => serde_json::to_string(value),
|
|
||||||
Value::Object(_) => serde_json::to_string(value),
|
|
||||||
|
|
||||||
// now needs to be escaped, as it is part of the array
|
// convert to text with escaping
|
||||||
Value::String(_) => serde_json::to_string(value),
|
Value::Bool(_) => serde_json::to_string(value).map(Some),
|
||||||
|
Value::Number(_) => serde_json::to_string(value).map(Some),
|
||||||
|
Value::Object(_) => serde_json::to_string(value).map(Some),
|
||||||
|
|
||||||
|
// here string needs to be escaped, as it is part of the array
|
||||||
|
Value::String(_) => serde_json::to_string(value).map(Some),
|
||||||
|
|
||||||
// recurse into array
|
// recurse into array
|
||||||
Value::Array(arr) => {
|
Value::Array(arr) => {
|
||||||
let vals = arr
|
let vals = arr
|
||||||
.iter()
|
.iter()
|
||||||
.map(json_array_to_pg_array)
|
.map(json_array_to_pg_array)
|
||||||
|
.map(|r| r.map(|v| v.unwrap_or_else(|| "NULL".to_string())))
|
||||||
.collect::<Result<Vec<_>, _>>()?
|
.collect::<Result<Vec<_>, _>>()?
|
||||||
.join(",");
|
.join(",");
|
||||||
Ok(format!("{{{}}}", vals))
|
|
||||||
|
Ok(Some(format!("{{{}}}", vals)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -80,7 +96,7 @@ fn json_array_to_pg_array(value: &Value) -> Result<String, serde_json::Error> {
|
|||||||
fn get_conn_info(
|
fn get_conn_info(
|
||||||
headers: &HeaderMap,
|
headers: &HeaderMap,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
) -> Result<(String, String, String, String), anyhow::Error> {
|
) -> Result<ConnInfo, anyhow::Error> {
|
||||||
let connection_string = headers
|
let connection_string = headers
|
||||||
.get("Neon-Connection-String")
|
.get("Neon-Connection-String")
|
||||||
.ok_or(anyhow::anyhow!("missing connection string"))?
|
.ok_or(anyhow::anyhow!("missing connection string"))?
|
||||||
@@ -133,56 +149,33 @@ fn get_conn_info(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((
|
Ok(ConnInfo {
|
||||||
username.to_owned(),
|
username: username.to_owned(),
|
||||||
dbname.to_owned(),
|
dbname: dbname.to_owned(),
|
||||||
hostname.to_owned(),
|
hostname: hostname.to_owned(),
|
||||||
password.to_owned(),
|
password: password.to_owned(),
|
||||||
))
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: return different http error codes
|
// TODO: return different http error codes
|
||||||
pub async fn handle(
|
pub async fn handle(
|
||||||
config: &'static ProxyConfig,
|
|
||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
|
conn_pool: Arc<GlobalConnPool>,
|
||||||
) -> anyhow::Result<Value> {
|
) -> anyhow::Result<Value> {
|
||||||
//
|
//
|
||||||
// Determine the destination and connection params
|
// Determine the destination and connection params
|
||||||
//
|
//
|
||||||
let headers = request.headers();
|
let headers = request.headers();
|
||||||
let (username, dbname, hostname, password) = get_conn_info(headers, sni_hostname)?;
|
let conn_info = get_conn_info(headers, sni_hostname)?;
|
||||||
let credential_params = StartupMessageParams::new([
|
|
||||||
("user", &username),
|
|
||||||
("database", &dbname),
|
|
||||||
("application_name", APP_NAME),
|
|
||||||
]);
|
|
||||||
|
|
||||||
//
|
// Determine the output options. Default behaviour is 'false'. Anything that is not
|
||||||
// Wake up the destination if needed. Code here is a bit involved because
|
// strictly 'true' assumed to be false.
|
||||||
// we reuse the code from the usual proxy and we need to prepare few structures
|
let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
|
||||||
// that this code expects.
|
let array_mode = headers.get(&ARRAY_MODE) == Some(&HEADER_VALUE_TRUE);
|
||||||
//
|
|
||||||
let tls = config.tls_config.as_ref();
|
// Allow connection pooling only if explicitly requested
|
||||||
let common_names = tls.and_then(|tls| tls.common_names.clone());
|
let allow_pool = headers.get(&ALLOW_POOL) == Some(&HEADER_VALUE_TRUE);
|
||||||
let creds = config
|
|
||||||
.auth_backend
|
|
||||||
.as_ref()
|
|
||||||
.map(|_| auth::ClientCredentials::parse(&credential_params, Some(&hostname), common_names))
|
|
||||||
.transpose()?;
|
|
||||||
let extra = console::ConsoleReqExtra {
|
|
||||||
session_id: uuid::Uuid::new_v4(),
|
|
||||||
application_name: Some(APP_NAME),
|
|
||||||
};
|
|
||||||
let node = creds.wake_compute(&extra).await?.expect("msg");
|
|
||||||
let conf = node.value.config;
|
|
||||||
let port = *conf.get_ports().first().expect("no port");
|
|
||||||
let host = match conf.get_hosts().first().expect("no host") {
|
|
||||||
tokio_postgres::config::Host::Tcp(host) => host,
|
|
||||||
tokio_postgres::config::Host::Unix(_) => {
|
|
||||||
return Err(anyhow::anyhow!("unix socket is not supported"));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let request_content_length = match request.body().size_hint().upper() {
|
let request_content_length = match request.body().size_hint().upper() {
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
@@ -202,28 +195,11 @@ pub async fn handle(
|
|||||||
let QueryData { query, params } = serde_json::from_slice(&body)?;
|
let QueryData { query, params } = serde_json::from_slice(&body)?;
|
||||||
let query_params = json_to_pg_text(params)?;
|
let query_params = json_to_pg_text(params)?;
|
||||||
|
|
||||||
//
|
|
||||||
// Connenct to the destination
|
|
||||||
//
|
|
||||||
let (client, connection) = tokio_postgres::Config::new()
|
|
||||||
.host(host)
|
|
||||||
.port(port)
|
|
||||||
.user(&username)
|
|
||||||
.password(&password)
|
|
||||||
.dbname(&dbname)
|
|
||||||
.max_backend_message_size(MAX_RESPONSE_SIZE)
|
|
||||||
.connect(tokio_postgres::NoTls)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
tokio::spawn(async move {
|
|
||||||
if let Err(e) = connection.await {
|
|
||||||
eprintln!("connection error: {}", e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Now execute the query and return the result
|
// Now execute the query and return the result
|
||||||
//
|
//
|
||||||
|
let client = conn_pool.get(&conn_info, !allow_pool).await?;
|
||||||
|
|
||||||
let row_stream = client.query_raw_txt(query, query_params).await?;
|
let row_stream = client.query_raw_txt(query, query_params).await?;
|
||||||
|
|
||||||
// Manually drain the stream into a vector to leave row_stream hanging
|
// Manually drain the stream into a vector to leave row_stream hanging
|
||||||
@@ -262,6 +238,11 @@ pub async fn handle(
|
|||||||
json!({
|
json!({
|
||||||
"name": Value::String(c.name().to_owned()),
|
"name": Value::String(c.name().to_owned()),
|
||||||
"dataTypeID": Value::Number(c.type_().oid().into()),
|
"dataTypeID": Value::Number(c.type_().oid().into()),
|
||||||
|
"tableID": c.table_oid(),
|
||||||
|
"columnID": c.column_id(),
|
||||||
|
"dataTypeSize": c.type_size(),
|
||||||
|
"dataTypeModifier": c.type_modifier(),
|
||||||
|
"format": "text",
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
@@ -272,35 +253,58 @@ pub async fn handle(
|
|||||||
// convert rows to JSON
|
// convert rows to JSON
|
||||||
let rows = rows
|
let rows = rows
|
||||||
.iter()
|
.iter()
|
||||||
.map(pg_text_row_to_json)
|
.map(|row| pg_text_row_to_json(row, raw_output, array_mode))
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
|
if allow_pool {
|
||||||
|
// return connection to the pool
|
||||||
|
tokio::task::spawn(async move {
|
||||||
|
let _ = conn_pool.put(&conn_info, client).await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// resulting JSON format is based on the format of node-postgres result
|
// resulting JSON format is based on the format of node-postgres result
|
||||||
Ok(json!({
|
Ok(json!({
|
||||||
"command": command_tag_name,
|
"command": command_tag_name,
|
||||||
"rowCount": command_tag_count,
|
"rowCount": command_tag_count,
|
||||||
"rows": rows,
|
"rows": rows,
|
||||||
"fields": fields,
|
"fields": fields,
|
||||||
|
"rowAsArray": array_mode,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Convert postgres row with text-encoded values to JSON object
|
// Convert postgres row with text-encoded values to JSON object
|
||||||
//
|
//
|
||||||
pub fn pg_text_row_to_json(row: &Row) -> Result<Value, anyhow::Error> {
|
pub fn pg_text_row_to_json(
|
||||||
let res = row
|
row: &Row,
|
||||||
.columns()
|
raw_output: bool,
|
||||||
.iter()
|
array_mode: bool,
|
||||||
.enumerate()
|
) -> Result<Value, anyhow::Error> {
|
||||||
.map(|(i, column)| {
|
let iter = row.columns().iter().enumerate().map(|(i, column)| {
|
||||||
let name = column.name();
|
let name = column.name();
|
||||||
let pg_value = row.as_text(i)?;
|
let pg_value = row.as_text(i)?;
|
||||||
let json_value = pg_text_to_json(pg_value, column.type_())?;
|
let json_value = if raw_output {
|
||||||
Ok((name.to_string(), json_value))
|
match pg_value {
|
||||||
})
|
Some(v) => Value::String(v.to_string()),
|
||||||
.collect::<Result<Map<String, Value>, anyhow::Error>>()?;
|
None => Value::Null,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pg_text_to_json(pg_value, column.type_())?
|
||||||
|
};
|
||||||
|
Ok((name.to_string(), json_value))
|
||||||
|
});
|
||||||
|
|
||||||
Ok(Value::Object(res))
|
if array_mode {
|
||||||
|
// drop keys and aggregate into array
|
||||||
|
let arr = iter
|
||||||
|
.map(|r| r.map(|(_key, val)| val))
|
||||||
|
.collect::<Result<Vec<Value>, anyhow::Error>>()?;
|
||||||
|
Ok(Value::Array(arr))
|
||||||
|
} else {
|
||||||
|
let obj = iter.collect::<Result<Map<String, Value>, anyhow::Error>>()?;
|
||||||
|
Ok(Value::Object(obj))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -308,10 +312,6 @@ pub fn pg_text_row_to_json(row: &Row) -> Result<Value, anyhow::Error> {
|
|||||||
//
|
//
|
||||||
pub fn pg_text_to_json(pg_value: Option<&str>, pg_type: &Type) -> Result<Value, anyhow::Error> {
|
pub fn pg_text_to_json(pg_value: Option<&str>, pg_type: &Type) -> Result<Value, anyhow::Error> {
|
||||||
if let Some(val) = pg_value {
|
if let Some(val) = pg_value {
|
||||||
if val == "NULL" {
|
|
||||||
return Ok(Value::Null);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Kind::Array(elem_type) = pg_type.kind() {
|
if let Kind::Array(elem_type) = pg_type.kind() {
|
||||||
return pg_array_parse(val, elem_type);
|
return pg_array_parse(val, elem_type);
|
||||||
}
|
}
|
||||||
@@ -373,6 +373,27 @@ fn _pg_array_parse(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn push_checked(
|
||||||
|
entry: &mut String,
|
||||||
|
entries: &mut Vec<Value>,
|
||||||
|
elem_type: &Type,
|
||||||
|
) -> Result<(), anyhow::Error> {
|
||||||
|
if !entry.is_empty() {
|
||||||
|
// While in usual postgres response we get nulls as None and everything else
|
||||||
|
// as Some(&str), in arrays we get NULL as unquoted 'NULL' string (while
|
||||||
|
// string with value 'NULL' will be represented by '"NULL"'). So catch NULLs
|
||||||
|
// here while we have quotation info and convert them to None.
|
||||||
|
if entry == "NULL" {
|
||||||
|
entries.push(pg_text_to_json(None, elem_type)?);
|
||||||
|
} else {
|
||||||
|
entries.push(pg_text_to_json(Some(entry), elem_type)?);
|
||||||
|
}
|
||||||
|
entry.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
while let Some((mut i, mut c)) = pg_array_chr.next() {
|
while let Some((mut i, mut c)) = pg_array_chr.next() {
|
||||||
let mut escaped = false;
|
let mut escaped = false;
|
||||||
|
|
||||||
@@ -395,9 +416,7 @@ fn _pg_array_parse(
|
|||||||
'}' => {
|
'}' => {
|
||||||
level -= 1;
|
level -= 1;
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
if !entry.is_empty() {
|
push_checked(&mut entry, &mut entries, elem_type)?;
|
||||||
entries.push(pg_text_to_json(Some(&entry), elem_type)?);
|
|
||||||
}
|
|
||||||
if nested {
|
if nested {
|
||||||
return Ok((Value::Array(entries), i));
|
return Ok((Value::Array(entries), i));
|
||||||
}
|
}
|
||||||
@@ -405,17 +424,15 @@ fn _pg_array_parse(
|
|||||||
}
|
}
|
||||||
'"' if !escaped => {
|
'"' if !escaped => {
|
||||||
if quote {
|
if quote {
|
||||||
// push even if empty
|
// end of quoted string, so push it manually without any checks
|
||||||
|
// for emptiness or nulls
|
||||||
entries.push(pg_text_to_json(Some(&entry), elem_type)?);
|
entries.push(pg_text_to_json(Some(&entry), elem_type)?);
|
||||||
entry = String::new();
|
entry.clear();
|
||||||
}
|
}
|
||||||
quote = !quote;
|
quote = !quote;
|
||||||
}
|
}
|
||||||
',' if !quote => {
|
',' if !quote => {
|
||||||
if !entry.is_empty() {
|
push_checked(&mut entry, &mut entries, elem_type)?;
|
||||||
entries.push(pg_text_to_json(Some(&entry), elem_type)?);
|
|
||||||
entry = String::new();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
entry.push(c);
|
entry.push(c);
|
||||||
@@ -439,30 +456,35 @@ mod tests {
|
|||||||
fn test_atomic_types_to_pg_params() {
|
fn test_atomic_types_to_pg_params() {
|
||||||
let json = vec![Value::Bool(true), Value::Bool(false)];
|
let json = vec![Value::Bool(true), Value::Bool(false)];
|
||||||
let pg_params = json_to_pg_text(json).unwrap();
|
let pg_params = json_to_pg_text(json).unwrap();
|
||||||
assert_eq!(pg_params, vec!["true", "false"]);
|
assert_eq!(
|
||||||
|
pg_params,
|
||||||
|
vec![Some("true".to_owned()), Some("false".to_owned())]
|
||||||
|
);
|
||||||
|
|
||||||
let json = vec![Value::Number(serde_json::Number::from(42))];
|
let json = vec![Value::Number(serde_json::Number::from(42))];
|
||||||
let pg_params = json_to_pg_text(json).unwrap();
|
let pg_params = json_to_pg_text(json).unwrap();
|
||||||
assert_eq!(pg_params, vec!["42"]);
|
assert_eq!(pg_params, vec![Some("42".to_owned())]);
|
||||||
|
|
||||||
let json = vec![Value::String("foo\"".to_string())];
|
let json = vec![Value::String("foo\"".to_string())];
|
||||||
let pg_params = json_to_pg_text(json).unwrap();
|
let pg_params = json_to_pg_text(json).unwrap();
|
||||||
assert_eq!(pg_params, vec!["foo\""]);
|
assert_eq!(pg_params, vec![Some("foo\"".to_owned())]);
|
||||||
|
|
||||||
let json = vec![Value::Null];
|
let json = vec![Value::Null];
|
||||||
let pg_params = json_to_pg_text(json).unwrap();
|
let pg_params = json_to_pg_text(json).unwrap();
|
||||||
assert_eq!(pg_params, vec!["null"]);
|
assert_eq!(pg_params, vec![None]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_json_array_to_pg_array() {
|
fn test_json_array_to_pg_array() {
|
||||||
// atoms and escaping
|
// atoms and escaping
|
||||||
let json = "[true, false, null, 42, \"foo\", \"bar\\\"-\\\\\"]";
|
let json = "[true, false, null, \"NULL\", 42, \"foo\", \"bar\\\"-\\\\\"]";
|
||||||
let json: Value = serde_json::from_str(json).unwrap();
|
let json: Value = serde_json::from_str(json).unwrap();
|
||||||
let pg_params = json_to_pg_text(vec![json]).unwrap();
|
let pg_params = json_to_pg_text(vec![json]).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
pg_params,
|
pg_params,
|
||||||
vec!["{true,false,null,42,\"foo\",\"bar\\\"-\\\\\"}"]
|
vec![Some(
|
||||||
|
"{true,false,NULL,\"NULL\",42,\"foo\",\"bar\\\"-\\\\\"}".to_owned()
|
||||||
|
)]
|
||||||
);
|
);
|
||||||
|
|
||||||
// nested arrays
|
// nested arrays
|
||||||
@@ -471,7 +493,9 @@ mod tests {
|
|||||||
let pg_params = json_to_pg_text(vec![json]).unwrap();
|
let pg_params = json_to_pg_text(vec![json]).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
pg_params,
|
pg_params,
|
||||||
vec!["{{true,false},{null,42},{\"foo\",\"bar\\\"-\\\\\"}}"]
|
vec![Some(
|
||||||
|
"{{true,false},{NULL,42},{\"foo\",\"bar\\\"-\\\\\"}}".to_owned()
|
||||||
|
)]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ use tls_listener::TlsListener;
|
|||||||
use tokio::{
|
use tokio::{
|
||||||
io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
|
io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
|
||||||
net::TcpListener,
|
net::TcpListener,
|
||||||
select,
|
|
||||||
};
|
};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info, info_span, warn, Instrument};
|
use tracing::{error, info, info_span, warn, Instrument};
|
||||||
@@ -36,7 +35,7 @@ use utils::http::{error::ApiError, json::json_response};
|
|||||||
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
||||||
use sync_wrapper::SyncWrapper;
|
use sync_wrapper::SyncWrapper;
|
||||||
|
|
||||||
use super::sql_over_http;
|
use super::{conn_pool::GlobalConnPool, sql_over_http};
|
||||||
|
|
||||||
pin_project! {
|
pin_project! {
|
||||||
/// This is a wrapper around a [`WebSocketStream`] that
|
/// This is a wrapper around a [`WebSocketStream`] that
|
||||||
@@ -165,6 +164,7 @@ async fn serve_websocket(
|
|||||||
async fn ws_handler(
|
async fn ws_handler(
|
||||||
mut request: Request<Body>,
|
mut request: Request<Body>,
|
||||||
config: &'static ProxyConfig,
|
config: &'static ProxyConfig,
|
||||||
|
conn_pool: Arc<GlobalConnPool>,
|
||||||
cancel_map: Arc<CancelMap>,
|
cancel_map: Arc<CancelMap>,
|
||||||
session_id: uuid::Uuid,
|
session_id: uuid::Uuid,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
@@ -193,14 +193,9 @@ async fn ws_handler(
|
|||||||
// TODO: that deserves a refactor as now this function also handles http json client besides websockets.
|
// TODO: that deserves a refactor as now this function also handles http json client besides websockets.
|
||||||
// Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
|
// Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
|
||||||
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
||||||
let result = select! {
|
let result = sql_over_http::handle(request, sni_hostname, conn_pool)
|
||||||
_ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
|
.instrument(info_span!("sql-over-http"))
|
||||||
Err(anyhow::anyhow!("Query timed out"))
|
.await;
|
||||||
}
|
|
||||||
response = sql_over_http::handle(config, request, sni_hostname) => {
|
|
||||||
response
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let status_code = match result {
|
let status_code = match result {
|
||||||
Ok(_) => StatusCode::OK,
|
Ok(_) => StatusCode::OK,
|
||||||
Err(_) => StatusCode::BAD_REQUEST,
|
Err(_) => StatusCode::BAD_REQUEST,
|
||||||
@@ -240,6 +235,8 @@ pub async fn task_main(
|
|||||||
info!("websocket server has shut down");
|
info!("websocket server has shut down");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let conn_pool: Arc<GlobalConnPool> = GlobalConnPool::new(config);
|
||||||
|
|
||||||
let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
|
let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
|
||||||
let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
|
let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
|
||||||
Some(config) => config.into(),
|
Some(config) => config.into(),
|
||||||
@@ -264,15 +261,18 @@ pub async fn task_main(
|
|||||||
let make_svc =
|
let make_svc =
|
||||||
hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
|
hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
|
||||||
let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
|
let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
|
||||||
|
let conn_pool = conn_pool.clone();
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
Ok::<_, Infallible>(hyper::service::service_fn(move |req: Request<Body>| {
|
Ok::<_, Infallible>(hyper::service::service_fn(move |req: Request<Body>| {
|
||||||
let sni_name = sni_name.clone();
|
let sni_name = sni_name.clone();
|
||||||
|
let conn_pool = conn_pool.clone();
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
let cancel_map = Arc::new(CancelMap::default());
|
let cancel_map = Arc::new(CancelMap::default());
|
||||||
let session_id = uuid::Uuid::new_v4();
|
let session_id = uuid::Uuid::new_v4();
|
||||||
|
|
||||||
ws_handler(req, config, cancel_map, session_id, sni_name)
|
ws_handler(req, config, conn_pool, cancel_map, session_id, sni_name)
|
||||||
.instrument(info_span!(
|
.instrument(info_span!(
|
||||||
"ws-client",
|
"ws-client",
|
||||||
session = format_args!("{session_id}")
|
session = format_args!("{session_id}")
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
|
|||||||
.from_env_lossy();
|
.from_env_lossy();
|
||||||
|
|
||||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||||
.with_ansi(atty::is(atty::Stream::Stderr))
|
.with_ansi(false)
|
||||||
.with_writer(std::io::stderr)
|
.with_writer(std::io::stderr)
|
||||||
.with_target(false);
|
.with_target(false);
|
||||||
|
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ use crate::{config::MetricCollectionConfig, http};
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::collections::HashMap;
|
use std::{collections::HashMap, time::Duration};
|
||||||
use tracing::{error, info, instrument, trace, warn};
|
use tracing::{error, info, instrument, trace, warn};
|
||||||
|
|
||||||
const PROXY_IO_BYTES_PER_CLIENT: &str = "proxy_io_bytes_per_client";
|
const PROXY_IO_BYTES_PER_CLIENT: &str = "proxy_io_bytes_per_client";
|
||||||
|
|
||||||
|
const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Key that uniquely identifies the object, this metric describes.
|
/// Key that uniquely identifies the object, this metric describes.
|
||||||
/// Currently, endpoint_id is enough, but this may change later,
|
/// Currently, endpoint_id is enough, but this may change later,
|
||||||
@@ -30,7 +32,7 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<()> {
|
|||||||
info!("metrics collector has shut down");
|
info!("metrics collector has shut down");
|
||||||
}
|
}
|
||||||
|
|
||||||
let http_client = http::new_client();
|
let http_client = http::new_client_with_timeout(DEFAULT_HTTP_REPORTING_TIMEOUT);
|
||||||
let mut cached_metrics: HashMap<Ids, (u64, DateTime<Utc>)> = HashMap::new();
|
let mut cached_metrics: HashMap<Ids, (u64, DateTime<Utc>)> = HashMap::new();
|
||||||
let hostname = hostname::get()?.as_os_str().to_string_lossy().into_owned();
|
let hostname = hostname::get()?.as_os_str().to_string_lossy().into_owned();
|
||||||
|
|
||||||
@@ -182,36 +184,36 @@ async fn collect_metrics_iteration(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if res.status().is_success() {
|
if !res.status().is_success() {
|
||||||
// update cached metrics after they were sent successfully
|
|
||||||
for send_metric in chunk {
|
|
||||||
let stop_time = match send_metric.kind {
|
|
||||||
EventType::Incremental { stop_time, .. } => stop_time,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
cached_metrics
|
|
||||||
.entry(Ids {
|
|
||||||
endpoint_id: send_metric.extra.endpoint_id.clone(),
|
|
||||||
branch_id: send_metric.extra.branch_id.clone(),
|
|
||||||
})
|
|
||||||
// update cached value (add delta) and time
|
|
||||||
.and_modify(|e| {
|
|
||||||
e.0 = e.0.saturating_add(send_metric.value);
|
|
||||||
e.1 = stop_time
|
|
||||||
})
|
|
||||||
// cache new metric
|
|
||||||
.or_insert((send_metric.value, stop_time));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
||||||
for metric in chunk.iter() {
|
for metric in chunk.iter().filter(|metric| metric.value > (1u64 << 40)) {
|
||||||
// Report if the metric value is suspiciously large
|
// Report if the metric value is suspiciously large
|
||||||
if metric.value > (1u64 << 40) {
|
error!("potentially abnormal metric value: {:?}", metric);
|
||||||
error!("potentially abnormal metric value: {:?}", metric);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// update cached metrics after they were sent
|
||||||
|
// (to avoid sending the same metrics twice)
|
||||||
|
// see the relevant discussion on why to do so even if the status is not success:
|
||||||
|
// https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956
|
||||||
|
for send_metric in chunk {
|
||||||
|
let stop_time = match send_metric.kind {
|
||||||
|
EventType::Incremental { stop_time, .. } => stop_time,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
cached_metrics
|
||||||
|
.entry(Ids {
|
||||||
|
endpoint_id: send_metric.extra.endpoint_id.clone(),
|
||||||
|
branch_id: send_metric.extra.branch_id.clone(),
|
||||||
|
})
|
||||||
|
// update cached value (add delta) and time
|
||||||
|
.and_modify(|e| {
|
||||||
|
e.0 = e.0.saturating_add(send_metric.value);
|
||||||
|
e.1 = stop_time
|
||||||
|
})
|
||||||
|
// cache new metric
|
||||||
|
.or_insert((send_metric.value, stop_time));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,13 +16,16 @@ use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCou
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
|
use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
use tokio::{
|
||||||
|
io::{AsyncRead, AsyncWrite, AsyncWriteExt},
|
||||||
|
time,
|
||||||
|
};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info, warn};
|
use tracing::{error, info, warn};
|
||||||
use utils::measured_stream::MeasuredStream;
|
use utils::measured_stream::MeasuredStream;
|
||||||
|
|
||||||
/// Number of times we should retry the `/proxy_wake_compute` http request.
|
/// Number of times we should retry the `/proxy_wake_compute` http request.
|
||||||
const NUM_RETRIES_WAKE_COMPUTE: usize = 1;
|
pub const NUM_RETRIES_WAKE_COMPUTE: usize = 1;
|
||||||
|
|
||||||
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
||||||
const ERR_PROTO_VIOLATION: &str = "protocol violation";
|
const ERR_PROTO_VIOLATION: &str = "protocol violation";
|
||||||
@@ -283,34 +286,36 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If we couldn't connect, a cached connection info might be to blame
|
||||||
|
/// (e.g. the compute node's address might've changed at the wrong time).
|
||||||
|
/// Invalidate the cache entry (if any) to prevent subsequent errors.
|
||||||
|
#[tracing::instrument(name = "invalidate_cache", skip_all)]
|
||||||
|
pub fn invalidate_cache(node_info: &console::CachedNodeInfo) {
|
||||||
|
let is_cached = node_info.cached();
|
||||||
|
if is_cached {
|
||||||
|
warn!("invalidating stalled compute node info cache entry");
|
||||||
|
node_info.invalidate();
|
||||||
|
}
|
||||||
|
|
||||||
|
let label = match is_cached {
|
||||||
|
true => "compute_cached",
|
||||||
|
false => "compute_uncached",
|
||||||
|
};
|
||||||
|
NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
|
||||||
|
}
|
||||||
|
|
||||||
/// Try to connect to the compute node once.
|
/// Try to connect to the compute node once.
|
||||||
#[tracing::instrument(name = "connect_once", skip_all)]
|
#[tracing::instrument(name = "connect_once", skip_all)]
|
||||||
async fn connect_to_compute_once(
|
async fn connect_to_compute_once(
|
||||||
node_info: &console::CachedNodeInfo,
|
node_info: &console::CachedNodeInfo,
|
||||||
|
timeout: time::Duration,
|
||||||
) -> Result<PostgresConnection, compute::ConnectionError> {
|
) -> Result<PostgresConnection, compute::ConnectionError> {
|
||||||
// If we couldn't connect, a cached connection info might be to blame
|
|
||||||
// (e.g. the compute node's address might've changed at the wrong time).
|
|
||||||
// Invalidate the cache entry (if any) to prevent subsequent errors.
|
|
||||||
let invalidate_cache = |_: &compute::ConnectionError| {
|
|
||||||
let is_cached = node_info.cached();
|
|
||||||
if is_cached {
|
|
||||||
warn!("invalidating stalled compute node info cache entry");
|
|
||||||
node_info.invalidate();
|
|
||||||
}
|
|
||||||
|
|
||||||
let label = match is_cached {
|
|
||||||
true => "compute_cached",
|
|
||||||
false => "compute_uncached",
|
|
||||||
};
|
|
||||||
NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
|
|
||||||
};
|
|
||||||
|
|
||||||
let allow_self_signed_compute = node_info.allow_self_signed_compute;
|
let allow_self_signed_compute = node_info.allow_self_signed_compute;
|
||||||
|
|
||||||
node_info
|
node_info
|
||||||
.config
|
.config
|
||||||
.connect(allow_self_signed_compute)
|
.connect(allow_self_signed_compute, timeout)
|
||||||
.inspect_err(invalidate_cache)
|
.inspect_err(|_: &compute::ConnectionError| invalidate_cache(node_info))
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -327,7 +332,27 @@ async fn connect_to_compute(
|
|||||||
loop {
|
loop {
|
||||||
// Apply startup params to the (possibly, cached) compute node info.
|
// Apply startup params to the (possibly, cached) compute node info.
|
||||||
node_info.config.set_startup_params(params);
|
node_info.config.set_startup_params(params);
|
||||||
match connect_to_compute_once(node_info).await {
|
|
||||||
|
// Set a shorter timeout for the initial connection attempt.
|
||||||
|
//
|
||||||
|
// In case we try to connect to an outdated address that is no longer valid, the
|
||||||
|
// default behavior of Kubernetes is to drop the packets, causing us to wait for
|
||||||
|
// the entire timeout period. We want to fail fast in such cases.
|
||||||
|
//
|
||||||
|
// A specific case to consider is when we have cached compute node information
|
||||||
|
// with a 4-minute TTL (Time To Live), but the user has executed a `/suspend` API
|
||||||
|
// call, resulting in the nonexistence of the compute node.
|
||||||
|
//
|
||||||
|
// We only use caching in case of scram proxy backed by the console, so reduce
|
||||||
|
// the timeout only in that case.
|
||||||
|
let is_scram_proxy = matches!(creds, auth::BackendType::Console(_, _));
|
||||||
|
let timeout = if is_scram_proxy && num_retries == NUM_RETRIES_WAKE_COMPUTE {
|
||||||
|
time::Duration::from_secs(2)
|
||||||
|
} else {
|
||||||
|
time::Duration::from_secs(10)
|
||||||
|
};
|
||||||
|
|
||||||
|
match connect_to_compute_once(node_info, timeout).await {
|
||||||
Err(e) if num_retries > 0 => {
|
Err(e) if num_retries > 0 => {
|
||||||
info!("compute node's state has changed; requesting a wake-up");
|
info!("compute node's state has changed; requesting a wake-up");
|
||||||
match creds.wake_compute(extra).map_err(io_error).await? {
|
match creds.wake_compute(extra).map_err(io_error).await? {
|
||||||
|
|||||||
@@ -45,17 +45,74 @@ fn hmac_sha256<'a>(key: &[u8], parts: impl IntoIterator<Item = &'a [u8]>) -> [u8
|
|||||||
let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("bad key size");
|
let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("bad key size");
|
||||||
parts.into_iter().for_each(|s| mac.update(s));
|
parts.into_iter().for_each(|s| mac.update(s));
|
||||||
|
|
||||||
// TODO: maybe newer `hmac` et al already migrated to regular arrays?
|
mac.finalize().into_bytes().into()
|
||||||
let mut result = [0u8; 32];
|
|
||||||
result.copy_from_slice(mac.finalize().into_bytes().as_slice());
|
|
||||||
result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {
|
fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {
|
||||||
let mut hasher = Sha256::new();
|
let mut hasher = Sha256::new();
|
||||||
parts.into_iter().for_each(|s| hasher.update(s));
|
parts.into_iter().for_each(|s| hasher.update(s));
|
||||||
|
|
||||||
let mut result = [0u8; 32];
|
hasher.finalize().into()
|
||||||
result.copy_from_slice(hasher.finalize().as_slice());
|
}
|
||||||
result
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::sasl::{Mechanism, Step};
|
||||||
|
|
||||||
|
use super::{password::SaltedPassword, Exchange, ServerSecret};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn happy_path() {
|
||||||
|
let iterations = 4096;
|
||||||
|
let salt_base64 = "QSXCR+Q6sek8bf92";
|
||||||
|
let pw = SaltedPassword::new(
|
||||||
|
b"pencil",
|
||||||
|
base64::decode(salt_base64).unwrap().as_slice(),
|
||||||
|
iterations,
|
||||||
|
);
|
||||||
|
|
||||||
|
let secret = ServerSecret {
|
||||||
|
iterations,
|
||||||
|
salt_base64: salt_base64.to_owned(),
|
||||||
|
stored_key: pw.client_key().sha256(),
|
||||||
|
server_key: pw.server_key(),
|
||||||
|
doomed: false,
|
||||||
|
};
|
||||||
|
const NONCE: [u8; 18] = [
|
||||||
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
||||||
|
];
|
||||||
|
let mut exchange = Exchange::new(&secret, || NONCE, None);
|
||||||
|
|
||||||
|
let client_first = "n,,n=user,r=rOprNGfwEbeRWgbNEkqO";
|
||||||
|
let client_final = "c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0=";
|
||||||
|
let server_first =
|
||||||
|
"r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,s=QSXCR+Q6sek8bf92,i=4096";
|
||||||
|
let server_final = "v=qtUDIofVnIhM7tKn93EQUUt5vgMOldcDVu1HC+OH0o0=";
|
||||||
|
|
||||||
|
exchange = match exchange.exchange(client_first).unwrap() {
|
||||||
|
Step::Continue(exchange, message) => {
|
||||||
|
assert_eq!(message, server_first);
|
||||||
|
exchange
|
||||||
|
}
|
||||||
|
Step::Success(_, _) => panic!("expected continue, got success"),
|
||||||
|
Step::Failure(f) => panic!("{f}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let key = match exchange.exchange(client_final).unwrap() {
|
||||||
|
Step::Success(key, message) => {
|
||||||
|
assert_eq!(message, server_final);
|
||||||
|
key
|
||||||
|
}
|
||||||
|
Step::Continue(_, _) => panic!("expected success, got continue"),
|
||||||
|
Step::Failure(f) => panic!("{f}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
key.as_bytes(),
|
||||||
|
[
|
||||||
|
74, 103, 1, 132, 12, 31, 200, 48, 28, 54, 82, 232, 207, 12, 138, 189, 40, 32, 134,
|
||||||
|
27, 125, 170, 232, 35, 171, 167, 166, 41, 70, 228, 182, 112,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,19 +14,7 @@ impl SaltedPassword {
|
|||||||
/// See `scram-common.c : scram_SaltedPassword` for details.
|
/// See `scram-common.c : scram_SaltedPassword` for details.
|
||||||
/// Further reading: <https://datatracker.ietf.org/doc/html/rfc2898> (see `PBKDF2`).
|
/// Further reading: <https://datatracker.ietf.org/doc/html/rfc2898> (see `PBKDF2`).
|
||||||
pub fn new(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
pub fn new(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
||||||
let one = 1_u32.to_be_bytes(); // magic
|
pbkdf2::pbkdf2_hmac_array::<sha2::Sha256, 32>(password, salt, iterations).into()
|
||||||
|
|
||||||
let mut current = super::hmac_sha256(password, [salt, &one]);
|
|
||||||
let mut result = current;
|
|
||||||
for _ in 1..iterations {
|
|
||||||
current = super::hmac_sha256(password, [current.as_ref()]);
|
|
||||||
// TODO: result = current.zip(result).map(|(x, y)| x ^ y), issue #80094
|
|
||||||
for (i, x) in current.iter().enumerate() {
|
|
||||||
result[i] ^= x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result.into()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Derive `ClientKey` from a salted hashed password.
|
/// Derive `ClientKey` from a salted hashed password.
|
||||||
@@ -46,3 +34,41 @@ impl From<[u8; SALTED_PASSWORD_LEN]> for SaltedPassword {
|
|||||||
Self { bytes }
|
Self { bytes }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::SaltedPassword;
|
||||||
|
|
||||||
|
fn legacy_pbkdf2_impl(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
||||||
|
let one = 1_u32.to_be_bytes(); // magic
|
||||||
|
|
||||||
|
let mut current = super::super::hmac_sha256(password, [salt, &one]);
|
||||||
|
let mut result = current;
|
||||||
|
for _ in 1..iterations {
|
||||||
|
current = super::super::hmac_sha256(password, [current.as_ref()]);
|
||||||
|
// TODO: result = current.zip(result).map(|(x, y)| x ^ y), issue #80094
|
||||||
|
for (i, x) in current.iter().enumerate() {
|
||||||
|
result[i] ^= x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pbkdf2() {
|
||||||
|
let password = "a-very-secure-password";
|
||||||
|
let salt = "such-a-random-salt";
|
||||||
|
let iterations = 4096;
|
||||||
|
let output = [
|
||||||
|
203, 18, 206, 81, 4, 154, 193, 100, 147, 41, 211, 217, 177, 203, 69, 210, 194, 211,
|
||||||
|
101, 1, 248, 156, 96, 0, 8, 223, 30, 87, 158, 41, 20, 42,
|
||||||
|
];
|
||||||
|
|
||||||
|
let actual = SaltedPassword::new(password.as_bytes(), salt.as_bytes(), iterations);
|
||||||
|
let expected = legacy_pbkdf2_impl(password.as_bytes(), salt.as_bytes(), iterations);
|
||||||
|
|
||||||
|
assert_eq!(actual.bytes, output);
|
||||||
|
assert_eq!(actual.bytes, expected.bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ authors = []
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
pytest = "^7.3.1"
|
pytest = "^7.3.1"
|
||||||
psycopg2-binary = "^2.9.1"
|
psycopg2-binary = "^2.9.6"
|
||||||
typing-extensions = "^4.6.1"
|
typing-extensions = "^4.6.1"
|
||||||
PyJWT = {version = "^2.1.0", extras = ["crypto"]}
|
PyJWT = {version = "^2.1.0", extras = ["crypto"]}
|
||||||
requests = "^2.31.0"
|
requests = "^2.31.0"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[toolchain]
|
[toolchain]
|
||||||
channel = "1.68.2"
|
channel = "1.70.0"
|
||||||
profile = "default"
|
profile = "default"
|
||||||
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
||||||
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
||||||
|
|||||||
@@ -3,15 +3,19 @@
|
|||||||
//
|
//
|
||||||
use anyhow::{bail, Context, Result};
|
use anyhow::{bail, Context, Result};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
use futures::future::BoxFuture;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
|
use futures::{FutureExt, StreamExt};
|
||||||
use remote_storage::RemoteStorageConfig;
|
use remote_storage::RemoteStorageConfig;
|
||||||
|
use tokio::runtime::Handle;
|
||||||
|
use tokio::signal::unix::{signal, SignalKind};
|
||||||
|
use tokio::task::JoinError;
|
||||||
use toml_edit::Document;
|
use toml_edit::Document;
|
||||||
use utils::signals::ShutdownSignals;
|
|
||||||
|
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::{ErrorKind, Write};
|
use std::io::{ErrorKind, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use storage_broker::Uri;
|
use storage_broker::Uri;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
@@ -20,22 +24,21 @@ use tracing::*;
|
|||||||
use utils::pid_file;
|
use utils::pid_file;
|
||||||
|
|
||||||
use metrics::set_build_info_metric;
|
use metrics::set_build_info_metric;
|
||||||
use safekeeper::broker;
|
|
||||||
use safekeeper::control_file;
|
|
||||||
use safekeeper::defaults::{
|
use safekeeper::defaults::{
|
||||||
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES,
|
DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES,
|
||||||
DEFAULT_PG_LISTEN_ADDR,
|
DEFAULT_PG_LISTEN_ADDR,
|
||||||
};
|
};
|
||||||
use safekeeper::http;
|
|
||||||
use safekeeper::remove_wal;
|
|
||||||
use safekeeper::wal_backup;
|
|
||||||
use safekeeper::wal_service;
|
use safekeeper::wal_service;
|
||||||
use safekeeper::GlobalTimelines;
|
use safekeeper::GlobalTimelines;
|
||||||
use safekeeper::SafeKeeperConf;
|
use safekeeper::SafeKeeperConf;
|
||||||
|
use safekeeper::{broker, WAL_SERVICE_RUNTIME};
|
||||||
|
use safekeeper::{control_file, BROKER_RUNTIME};
|
||||||
|
use safekeeper::{http, WAL_REMOVER_RUNTIME};
|
||||||
|
use safekeeper::{remove_wal, WAL_BACKUP_RUNTIME};
|
||||||
|
use safekeeper::{wal_backup, HTTP_RUNTIME};
|
||||||
use storage_broker::DEFAULT_ENDPOINT;
|
use storage_broker::DEFAULT_ENDPOINT;
|
||||||
use utils::auth::JwtAuth;
|
use utils::auth::JwtAuth;
|
||||||
use utils::{
|
use utils::{
|
||||||
http::endpoint,
|
|
||||||
id::NodeId,
|
id::NodeId,
|
||||||
logging::{self, LogFormat},
|
logging::{self, LogFormat},
|
||||||
project_git_version,
|
project_git_version,
|
||||||
@@ -104,10 +107,6 @@ struct Args {
|
|||||||
/// Safekeeper won't be elected for WAL offloading if it is lagging for more than this value in bytes
|
/// Safekeeper won't be elected for WAL offloading if it is lagging for more than this value in bytes
|
||||||
#[arg(long, default_value_t = DEFAULT_MAX_OFFLOADER_LAG_BYTES)]
|
#[arg(long, default_value_t = DEFAULT_MAX_OFFLOADER_LAG_BYTES)]
|
||||||
max_offloader_lag: u64,
|
max_offloader_lag: u64,
|
||||||
/// Number of threads for wal backup runtime, by default number of cores
|
|
||||||
/// available to the system.
|
|
||||||
#[arg(long)]
|
|
||||||
wal_backup_threads: Option<usize>,
|
|
||||||
/// Number of max parallel WAL segments to be offloaded to remote storage.
|
/// Number of max parallel WAL segments to be offloaded to remote storage.
|
||||||
#[arg(long, default_value = "5")]
|
#[arg(long, default_value = "5")]
|
||||||
wal_backup_parallel_jobs: usize,
|
wal_backup_parallel_jobs: usize,
|
||||||
@@ -121,9 +120,14 @@ struct Args {
|
|||||||
/// Format for logging, either 'plain' or 'json'.
|
/// Format for logging, either 'plain' or 'json'.
|
||||||
#[arg(long, default_value = "plain")]
|
#[arg(long, default_value = "plain")]
|
||||||
log_format: String,
|
log_format: String,
|
||||||
|
/// Run everything in single threaded current thread runtime, might be
|
||||||
|
/// useful for debugging.
|
||||||
|
#[arg(long)]
|
||||||
|
current_thread_runtime: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
#[tokio::main(flavor = "current_thread")]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
|
|
||||||
if let Some(addr) = args.dump_control_file {
|
if let Some(addr) = args.dump_control_file {
|
||||||
@@ -183,10 +187,10 @@ fn main() -> anyhow::Result<()> {
|
|||||||
heartbeat_timeout: args.heartbeat_timeout,
|
heartbeat_timeout: args.heartbeat_timeout,
|
||||||
remote_storage: args.remote_storage,
|
remote_storage: args.remote_storage,
|
||||||
max_offloader_lag_bytes: args.max_offloader_lag,
|
max_offloader_lag_bytes: args.max_offloader_lag,
|
||||||
backup_runtime_threads: args.wal_backup_threads,
|
|
||||||
wal_backup_enabled: !args.disable_wal_backup,
|
wal_backup_enabled: !args.disable_wal_backup,
|
||||||
backup_parallel_jobs: args.wal_backup_parallel_jobs,
|
backup_parallel_jobs: args.wal_backup_parallel_jobs,
|
||||||
auth,
|
auth,
|
||||||
|
current_thread_runtime: args.current_thread_runtime,
|
||||||
};
|
};
|
||||||
|
|
||||||
// initialize sentry if SENTRY_DSN is provided
|
// initialize sentry if SENTRY_DSN is provided
|
||||||
@@ -194,10 +198,14 @@ fn main() -> anyhow::Result<()> {
|
|||||||
Some(GIT_VERSION.into()),
|
Some(GIT_VERSION.into()),
|
||||||
&[("node_id", &conf.my_id.to_string())],
|
&[("node_id", &conf.my_id.to_string())],
|
||||||
);
|
);
|
||||||
start_safekeeper(conf)
|
start_safekeeper(conf).await
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
/// Result of joining any of main tasks: upper error means task failed to
|
||||||
|
/// complete, e.g. panicked, inner is error produced by task itself.
|
||||||
|
type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
|
||||||
|
|
||||||
|
async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
||||||
// Prevent running multiple safekeepers on the same directory
|
// Prevent running multiple safekeepers on the same directory
|
||||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||||
let lock_file =
|
let lock_file =
|
||||||
@@ -208,14 +216,18 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
|||||||
// we need to release the lock file only when the current process is gone
|
// we need to release the lock file only when the current process is gone
|
||||||
std::mem::forget(lock_file);
|
std::mem::forget(lock_file);
|
||||||
|
|
||||||
let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
|
||||||
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
|
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||||
|
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
||||||
e
|
e
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
info!("starting safekeeper on {}", conf.listen_pg_addr);
|
info!(
|
||||||
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
"starting safekeeper HTTP service on {}",
|
||||||
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
conf.listen_http_addr
|
||||||
|
);
|
||||||
|
let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
||||||
|
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
|
||||||
e
|
e
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -224,71 +236,88 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
|||||||
let timeline_collector = safekeeper::metrics::TimelineCollector::new();
|
let timeline_collector = safekeeper::metrics::TimelineCollector::new();
|
||||||
metrics::register_internal(Box::new(timeline_collector))?;
|
metrics::register_internal(Box::new(timeline_collector))?;
|
||||||
|
|
||||||
let mut threads = vec![];
|
|
||||||
let (wal_backup_launcher_tx, wal_backup_launcher_rx) = mpsc::channel(100);
|
let (wal_backup_launcher_tx, wal_backup_launcher_rx) = mpsc::channel(100);
|
||||||
|
|
||||||
// Load all timelines from disk to memory.
|
// Load all timelines from disk to memory.
|
||||||
GlobalTimelines::init(conf.clone(), wal_backup_launcher_tx)?;
|
GlobalTimelines::init(conf.clone(), wal_backup_launcher_tx)?;
|
||||||
|
|
||||||
let conf_ = conf.clone();
|
// Keep handles to main tasks to die if any of them disappears.
|
||||||
threads.push(
|
let mut tasks_handles: FuturesUnordered<BoxFuture<(String, JoinTaskRes)>> =
|
||||||
thread::Builder::new()
|
FuturesUnordered::new();
|
||||||
.name("http_endpoint_thread".into())
|
|
||||||
.spawn(|| {
|
|
||||||
let router = http::make_router(conf_);
|
|
||||||
endpoint::serve_thread_main(
|
|
||||||
router,
|
|
||||||
http_listener,
|
|
||||||
std::future::pending(), // never shut down
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
})?,
|
|
||||||
);
|
|
||||||
|
|
||||||
let conf_cloned = conf.clone();
|
|
||||||
let safekeeper_thread = thread::Builder::new()
|
|
||||||
.name("WAL service thread".into())
|
|
||||||
.spawn(|| wal_service::thread_main(conf_cloned, pg_listener))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
threads.push(safekeeper_thread);
|
|
||||||
|
|
||||||
let conf_ = conf.clone();
|
let conf_ = conf.clone();
|
||||||
threads.push(
|
// Run everything in current thread rt, if asked.
|
||||||
thread::Builder::new()
|
if conf.current_thread_runtime {
|
||||||
.name("broker thread".into())
|
info!("running in current thread runtime");
|
||||||
.spawn(|| {
|
}
|
||||||
broker::thread_main(conf_);
|
let current_thread_rt = conf
|
||||||
})?,
|
.current_thread_runtime
|
||||||
);
|
.then(|| Handle::try_current().expect("no runtime in main"));
|
||||||
|
let wal_service_handle = current_thread_rt
|
||||||
|
.as_ref()
|
||||||
|
.unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())
|
||||||
|
.spawn(wal_service::task_main(conf_, pg_listener))
|
||||||
|
// wrap with task name for error reporting
|
||||||
|
.map(|res| ("WAL service main".to_owned(), res));
|
||||||
|
tasks_handles.push(Box::pin(wal_service_handle));
|
||||||
|
|
||||||
let conf_ = conf.clone();
|
let conf_ = conf.clone();
|
||||||
threads.push(
|
let http_handle = current_thread_rt
|
||||||
thread::Builder::new()
|
.as_ref()
|
||||||
.name("WAL removal thread".into())
|
.unwrap_or_else(|| HTTP_RUNTIME.handle())
|
||||||
.spawn(|| {
|
.spawn(http::task_main(conf_, http_listener))
|
||||||
remove_wal::thread_main(conf_);
|
.map(|res| ("HTTP service main".to_owned(), res));
|
||||||
})?,
|
tasks_handles.push(Box::pin(http_handle));
|
||||||
);
|
|
||||||
|
|
||||||
threads.push(
|
let conf_ = conf.clone();
|
||||||
thread::Builder::new()
|
let broker_task_handle = current_thread_rt
|
||||||
.name("WAL backup launcher thread".into())
|
.as_ref()
|
||||||
.spawn(move || {
|
.unwrap_or_else(|| BROKER_RUNTIME.handle())
|
||||||
wal_backup::wal_backup_launcher_thread_main(conf, wal_backup_launcher_rx);
|
.spawn(broker::task_main(conf_).instrument(info_span!("broker")))
|
||||||
})?,
|
.map(|res| ("broker main".to_owned(), res));
|
||||||
);
|
tasks_handles.push(Box::pin(broker_task_handle));
|
||||||
|
|
||||||
|
let conf_ = conf.clone();
|
||||||
|
let wal_remover_handle = current_thread_rt
|
||||||
|
.as_ref()
|
||||||
|
.unwrap_or_else(|| WAL_REMOVER_RUNTIME.handle())
|
||||||
|
.spawn(remove_wal::task_main(conf_))
|
||||||
|
.map(|res| ("WAL remover".to_owned(), res));
|
||||||
|
tasks_handles.push(Box::pin(wal_remover_handle));
|
||||||
|
|
||||||
|
let conf_ = conf.clone();
|
||||||
|
let wal_backup_handle = current_thread_rt
|
||||||
|
.as_ref()
|
||||||
|
.unwrap_or_else(|| WAL_BACKUP_RUNTIME.handle())
|
||||||
|
.spawn(wal_backup::wal_backup_launcher_task_main(
|
||||||
|
conf_,
|
||||||
|
wal_backup_launcher_rx,
|
||||||
|
))
|
||||||
|
.map(|res| ("WAL backup launcher".to_owned(), res));
|
||||||
|
tasks_handles.push(Box::pin(wal_backup_handle));
|
||||||
|
|
||||||
set_build_info_metric(GIT_VERSION);
|
set_build_info_metric(GIT_VERSION);
|
||||||
// TODO: put more thoughts into handling of failed threads
|
|
||||||
// We should catch & die if they are in trouble.
|
|
||||||
|
|
||||||
// On any shutdown signal, log receival and exit. Additionally, handling
|
// TODO: update tokio-stream, convert to real async Stream with
|
||||||
// SIGQUIT prevents coredump.
|
// SignalStream, map it to obtain missing signal name, combine streams into
|
||||||
ShutdownSignals::handle(|signal| {
|
// single stream we can easily sit on.
|
||||||
info!("received {}, terminating", signal.name());
|
let mut sigquit_stream = signal(SignalKind::quit())?;
|
||||||
std::process::exit(0);
|
let mut sigint_stream = signal(SignalKind::interrupt())?;
|
||||||
})
|
let mut sigterm_stream = signal(SignalKind::terminate())?;
|
||||||
|
|
||||||
|
tokio::select! {
|
||||||
|
Some((task_name, res)) = tasks_handles.next()=> {
|
||||||
|
error!("{} task failed: {:?}, exiting", task_name, res);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
// On any shutdown signal, log receival and exit. Additionally, handling
|
||||||
|
// SIGQUIT prevents coredump.
|
||||||
|
_ = sigquit_stream.recv() => info!("received SIGQUIT, terminating"),
|
||||||
|
_ = sigint_stream.recv() => info!("received SIGINT, terminating"),
|
||||||
|
_ = sigterm_stream.recv() => info!("received SIGTERM, terminating")
|
||||||
|
|
||||||
|
};
|
||||||
|
std::process::exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine safekeeper id.
|
/// Determine safekeeper id.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use anyhow::Error;
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
use storage_broker::parse_proto_ttid;
|
use storage_broker::parse_proto_ttid;
|
||||||
use storage_broker::proto::broker_service_client::BrokerServiceClient;
|
|
||||||
use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
|
use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
|
||||||
use storage_broker::proto::SubscribeSafekeeperInfoRequest;
|
use storage_broker::proto::SubscribeSafekeeperInfoRequest;
|
||||||
use storage_broker::Request;
|
use storage_broker::Request;
|
||||||
@@ -16,7 +16,7 @@ use storage_broker::Request;
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
use tokio::{runtime, time::sleep};
|
use tokio::time::sleep;
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
use crate::metrics::BROKER_ITERATION_TIMELINES;
|
use crate::metrics::BROKER_ITERATION_TIMELINES;
|
||||||
@@ -29,23 +29,10 @@ use crate::SafeKeeperConf;
|
|||||||
const RETRY_INTERVAL_MSEC: u64 = 1000;
|
const RETRY_INTERVAL_MSEC: u64 = 1000;
|
||||||
const PUSH_INTERVAL_MSEC: u64 = 1000;
|
const PUSH_INTERVAL_MSEC: u64 = 1000;
|
||||||
|
|
||||||
pub fn thread_main(conf: SafeKeeperConf) {
|
|
||||||
let runtime = runtime::Builder::new_current_thread()
|
|
||||||
.enable_all()
|
|
||||||
.build()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let _enter = info_span!("broker").entered();
|
|
||||||
info!("started, broker endpoint {:?}", conf.broker_endpoint);
|
|
||||||
|
|
||||||
runtime.block_on(async {
|
|
||||||
main_loop(conf).await;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Push once in a while data about all active timelines to the broker.
|
/// Push once in a while data about all active timelines to the broker.
|
||||||
async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
||||||
let mut client = BrokerServiceClient::connect(conf.broker_endpoint.clone()).await?;
|
let mut client =
|
||||||
|
storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)?;
|
||||||
let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
|
let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
|
||||||
|
|
||||||
let outbound = async_stream::stream! {
|
let outbound = async_stream::stream! {
|
||||||
@@ -55,20 +42,27 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
|||||||
// sensitive and there is no risk of deadlock as we don't await while
|
// sensitive and there is no risk of deadlock as we don't await while
|
||||||
// lock is held.
|
// lock is held.
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let mut active_tlis = GlobalTimelines::get_all();
|
let all_tlis = GlobalTimelines::get_all();
|
||||||
active_tlis.retain(|tli| tli.is_active());
|
let mut n_pushed_tlis = 0;
|
||||||
for tli in &active_tlis {
|
for tli in &all_tlis {
|
||||||
let sk_info = tli.get_safekeeper_info(&conf);
|
// filtering alternative futures::stream::iter(all_tlis)
|
||||||
|
// .filter(|tli| {let tli = tli.clone(); async move { tli.is_active().await}}).collect::<Vec<_>>().await;
|
||||||
|
// doesn't look better, and I'm not sure how to do that without collect.
|
||||||
|
if !tli.is_active().await {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let sk_info = tli.get_safekeeper_info(&conf).await;
|
||||||
yield sk_info;
|
yield sk_info;
|
||||||
BROKER_PUSHED_UPDATES.inc();
|
BROKER_PUSHED_UPDATES.inc();
|
||||||
|
n_pushed_tlis += 1;
|
||||||
}
|
}
|
||||||
let elapsed = now.elapsed();
|
let elapsed = now.elapsed();
|
||||||
|
|
||||||
BROKER_PUSH_ALL_UPDATES_SECONDS.observe(elapsed.as_secs_f64());
|
BROKER_PUSH_ALL_UPDATES_SECONDS.observe(elapsed.as_secs_f64());
|
||||||
BROKER_ITERATION_TIMELINES.observe(active_tlis.len() as f64);
|
BROKER_ITERATION_TIMELINES.observe(n_pushed_tlis as f64);
|
||||||
|
|
||||||
if elapsed > push_interval / 2 {
|
if elapsed > push_interval / 2 {
|
||||||
info!("broker push is too long, pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
|
info!("broker push is too long, pushed {} timeline updates to broker in {:?}", n_pushed_tlis, elapsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
sleep(push_interval).await;
|
sleep(push_interval).await;
|
||||||
@@ -125,10 +119,13 @@ async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
|
|||||||
bail!("end of stream");
|
bail!("end of stream");
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn main_loop(conf: SafeKeeperConf) {
|
pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
|
||||||
|
info!("started, broker endpoint {:?}", conf.broker_endpoint);
|
||||||
|
|
||||||
let mut ticker = tokio::time::interval(Duration::from_millis(RETRY_INTERVAL_MSEC));
|
let mut ticker = tokio::time::interval(Duration::from_millis(RETRY_INTERVAL_MSEC));
|
||||||
let mut push_handle: Option<JoinHandle<Result<(), Error>>> = None;
|
let mut push_handle: Option<JoinHandle<Result<(), Error>>> = None;
|
||||||
let mut pull_handle: Option<JoinHandle<Result<(), Error>>> = None;
|
let mut pull_handle: Option<JoinHandle<Result<(), Error>>> = None;
|
||||||
|
|
||||||
// Selecting on JoinHandles requires some squats; is there a better way to
|
// Selecting on JoinHandles requires some squats; is there a better way to
|
||||||
// reap tasks individually?
|
// reap tasks individually?
|
||||||
|
|
||||||
|
|||||||
@@ -2,9 +2,10 @@
|
|||||||
|
|
||||||
use anyhow::{bail, ensure, Context, Result};
|
use anyhow::{bail, ensure, Context, Result};
|
||||||
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
use tokio::fs::{self, File};
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
|
||||||
use std::fs::{self, File, OpenOptions};
|
use std::io::Read;
|
||||||
use std::io::{Read, Write};
|
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
@@ -26,9 +27,10 @@ pub const CHECKSUM_SIZE: usize = std::mem::size_of::<u32>();
|
|||||||
|
|
||||||
/// Storage should keep actual state inside of it. It should implement Deref
|
/// Storage should keep actual state inside of it. It should implement Deref
|
||||||
/// trait to access state fields and have persist method for updating that state.
|
/// trait to access state fields and have persist method for updating that state.
|
||||||
|
#[async_trait::async_trait]
|
||||||
pub trait Storage: Deref<Target = SafeKeeperState> {
|
pub trait Storage: Deref<Target = SafeKeeperState> {
|
||||||
/// Persist safekeeper state on disk and update internal state.
|
/// Persist safekeeper state on disk and update internal state.
|
||||||
fn persist(&mut self, s: &SafeKeeperState) -> Result<()>;
|
async fn persist(&mut self, s: &SafeKeeperState) -> Result<()>;
|
||||||
|
|
||||||
/// Timestamp of last persist.
|
/// Timestamp of last persist.
|
||||||
fn last_persist_at(&self) -> Instant;
|
fn last_persist_at(&self) -> Instant;
|
||||||
@@ -82,7 +84,7 @@ impl FileStorage {
|
|||||||
/// Check the magic/version in the on-disk data and deserialize it, if possible.
|
/// Check the magic/version in the on-disk data and deserialize it, if possible.
|
||||||
fn deser_sk_state(buf: &mut &[u8]) -> Result<SafeKeeperState> {
|
fn deser_sk_state(buf: &mut &[u8]) -> Result<SafeKeeperState> {
|
||||||
// Read the version independent part
|
// Read the version independent part
|
||||||
let magic = buf.read_u32::<LittleEndian>()?;
|
let magic = ReadBytesExt::read_u32::<LittleEndian>(buf)?;
|
||||||
if magic != SK_MAGIC {
|
if magic != SK_MAGIC {
|
||||||
bail!(
|
bail!(
|
||||||
"bad control file magic: {:X}, expected {:X}",
|
"bad control file magic: {:X}, expected {:X}",
|
||||||
@@ -90,7 +92,7 @@ impl FileStorage {
|
|||||||
SK_MAGIC
|
SK_MAGIC
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let version = buf.read_u32::<LittleEndian>()?;
|
let version = ReadBytesExt::read_u32::<LittleEndian>(buf)?;
|
||||||
if version == SK_FORMAT_VERSION {
|
if version == SK_FORMAT_VERSION {
|
||||||
let res = SafeKeeperState::des(buf)?;
|
let res = SafeKeeperState::des(buf)?;
|
||||||
return Ok(res);
|
return Ok(res);
|
||||||
@@ -110,7 +112,7 @@ impl FileStorage {
|
|||||||
|
|
||||||
/// Read in the control file.
|
/// Read in the control file.
|
||||||
pub fn load_control_file<P: AsRef<Path>>(control_file_path: P) -> Result<SafeKeeperState> {
|
pub fn load_control_file<P: AsRef<Path>>(control_file_path: P) -> Result<SafeKeeperState> {
|
||||||
let mut control_file = OpenOptions::new()
|
let mut control_file = std::fs::OpenOptions::new()
|
||||||
.read(true)
|
.read(true)
|
||||||
.write(true)
|
.write(true)
|
||||||
.open(&control_file_path)
|
.open(&control_file_path)
|
||||||
@@ -159,30 +161,31 @@ impl Deref for FileStorage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait::async_trait]
|
||||||
impl Storage for FileStorage {
|
impl Storage for FileStorage {
|
||||||
/// persists state durably to underlying storage
|
/// persists state durably to underlying storage
|
||||||
/// for description see https://lwn.net/Articles/457667/
|
/// for description see https://lwn.net/Articles/457667/
|
||||||
fn persist(&mut self, s: &SafeKeeperState) -> Result<()> {
|
async fn persist(&mut self, s: &SafeKeeperState) -> Result<()> {
|
||||||
let _timer = PERSIST_CONTROL_FILE_SECONDS.start_timer();
|
let _timer = PERSIST_CONTROL_FILE_SECONDS.start_timer();
|
||||||
|
|
||||||
// write data to safekeeper.control.partial
|
// write data to safekeeper.control.partial
|
||||||
let control_partial_path = self.timeline_dir.join(CONTROL_FILE_NAME_PARTIAL);
|
let control_partial_path = self.timeline_dir.join(CONTROL_FILE_NAME_PARTIAL);
|
||||||
let mut control_partial = File::create(&control_partial_path).with_context(|| {
|
let mut control_partial = File::create(&control_partial_path).await.with_context(|| {
|
||||||
format!(
|
format!(
|
||||||
"failed to create partial control file at: {}",
|
"failed to create partial control file at: {}",
|
||||||
&control_partial_path.display()
|
&control_partial_path.display()
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
let mut buf: Vec<u8> = Vec::new();
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
buf.write_u32::<LittleEndian>(SK_MAGIC)?;
|
WriteBytesExt::write_u32::<LittleEndian>(&mut buf, SK_MAGIC)?;
|
||||||
buf.write_u32::<LittleEndian>(SK_FORMAT_VERSION)?;
|
WriteBytesExt::write_u32::<LittleEndian>(&mut buf, SK_FORMAT_VERSION)?;
|
||||||
s.ser_into(&mut buf)?;
|
s.ser_into(&mut buf)?;
|
||||||
|
|
||||||
// calculate checksum before resize
|
// calculate checksum before resize
|
||||||
let checksum = crc32c::crc32c(&buf);
|
let checksum = crc32c::crc32c(&buf);
|
||||||
buf.extend_from_slice(&checksum.to_le_bytes());
|
buf.extend_from_slice(&checksum.to_le_bytes());
|
||||||
|
|
||||||
control_partial.write_all(&buf).with_context(|| {
|
control_partial.write_all(&buf).await.with_context(|| {
|
||||||
format!(
|
format!(
|
||||||
"failed to write safekeeper state into control file at: {}",
|
"failed to write safekeeper state into control file at: {}",
|
||||||
control_partial_path.display()
|
control_partial_path.display()
|
||||||
@@ -191,7 +194,7 @@ impl Storage for FileStorage {
|
|||||||
|
|
||||||
// fsync the file
|
// fsync the file
|
||||||
if !self.conf.no_sync {
|
if !self.conf.no_sync {
|
||||||
control_partial.sync_all().with_context(|| {
|
control_partial.sync_all().await.with_context(|| {
|
||||||
format!(
|
format!(
|
||||||
"failed to sync partial control file at {}",
|
"failed to sync partial control file at {}",
|
||||||
control_partial_path.display()
|
control_partial_path.display()
|
||||||
@@ -202,21 +205,22 @@ impl Storage for FileStorage {
|
|||||||
let control_path = self.timeline_dir.join(CONTROL_FILE_NAME);
|
let control_path = self.timeline_dir.join(CONTROL_FILE_NAME);
|
||||||
|
|
||||||
// rename should be atomic
|
// rename should be atomic
|
||||||
fs::rename(&control_partial_path, &control_path)?;
|
fs::rename(&control_partial_path, &control_path).await?;
|
||||||
// this sync is not required by any standard but postgres does this (see durable_rename)
|
// this sync is not required by any standard but postgres does this (see durable_rename)
|
||||||
if !self.conf.no_sync {
|
if !self.conf.no_sync {
|
||||||
File::open(&control_path)
|
let new_f = File::open(&control_path).await?;
|
||||||
.and_then(|f| f.sync_all())
|
new_f.sync_all().await.with_context(|| {
|
||||||
.with_context(|| {
|
format!(
|
||||||
format!(
|
"failed to sync control file at: {}",
|
||||||
"failed to sync control file at: {}",
|
&control_path.display()
|
||||||
&control_path.display()
|
)
|
||||||
)
|
})?;
|
||||||
})?;
|
|
||||||
|
|
||||||
// fsync the directory (linux specific)
|
// fsync the directory (linux specific)
|
||||||
File::open(&self.timeline_dir)
|
let tli_dir = File::open(&self.timeline_dir).await?;
|
||||||
.and_then(|f| f.sync_all())
|
tli_dir
|
||||||
|
.sync_all()
|
||||||
|
.await
|
||||||
.context("failed to sync control file directory")?;
|
.context("failed to sync control file directory")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,7 +240,6 @@ mod test {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::{safekeeper::SafeKeeperState, SafeKeeperConf};
|
use crate::{safekeeper::SafeKeeperState, SafeKeeperConf};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use std::fs;
|
|
||||||
use utils::{id::TenantTimelineId, lsn::Lsn};
|
use utils::{id::TenantTimelineId, lsn::Lsn};
|
||||||
|
|
||||||
fn stub_conf() -> SafeKeeperConf {
|
fn stub_conf() -> SafeKeeperConf {
|
||||||
@@ -247,59 +250,75 @@ mod test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_from_control_file(
|
async fn load_from_control_file(
|
||||||
conf: &SafeKeeperConf,
|
conf: &SafeKeeperConf,
|
||||||
ttid: &TenantTimelineId,
|
ttid: &TenantTimelineId,
|
||||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||||
fs::create_dir_all(conf.timeline_dir(ttid)).expect("failed to create timeline dir");
|
fs::create_dir_all(conf.timeline_dir(ttid))
|
||||||
|
.await
|
||||||
|
.expect("failed to create timeline dir");
|
||||||
Ok((
|
Ok((
|
||||||
FileStorage::restore_new(ttid, conf)?,
|
FileStorage::restore_new(ttid, conf)?,
|
||||||
FileStorage::load_control_file_conf(conf, ttid)?,
|
FileStorage::load_control_file_conf(conf, ttid)?,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create(
|
async fn create(
|
||||||
conf: &SafeKeeperConf,
|
conf: &SafeKeeperConf,
|
||||||
ttid: &TenantTimelineId,
|
ttid: &TenantTimelineId,
|
||||||
) -> Result<(FileStorage, SafeKeeperState)> {
|
) -> Result<(FileStorage, SafeKeeperState)> {
|
||||||
fs::create_dir_all(conf.timeline_dir(ttid)).expect("failed to create timeline dir");
|
fs::create_dir_all(conf.timeline_dir(ttid))
|
||||||
|
.await
|
||||||
|
.expect("failed to create timeline dir");
|
||||||
let state = SafeKeeperState::empty();
|
let state = SafeKeeperState::empty();
|
||||||
let storage = FileStorage::create_new(ttid, conf, state.clone())?;
|
let storage = FileStorage::create_new(ttid, conf, state.clone())?;
|
||||||
Ok((storage, state))
|
Ok((storage, state))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_read_write_safekeeper_state() {
|
async fn test_read_write_safekeeper_state() {
|
||||||
let conf = stub_conf();
|
let conf = stub_conf();
|
||||||
let ttid = TenantTimelineId::generate();
|
let ttid = TenantTimelineId::generate();
|
||||||
{
|
{
|
||||||
let (mut storage, mut state) = create(&conf, &ttid).expect("failed to create state");
|
let (mut storage, mut state) =
|
||||||
|
create(&conf, &ttid).await.expect("failed to create state");
|
||||||
// change something
|
// change something
|
||||||
state.commit_lsn = Lsn(42);
|
state.commit_lsn = Lsn(42);
|
||||||
storage.persist(&state).expect("failed to persist state");
|
storage
|
||||||
|
.persist(&state)
|
||||||
|
.await
|
||||||
|
.expect("failed to persist state");
|
||||||
}
|
}
|
||||||
|
|
||||||
let (_, state) = load_from_control_file(&conf, &ttid).expect("failed to read state");
|
let (_, state) = load_from_control_file(&conf, &ttid)
|
||||||
|
.await
|
||||||
|
.expect("failed to read state");
|
||||||
assert_eq!(state.commit_lsn, Lsn(42));
|
assert_eq!(state.commit_lsn, Lsn(42));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[tokio::test]
|
||||||
fn test_safekeeper_state_checksum_mismatch() {
|
async fn test_safekeeper_state_checksum_mismatch() {
|
||||||
let conf = stub_conf();
|
let conf = stub_conf();
|
||||||
let ttid = TenantTimelineId::generate();
|
let ttid = TenantTimelineId::generate();
|
||||||
{
|
{
|
||||||
let (mut storage, mut state) = create(&conf, &ttid).expect("failed to read state");
|
let (mut storage, mut state) =
|
||||||
|
create(&conf, &ttid).await.expect("failed to read state");
|
||||||
|
|
||||||
// change something
|
// change something
|
||||||
state.commit_lsn = Lsn(42);
|
state.commit_lsn = Lsn(42);
|
||||||
storage.persist(&state).expect("failed to persist state");
|
storage
|
||||||
|
.persist(&state)
|
||||||
|
.await
|
||||||
|
.expect("failed to persist state");
|
||||||
}
|
}
|
||||||
let control_path = conf.timeline_dir(&ttid).join(CONTROL_FILE_NAME);
|
let control_path = conf.timeline_dir(&ttid).join(CONTROL_FILE_NAME);
|
||||||
let mut data = fs::read(&control_path).unwrap();
|
let mut data = fs::read(&control_path).await.unwrap();
|
||||||
data[0] += 1; // change the first byte of the file to fail checksum validation
|
data[0] += 1; // change the first byte of the file to fail checksum validation
|
||||||
fs::write(&control_path, &data).expect("failed to write control file");
|
fs::write(&control_path, &data)
|
||||||
|
.await
|
||||||
|
.expect("failed to write control file");
|
||||||
|
|
||||||
match load_from_control_file(&conf, &ttid) {
|
match load_from_control_file(&conf, &ttid).await {
|
||||||
Err(err) => assert!(err
|
Err(err) => assert!(err
|
||||||
.to_string()
|
.to_string()
|
||||||
.contains("safekeeper control file checksum mismatch")),
|
.contains("safekeeper control file checksum mismatch")),
|
||||||
|
|||||||
@@ -121,7 +121,7 @@ pub struct FileInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Build debug dump response, using the provided [`Args`] filters.
|
/// Build debug dump response, using the provided [`Args`] filters.
|
||||||
pub fn build(args: Args) -> Result<Response> {
|
pub async fn build(args: Args) -> Result<Response> {
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
let timelines_count = GlobalTimelines::timelines_count();
|
let timelines_count = GlobalTimelines::timelines_count();
|
||||||
|
|
||||||
@@ -155,7 +155,7 @@ pub fn build(args: Args) -> Result<Response> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let control_file = if args.dump_control_file {
|
let control_file = if args.dump_control_file {
|
||||||
let mut state = tli.get_state().1;
|
let mut state = tli.get_state().await.1;
|
||||||
if !args.dump_term_history {
|
if !args.dump_term_history {
|
||||||
state.acceptor_state.term_history = TermHistory(vec![]);
|
state.acceptor_state.term_history = TermHistory(vec![]);
|
||||||
}
|
}
|
||||||
@@ -165,7 +165,7 @@ pub fn build(args: Args) -> Result<Response> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let memory = if args.dump_memory {
|
let memory = if args.dump_memory {
|
||||||
Some(tli.memory_dump())
|
Some(tli.memory_dump().await)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -256,14 +256,14 @@ impl SafekeeperPostgresHandler {
|
|||||||
|
|
||||||
let lsn = if self.is_walproposer_recovery() {
|
let lsn = if self.is_walproposer_recovery() {
|
||||||
// walproposer should get all local WAL until flush_lsn
|
// walproposer should get all local WAL until flush_lsn
|
||||||
tli.get_flush_lsn()
|
tli.get_flush_lsn().await
|
||||||
} else {
|
} else {
|
||||||
// other clients shouldn't get any uncommitted WAL
|
// other clients shouldn't get any uncommitted WAL
|
||||||
tli.get_state().0.commit_lsn
|
tli.get_state().await.0.commit_lsn
|
||||||
}
|
}
|
||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
let sysid = tli.get_state().1.server.system_id.to_string();
|
let sysid = tli.get_state().await.1.server.system_id.to_string();
|
||||||
let lsn_bytes = lsn.as_bytes();
|
let lsn_bytes = lsn.as_bytes();
|
||||||
let tli = PG_TLI.to_string();
|
let tli = PG_TLI.to_string();
|
||||||
let tli_bytes = tli.as_bytes();
|
let tli_bytes = tli.as_bytes();
|
||||||
|
|||||||
@@ -2,3 +2,18 @@ pub mod routes;
|
|||||||
pub use routes::make_router;
|
pub use routes::make_router;
|
||||||
|
|
||||||
pub use safekeeper_api::models;
|
pub use safekeeper_api::models;
|
||||||
|
|
||||||
|
use crate::SafeKeeperConf;
|
||||||
|
|
||||||
|
pub async fn task_main(
|
||||||
|
conf: SafeKeeperConf,
|
||||||
|
http_listener: std::net::TcpListener,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let router = make_router(conf)
|
||||||
|
.build()
|
||||||
|
.map_err(|err| anyhow::anyhow!(err))?;
|
||||||
|
let service = utils::http::RouterService::new(router).unwrap();
|
||||||
|
let server = hyper::Server::from_tcp(http_listener)?;
|
||||||
|
server.serve(service).await?;
|
||||||
|
Ok(()) // unreachable
|
||||||
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user