mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-21 14:20:37 +00:00
Compare commits
3 Commits
mx_offset_
...
skyzh/laye
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3909e03f8 | ||
|
|
fc190a2a19 | ||
|
|
faee3152f3 |
9
.github/workflows/benchmarking.yml
vendored
9
.github/workflows/benchmarking.yml
vendored
@@ -180,8 +180,7 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
# Increase timeout to 8h, default timeout is 6h
|
timeout-minutes: 360 # 6h
|
||||||
timeout-minutes: 480
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
@@ -322,6 +321,8 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
|
timeout-minutes: 360 # 6h
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
@@ -413,6 +414,8 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
|
timeout-minutes: 360 # 6h
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
@@ -498,6 +501,8 @@ jobs:
|
|||||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||||
options: --init
|
options: --init
|
||||||
|
|
||||||
|
timeout-minutes: 360 # 6h
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
|||||||
175
.github/workflows/build_and_test.yml
vendored
175
.github/workflows/build_and_test.yml
vendored
@@ -623,6 +623,51 @@ jobs:
|
|||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
run: rm -rf ~/.ecr
|
run: rm -rf ~/.ecr
|
||||||
|
|
||||||
|
|
||||||
|
neon-image-depot:
|
||||||
|
# For testing this will run side-by-side for a few merges.
|
||||||
|
# This action is not really optimized yet, but gets the job done
|
||||||
|
runs-on: [ self-hosted, gen3, large ]
|
||||||
|
needs: [ tag ]
|
||||||
|
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup go
|
||||||
|
uses: actions/setup-go@v3
|
||||||
|
with:
|
||||||
|
go-version: '1.19'
|
||||||
|
|
||||||
|
- name: Set up Depot CLI
|
||||||
|
uses: depot/setup-action@v1
|
||||||
|
|
||||||
|
- name: Install Crane & ECR helper
|
||||||
|
run: go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
|
||||||
|
|
||||||
|
- name: Configure ECR login
|
||||||
|
run: |
|
||||||
|
mkdir /github/home/.docker/
|
||||||
|
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
uses: depot/build-push-action@v1
|
||||||
|
with:
|
||||||
|
# if no depot.json file is at the root of your repo, you must specify the project id
|
||||||
|
project: nrdv0s4kcs
|
||||||
|
push: true
|
||||||
|
tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
|
||||||
|
build-args: |
|
||||||
|
GIT_VERSION=${{ github.sha }}
|
||||||
|
REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
|
|
||||||
compute-tools-image:
|
compute-tools-image:
|
||||||
runs-on: [ self-hosted, gen3, large ]
|
runs-on: [ self-hosted, gen3, large ]
|
||||||
needs: [ tag ]
|
needs: [ tag ]
|
||||||
@@ -659,7 +704,6 @@ jobs:
|
|||||||
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
|
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
|
||||||
--context .
|
--context .
|
||||||
--build-arg GIT_VERSION=${{ github.sha }}
|
--build-arg GIT_VERSION=${{ github.sha }}
|
||||||
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
|
|
||||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
--dockerfile Dockerfile.compute-tools
|
--dockerfile Dockerfile.compute-tools
|
||||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||||
@@ -717,40 +761,10 @@ jobs:
|
|||||||
--context .
|
--context .
|
||||||
--build-arg GIT_VERSION=${{ github.sha }}
|
--build-arg GIT_VERSION=${{ github.sha }}
|
||||||
--build-arg PG_VERSION=${{ matrix.version }}
|
--build-arg PG_VERSION=${{ matrix.version }}
|
||||||
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
|
|
||||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
--dockerfile Dockerfile.compute-node
|
--dockerfile Dockerfile.compute-node
|
||||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
--destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
--destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||||
--cleanup
|
|
||||||
|
|
||||||
# Due to a kaniko bug, we can't use cache for extensions image, thus it takes about the same amount of time as compute-node image to build (~10 min)
|
|
||||||
# During the transition period we need to have extensions in both places (in S3 and in compute-node image),
|
|
||||||
# so we won't build extension twice, but extract them from compute-node.
|
|
||||||
#
|
|
||||||
# For now we use extensions image only for new custom extensitons
|
|
||||||
- name: Kaniko build extensions only
|
|
||||||
run: |
|
|
||||||
# Kaniko is suposed to clean up after itself if --cleanup flag is set, but it doesn't.
|
|
||||||
# Despite some fixes were made in https://github.com/GoogleContainerTools/kaniko/pull/2504 (in kaniko v1.11.0),
|
|
||||||
# it still fails with error:
|
|
||||||
# error building image: could not save file: copying file: symlink postgres /kaniko/1/usr/local/pgsql/bin/postmaster: file exists
|
|
||||||
#
|
|
||||||
# Ref https://github.com/GoogleContainerTools/kaniko/issues/1406
|
|
||||||
find /kaniko -maxdepth 1 -mindepth 1 -type d -regex "/kaniko/[0-9]*" -exec rm -rv {} \;
|
|
||||||
|
|
||||||
/kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true \
|
|
||||||
--cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
|
|
||||||
--context . \
|
|
||||||
--build-arg GIT_VERSION=${{ github.sha }} \
|
|
||||||
--build-arg PG_VERSION=${{ matrix.version }} \
|
|
||||||
--build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}} \
|
|
||||||
--build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com \
|
|
||||||
--dockerfile Dockerfile.compute-node \
|
|
||||||
--destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
|
||||||
--destination neondatabase/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
|
||||||
--cleanup \
|
|
||||||
--target postgres-extensions
|
|
||||||
|
|
||||||
# Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
|
# Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
|
||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
@@ -767,7 +781,7 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
shell: sh -eu {0}
|
shell: sh -eu {0}
|
||||||
env:
|
env:
|
||||||
VM_BUILDER_VERSION: v0.11.1
|
VM_BUILDER_VERSION: v0.8.0
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -869,10 +883,8 @@ jobs:
|
|||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} latest
|
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} latest
|
|
||||||
|
|
||||||
- name: Push images to production ECR
|
- name: Push images to production ECR
|
||||||
if: |
|
if: |
|
||||||
@@ -883,10 +895,8 @@ jobs:
|
|||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:latest
|
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
||||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:latest
|
|
||||||
|
|
||||||
- name: Configure Docker Hub login
|
- name: Configure Docker Hub login
|
||||||
run: |
|
run: |
|
||||||
@@ -908,93 +918,16 @@ jobs:
|
|||||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/extensions-v14:${{needs.tag.outputs.build-tag}} latest
|
|
||||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||||
crane tag neondatabase/extensions-v15:${{needs.tag.outputs.build-tag}} latest
|
|
||||||
|
|
||||||
- name: Cleanup ECR folder
|
- name: Cleanup ECR folder
|
||||||
run: rm -rf ~/.ecr
|
run: rm -rf ~/.ecr
|
||||||
|
|
||||||
upload-postgres-extensions-to-s3:
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
runs-on: ${{ github.ref_name == 'release' && fromJSON('["self-hosted", "prod", "x64"]') || fromJSON('["self-hosted", "gen3", "small"]') }}
|
|
||||||
needs: [ tag, promote-images ]
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
version: [ v14, v15 ]
|
|
||||||
|
|
||||||
env:
|
|
||||||
# While on transition period we extract public extensions from compute-node image and custom extensions from extensions image.
|
|
||||||
# Later all the extensions will be moved to extensions image.
|
|
||||||
EXTENSIONS_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:latest
|
|
||||||
COMPUTE_NODE_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:latest
|
|
||||||
AWS_ACCESS_KEY_ID: ${{ github.ref_name == 'release' && secrets.AWS_ACCESS_KEY_PROD || secrets.AWS_ACCESS_KEY_DEV }}
|
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ github.ref_name == 'release' && secrets.AWS_SECRET_KEY_PROD || secrets.AWS_SECRET_KEY_DEV }}
|
|
||||||
S3_BUCKETS: |
|
|
||||||
${{ github.ref_name == 'release' &&
|
|
||||||
'neon-prod-extensions-ap-southeast-1 neon-prod-extensions-eu-central-1 neon-prod-extensions-us-east-1 neon-prod-extensions-us-east-2 neon-prod-extensions-us-west-2' ||
|
|
||||||
'neon-dev-extensions-eu-central-1 neon-dev-extensions-eu-west-1 neon-dev-extensions-us-east-2' }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Pull postgres-extensions image
|
|
||||||
run: |
|
|
||||||
docker pull ${EXTENSIONS_IMAGE}
|
|
||||||
docker pull ${COMPUTE_NODE_IMAGE}
|
|
||||||
|
|
||||||
- name: Create postgres-extensions container
|
|
||||||
id: create-container
|
|
||||||
run: |
|
|
||||||
EID=$(docker create ${EXTENSIONS_IMAGE} true)
|
|
||||||
echo "EID=${EID}" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
CID=$(docker create ${COMPUTE_NODE_IMAGE} true)
|
|
||||||
echo "CID=${CID}" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
- name: Extract postgres-extensions from container
|
|
||||||
run: |
|
|
||||||
rm -rf ./extensions-to-upload ./custom-extensions # Just in case
|
|
||||||
|
|
||||||
# In compute image we have a bit different directory layout
|
|
||||||
mkdir -p extensions-to-upload/share
|
|
||||||
docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/share/extension ./extensions-to-upload/share/extension
|
|
||||||
docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/lib ./extensions-to-upload/lib
|
|
||||||
|
|
||||||
# Delete Neon extensitons (they always present on compute-node image)
|
|
||||||
rm -rf ./extensions-to-upload/share/extension/neon*
|
|
||||||
rm -rf ./extensions-to-upload/lib/neon*
|
|
||||||
|
|
||||||
# Delete leftovers from the extension build step
|
|
||||||
rm -rf ./extensions-to-upload/lib/pgxs
|
|
||||||
rm -rf ./extensions-to-upload/lib/pkgconfig
|
|
||||||
|
|
||||||
docker cp ${{ steps.create-container.outputs.EID }}:/extensions ./custom-extensions
|
|
||||||
for EXT_NAME in $(ls ./custom-extensions); do
|
|
||||||
mkdir -p ./extensions-to-upload/${EXT_NAME}/share
|
|
||||||
|
|
||||||
mv ./custom-extensions/${EXT_NAME}/share/extension ./extensions-to-upload/${EXT_NAME}/share/extension
|
|
||||||
mv ./custom-extensions/${EXT_NAME}/lib ./extensions-to-upload/${EXT_NAME}/lib
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Upload postgres-extensions to S3
|
|
||||||
run: |
|
|
||||||
for BUCKET in $(echo ${S3_BUCKETS}); do
|
|
||||||
aws s3 cp --recursive --only-show-errors ./extensions-to-upload s3://${BUCKET}/${{ needs.tag.outputs.build-tag }}/${{ matrix.version }}
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Cleanup
|
|
||||||
if: ${{ always() && (steps.create-container.outputs.CID || steps.create-container.outputs.EID) }}
|
|
||||||
run: |
|
|
||||||
docker rm ${{ steps.create-container.outputs.CID }} || true
|
|
||||||
docker rm ${{ steps.create-container.outputs.EID }} || true
|
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||||
needs: [ upload-postgres-extensions-to-s3, promote-images, tag, regress-tests ]
|
needs: [ promote-images, tag, regress-tests ]
|
||||||
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
|
if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
|
||||||
steps:
|
steps:
|
||||||
- name: Fix git ownership
|
- name: Fix git ownership
|
||||||
@@ -1026,20 +959,6 @@ jobs:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Create git tag
|
|
||||||
if: github.ref_name == 'release'
|
|
||||||
uses: actions/github-script@v6
|
|
||||||
with:
|
|
||||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
|
||||||
retries: 5
|
|
||||||
script: |
|
|
||||||
github.rest.git.createRef({
|
|
||||||
owner: context.repo.owner,
|
|
||||||
repo: context.repo.repo,
|
|
||||||
ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
|
|
||||||
sha: context.sha,
|
|
||||||
})
|
|
||||||
|
|
||||||
promote-compatibility-data:
|
promote-compatibility-data:
|
||||||
runs-on: [ self-hosted, gen3, small ]
|
runs-on: [ self-hosted, gen3, small ]
|
||||||
container:
|
container:
|
||||||
|
|||||||
1
.github/workflows/release.yml
vendored
1
.github/workflows/release.yml
vendored
@@ -3,7 +3,6 @@ name: Create Release Branch
|
|||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 10 * * 2'
|
- cron: '0 10 * * 2'
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
create_release_branch:
|
create_release_branch:
|
||||||
|
|||||||
234
Cargo.lock
generated
234
Cargo.lock
generated
@@ -110,6 +110,12 @@ dependencies = [
|
|||||||
"backtrace",
|
"backtrace",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arc-swap"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "archery"
|
name = "archery"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@@ -200,6 +206,17 @@ dependencies = [
|
|||||||
"critical-section",
|
"critical-section",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atty"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi 0.1.19",
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
@@ -794,6 +811,18 @@ dependencies = [
|
|||||||
"libloading",
|
"libloading",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "3.2.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"clap_lex 0.2.4",
|
||||||
|
"indexmap",
|
||||||
|
"textwrap",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.3.0"
|
version = "4.3.0"
|
||||||
@@ -814,7 +843,7 @@ dependencies = [
|
|||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"clap_lex",
|
"clap_lex 0.5.0",
|
||||||
"strsim",
|
"strsim",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -830,6 +859,15 @@ dependencies = [
|
|||||||
"syn 2.0.16",
|
"syn 2.0.16",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||||
|
dependencies = [
|
||||||
|
"os_str_bytes",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_lex"
|
name = "clap_lex"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@@ -883,7 +921,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"futures",
|
"futures",
|
||||||
"hyper",
|
"hyper",
|
||||||
@@ -945,7 +983,7 @@ name = "control_plane"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"git-version",
|
"git-version",
|
||||||
@@ -1015,19 +1053,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "criterion"
|
name = "criterion"
|
||||||
version = "0.5.1"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
|
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anes",
|
"anes",
|
||||||
|
"atty",
|
||||||
"cast",
|
"cast",
|
||||||
"ciborium",
|
"ciborium",
|
||||||
"clap",
|
"clap 3.2.25",
|
||||||
"criterion-plot",
|
"criterion-plot",
|
||||||
"is-terminal",
|
|
||||||
"itertools",
|
"itertools",
|
||||||
|
"lazy_static",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"once_cell",
|
|
||||||
"oorandom",
|
"oorandom",
|
||||||
"plotters",
|
"plotters",
|
||||||
"rayon",
|
"rayon",
|
||||||
@@ -1108,7 +1146,7 @@ dependencies = [
|
|||||||
"crossterm_winapi",
|
"crossterm_winapi",
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"signal-hook",
|
"signal-hook",
|
||||||
"signal-hook-mio",
|
"signal-hook-mio",
|
||||||
"winapi",
|
"winapi",
|
||||||
@@ -1178,7 +1216,7 @@ dependencies = [
|
|||||||
"hashbrown 0.12.3",
|
"hashbrown 0.12.3",
|
||||||
"lock_api",
|
"lock_api",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot_core 0.9.7",
|
"parking_lot_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1644,6 +1682,15 @@ version = "0.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.2.6"
|
version = "0.2.6"
|
||||||
@@ -1898,9 +1945,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"js-sys",
|
|
||||||
"wasm-bindgen",
|
|
||||||
"web-sys",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2229,6 +2273,16 @@ dependencies = [
|
|||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nu-ansi-term"
|
||||||
|
version = "0.46.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||||
|
dependencies = [
|
||||||
|
"overload",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-bigint"
|
name = "num-bigint"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
@@ -2301,9 +2355,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl"
|
name = "openssl"
|
||||||
version = "0.10.55"
|
version = "0.10.52"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
|
checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@@ -2333,9 +2387,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl-sys"
|
name = "openssl-sys"
|
||||||
version = "0.9.90"
|
version = "0.9.87"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
|
checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"libc",
|
"libc",
|
||||||
@@ -2456,19 +2510,31 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "os_str_bytes"
|
||||||
|
version = "6.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "outref"
|
name = "outref"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "overload"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pagectl"
|
name = "pagectl"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"git-version",
|
"git-version",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
@@ -2482,12 +2548,13 @@ name = "pageserver"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"arc-swap",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"close_fds",
|
"close_fds",
|
||||||
"const_format",
|
"const_format",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
@@ -2569,17 +2636,6 @@ dependencies = [
|
|||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot"
|
|
||||||
version = "0.11.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
|
|
||||||
dependencies = [
|
|
||||||
"instant",
|
|
||||||
"lock_api",
|
|
||||||
"parking_lot_core 0.8.6",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.12.1"
|
version = "0.12.1"
|
||||||
@@ -2587,21 +2643,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"lock_api",
|
"lock_api",
|
||||||
"parking_lot_core 0.9.7",
|
"parking_lot_core",
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking_lot_core"
|
|
||||||
version = "0.8.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"instant",
|
|
||||||
"libc",
|
|
||||||
"redox_syscall 0.2.16",
|
|
||||||
"smallvec",
|
|
||||||
"winapi",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2617,16 +2659,6 @@ dependencies = [
|
|||||||
"windows-sys 0.45.0",
|
"windows-sys 0.45.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pbkdf2"
|
|
||||||
version = "0.12.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f0ca0b5a68607598bf3bad68f32227a8164f6254833f84eafaac409cd6746c31"
|
|
||||||
dependencies = [
|
|
||||||
"digest",
|
|
||||||
"hmac",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "peeking_take_while"
|
name = "peeking_take_while"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@@ -2745,7 +2777,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres"
|
name = "postgres"
|
||||||
version = "0.19.4"
|
version = "0.19.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
@@ -2758,7 +2790,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-native-tls"
|
name = "postgres-native-tls"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"native-tls",
|
"native-tls",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -2769,7 +2801,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-protocol"
|
name = "postgres-protocol"
|
||||||
version = "0.6.4"
|
version = "0.6.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.20.0",
|
"base64 0.20.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -2787,7 +2819,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-types"
|
name = "postgres-types"
|
||||||
version = "0.2.4"
|
version = "0.2.4"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
@@ -2932,7 +2964,7 @@ dependencies = [
|
|||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
"memchr",
|
"memchr",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"procfs",
|
"procfs",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
@@ -2997,11 +3029,12 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
"atty",
|
||||||
"base64 0.13.1",
|
"base64 0.13.1",
|
||||||
"bstr",
|
"bstr",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
"git-version",
|
||||||
@@ -3019,8 +3052,7 @@ dependencies = [
|
|||||||
"native-tls",
|
"native-tls",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"pbkdf2",
|
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"postgres-native-tls",
|
"postgres-native-tls",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
@@ -3031,7 +3063,6 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"reqwest-middleware",
|
"reqwest-middleware",
|
||||||
"reqwest-retry",
|
|
||||||
"reqwest-tracing",
|
"reqwest-tracing",
|
||||||
"routerify",
|
"routerify",
|
||||||
"rstest",
|
"rstest",
|
||||||
@@ -3267,29 +3298,6 @@ dependencies = [
|
|||||||
"thiserror",
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "reqwest-retry"
|
|
||||||
version = "0.2.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "48d0fd6ef4c6d23790399fe15efc8d12cd9f3d4133958f9bd7801ee5cbaec6c4"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"async-trait",
|
|
||||||
"chrono",
|
|
||||||
"futures",
|
|
||||||
"getrandom",
|
|
||||||
"http",
|
|
||||||
"hyper",
|
|
||||||
"parking_lot 0.11.2",
|
|
||||||
"reqwest",
|
|
||||||
"reqwest-middleware",
|
|
||||||
"retry-policies",
|
|
||||||
"task-local-extensions",
|
|
||||||
"tokio",
|
|
||||||
"tracing",
|
|
||||||
"wasm-timer",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest-tracing"
|
name = "reqwest-tracing"
|
||||||
version = "0.4.4"
|
version = "0.4.4"
|
||||||
@@ -3308,17 +3316,6 @@ dependencies = [
|
|||||||
"tracing-opentelemetry",
|
"tracing-opentelemetry",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "retry-policies"
|
|
||||||
version = "0.1.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e09bbcb5003282bcb688f0bae741b278e9c7e8f378f561522c9806c58e075d9b"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"chrono",
|
|
||||||
"rand",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.16.20"
|
version = "0.16.20"
|
||||||
@@ -3517,7 +3514,7 @@ dependencies = [
|
|||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"const_format",
|
"const_format",
|
||||||
"crc32c",
|
"crc32c",
|
||||||
"fs2",
|
"fs2",
|
||||||
@@ -3528,7 +3525,7 @@ dependencies = [
|
|||||||
"hyper",
|
"hyper",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"postgres",
|
"postgres",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
@@ -3947,7 +3944,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"bytes",
|
"bytes",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"const_format",
|
"const_format",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -3957,7 +3954,7 @@ dependencies = [
|
|||||||
"hyper",
|
"hyper",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"prost",
|
"prost",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
@@ -4128,6 +4125,12 @@ dependencies = [
|
|||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "textwrap"
|
||||||
|
version = "0.16.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.40"
|
version = "1.0.40"
|
||||||
@@ -4276,7 +4279,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-postgres"
|
name = "tokio-postgres"
|
||||||
version = "0.7.7"
|
version = "0.7.7"
|
||||||
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=1aaedab101b23f7612042850d8f2036810fa7c7f#1aaedab101b23f7612042850d8f2036810fa7c7f"
|
source = "git+https://github.com/neondatabase/rust-postgres.git?rev=f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c#f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -4285,7 +4288,7 @@ dependencies = [
|
|||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"log",
|
"log",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"phf",
|
"phf",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
@@ -4543,7 +4546,7 @@ name = "trace"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"utils",
|
"utils",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
@@ -4645,6 +4648,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"matchers",
|
"matchers",
|
||||||
|
"nu-ansi-term",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"regex",
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -4813,6 +4817,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
"atty",
|
||||||
"bincode",
|
"bincode",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -4889,7 +4894,7 @@ name = "wal_craft"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@@ -4993,21 +4998,6 @@ version = "0.2.86"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "wasm-timer"
|
|
||||||
version = "0.2.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f"
|
|
||||||
dependencies = [
|
|
||||||
"futures",
|
|
||||||
"js-sys",
|
|
||||||
"parking_lot 0.11.2",
|
|
||||||
"pin-utils",
|
|
||||||
"wasm-bindgen",
|
|
||||||
"wasm-bindgen-futures",
|
|
||||||
"web-sys",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "web-sys"
|
name = "web-sys"
|
||||||
version = "0.3.63"
|
version = "0.3.63"
|
||||||
@@ -5269,7 +5259,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap 4.3.0",
|
||||||
"clap_builder",
|
"clap_builder",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"either",
|
"either",
|
||||||
|
|||||||
20
Cargo.toml
20
Cargo.toml
@@ -32,8 +32,10 @@ license = "Apache-2.0"
|
|||||||
## All dependency versions, used in the project
|
## All dependency versions, used in the project
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||||
|
arc-swap = "1.6"
|
||||||
async-stream = "0.3"
|
async-stream = "0.3"
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
atty = "0.2.14"
|
||||||
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
||||||
aws-sdk-s3 = "0.27"
|
aws-sdk-s3 = "0.27"
|
||||||
aws-smithy-http = "0.55"
|
aws-smithy-http = "0.55"
|
||||||
@@ -86,7 +88,6 @@ opentelemetry = "0.18.0"
|
|||||||
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||||
opentelemetry-semantic-conventions = "0.10.0"
|
opentelemetry-semantic-conventions = "0.10.0"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
pbkdf2 = "0.12.1"
|
|
||||||
pin-project-lite = "0.2"
|
pin-project-lite = "0.2"
|
||||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||||
prost = "0.11"
|
prost = "0.11"
|
||||||
@@ -95,7 +96,6 @@ regex = "1.4"
|
|||||||
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
||||||
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
|
reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
|
||||||
reqwest-middleware = "0.2.0"
|
reqwest-middleware = "0.2.0"
|
||||||
reqwest-retry = "0.2.2"
|
|
||||||
routerify = "3"
|
routerify = "3"
|
||||||
rpds = "0.13"
|
rpds = "0.13"
|
||||||
rustls = "0.20"
|
rustls = "0.20"
|
||||||
@@ -129,7 +129,7 @@ tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
|||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-error = "0.2.0"
|
tracing-error = "0.2.0"
|
||||||
tracing-opentelemetry = "0.18.0"
|
tracing-opentelemetry = "0.18.0"
|
||||||
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
url = "2.2"
|
url = "2.2"
|
||||||
uuid = { version = "1.2", features = ["v4", "serde"] }
|
uuid = { version = "1.2", features = ["v4", "serde"] }
|
||||||
walkdir = "2.3.2"
|
walkdir = "2.3.2"
|
||||||
@@ -141,11 +141,11 @@ env_logger = "0.10"
|
|||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
||||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
|
tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
|
||||||
|
|
||||||
## Other git libraries
|
## Other git libraries
|
||||||
@@ -171,7 +171,7 @@ utils = { version = "0.1", path = "./libs/utils/" }
|
|||||||
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
|
||||||
|
|
||||||
## Build dependencies
|
## Build dependencies
|
||||||
criterion = "0.5.1"
|
criterion = "0.4"
|
||||||
rcgen = "0.10"
|
rcgen = "0.10"
|
||||||
rstest = "0.17"
|
rstest = "0.17"
|
||||||
tempfile = "3.4"
|
tempfile = "3.4"
|
||||||
@@ -181,7 +181,7 @@ tonic-build = "0.9"
|
|||||||
|
|
||||||
# This is only needed for proxy's tests.
|
# This is only needed for proxy's tests.
|
||||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
|
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="f6ec31df3bcce89cb34f300f17c8a8c031c5ee8c" }
|
||||||
|
|
||||||
# Changes the MAX_THREADS limit from 4096 to 32768.
|
# Changes the MAX_THREADS limit from 4096 to 32768.
|
||||||
# This is a temporary workaround for using tracing from many threads in safekeepers code,
|
# This is a temporary workaround for using tracing from many threads in safekeepers code,
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ ARG PG_VERSION
|
|||||||
ARG REPOSITORY=neondatabase
|
ARG REPOSITORY=neondatabase
|
||||||
ARG IMAGE=rust
|
ARG IMAGE=rust
|
||||||
ARG TAG=pinned
|
ARG TAG=pinned
|
||||||
ARG BUILD_TAG
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
@@ -189,8 +188,8 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
|
|||||||
FROM build-deps AS vector-pg-build
|
FROM build-deps AS vector-pg-build
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.4.tar.gz -O pgvector.tar.gz && \
|
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
|
||||||
echo "1cb70a63f8928e396474796c22a20be9f7285a8a013009deb8152445b61b72e6 pgvector.tar.gz" | sha256sum --check && \
|
echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
|
||||||
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||||
@@ -481,79 +480,6 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_1.tar.
|
|||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
#
|
|
||||||
# Layer "pg-uuidv7-pg-build"
|
|
||||||
# compile pg_uuidv7 extension
|
|
||||||
#
|
|
||||||
#########################################################################################
|
|
||||||
FROM build-deps AS pg-uuidv7-pg-build
|
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
|
||||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
|
||||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
|
||||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
|
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
#
|
|
||||||
# Layer "pg-roaringbitmap-pg-build"
|
|
||||||
# compile pg_roaringbitmap extension
|
|
||||||
#
|
|
||||||
#########################################################################################
|
|
||||||
FROM build-deps AS pg-roaringbitmap-pg-build
|
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
|
||||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
|
||||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
|
||||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
|
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
#
|
|
||||||
# Layer "pg-embedding-pg-build"
|
|
||||||
# compile pg_embedding extension
|
|
||||||
#
|
|
||||||
#########################################################################################
|
|
||||||
FROM build-deps AS pg-embedding-pg-build
|
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
|
||||||
# 2465f831ea1f8d49c1d74f8959adb7fc277d70cd made on 05/07/2023
|
|
||||||
# There is no release tag yet
|
|
||||||
RUN wget https://github.com/neondatabase/pg_embedding/archive/2465f831ea1f8d49c1d74f8959adb7fc277d70cd.tar.gz -O pg_embedding.tar.gz && \
|
|
||||||
echo "047af2b1f664a1e6e37867bd4eeaf5934fa27d6ba3d6c4461efa388ddf7cd1d5 pg_embedding.tar.gz" | sha256sum --check && \
|
|
||||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/embedding.control
|
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
#
|
|
||||||
# Layer "pg-anon-pg-build"
|
|
||||||
# compile anon extension
|
|
||||||
#
|
|
||||||
#########################################################################################
|
|
||||||
FROM build-deps AS pg-anon-pg-build
|
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
|
|
||||||
# Kaniko doesn't allow to do `${from#/usr/local/pgsql/}`, so we use `${from:17}` instead
|
|
||||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
|
||||||
RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgresql_anonymizer-1.1.0.tar.gz -O pg_anon.tar.gz && \
|
|
||||||
echo "08b09d2ff9b962f96c60db7e6f8e79cf7253eb8772516998fc35ece08633d3ad pg_anon.tar.gz" | sha256sum --check && \
|
|
||||||
mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
|
||||||
find /usr/local/pgsql -type f | sort > /before.txt && \
|
|
||||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
|
|
||||||
find /usr/local/pgsql -type f | sort > /after.txt && \
|
|
||||||
/bin/bash -c 'for from in $(comm -13 /before.txt /after.txt); do to=/extensions/anon/${from:17} && mkdir -p $(dirname ${to}) && cp -a ${from} ${to}; done'
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Layer "rust extensions"
|
# Layer "rust extensions"
|
||||||
@@ -662,7 +588,6 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -
|
|||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM build-deps AS neon-pg-ext-build
|
FROM build-deps AS neon-pg-ext-build
|
||||||
# Public extensions
|
|
||||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=postgis-build /sfcgal/* /
|
COPY --from=postgis-build /sfcgal/* /
|
||||||
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
@@ -688,9 +613,6 @@ COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|||||||
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
|
||||||
COPY pgxn/ pgxn/
|
COPY pgxn/ pgxn/
|
||||||
|
|
||||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||||
@@ -712,9 +634,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
|||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||||
ARG BUILD_TAG
|
|
||||||
ENV BUILD_TAG=$BUILD_TAG
|
|
||||||
|
|
||||||
USER nonroot
|
USER nonroot
|
||||||
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
|
||||||
COPY --chown=nonroot . .
|
COPY --chown=nonroot . .
|
||||||
@@ -739,22 +658,6 @@ RUN rm -r /usr/local/pgsql/include
|
|||||||
# if they were to be used by other libraries.
|
# if they were to be used by other libraries.
|
||||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||||
|
|
||||||
#########################################################################################
|
|
||||||
#
|
|
||||||
# Extenstion only
|
|
||||||
#
|
|
||||||
#########################################################################################
|
|
||||||
FROM scratch AS postgres-extensions
|
|
||||||
# After the transition this layer will include all extensitons.
|
|
||||||
# As for now, it's only for new custom ones
|
|
||||||
#
|
|
||||||
# # Default extensions
|
|
||||||
# COPY --from=postgres-cleanup-layer /usr/local/pgsql/share/extension /usr/local/pgsql/share/extension
|
|
||||||
# COPY --from=postgres-cleanup-layer /usr/local/pgsql/lib /usr/local/pgsql/lib
|
|
||||||
# Custom extensions
|
|
||||||
COPY --from=pg-anon-pg-build /extensions/anon/lib/ /extensions/anon/lib
|
|
||||||
COPY --from=pg-anon-pg-build /extensions/anon/share/extension /extensions/anon/share/extension
|
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Final layer
|
# Final layer
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
ARG REPOSITORY=neondatabase
|
ARG REPOSITORY=neondatabase
|
||||||
ARG IMAGE=rust
|
ARG IMAGE=rust
|
||||||
ARG TAG=pinned
|
ARG TAG=pinned
|
||||||
ARG BUILD_TAG
|
|
||||||
|
|
||||||
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
|
FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
|
||||||
WORKDIR /home/nonroot
|
WORKDIR /home/nonroot
|
||||||
@@ -17,8 +16,6 @@ ENV CACHEPOT_S3_KEY_PREFIX=cachepot
|
|||||||
ARG CACHEPOT_BUCKET=neon-github-dev
|
ARG CACHEPOT_BUCKET=neon-github-dev
|
||||||
#ARG AWS_ACCESS_KEY_ID
|
#ARG AWS_ACCESS_KEY_ID
|
||||||
#ARG AWS_SECRET_ACCESS_KEY
|
#ARG AWS_SECRET_ACCESS_KEY
|
||||||
ARG BUILD_TAG
|
|
||||||
ENV BUILD_TAG=$BUILD_TAG
|
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
|||||||
16
README.md
16
README.md
@@ -132,13 +132,13 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
|
|||||||
# Create repository in .neon with proper paths to binaries and data
|
# Create repository in .neon with proper paths to binaries and data
|
||||||
# Later that would be responsibility of a package install script
|
# Later that would be responsibility of a package install script
|
||||||
> cargo neon init
|
> cargo neon init
|
||||||
Initializing pageserver node 1 at '127.0.0.1:64000' in ".neon"
|
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||||
|
|
||||||
# start pageserver, safekeeper, and broker for their intercommunication
|
# start pageserver, safekeeper, and broker for their intercommunication
|
||||||
> cargo neon start
|
> cargo neon start
|
||||||
Starting neon broker at 127.0.0.1:50051.
|
Starting neon broker at 127.0.0.1:50051
|
||||||
storage_broker started, pid: 2918372
|
storage_broker started, pid: 2918372
|
||||||
Starting pageserver node 1 at '127.0.0.1:64000' in ".neon".
|
Starting pageserver at '127.0.0.1:64000' in '.neon'.
|
||||||
pageserver started, pid: 2918386
|
pageserver started, pid: 2918386
|
||||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
|
||||||
safekeeper 1 started, pid: 2918437
|
safekeeper 1 started, pid: 2918437
|
||||||
@@ -152,7 +152,8 @@ Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
|
|||||||
# start postgres compute node
|
# start postgres compute node
|
||||||
> cargo neon endpoint start main
|
> cargo neon endpoint start main
|
||||||
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||||
Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55432/postgres'
|
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||||
|
Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||||
|
|
||||||
# check list of running postgres instances
|
# check list of running postgres instances
|
||||||
> cargo neon endpoint list
|
> cargo neon endpoint list
|
||||||
@@ -188,17 +189,18 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
|
|||||||
# start postgres on that branch
|
# start postgres on that branch
|
||||||
> cargo neon endpoint start migration_check --branch-name migration_check
|
> cargo neon endpoint start migration_check --branch-name migration_check
|
||||||
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
||||||
Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'
|
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
|
||||||
|
Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
|
||||||
|
|
||||||
# check the new list of running postgres instances
|
# check the new list of running postgres instances
|
||||||
> cargo neon endpoint list
|
> cargo neon endpoint list
|
||||||
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS
|
||||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
|
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running
|
||||||
migration_check 127.0.0.1:55434 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
|
migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running
|
||||||
|
|
||||||
# this new postgres instance will have all the data from 'main' postgres,
|
# this new postgres instance will have all the data from 'main' postgres,
|
||||||
# but all modifications would not affect data in original postgres
|
# but all modifications would not affect data in original postgres
|
||||||
> psql -p55434 -h 127.0.0.1 -U cloud_admin postgres
|
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
|
||||||
postgres=# select * from t;
|
postgres=# select * from t;
|
||||||
key | value
|
key | value
|
||||||
-----+-------
|
-----+-------
|
||||||
|
|||||||
@@ -54,15 +54,9 @@ use compute_tools::monitor::launch_monitor;
|
|||||||
use compute_tools::params::*;
|
use compute_tools::params::*;
|
||||||
use compute_tools::spec::*;
|
use compute_tools::spec::*;
|
||||||
|
|
||||||
const BUILD_TAG_DEFAULT: &str = "local";
|
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||||
|
|
||||||
let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
|
|
||||||
|
|
||||||
info!("build_tag: {build_tag}");
|
|
||||||
|
|
||||||
let matches = cli().get_matches();
|
let matches = cli().get_matches();
|
||||||
|
|
||||||
let http_port = *matches
|
let http_port = *matches
|
||||||
@@ -256,16 +250,6 @@ fn main() -> Result<()> {
|
|||||||
exit_code = ecode.code()
|
exit_code = ecode.code()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maybe sync safekeepers again, to speed up next startup
|
|
||||||
let compute_state = compute.state.lock().unwrap().clone();
|
|
||||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
|
||||||
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
|
||||||
info!("syncing safekeepers on shutdown");
|
|
||||||
let storage_auth_token = pspec.storage_auth_token.clone();
|
|
||||||
let lsn = compute.sync_safekeepers(storage_auth_token)?;
|
|
||||||
info!("synced safekeepers at lsn {lsn}");
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Err(err) = compute.check_for_core_dumps() {
|
if let Err(err) = compute.check_for_core_dumps() {
|
||||||
error!("error while checking for core dumps: {err:?}");
|
error!("error while checking for core dumps: {err:?}");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -133,84 +133,6 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
|
|
||||||
/// that we give to customers
|
|
||||||
fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|
||||||
let roles = spec
|
|
||||||
.cluster
|
|
||||||
.roles
|
|
||||||
.iter()
|
|
||||||
.map(|r| format!("'{}'", escape_literal(&r.name)))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let dbs = spec
|
|
||||||
.cluster
|
|
||||||
.databases
|
|
||||||
.iter()
|
|
||||||
.map(|db| format!("'{}'", escape_literal(&db.name)))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let roles_decl = if roles.is_empty() {
|
|
||||||
String::from("roles text[] := NULL;")
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
r#"
|
|
||||||
roles text[] := ARRAY(SELECT rolname
|
|
||||||
FROM pg_catalog.pg_roles
|
|
||||||
WHERE rolname IN ({}));"#,
|
|
||||||
roles.join(", ")
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
let database_decl = if dbs.is_empty() {
|
|
||||||
String::from("dbs text[] := NULL;")
|
|
||||||
} else {
|
|
||||||
format!(
|
|
||||||
r#"
|
|
||||||
dbs text[] := ARRAY(SELECT datname
|
|
||||||
FROM pg_catalog.pg_database
|
|
||||||
WHERE datname IN ({}));"#,
|
|
||||||
dbs.join(", ")
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases
|
|
||||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
|
||||||
let query = format!(
|
|
||||||
r#"
|
|
||||||
DO $$
|
|
||||||
DECLARE
|
|
||||||
r text;
|
|
||||||
{}
|
|
||||||
{}
|
|
||||||
BEGIN
|
|
||||||
IF NOT EXISTS (
|
|
||||||
SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
|
||||||
THEN
|
|
||||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN IN ROLE pg_read_all_data, pg_write_all_data;
|
|
||||||
IF array_length(roles, 1) IS NOT NULL THEN
|
|
||||||
EXECUTE format('GRANT neon_superuser TO %s',
|
|
||||||
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
|
|
||||||
FOREACH r IN ARRAY roles LOOP
|
|
||||||
EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r));
|
|
||||||
END LOOP;
|
|
||||||
END IF;
|
|
||||||
IF array_length(dbs, 1) IS NOT NULL THEN
|
|
||||||
EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser',
|
|
||||||
array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', '));
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END
|
|
||||||
$$;"#,
|
|
||||||
roles_decl, database_decl,
|
|
||||||
);
|
|
||||||
info!("Neon superuser created:\n{}", &query);
|
|
||||||
client
|
|
||||||
.simple_query(&query)
|
|
||||||
.map_err(|e| anyhow::anyhow!(e).context(query))?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ComputeNode {
|
impl ComputeNode {
|
||||||
pub fn set_status(&self, status: ComputeStatus) {
|
pub fn set_status(&self, status: ComputeStatus) {
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
@@ -235,7 +157,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||||
// unarchive it to `pgdata` directory overriding all its previous content.
|
// unarchive it to `pgdata` directory overriding all its previous content.
|
||||||
#[instrument(skip_all, fields(%lsn))]
|
#[instrument(skip(self, compute_state))]
|
||||||
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
@@ -277,8 +199,8 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
||||||
// and return the reported LSN back to the caller.
|
// and return the reported LSN back to the caller.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self, storage_auth_token))]
|
||||||
pub fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
|
|
||||||
let sync_handle = Command::new(&self.pgbin)
|
let sync_handle = Command::new(&self.pgbin)
|
||||||
@@ -322,7 +244,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Do all the preparations like PGDATA directory creation, configuration,
|
/// Do all the preparations like PGDATA directory creation, configuration,
|
||||||
/// safekeepers sync, basebackup, etc.
|
/// safekeepers sync, basebackup, etc.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self, compute_state))]
|
||||||
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
|
||||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
let spec = &pspec.spec;
|
let spec = &pspec.spec;
|
||||||
@@ -380,7 +302,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Start Postgres as a child process and manage DBs/roles.
|
/// Start Postgres as a child process and manage DBs/roles.
|
||||||
/// After that this will hang waiting on the postmaster process to exit.
|
/// After that this will hang waiting on the postmaster process to exit.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self))]
|
||||||
pub fn start_postgres(
|
pub fn start_postgres(
|
||||||
&self,
|
&self,
|
||||||
storage_auth_token: Option<String>,
|
storage_auth_token: Option<String>,
|
||||||
@@ -404,7 +326,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Do initial configuration of the already started Postgres.
|
/// Do initial configuration of the already started Postgres.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self, compute_state))]
|
||||||
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
||||||
// If connection fails,
|
// If connection fails,
|
||||||
// it may be the old node with `zenith_admin` superuser.
|
// it may be the old node with `zenith_admin` superuser.
|
||||||
@@ -425,8 +347,6 @@ impl ComputeNode {
|
|||||||
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
|
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
|
||||||
|
|
||||||
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
|
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
|
||||||
// Disable forwarding so that users don't get a cloud_admin role
|
|
||||||
client.simple_query("SET neon.forward_ddl = false")?;
|
|
||||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||||
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
||||||
drop(client);
|
drop(client);
|
||||||
@@ -437,16 +357,14 @@ impl ComputeNode {
|
|||||||
Ok(client) => client,
|
Ok(client) => client,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||||
client.simple_query("SET neon.forward_ddl = false")?;
|
client.simple_query("SET neon.forward_ddl = false")?;
|
||||||
|
|
||||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
|
||||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||||
create_neon_superuser(spec, &mut client)?;
|
|
||||||
handle_roles(spec, &mut client)?;
|
handle_roles(spec, &mut client)?;
|
||||||
handle_databases(spec, &mut client)?;
|
handle_databases(spec, &mut client)?;
|
||||||
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_grants(spec, self.connstr.as_str())?;
|
handle_grants(spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_extensions(spec, &mut client)?;
|
handle_extensions(spec, &mut client)?;
|
||||||
|
|
||||||
// 'Close' connection
|
// 'Close' connection
|
||||||
@@ -458,7 +376,7 @@ impl ComputeNode {
|
|||||||
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
|
// We could've wrapped this around `pg_ctl reload`, but right now we don't use
|
||||||
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
// `pg_ctl` for start / stop, so this just seems much easier to do as we already
|
||||||
// have opened connection to Postgres and superuser access.
|
// have opened connection to Postgres and superuser access.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self, client))]
|
||||||
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
|
fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
|
||||||
client.simple_query("SELECT pg_reload_conf()")?;
|
client.simple_query("SELECT pg_reload_conf()")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -466,7 +384,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
/// Similar to `apply_config()`, but does a bit different sequence of operations,
|
/// Similar to `apply_config()`, but does a bit different sequence of operations,
|
||||||
/// as it's used to reconfigure a previously started and configured Postgres node.
|
/// as it's used to reconfigure a previously started and configured Postgres node.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self))]
|
||||||
pub fn reconfigure(&self) -> Result<()> {
|
pub fn reconfigure(&self) -> Result<()> {
|
||||||
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
|
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
|
||||||
|
|
||||||
@@ -484,7 +402,7 @@ impl ComputeNode {
|
|||||||
handle_roles(&spec, &mut client)?;
|
handle_roles(&spec, &mut client)?;
|
||||||
handle_databases(&spec, &mut client)?;
|
handle_databases(&spec, &mut client)?;
|
||||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_grants(&spec, self.connstr.as_str())?;
|
handle_grants(&spec, self.connstr.as_str(), &mut client)?;
|
||||||
handle_extensions(&spec, &mut client)?;
|
handle_extensions(&spec, &mut client)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,7 +419,7 @@ impl ComputeNode {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(self))]
|
||||||
pub fn start_compute(&self) -> Result<std::process::Child> {
|
pub fn start_compute(&self) -> Result<std::process::Child> {
|
||||||
let compute_state = self.state.lock().unwrap().clone();
|
let compute_state = self.state.lock().unwrap().clone();
|
||||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
@@ -516,9 +434,9 @@ impl ComputeNode {
|
|||||||
self.prepare_pgdata(&compute_state)?;
|
self.prepare_pgdata(&compute_state)?;
|
||||||
|
|
||||||
let start_time = Utc::now();
|
let start_time = Utc::now();
|
||||||
|
|
||||||
let pg = self.start_postgres(pspec.storage_auth_token.clone())?;
|
let pg = self.start_postgres(pspec.storage_auth_token.clone())?;
|
||||||
|
|
||||||
let config_time = Utc::now();
|
|
||||||
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
|
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
|
||||||
self.apply_config(&compute_state)?;
|
self.apply_config(&compute_state)?;
|
||||||
}
|
}
|
||||||
@@ -526,13 +444,8 @@ impl ComputeNode {
|
|||||||
let startup_end_time = Utc::now();
|
let startup_end_time = Utc::now();
|
||||||
{
|
{
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
state.metrics.start_postgres_ms = config_time
|
|
||||||
.signed_duration_since(start_time)
|
|
||||||
.to_std()
|
|
||||||
.unwrap()
|
|
||||||
.as_millis() as u64;
|
|
||||||
state.metrics.config_ms = startup_end_time
|
state.metrics.config_ms = startup_end_time
|
||||||
.signed_duration_since(config_time)
|
.signed_duration_since(start_time)
|
||||||
.to_std()
|
.to_std()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.as_millis() as u64;
|
.as_millis() as u64;
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use compute_api::responses::ComputeStatus;
|
|||||||
|
|
||||||
use crate::compute::ComputeNode;
|
use crate::compute::ComputeNode;
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip(compute))]
|
||||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||||
info!("waiting for reconfiguration requests");
|
info!("waiting for reconfiguration requests");
|
||||||
loop {
|
loop {
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
|
|||||||
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
|
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
|
||||||
|
|
||||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||||
.with_ansi(false)
|
|
||||||
.with_target(false)
|
.with_target(false)
|
||||||
.with_writer(std::io::stderr);
|
.with_writer(std::io::stderr);
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
|
|||||||
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
|
||||||
|
|
||||||
/// Escape a string for including it in a SQL literal
|
/// Escape a string for including it in a SQL literal
|
||||||
pub fn escape_literal(s: &str) -> String {
|
fn escape_literal(s: &str) -> String {
|
||||||
s.replace('\'', "''").replace('\\', "\\\\")
|
s.replace('\'', "''").replace('\\', "\\\\")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,7 +215,7 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
|
|||||||
/// Wait for Postgres to become ready to accept connections. It's ready to
|
/// Wait for Postgres to become ready to accept connections. It's ready to
|
||||||
/// accept connections when the state-field in `pgdata/postmaster.pid` says
|
/// accept connections when the state-field in `pgdata/postmaster.pid` says
|
||||||
/// 'ready'.
|
/// 'ready'.
|
||||||
#[instrument(skip_all, fields(pgdata = %pgdata.display()))]
|
#[instrument(skip(pg))]
|
||||||
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
|
pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
|
||||||
let pid_path = pgdata.join("postmaster.pid");
|
let pid_path = pgdata.join("postmaster.pid");
|
||||||
|
|
||||||
|
|||||||
@@ -269,13 +269,17 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
xact.execute(query.as_str(), &[])?;
|
xact.execute(query.as_str(), &[])?;
|
||||||
}
|
}
|
||||||
RoleAction::Create => {
|
RoleAction::Create => {
|
||||||
let mut query: String = format!(
|
let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
|
||||||
"CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser",
|
|
||||||
name.pg_quote()
|
|
||||||
);
|
|
||||||
info!("role create query: '{}'", &query);
|
info!("role create query: '{}'", &query);
|
||||||
query.push_str(&role.to_pg_options());
|
query.push_str(&role.to_pg_options());
|
||||||
xact.execute(query.as_str(), &[])?;
|
xact.execute(query.as_str(), &[])?;
|
||||||
|
|
||||||
|
let grant_query = format!(
|
||||||
|
"GRANT pg_read_all_data, pg_write_all_data TO {}",
|
||||||
|
name.pg_quote()
|
||||||
|
);
|
||||||
|
xact.execute(grant_query.as_str(), &[])?;
|
||||||
|
info!("role grant query: '{}'", &grant_query);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -472,11 +476,6 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
query.push_str(&db.to_pg_options());
|
query.push_str(&db.to_pg_options());
|
||||||
let _guard = info_span!("executing", query).entered();
|
let _guard = info_span!("executing", query).entered();
|
||||||
client.execute(query.as_str(), &[])?;
|
client.execute(query.as_str(), &[])?;
|
||||||
let grant_query: String = format!(
|
|
||||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
|
||||||
name.pg_quote()
|
|
||||||
);
|
|
||||||
client.execute(grant_query.as_str(), &[])?;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -496,9 +495,35 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
||||||
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub fn handle_grants(spec: &ComputeSpec, connstr: &str) -> Result<()> {
|
pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
|
||||||
info!("cluster spec grants:");
|
info!("cluster spec grants:");
|
||||||
|
|
||||||
|
// We now have a separate `web_access` role to connect to the database
|
||||||
|
// via the web interface and proxy link auth. And also we grant a
|
||||||
|
// read / write all data privilege to every role. So also grant
|
||||||
|
// create to everyone.
|
||||||
|
// XXX: later we should stop messing with Postgres ACL in such horrible
|
||||||
|
// ways.
|
||||||
|
let roles = spec
|
||||||
|
.cluster
|
||||||
|
.roles
|
||||||
|
.iter()
|
||||||
|
.map(|r| r.name.pg_quote())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
for db in &spec.cluster.databases {
|
||||||
|
let dbname = &db.name;
|
||||||
|
|
||||||
|
let query: String = format!(
|
||||||
|
"GRANT CREATE ON DATABASE {} TO {}",
|
||||||
|
dbname.pg_quote(),
|
||||||
|
roles.join(", ")
|
||||||
|
);
|
||||||
|
info!("grant query {}", &query);
|
||||||
|
|
||||||
|
client.execute(query.as_str(), &[])?;
|
||||||
|
}
|
||||||
|
|
||||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||||
// atomically.
|
// atomically.
|
||||||
|
|||||||
@@ -180,11 +180,6 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait until process is gone
|
// Wait until process is gone
|
||||||
wait_until_stopped(process_name, pid)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
|
|
||||||
for retries in 0..RETRIES {
|
for retries in 0..RETRIES {
|
||||||
match process_has_stopped(pid) {
|
match process_has_stopped(pid) {
|
||||||
Ok(true) => {
|
Ok(true) => {
|
||||||
|
|||||||
@@ -308,8 +308,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
|||||||
|
|
||||||
let mut env =
|
let mut env =
|
||||||
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
||||||
let force = init_match.get_flag("force");
|
env.init(pg_version)
|
||||||
env.init(pg_version, force)
|
|
||||||
.context("Failed to initialize neon repository")?;
|
.context("Failed to initialize neon repository")?;
|
||||||
|
|
||||||
// Initialize pageserver, create initial tenant and timeline.
|
// Initialize pageserver, create initial tenant and timeline.
|
||||||
@@ -1014,13 +1013,6 @@ fn cli() -> Command {
|
|||||||
.help("If set, the node will be a hot replica on the specified timeline")
|
.help("If set, the node will be a hot replica on the specified timeline")
|
||||||
.required(false);
|
.required(false);
|
||||||
|
|
||||||
let force_arg = Arg::new("force")
|
|
||||||
.value_parser(value_parser!(bool))
|
|
||||||
.long("force")
|
|
||||||
.action(ArgAction::SetTrue)
|
|
||||||
.help("Force initialization even if the repository is not empty")
|
|
||||||
.required(false);
|
|
||||||
|
|
||||||
Command::new("Neon CLI")
|
Command::new("Neon CLI")
|
||||||
.arg_required_else_help(true)
|
.arg_required_else_help(true)
|
||||||
.version(GIT_VERSION)
|
.version(GIT_VERSION)
|
||||||
@@ -1036,7 +1028,6 @@ fn cli() -> Command {
|
|||||||
.value_name("config"),
|
.value_name("config"),
|
||||||
)
|
)
|
||||||
.arg(pg_version_arg.clone())
|
.arg(pg_version_arg.clone())
|
||||||
.arg(force_arg)
|
|
||||||
)
|
)
|
||||||
.subcommand(
|
.subcommand(
|
||||||
Command::new("timeline")
|
Command::new("timeline")
|
||||||
|
|||||||
@@ -67,7 +67,6 @@ pub struct EndpointConf {
|
|||||||
pg_port: u16,
|
pg_port: u16,
|
||||||
http_port: u16,
|
http_port: u16,
|
||||||
pg_version: u32,
|
pg_version: u32,
|
||||||
skip_pg_catalog_updates: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -136,7 +135,6 @@ impl ComputeControlPlane {
|
|||||||
mode,
|
mode,
|
||||||
tenant_id,
|
tenant_id,
|
||||||
pg_version,
|
pg_version,
|
||||||
skip_pg_catalog_updates: false,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
ep.create_endpoint_dir()?;
|
ep.create_endpoint_dir()?;
|
||||||
@@ -150,7 +148,6 @@ impl ComputeControlPlane {
|
|||||||
http_port,
|
http_port,
|
||||||
pg_port,
|
pg_port,
|
||||||
pg_version,
|
pg_version,
|
||||||
skip_pg_catalog_updates: false,
|
|
||||||
})?,
|
})?,
|
||||||
)?;
|
)?;
|
||||||
std::fs::write(
|
std::fs::write(
|
||||||
@@ -186,9 +183,6 @@ pub struct Endpoint {
|
|||||||
// the endpoint runs in.
|
// the endpoint runs in.
|
||||||
pub env: LocalEnv,
|
pub env: LocalEnv,
|
||||||
pageserver: Arc<PageServerNode>,
|
pageserver: Arc<PageServerNode>,
|
||||||
|
|
||||||
// Optimizations
|
|
||||||
skip_pg_catalog_updates: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Endpoint {
|
impl Endpoint {
|
||||||
@@ -222,7 +216,6 @@ impl Endpoint {
|
|||||||
mode: conf.mode,
|
mode: conf.mode,
|
||||||
tenant_id: conf.tenant_id,
|
tenant_id: conf.tenant_id,
|
||||||
pg_version: conf.pg_version,
|
pg_version: conf.pg_version,
|
||||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -405,16 +398,6 @@ impl Endpoint {
|
|||||||
String::from_utf8_lossy(&pg_ctl.stderr),
|
String::from_utf8_lossy(&pg_ctl.stderr),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also wait for the compute_ctl process to die. It might have some cleanup
|
|
||||||
// work to do after postgres stops, like syncing safekeepers, etc.
|
|
||||||
//
|
|
||||||
// TODO use background_process::stop_process instead
|
|
||||||
let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
|
|
||||||
let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;
|
|
||||||
let pid = nix::unistd::Pid::from_raw(pid as i32);
|
|
||||||
crate::background_process::wait_until_stopped("compute_ctl", pid)?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -467,7 +450,7 @@ impl Endpoint {
|
|||||||
|
|
||||||
// Create spec file
|
// Create spec file
|
||||||
let spec = ComputeSpec {
|
let spec = ComputeSpec {
|
||||||
skip_pg_catalog_updates: self.skip_pg_catalog_updates,
|
skip_pg_catalog_updates: false,
|
||||||
format_version: 1.0,
|
format_version: 1.0,
|
||||||
operation_uuid: None,
|
operation_uuid: None,
|
||||||
cluster: Cluster {
|
cluster: Cluster {
|
||||||
@@ -517,13 +500,7 @@ impl Endpoint {
|
|||||||
.stdin(std::process::Stdio::null())
|
.stdin(std::process::Stdio::null())
|
||||||
.stderr(logfile.try_clone()?)
|
.stderr(logfile.try_clone()?)
|
||||||
.stdout(logfile);
|
.stdout(logfile);
|
||||||
let child = cmd.spawn()?;
|
let _child = cmd.spawn()?;
|
||||||
|
|
||||||
// Write down the pid so we can wait for it when we want to stop
|
|
||||||
// TODO use background_process::start_process instead
|
|
||||||
let pid = child.id();
|
|
||||||
let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
|
|
||||||
std::fs::write(pidfile_path, pid.to_string())?;
|
|
||||||
|
|
||||||
// Wait for it to start
|
// Wait for it to start
|
||||||
let mut attempt = 0;
|
let mut attempt = 0;
|
||||||
|
|||||||
@@ -364,7 +364,7 @@ impl LocalEnv {
|
|||||||
//
|
//
|
||||||
// Initialize a new Neon repository
|
// Initialize a new Neon repository
|
||||||
//
|
//
|
||||||
pub fn init(&mut self, pg_version: u32, force: bool) -> anyhow::Result<()> {
|
pub fn init(&mut self, pg_version: u32) -> anyhow::Result<()> {
|
||||||
// check if config already exists
|
// check if config already exists
|
||||||
let base_path = &self.base_data_dir;
|
let base_path = &self.base_data_dir;
|
||||||
ensure!(
|
ensure!(
|
||||||
@@ -372,29 +372,11 @@ impl LocalEnv {
|
|||||||
"repository base path is missing"
|
"repository base path is missing"
|
||||||
);
|
);
|
||||||
|
|
||||||
if base_path.exists() {
|
ensure!(
|
||||||
if force {
|
!base_path.exists(),
|
||||||
println!("removing all contents of '{}'", base_path.display());
|
"directory '{}' already exists. Perhaps already initialized?",
|
||||||
// instead of directly calling `remove_dir_all`, we keep the original dir but removing
|
base_path.display()
|
||||||
// all contents inside. This helps if the developer symbol links another directory (i.e.,
|
);
|
||||||
// S3 local SSD) to the `.neon` base directory.
|
|
||||||
for entry in std::fs::read_dir(base_path)? {
|
|
||||||
let entry = entry?;
|
|
||||||
let path = entry.path();
|
|
||||||
if path.is_dir() {
|
|
||||||
fs::remove_dir_all(&path)?;
|
|
||||||
} else {
|
|
||||||
fs::remove_file(&path)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
bail!(
|
|
||||||
"directory '{}' already exists. Perhaps already initialized? (Hint: use --force to remove all contents)",
|
|
||||||
base_path.display()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||||
bail!(
|
bail!(
|
||||||
"Can't find postgres binary at {}",
|
"Can't find postgres binary at {}",
|
||||||
@@ -410,9 +392,7 @@ impl LocalEnv {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !base_path.exists() {
|
fs::create_dir(base_path)?;
|
||||||
fs::create_dir(base_path)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate keypair for JWT.
|
// Generate keypair for JWT.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -189,7 +189,7 @@ services:
|
|||||||
- "/bin/bash"
|
- "/bin/bash"
|
||||||
- "-c"
|
- "-c"
|
||||||
command:
|
command:
|
||||||
- "until pg_isready -h compute -p 55433 -U cloud_admin ; do
|
- "until pg_isready -h compute -p 55433 ; do
|
||||||
echo 'Waiting to start compute...' && sleep 1;
|
echo 'Waiting to start compute...' && sleep 1;
|
||||||
done"
|
done"
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@@ -48,7 +48,6 @@ Creating docker-compose_storage_broker_1 ... done
|
|||||||
2. connect compute node
|
2. connect compute node
|
||||||
```
|
```
|
||||||
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
||||||
$ chmod 600 ~/.pgpass
|
|
||||||
$ psql -h localhost -p 55433 -U cloud_admin
|
$ psql -h localhost -p 55433 -U cloud_admin
|
||||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||||
CREATE TABLE
|
CREATE TABLE
|
||||||
|
|||||||
@@ -71,7 +71,6 @@ pub struct ComputeMetrics {
|
|||||||
pub wait_for_spec_ms: u64,
|
pub wait_for_spec_ms: u64,
|
||||||
pub sync_safekeepers_ms: u64,
|
pub sync_safekeepers_ms: u64,
|
||||||
pub basebackup_ms: u64,
|
pub basebackup_ms: u64,
|
||||||
pub start_postgres_ms: u64,
|
|
||||||
pub config_ms: u64,
|
pub config_ms: u64,
|
||||||
pub total_startup_ms: u64,
|
pub total_startup_ms: u64,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -148,14 +148,4 @@ mod tests {
|
|||||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||||
let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_unknown_fields() {
|
|
||||||
// Forward compatibility test
|
|
||||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
|
||||||
let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
|
|
||||||
let ob = json.as_object_mut().unwrap();
|
|
||||||
ob.insert("unknown_field_123123123".into(), "hello".into());
|
|
||||||
let _spec: ComputeSpec = serde_json::from_value(json).unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ use prometheus::{Registry, Result};
|
|||||||
pub mod launch_timestamp;
|
pub mod launch_timestamp;
|
||||||
mod wrappers;
|
mod wrappers;
|
||||||
pub use wrappers::{CountedReader, CountedWriter};
|
pub use wrappers::{CountedReader, CountedWriter};
|
||||||
pub mod metric_vec_duration;
|
|
||||||
|
|
||||||
pub type UIntGauge = GenericGauge<AtomicU64>;
|
pub type UIntGauge = GenericGauge<AtomicU64>;
|
||||||
pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
|
pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
|
||||||
|
|||||||
@@ -1,23 +0,0 @@
|
|||||||
//! Helpers for observing duration on HistogramVec / CounterVec / GaugeVec / MetricVec<T>.
|
|
||||||
|
|
||||||
use std::{future::Future, time::Instant};
|
|
||||||
|
|
||||||
pub trait DurationResultObserver {
|
|
||||||
fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn observe_async_block_duration_by_result<
|
|
||||||
T,
|
|
||||||
E,
|
|
||||||
F: Future<Output = Result<T, E>>,
|
|
||||||
O: DurationResultObserver,
|
|
||||||
>(
|
|
||||||
observer: &O,
|
|
||||||
block: F,
|
|
||||||
) -> Result<T, E> {
|
|
||||||
let start = Instant::now();
|
|
||||||
let result = block.await;
|
|
||||||
let duration = start.elapsed();
|
|
||||||
observer.observe_result(&result, duration);
|
|
||||||
result
|
|
||||||
}
|
|
||||||
@@ -57,9 +57,9 @@ pub fn slru_may_delete_clogsegment(segpage: u32, cutoff_page: u32) -> bool {
|
|||||||
// Multixact utils
|
// Multixact utils
|
||||||
|
|
||||||
pub fn mx_offset_to_flags_offset(xid: MultiXactId) -> usize {
|
pub fn mx_offset_to_flags_offset(xid: MultiXactId) -> usize {
|
||||||
((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32)
|
((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32) as u16
|
||||||
% pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE as u32
|
% pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE
|
||||||
* pg_constants::MULTIXACT_MEMBERGROUP_SIZE as u32) as usize
|
* pg_constants::MULTIXACT_MEMBERGROUP_SIZE) as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn mx_offset_to_flags_bitshift(xid: MultiXactId) -> u16 {
|
pub fn mx_offset_to_flags_bitshift(xid: MultiXactId) -> u16 {
|
||||||
|
|||||||
@@ -70,14 +70,6 @@ impl RemotePath {
|
|||||||
pub fn join(&self, segment: &Path) -> Self {
|
pub fn join(&self, segment: &Path) -> Self {
|
||||||
Self(self.0.join(segment))
|
Self(self.0.join(segment))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_path(&self) -> &PathBuf {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn extension(&self) -> Option<&str> {
|
|
||||||
self.0.extension()?.to_str()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Storage (potentially remote) API to manage its state.
|
/// Storage (potentially remote) API to manage its state.
|
||||||
@@ -94,19 +86,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
|||||||
prefix: Option<&RemotePath>,
|
prefix: Option<&RemotePath>,
|
||||||
) -> Result<Vec<RemotePath>, DownloadError>;
|
) -> Result<Vec<RemotePath>, DownloadError>;
|
||||||
|
|
||||||
/// Lists all files in directory "recursively"
|
|
||||||
/// (not really recursively, because AWS has a flat namespace)
|
|
||||||
/// Note: This is subtely different than list_prefixes,
|
|
||||||
/// because it is for listing files instead of listing
|
|
||||||
/// names sharing common prefixes.
|
|
||||||
/// For example,
|
|
||||||
/// list_files("foo/bar") = ["foo/bar/cat123.txt",
|
|
||||||
/// "foo/bar/cat567.txt", "foo/bar/dog123.txt", "foo/bar/dog456.txt"]
|
|
||||||
/// whereas,
|
|
||||||
/// list_prefixes("foo/bar/") = ["cat", "dog"]
|
|
||||||
/// See `test_real_s3.rs` for more details.
|
|
||||||
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
|
|
||||||
|
|
||||||
/// Streams the local file contents into remote into the remote storage entry.
|
/// Streams the local file contents into remote into the remote storage entry.
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
@@ -195,14 +174,6 @@ impl GenericRemoteStorage {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
|
||||||
match self {
|
|
||||||
Self::LocalFs(s) => s.list_files(folder).await,
|
|
||||||
Self::AwsS3(s) => s.list_files(folder).await,
|
|
||||||
Self::Unreliable(s) => s.list_files(folder).await,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn upload(
|
pub async fn upload(
|
||||||
&self,
|
&self,
|
||||||
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
|
|||||||
@@ -48,14 +48,6 @@ impl LocalFs {
|
|||||||
Ok(Self { storage_root })
|
Ok(Self { storage_root })
|
||||||
}
|
}
|
||||||
|
|
||||||
// mirrors S3Bucket::s3_object_to_relative_path
|
|
||||||
fn local_file_to_relative_path(&self, key: PathBuf) -> RemotePath {
|
|
||||||
let relative_path = key
|
|
||||||
.strip_prefix(&self.storage_root)
|
|
||||||
.expect("relative path must contain storage_root as prefix");
|
|
||||||
RemotePath(relative_path.into())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn read_storage_metadata(
|
async fn read_storage_metadata(
|
||||||
&self,
|
&self,
|
||||||
file_path: &Path,
|
file_path: &Path,
|
||||||
@@ -140,34 +132,6 @@ impl RemoteStorage for LocalFs {
|
|||||||
Ok(prefixes)
|
Ok(prefixes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursively lists all files in a directory,
|
|
||||||
// mirroring the `list_files` for `s3_bucket`
|
|
||||||
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
|
||||||
let full_path = match folder {
|
|
||||||
Some(folder) => folder.with_base(&self.storage_root),
|
|
||||||
None => self.storage_root.clone(),
|
|
||||||
};
|
|
||||||
let mut files = vec![];
|
|
||||||
let mut directory_queue = vec![full_path.clone()];
|
|
||||||
|
|
||||||
while !directory_queue.is_empty() {
|
|
||||||
let cur_folder = directory_queue
|
|
||||||
.pop()
|
|
||||||
.expect("queue cannot be empty: we just checked");
|
|
||||||
let mut entries = fs::read_dir(cur_folder.clone()).await?;
|
|
||||||
while let Some(entry) = entries.next_entry().await? {
|
|
||||||
let file_name: PathBuf = entry.file_name().into();
|
|
||||||
let full_file_name = cur_folder.clone().join(&file_name);
|
|
||||||
let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());
|
|
||||||
files.push(file_remote_path.clone());
|
|
||||||
if full_file_name.is_dir() {
|
|
||||||
directory_queue.push(full_file_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(files)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
data: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
data: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
|
|||||||
@@ -34,8 +34,6 @@ use crate::{
|
|||||||
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
const MAX_DELETE_OBJECTS_REQUEST_SIZE: usize = 1000;
|
|
||||||
|
|
||||||
pub(super) mod metrics {
|
pub(super) mod metrics {
|
||||||
use metrics::{register_int_counter_vec, IntCounterVec};
|
use metrics::{register_int_counter_vec, IntCounterVec};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
@@ -347,51 +345,6 @@ impl RemoteStorage for S3Bucket {
|
|||||||
Ok(document_keys)
|
Ok(document_keys)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// See the doc for `RemoteStorage::list_files`
|
|
||||||
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
|
||||||
let folder_name = folder
|
|
||||||
.map(|p| self.relative_path_to_s3_object(p))
|
|
||||||
.or_else(|| self.prefix_in_bucket.clone());
|
|
||||||
|
|
||||||
// AWS may need to break the response into several parts
|
|
||||||
let mut continuation_token = None;
|
|
||||||
let mut all_files = vec![];
|
|
||||||
loop {
|
|
||||||
let _guard = self
|
|
||||||
.concurrency_limiter
|
|
||||||
.acquire()
|
|
||||||
.await
|
|
||||||
.context("Concurrency limiter semaphore got closed during S3 list_files")?;
|
|
||||||
metrics::inc_list_objects();
|
|
||||||
|
|
||||||
let response = self
|
|
||||||
.client
|
|
||||||
.list_objects_v2()
|
|
||||||
.bucket(self.bucket_name.clone())
|
|
||||||
.set_prefix(folder_name.clone())
|
|
||||||
.set_continuation_token(continuation_token)
|
|
||||||
.set_max_keys(self.max_keys_per_list_response)
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.map_err(|e| {
|
|
||||||
metrics::inc_list_objects_fail();
|
|
||||||
e
|
|
||||||
})
|
|
||||||
.context("Failed to list files in S3 bucket")?;
|
|
||||||
|
|
||||||
for object in response.contents().unwrap_or_default() {
|
|
||||||
let object_path = object.key().expect("response does not contain a key");
|
|
||||||
let remote_path = self.s3_object_to_relative_path(object_path);
|
|
||||||
all_files.push(remote_path);
|
|
||||||
}
|
|
||||||
match response.next_continuation_token {
|
|
||||||
Some(new_token) => continuation_token = Some(new_token),
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(all_files)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -471,33 +424,17 @@ impl RemoteStorage for S3Bucket {
|
|||||||
delete_objects.push(obj_id);
|
delete_objects.push(obj_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
for chunk in delete_objects.chunks(MAX_DELETE_OBJECTS_REQUEST_SIZE) {
|
metrics::inc_delete_objects(paths.len() as u64);
|
||||||
metrics::inc_delete_objects(chunk.len() as u64);
|
self.client
|
||||||
|
.delete_objects()
|
||||||
let resp = self
|
.bucket(self.bucket_name.clone())
|
||||||
.client
|
.delete(Delete::builder().set_objects(Some(delete_objects)).build())
|
||||||
.delete_objects()
|
.send()
|
||||||
.bucket(self.bucket_name.clone())
|
.await
|
||||||
.delete(Delete::builder().set_objects(Some(chunk.to_vec())).build())
|
.map_err(|e| {
|
||||||
.send()
|
metrics::inc_delete_objects_fail(paths.len() as u64);
|
||||||
.await;
|
e
|
||||||
|
})?;
|
||||||
match resp {
|
|
||||||
Ok(resp) => {
|
|
||||||
if let Some(errors) = resp.errors {
|
|
||||||
metrics::inc_delete_objects_fail(errors.len() as u64);
|
|
||||||
return Err(anyhow::format_err!(
|
|
||||||
"Failed to delete {} objects",
|
|
||||||
errors.len()
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
metrics::inc_delete_objects_fail(chunk.len() as u64);
|
|
||||||
return Err(e.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ enum RemoteOp {
|
|||||||
Upload(RemotePath),
|
Upload(RemotePath),
|
||||||
Download(RemotePath),
|
Download(RemotePath),
|
||||||
Delete(RemotePath),
|
Delete(RemotePath),
|
||||||
DeleteObjects(Vec<RemotePath>),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UnreliableWrapper {
|
impl UnreliableWrapper {
|
||||||
@@ -83,11 +82,6 @@ impl RemoteStorage for UnreliableWrapper {
|
|||||||
self.inner.list_prefixes(prefix).await
|
self.inner.list_prefixes(prefix).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_files(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
|
||||||
self.attempt(RemoteOp::ListPrefixes(folder.cloned()))?;
|
|
||||||
self.inner.list_files(folder).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
|
data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -127,18 +121,8 @@ impl RemoteStorage for UnreliableWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
async fn delete_objects<'a>(&self, paths: &'a [RemotePath]) -> anyhow::Result<()> {
|
||||||
self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
|
|
||||||
let mut error_counter = 0;
|
|
||||||
for path in paths {
|
for path in paths {
|
||||||
if (self.delete(path).await).is_err() {
|
self.delete(path).await?
|
||||||
error_counter += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if error_counter > 0 {
|
|
||||||
return Err(anyhow::anyhow!(
|
|
||||||
"failed to delete {} objects",
|
|
||||||
error_counter
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,58 +88,6 @@ async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3WithTestBlobs) -> any
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.
|
|
||||||
/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
|
|
||||||
/// See `s3_pagination_should_work` for more information.
|
|
||||||
///
|
|
||||||
/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_s3_data`]
|
|
||||||
/// Then performs the following queries:
|
|
||||||
/// 1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
|
|
||||||
/// 2. `list_files("folder1")`. This should return all files `random_prefix/folder1/blob_{i}.txt`
|
|
||||||
#[test_context(MaybeEnabledS3WithSimpleTestBlobs)]
|
|
||||||
#[tokio::test]
|
|
||||||
async fn s3_list_files_works(ctx: &mut MaybeEnabledS3WithSimpleTestBlobs) -> anyhow::Result<()> {
|
|
||||||
let ctx = match ctx {
|
|
||||||
MaybeEnabledS3WithSimpleTestBlobs::Enabled(ctx) => ctx,
|
|
||||||
MaybeEnabledS3WithSimpleTestBlobs::Disabled => return Ok(()),
|
|
||||||
MaybeEnabledS3WithSimpleTestBlobs::UploadsFailed(e, _) => {
|
|
||||||
anyhow::bail!("S3 init failed: {e:?}")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let test_client = Arc::clone(&ctx.enabled.client);
|
|
||||||
let base_prefix =
|
|
||||||
RemotePath::new(Path::new("folder1")).context("common_prefix construction")?;
|
|
||||||
let root_files = test_client
|
|
||||||
.list_files(None)
|
|
||||||
.await
|
|
||||||
.context("client list root files failure")?
|
|
||||||
.into_iter()
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
assert_eq!(
|
|
||||||
root_files,
|
|
||||||
ctx.remote_blobs.clone(),
|
|
||||||
"remote storage list_files on root mismatches with the uploads."
|
|
||||||
);
|
|
||||||
let nested_remote_files = test_client
|
|
||||||
.list_files(Some(&base_prefix))
|
|
||||||
.await
|
|
||||||
.context("client list nested files failure")?
|
|
||||||
.into_iter()
|
|
||||||
.collect::<HashSet<_>>();
|
|
||||||
let trim_remote_blobs: HashSet<_> = ctx
|
|
||||||
.remote_blobs
|
|
||||||
.iter()
|
|
||||||
.map(|x| x.get_path().to_str().expect("must be valid name"))
|
|
||||||
.filter(|x| x.starts_with("folder1"))
|
|
||||||
.map(|x| RemotePath::new(Path::new(x)).expect("must be valid name"))
|
|
||||||
.collect();
|
|
||||||
assert_eq!(
|
|
||||||
nested_remote_files, trim_remote_blobs,
|
|
||||||
"remote storage list_files on subdirrectory mismatches with the uploads."
|
|
||||||
);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test_context(MaybeEnabledS3)]
|
#[test_context(MaybeEnabledS3)]
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn s3_delete_non_exising_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
|
async fn s3_delete_non_exising_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
|
||||||
@@ -173,15 +121,10 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
|
|||||||
let path2 = RemotePath::new(&PathBuf::from(format!("{}/path2", ctx.base_prefix,)))
|
let path2 = RemotePath::new(&PathBuf::from(format!("{}/path2", ctx.base_prefix,)))
|
||||||
.with_context(|| "RemotePath conversion")?;
|
.with_context(|| "RemotePath conversion")?;
|
||||||
|
|
||||||
let path3 = RemotePath::new(&PathBuf::from(format!("{}/path3", ctx.base_prefix,)))
|
|
||||||
.with_context(|| "RemotePath conversion")?;
|
|
||||||
|
|
||||||
let data1 = "remote blob data1".as_bytes();
|
let data1 = "remote blob data1".as_bytes();
|
||||||
let data1_len = data1.len();
|
let data1_len = data1.len();
|
||||||
let data2 = "remote blob data2".as_bytes();
|
let data2 = "remote blob data2".as_bytes();
|
||||||
let data2_len = data2.len();
|
let data2_len = data2.len();
|
||||||
let data3 = "remote blob data3".as_bytes();
|
|
||||||
let data3_len = data3.len();
|
|
||||||
ctx.client
|
ctx.client
|
||||||
.upload(std::io::Cursor::new(data1), data1_len, &path1, None)
|
.upload(std::io::Cursor::new(data1), data1_len, &path1, None)
|
||||||
.await?;
|
.await?;
|
||||||
@@ -190,18 +133,8 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
|
|||||||
.upload(std::io::Cursor::new(data2), data2_len, &path2, None)
|
.upload(std::io::Cursor::new(data2), data2_len, &path2, None)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
ctx.client
|
|
||||||
.upload(std::io::Cursor::new(data3), data3_len, &path3, None)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
ctx.client.delete_objects(&[path1, path2]).await?;
|
ctx.client.delete_objects(&[path1, path2]).await?;
|
||||||
|
|
||||||
let prefixes = ctx.client.list_prefixes(None).await?;
|
|
||||||
|
|
||||||
assert_eq!(prefixes.len(), 1);
|
|
||||||
|
|
||||||
ctx.client.delete_objects(&[path3]).await?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,66 +248,6 @@ impl AsyncTestContext for MaybeEnabledS3WithTestBlobs {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: the setups for the list_prefixes test and the list_files test are very similar
|
|
||||||
// However, they are not idential. The list_prefixes function is concerned with listing prefixes,
|
|
||||||
// whereas the list_files function is concerned with listing files.
|
|
||||||
// See `RemoteStorage::list_files` documentation for more details
|
|
||||||
enum MaybeEnabledS3WithSimpleTestBlobs {
|
|
||||||
Enabled(S3WithSimpleTestBlobs),
|
|
||||||
Disabled,
|
|
||||||
UploadsFailed(anyhow::Error, S3WithSimpleTestBlobs),
|
|
||||||
}
|
|
||||||
struct S3WithSimpleTestBlobs {
|
|
||||||
enabled: EnabledS3,
|
|
||||||
remote_blobs: HashSet<RemotePath>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl AsyncTestContext for MaybeEnabledS3WithSimpleTestBlobs {
|
|
||||||
async fn setup() -> Self {
|
|
||||||
ensure_logging_ready();
|
|
||||||
if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
|
|
||||||
info!(
|
|
||||||
"`{}` env variable is not set, skipping the test",
|
|
||||||
ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
|
|
||||||
);
|
|
||||||
return Self::Disabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
let max_keys_in_list_response = 10;
|
|
||||||
let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
|
|
||||||
|
|
||||||
let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
|
|
||||||
|
|
||||||
match upload_simple_s3_data(&enabled.client, upload_tasks_count).await {
|
|
||||||
ControlFlow::Continue(uploads) => {
|
|
||||||
info!("Remote objects created successfully");
|
|
||||||
|
|
||||||
Self::Enabled(S3WithSimpleTestBlobs {
|
|
||||||
enabled,
|
|
||||||
remote_blobs: uploads,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ControlFlow::Break(uploads) => Self::UploadsFailed(
|
|
||||||
anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
|
|
||||||
S3WithSimpleTestBlobs {
|
|
||||||
enabled,
|
|
||||||
remote_blobs: uploads,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn teardown(self) {
|
|
||||||
match self {
|
|
||||||
Self::Disabled => {}
|
|
||||||
Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
|
|
||||||
cleanup(&ctx.enabled.client, ctx.remote_blobs).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn create_s3_client(
|
fn create_s3_client(
|
||||||
max_keys_per_list_response: Option<i32>,
|
max_keys_per_list_response: Option<i32>,
|
||||||
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
) -> anyhow::Result<Arc<GenericRemoteStorage>> {
|
||||||
@@ -385,7 +258,7 @@ fn create_s3_client(
|
|||||||
let random_prefix_part = std::time::SystemTime::now()
|
let random_prefix_part = std::time::SystemTime::now()
|
||||||
.duration_since(UNIX_EPOCH)
|
.duration_since(UNIX_EPOCH)
|
||||||
.context("random s3 test prefix part calculation")?
|
.context("random s3 test prefix part calculation")?
|
||||||
.as_nanos();
|
.as_millis();
|
||||||
let remote_storage_config = RemoteStorageConfig {
|
let remote_storage_config = RemoteStorageConfig {
|
||||||
max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
|
max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
|
||||||
max_sync_errors: NonZeroU32::new(5).unwrap(),
|
max_sync_errors: NonZeroU32::new(5).unwrap(),
|
||||||
@@ -491,52 +364,3 @@ async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
|
|
||||||
async fn upload_simple_s3_data(
|
|
||||||
client: &Arc<GenericRemoteStorage>,
|
|
||||||
upload_tasks_count: usize,
|
|
||||||
) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
|
|
||||||
info!("Creating {upload_tasks_count} S3 files");
|
|
||||||
let mut upload_tasks = JoinSet::new();
|
|
||||||
for i in 1..upload_tasks_count + 1 {
|
|
||||||
let task_client = Arc::clone(client);
|
|
||||||
upload_tasks.spawn(async move {
|
|
||||||
let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
|
|
||||||
let blob_path = RemotePath::new(&blob_path)
|
|
||||||
.with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
|
|
||||||
debug!("Creating remote item {i} at path {blob_path:?}");
|
|
||||||
|
|
||||||
let data = format!("remote blob data {i}").into_bytes();
|
|
||||||
let data_len = data.len();
|
|
||||||
task_client
|
|
||||||
.upload(std::io::Cursor::new(data), data_len, &blob_path, None)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok::<_, anyhow::Error>(blob_path)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut upload_tasks_failed = false;
|
|
||||||
let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
|
|
||||||
while let Some(task_run_result) = upload_tasks.join_next().await {
|
|
||||||
match task_run_result
|
|
||||||
.context("task join failed")
|
|
||||||
.and_then(|task_result| task_result.context("upload task failed"))
|
|
||||||
{
|
|
||||||
Ok(upload_path) => {
|
|
||||||
uploaded_blobs.insert(upload_path);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Upload task failed: {e:?}");
|
|
||||||
upload_tasks_failed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if upload_tasks_failed {
|
|
||||||
ControlFlow::Break(uploaded_blobs)
|
|
||||||
} else {
|
|
||||||
ControlFlow::Continue(uploaded_blobs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ edition.workspace = true
|
|||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
atty.workspace = true
|
||||||
sentry.workspace = true
|
sentry.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
use hyper::{header, Body, Response, StatusCode};
|
use hyper::{header, Body, Response, StatusCode};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::error::Error as StdError;
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tracing::error;
|
use tracing::error;
|
||||||
|
|
||||||
@@ -16,7 +15,7 @@ pub enum ApiError {
|
|||||||
Unauthorized(String),
|
Unauthorized(String),
|
||||||
|
|
||||||
#[error("NotFound: {0}")]
|
#[error("NotFound: {0}")]
|
||||||
NotFound(Box<dyn StdError + Send + Sync + 'static>),
|
NotFound(anyhow::Error),
|
||||||
|
|
||||||
#[error("Conflict: {0}")]
|
#[error("Conflict: {0}")]
|
||||||
Conflict(String),
|
Conflict(String),
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ pub fn init(
|
|||||||
let r = r.with({
|
let r = r.with({
|
||||||
let log_layer = tracing_subscriber::fmt::layer()
|
let log_layer = tracing_subscriber::fmt::layer()
|
||||||
.with_target(false)
|
.with_target(false)
|
||||||
.with_ansi(false)
|
.with_ansi(atty::is(atty::Stream::Stdout))
|
||||||
.with_writer(std::io::stdout);
|
.with_writer(std::io::stdout);
|
||||||
let log_layer = match log_format {
|
let log_layer = match log_format {
|
||||||
LogFormat::Json => log_layer.json().boxed(),
|
LogFormat::Json => log_layer.json().boxed(),
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ testing = ["fail/failpoints"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
|
arc-swap.workspace = true
|
||||||
async-stream.workspace = true
|
async-stream.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
byteorder.workspace = true
|
byteorder.workspace = true
|
||||||
|
|||||||
@@ -495,50 +495,50 @@ fn start_pageserver(
|
|||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
|
if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
|
||||||
let background_jobs_barrier = background_jobs_barrier;
|
let background_jobs_barrier = background_jobs_barrier;
|
||||||
let metrics_ctx = RequestContext::todo_child(
|
let metrics_ctx = RequestContext::todo_child(
|
||||||
TaskKind::MetricsCollection,
|
TaskKind::MetricsCollection,
|
||||||
// This task itself shouldn't download anything.
|
// This task itself shouldn't download anything.
|
||||||
// The actual size calculation does need downloads, and
|
// The actual size calculation does need downloads, and
|
||||||
// creates a child context with the right DownloadBehavior.
|
// creates a child context with the right DownloadBehavior.
|
||||||
DownloadBehavior::Error,
|
DownloadBehavior::Error,
|
||||||
);
|
);
|
||||||
task_mgr::spawn(
|
task_mgr::spawn(
|
||||||
crate::BACKGROUND_RUNTIME.handle(),
|
MGMT_REQUEST_RUNTIME.handle(),
|
||||||
TaskKind::MetricsCollection,
|
TaskKind::MetricsCollection,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
"consumption metrics collection",
|
"consumption metrics collection",
|
||||||
true,
|
true,
|
||||||
async move {
|
async move {
|
||||||
// first wait until background jobs are cleared to launch.
|
// first wait until background jobs are cleared to launch.
|
||||||
//
|
//
|
||||||
// this is because we only process active tenants and timelines, and the
|
// this is because we only process active tenants and timelines, and the
|
||||||
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
// Timeline::get_current_logical_size will spawn the logical size calculation,
|
||||||
// which will not be rate-limited.
|
// which will not be rate-limited.
|
||||||
let cancel = task_mgr::shutdown_token();
|
let cancel = task_mgr::shutdown_token();
|
||||||
|
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = cancel.cancelled() => { return Ok(()); },
|
_ = cancel.cancelled() => { return Ok(()); },
|
||||||
_ = background_jobs_barrier.wait() => {}
|
_ = background_jobs_barrier.wait() => {}
|
||||||
};
|
};
|
||||||
|
|
||||||
pageserver::consumption_metrics::collect_metrics(
|
pageserver::consumption_metrics::collect_metrics(
|
||||||
metric_collection_endpoint,
|
metric_collection_endpoint,
|
||||||
conf.metric_collection_interval,
|
conf.metric_collection_interval,
|
||||||
conf.cached_metric_collection_interval,
|
conf.cached_metric_collection_interval,
|
||||||
conf.synthetic_size_calculation_interval,
|
conf.synthetic_size_calculation_interval,
|
||||||
conf.id,
|
conf.id,
|
||||||
metrics_ctx,
|
metrics_ctx,
|
||||||
)
|
)
|
||||||
.instrument(info_span!("metrics_collection"))
|
.instrument(info_span!("metrics_collection"))
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
// Spawn a task to listen for libpq connections. It will spawn further tasks
|
||||||
|
|||||||
@@ -96,12 +96,12 @@ pub mod defaults {
|
|||||||
|
|
||||||
#background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'
|
#background_task_maximum_delay = '{DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY}'
|
||||||
|
|
||||||
[tenant_config]
|
# [tenant_config]
|
||||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||||
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
||||||
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
||||||
#compaction_threshold = {DEFAULT_COMPACTION_THRESHOLD}
|
#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
|
||||||
|
|
||||||
#gc_period = '{DEFAULT_GC_PERIOD}'
|
#gc_period = '{DEFAULT_GC_PERIOD}'
|
||||||
#gc_horizon = {DEFAULT_GC_HORIZON}
|
#gc_horizon = {DEFAULT_GC_HORIZON}
|
||||||
@@ -111,8 +111,7 @@ pub mod defaults {
|
|||||||
#min_resident_size_override = .. # in bytes
|
#min_resident_size_override = .. # in bytes
|
||||||
#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
|
#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
|
||||||
#gc_feedback = false
|
#gc_feedback = false
|
||||||
|
# [remote_storage]
|
||||||
[remote_storage]
|
|
||||||
|
|
||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -24,8 +24,6 @@ const RESIDENT_SIZE: &str = "resident_size";
|
|||||||
const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
|
const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
|
||||||
const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";
|
const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";
|
||||||
|
|
||||||
const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
|
|
||||||
|
|
||||||
#[serde_as]
|
#[serde_as]
|
||||||
#[derive(Serialize, Debug)]
|
#[derive(Serialize, Debug)]
|
||||||
struct Ids {
|
struct Ids {
|
||||||
@@ -75,10 +73,7 @@ pub async fn collect_metrics(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// define client here to reuse it for all requests
|
// define client here to reuse it for all requests
|
||||||
let client = reqwest::ClientBuilder::new()
|
let client = reqwest::Client::new();
|
||||||
.timeout(DEFAULT_HTTP_REPORTING_TIMEOUT)
|
|
||||||
.build()
|
|
||||||
.expect("Failed to create http client with timeout");
|
|
||||||
let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
|
let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
|
||||||
let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();
|
let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();
|
||||||
|
|
||||||
@@ -88,7 +83,7 @@ pub async fn collect_metrics(
|
|||||||
info!("collect_metrics received cancellation request");
|
info!("collect_metrics received cancellation request");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
},
|
},
|
||||||
tick_at = ticker.tick() => {
|
_ = ticker.tick() => {
|
||||||
|
|
||||||
// send cached metrics every cached_metric_collection_interval
|
// send cached metrics every cached_metric_collection_interval
|
||||||
let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
|
let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
|
||||||
@@ -98,12 +93,6 @@ pub async fn collect_metrics(
|
|||||||
}
|
}
|
||||||
|
|
||||||
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
|
collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
|
||||||
|
|
||||||
crate::tenant::tasks::warn_when_period_overrun(
|
|
||||||
tick_at.elapsed(),
|
|
||||||
metric_collection_interval,
|
|
||||||
"consumption_metrics_collect_metrics",
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -234,18 +223,14 @@ pub async fn collect_metrics_iteration(
|
|||||||
// Note that this metric is calculated in a separate bgworker
|
// Note that this metric is calculated in a separate bgworker
|
||||||
// Here we only use cached value, which may lag behind the real latest one
|
// Here we only use cached value, which may lag behind the real latest one
|
||||||
let tenant_synthetic_size = tenant.get_cached_synthetic_size();
|
let tenant_synthetic_size = tenant.get_cached_synthetic_size();
|
||||||
|
current_metrics.push((
|
||||||
if tenant_synthetic_size != 0 {
|
PageserverConsumptionMetricsKey {
|
||||||
// only send non-zeroes because otherwise these show up as errors in logs
|
tenant_id,
|
||||||
current_metrics.push((
|
timeline_id: None,
|
||||||
PageserverConsumptionMetricsKey {
|
metric: SYNTHETIC_STORAGE_SIZE,
|
||||||
tenant_id,
|
},
|
||||||
timeline_id: None,
|
tenant_synthetic_size,
|
||||||
metric: SYNTHETIC_STORAGE_SIZE,
|
));
|
||||||
},
|
|
||||||
tenant_synthetic_size,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter metrics, unless we want to send all metrics, including cached ones.
|
// Filter metrics, unless we want to send all metrics, including cached ones.
|
||||||
@@ -288,43 +273,32 @@ pub async fn collect_metrics_iteration(
|
|||||||
})
|
})
|
||||||
.expect("PageserverConsumptionMetric should not fail serialization");
|
.expect("PageserverConsumptionMetric should not fail serialization");
|
||||||
|
|
||||||
const MAX_RETRIES: u32 = 3;
|
let res = client
|
||||||
|
.post(metric_collection_endpoint.clone())
|
||||||
|
.json(&chunk_json)
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
|
||||||
for attempt in 0..MAX_RETRIES {
|
match res {
|
||||||
let res = client
|
Ok(res) => {
|
||||||
.post(metric_collection_endpoint.clone())
|
if res.status().is_success() {
|
||||||
.json(&chunk_json)
|
// update cached metrics after they were sent successfully
|
||||||
.send()
|
for (curr_key, curr_val) in chunk.iter() {
|
||||||
.await;
|
cached_metrics.insert(curr_key.clone(), *curr_val);
|
||||||
|
}
|
||||||
match res {
|
} else {
|
||||||
Ok(res) => {
|
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
||||||
if res.status().is_success() {
|
for metric in chunk_to_send.iter() {
|
||||||
// update cached metrics after they were sent successfully
|
// Report if the metric value is suspiciously large
|
||||||
for (curr_key, curr_val) in chunk.iter() {
|
if metric.value > (1u64 << 40) {
|
||||||
cached_metrics.insert(curr_key.clone(), *curr_val);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
|
||||||
for metric in chunk_to_send
|
|
||||||
.iter()
|
|
||||||
.filter(|metric| metric.value > (1u64 << 40))
|
|
||||||
{
|
|
||||||
// Report if the metric value is suspiciously large
|
|
||||||
error!("potentially abnormal metric value: {:?}", metric);
|
error!("potentially abnormal metric value: {:?}", metric);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
Err(err) if err.is_timeout() => {
|
|
||||||
error!(attempt, "timeout sending metrics, retrying immediately");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
error!(attempt, ?err, "failed to send metrics");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Err(err) => {
|
||||||
|
error!("failed to send metrics: {:?}", err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -343,7 +317,7 @@ pub async fn calculate_synthetic_size_worker(
|
|||||||
_ = task_mgr::shutdown_watcher() => {
|
_ = task_mgr::shutdown_watcher() => {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
},
|
},
|
||||||
tick_at = ticker.tick() => {
|
_ = ticker.tick() => {
|
||||||
|
|
||||||
let tenants = match mgr::list_tenants().await {
|
let tenants = match mgr::list_tenants().await {
|
||||||
Ok(tenants) => tenants,
|
Ok(tenants) => tenants,
|
||||||
@@ -369,12 +343,6 @@ pub async fn calculate_synthetic_size_worker(
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
crate::tenant::tasks::warn_when_period_overrun(
|
|
||||||
tick_at.elapsed(),
|
|
||||||
synthetic_size_calculation_interval,
|
|
||||||
"consumption_metrics_synthetic_size_worker",
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ pub fn launch_disk_usage_global_eviction_task(
|
|||||||
|
|
||||||
disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
|
disk_usage_eviction_task(&state, task_config, storage, &conf.tenants_path(), cancel)
|
||||||
.await;
|
.await;
|
||||||
|
info!("disk usage based eviction task finishing");
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -125,16 +126,13 @@ async fn disk_usage_eviction_task(
|
|||||||
tenants_dir: &Path,
|
tenants_dir: &Path,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) {
|
) {
|
||||||
scopeguard::defer! {
|
|
||||||
info!("disk usage based eviction task finishing");
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::tenant::tasks::random_init_delay;
|
use crate::tenant::tasks::random_init_delay;
|
||||||
{
|
{
|
||||||
if random_init_delay(task_config.period, &cancel)
|
if random_init_delay(task_config.period, &cancel)
|
||||||
.await
|
.await
|
||||||
.is_err()
|
.is_err()
|
||||||
{
|
{
|
||||||
|
info!("shutting down");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -169,6 +167,7 @@ async fn disk_usage_eviction_task(
|
|||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = tokio::time::sleep_until(sleep_until) => {},
|
_ = tokio::time::sleep_until(sleep_until) => {},
|
||||||
_ = cancel.cancelled() => {
|
_ = cancel.cancelled() => {
|
||||||
|
info!("shutting down");
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -315,7 +314,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
|||||||
partition,
|
partition,
|
||||||
candidate.layer.get_tenant_id(),
|
candidate.layer.get_tenant_id(),
|
||||||
candidate.layer.get_timeline_id(),
|
candidate.layer.get_timeline_id(),
|
||||||
candidate.layer,
|
candidate.layer.filename().file_name(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -186,8 +186,10 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
delete:
|
delete:
|
||||||
description: "Attempts to delete specified timeline. 500 and 409 errors should be retried"
|
description: "Attempts to delete specified timeline. On 500 errors should be retried"
|
||||||
responses:
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Ok
|
||||||
"400":
|
"400":
|
||||||
description: Error when no tenant id found in path or no timeline id
|
description: Error when no tenant id found in path or no timeline id
|
||||||
content:
|
content:
|
||||||
@@ -212,12 +214,6 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/NotFoundError"
|
$ref: "#/components/schemas/NotFoundError"
|
||||||
"409":
|
|
||||||
description: Deletion is already in progress, continue polling
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/ConflictError"
|
|
||||||
"412":
|
"412":
|
||||||
description: Tenant is missing, or timeline has children
|
description: Tenant is missing, or timeline has children
|
||||||
content:
|
content:
|
||||||
@@ -722,12 +718,6 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/ForbiddenError"
|
$ref: "#/components/schemas/ForbiddenError"
|
||||||
"406":
|
|
||||||
description: Permanently unsatisfiable request, don't retry.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
"409":
|
"409":
|
||||||
description: Timeline already exists, creation skipped
|
description: Timeline already exists, creation skipped
|
||||||
content:
|
content:
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ use super::models::{
|
|||||||
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
||||||
};
|
};
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
|
use crate::disk_usage_eviction_task;
|
||||||
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
||||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||||
use crate::task_mgr::TaskKind;
|
use crate::task_mgr::TaskKind;
|
||||||
@@ -34,7 +35,6 @@ use crate::tenant::size::ModelInputs;
|
|||||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
||||||
use crate::{config::PageServerConf, tenant::mgr};
|
use crate::{config::PageServerConf, tenant::mgr};
|
||||||
use crate::{disk_usage_eviction_task, tenant};
|
|
||||||
use utils::{
|
use utils::{
|
||||||
auth::JwtAuth,
|
auth::JwtAuth,
|
||||||
http::{
|
http::{
|
||||||
@@ -142,7 +142,7 @@ impl From<TenantMapInsertError> for ApiError {
|
|||||||
impl From<TenantStateError> for ApiError {
|
impl From<TenantStateError> for ApiError {
|
||||||
fn from(tse: TenantStateError) -> ApiError {
|
fn from(tse: TenantStateError) -> ApiError {
|
||||||
match tse {
|
match tse {
|
||||||
TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
TenantStateError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
|
||||||
_ => ApiError::InternalServerError(anyhow::Error::new(tse)),
|
_ => ApiError::InternalServerError(anyhow::Error::new(tse)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -151,7 +151,7 @@ impl From<TenantStateError> for ApiError {
|
|||||||
impl From<GetTenantError> for ApiError {
|
impl From<GetTenantError> for ApiError {
|
||||||
fn from(tse: GetTenantError) -> ApiError {
|
fn from(tse: GetTenantError) -> ApiError {
|
||||||
match tse {
|
match tse {
|
||||||
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid).into()),
|
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
|
||||||
e @ GetTenantError::NotActive(_) => {
|
e @ GetTenantError::NotActive(_) => {
|
||||||
// Why is this not `ApiError::NotFound`?
|
// Why is this not `ApiError::NotFound`?
|
||||||
// Because we must be careful to never return 404 for a tenant if it does
|
// Because we must be careful to never return 404 for a tenant if it does
|
||||||
@@ -169,7 +169,7 @@ impl From<SetNewTenantConfigError> for ApiError {
|
|||||||
fn from(e: SetNewTenantConfigError) -> ApiError {
|
fn from(e: SetNewTenantConfigError) -> ApiError {
|
||||||
match e {
|
match e {
|
||||||
SetNewTenantConfigError::GetTenant(tid) => {
|
SetNewTenantConfigError::GetTenant(tid) => {
|
||||||
ApiError::NotFound(anyhow!("tenant {}", tid).into())
|
ApiError::NotFound(anyhow!("tenant {}", tid))
|
||||||
}
|
}
|
||||||
e @ SetNewTenantConfigError::Persist(_) => {
|
e @ SetNewTenantConfigError::Persist(_) => {
|
||||||
ApiError::InternalServerError(anyhow::Error::new(e))
|
ApiError::InternalServerError(anyhow::Error::new(e))
|
||||||
@@ -182,12 +182,11 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
|
|||||||
fn from(value: crate::tenant::DeleteTimelineError) -> Self {
|
fn from(value: crate::tenant::DeleteTimelineError) -> Self {
|
||||||
use crate::tenant::DeleteTimelineError::*;
|
use crate::tenant::DeleteTimelineError::*;
|
||||||
match value {
|
match value {
|
||||||
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
|
NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found")),
|
||||||
HasChildren(children) => ApiError::PreconditionFailed(
|
HasChildren(children) => ApiError::PreconditionFailed(
|
||||||
format!("Cannot delete timeline which has child timelines: {children:?}")
|
format!("Cannot delete timeline which has child timelines: {children:?}")
|
||||||
.into_boxed_str(),
|
.into_boxed_str(),
|
||||||
),
|
),
|
||||||
a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
|
|
||||||
Other(e) => ApiError::InternalServerError(e),
|
Other(e) => ApiError::InternalServerError(e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -328,22 +327,15 @@ async fn timeline_create_handler(
|
|||||||
&ctx,
|
&ctx,
|
||||||
)
|
)
|
||||||
.await {
|
.await {
|
||||||
Ok(new_timeline) => {
|
Ok(Some(new_timeline)) => {
|
||||||
// Created. Construct a TimelineInfo for it.
|
// Created. Construct a TimelineInfo for it.
|
||||||
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
|
||||||
.await
|
.await
|
||||||
.map_err(ApiError::InternalServerError)?;
|
.map_err(ApiError::InternalServerError)?;
|
||||||
json_response(StatusCode::CREATED, timeline_info)
|
json_response(StatusCode::CREATED, timeline_info)
|
||||||
}
|
}
|
||||||
Err(tenant::CreateTimelineError::AlreadyExists) => {
|
Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
|
||||||
json_response(StatusCode::CONFLICT, ())
|
Err(err) => Err(ApiError::InternalServerError(err)),
|
||||||
}
|
|
||||||
Err(tenant::CreateTimelineError::AncestorLsn(err)) => {
|
|
||||||
json_response(StatusCode::NOT_ACCEPTABLE, HttpErrorBody::from_msg(
|
|
||||||
format!("{err:#}")
|
|
||||||
))
|
|
||||||
}
|
|
||||||
Err(tenant::CreateTimelineError::Other(err)) => Err(ApiError::InternalServerError(err)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
.instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||||
@@ -405,7 +397,7 @@ async fn timeline_detail_handler(
|
|||||||
|
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, false)
|
.get_timeline(timeline_id, false)
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
.map_err(ApiError::NotFound)?;
|
||||||
|
|
||||||
let timeline_info = build_timeline_info(
|
let timeline_info = build_timeline_info(
|
||||||
&timeline,
|
&timeline,
|
||||||
@@ -1069,7 +1061,7 @@ async fn timeline_download_remote_layers_handler_get(
|
|||||||
let info = timeline
|
let info = timeline
|
||||||
.get_download_all_remote_layers_task_info()
|
.get_download_all_remote_layers_task_info()
|
||||||
.context("task never started since last pageserver process start")
|
.context("task never started since last pageserver process start")
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
.map_err(ApiError::NotFound)?;
|
||||||
json_response(StatusCode::OK, info)
|
json_response(StatusCode::OK, info)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1080,7 +1072,7 @@ async fn active_timeline_of_active_tenant(
|
|||||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||||
tenant
|
tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))
|
.map_err(ApiError::NotFound)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn always_panic_handler(
|
async fn always_panic_handler(
|
||||||
@@ -1136,6 +1128,8 @@ async fn disk_usage_eviction_run(
|
|||||||
freed_bytes: 0,
|
freed_bytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::task_mgr::MGMT_REQUEST_RUNTIME;
|
||||||
|
|
||||||
let (tx, rx) = tokio::sync::oneshot::channel();
|
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||||
|
|
||||||
let state = get_state(&r);
|
let state = get_state(&r);
|
||||||
@@ -1153,7 +1147,7 @@ async fn disk_usage_eviction_run(
|
|||||||
let _g = cancel.drop_guard();
|
let _g = cancel.drop_guard();
|
||||||
|
|
||||||
crate::task_mgr::spawn(
|
crate::task_mgr::spawn(
|
||||||
crate::task_mgr::BACKGROUND_RUNTIME.handle(),
|
MGMT_REQUEST_RUNTIME.handle(),
|
||||||
TaskKind::DiskUsageEviction,
|
TaskKind::DiskUsageEviction,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
|
|||||||
@@ -148,17 +148,17 @@ async fn import_rel(
|
|||||||
// because there is no guarantee about the order in which we are processing segments.
|
// because there is no guarantee about the order in which we are processing segments.
|
||||||
// ignore "relation already exists" error
|
// ignore "relation already exists" error
|
||||||
//
|
//
|
||||||
// FIXME: Keep track of which relations we've already created?
|
// FIXME: use proper error type for this, instead of parsing the error message.
|
||||||
|
// Or better yet, keep track of which relations we've already created
|
||||||
// https://github.com/neondatabase/neon/issues/3309
|
// https://github.com/neondatabase/neon/issues/3309
|
||||||
if let Err(e) = modification
|
if let Err(e) = modification
|
||||||
.put_rel_creation(rel, nblocks as u32, ctx)
|
.put_rel_creation(rel, nblocks as u32, ctx)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
match e {
|
if e.to_string().contains("already exists") {
|
||||||
RelationError::AlreadyExists => {
|
debug!("relation {} already exists. we must be extending it", rel);
|
||||||
debug!("Relation {} already exist. We must be extending it.", rel)
|
} else {
|
||||||
}
|
return Err(e);
|
||||||
_ => return Err(e.into()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
use metrics::metric_vec_duration::DurationResultObserver;
|
use metrics::core::{AtomicU64, GenericCounter};
|
||||||
use metrics::{
|
use metrics::{
|
||||||
register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
|
register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
|
||||||
register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge,
|
register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
|
||||||
register_uint_gauge_vec, Counter, CounterVec, Histogram, HistogramVec, IntCounter,
|
Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
|
||||||
IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
UIntGauge, UIntGaugeVec,
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::models::TenantState;
|
use pageserver_api::models::TenantState;
|
||||||
@@ -95,19 +95,21 @@ static READ_NUM_FS_LAYERS: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Metrics collected on operations on the storage repository.
|
// Metrics collected on operations on the storage repository.
|
||||||
pub static RECONSTRUCT_TIME: Lazy<Histogram> = Lazy::new(|| {
|
static RECONSTRUCT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||||
register_histogram!(
|
register_histogram_vec!(
|
||||||
"pageserver_getpage_reconstruct_seconds",
|
"pageserver_getpage_reconstruct_seconds",
|
||||||
"Time spent in reconstruct_value (reconstruct a page from deltas)",
|
"Time spent in reconstruct_value",
|
||||||
|
&["tenant_id", "timeline_id"],
|
||||||
CRITICAL_OP_BUCKETS.into(),
|
CRITICAL_OP_BUCKETS.into(),
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
|
static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter_vec!(
|
||||||
"pageserver_materialized_cache_hits_direct_total",
|
"pageserver_materialized_cache_hits_direct_total",
|
||||||
"Number of cache hits from materialized page cache without redo",
|
"Number of cache hits from materialized page cache without redo",
|
||||||
|
&["tenant_id", "timeline_id"]
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
@@ -122,130 +124,15 @@ static GET_RECONSTRUCT_DATA_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
pub static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
|
static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter_vec!(
|
||||||
"pageserver_materialized_cache_hits_total",
|
"pageserver_materialized_cache_hits_total",
|
||||||
"Number of cache hits from materialized page cache",
|
"Number of cache hits from materialized page cache",
|
||||||
|
&["tenant_id", "timeline_id"]
|
||||||
)
|
)
|
||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
pub struct PageCacheMetrics {
|
|
||||||
pub read_accesses_materialized_page: IntCounter,
|
|
||||||
pub read_accesses_ephemeral: IntCounter,
|
|
||||||
pub read_accesses_immutable: IntCounter,
|
|
||||||
|
|
||||||
pub read_hits_ephemeral: IntCounter,
|
|
||||||
pub read_hits_immutable: IntCounter,
|
|
||||||
pub read_hits_materialized_page_exact: IntCounter,
|
|
||||||
pub read_hits_materialized_page_older_lsn: IntCounter,
|
|
||||||
}
|
|
||||||
|
|
||||||
static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
|
|
||||||
register_int_counter_vec!(
|
|
||||||
"pageserver_page_cache_read_hits_total",
|
|
||||||
"Number of read accesses to the page cache that hit",
|
|
||||||
&["key_kind", "hit_kind"]
|
|
||||||
)
|
|
||||||
.expect("failed to define a metric")
|
|
||||||
});
|
|
||||||
|
|
||||||
static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
|
|
||||||
register_int_counter_vec!(
|
|
||||||
"pageserver_page_cache_read_accesses_total",
|
|
||||||
"Number of read accesses to the page cache",
|
|
||||||
&["key_kind"]
|
|
||||||
)
|
|
||||||
.expect("failed to define a metric")
|
|
||||||
});
|
|
||||||
|
|
||||||
pub static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
|
|
||||||
read_accesses_materialized_page: {
|
|
||||||
PAGE_CACHE_READ_ACCESSES
|
|
||||||
.get_metric_with_label_values(&["materialized_page"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_accesses_ephemeral: {
|
|
||||||
PAGE_CACHE_READ_ACCESSES
|
|
||||||
.get_metric_with_label_values(&["ephemeral"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_accesses_immutable: {
|
|
||||||
PAGE_CACHE_READ_ACCESSES
|
|
||||||
.get_metric_with_label_values(&["immutable"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_hits_ephemeral: {
|
|
||||||
PAGE_CACHE_READ_HITS
|
|
||||||
.get_metric_with_label_values(&["ephemeral", "-"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_hits_immutable: {
|
|
||||||
PAGE_CACHE_READ_HITS
|
|
||||||
.get_metric_with_label_values(&["immutable", "-"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_hits_materialized_page_exact: {
|
|
||||||
PAGE_CACHE_READ_HITS
|
|
||||||
.get_metric_with_label_values(&["materialized_page", "exact"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
|
|
||||||
read_hits_materialized_page_older_lsn: {
|
|
||||||
PAGE_CACHE_READ_HITS
|
|
||||||
.get_metric_with_label_values(&["materialized_page", "older_lsn"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
pub struct PageCacheSizeMetrics {
|
|
||||||
pub max_bytes: UIntGauge,
|
|
||||||
|
|
||||||
pub current_bytes_ephemeral: UIntGauge,
|
|
||||||
pub current_bytes_immutable: UIntGauge,
|
|
||||||
pub current_bytes_materialized_page: UIntGauge,
|
|
||||||
}
|
|
||||||
|
|
||||||
static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
|
||||||
register_uint_gauge_vec!(
|
|
||||||
"pageserver_page_cache_size_current_bytes",
|
|
||||||
"Current size of the page cache in bytes, by key kind",
|
|
||||||
&["key_kind"]
|
|
||||||
)
|
|
||||||
.expect("failed to define a metric")
|
|
||||||
});
|
|
||||||
|
|
||||||
pub static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> = Lazy::new(|| PageCacheSizeMetrics {
|
|
||||||
max_bytes: {
|
|
||||||
register_uint_gauge!(
|
|
||||||
"pageserver_page_cache_size_max_bytes",
|
|
||||||
"Maximum size of the page cache in bytes"
|
|
||||||
)
|
|
||||||
.expect("failed to define a metric")
|
|
||||||
},
|
|
||||||
|
|
||||||
current_bytes_ephemeral: {
|
|
||||||
PAGE_CACHE_SIZE_CURRENT_BYTES
|
|
||||||
.get_metric_with_label_values(&["ephemeral"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
current_bytes_immutable: {
|
|
||||||
PAGE_CACHE_SIZE_CURRENT_BYTES
|
|
||||||
.get_metric_with_label_values(&["immutable"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
current_bytes_materialized_page: {
|
|
||||||
PAGE_CACHE_SIZE_CURRENT_BYTES
|
|
||||||
.get_metric_with_label_values(&["materialized_page"])
|
|
||||||
.unwrap()
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
static WAIT_LSN_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||||
register_histogram_vec!(
|
register_histogram_vec!(
|
||||||
"pageserver_wait_lsn_seconds",
|
"pageserver_wait_lsn_seconds",
|
||||||
@@ -320,11 +207,11 @@ pub static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
|||||||
|
|
||||||
pub static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
pub static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||||
register_uint_gauge_vec!(
|
register_uint_gauge_vec!(
|
||||||
"pageserver_tenant_synthetic_cached_size_bytes",
|
"pageserver_tenant_synthetic_size",
|
||||||
"Synthetic size of each tenant in bytes",
|
"Synthetic size of each tenant",
|
||||||
&["tenant_id"]
|
&["tenant_id"]
|
||||||
)
|
)
|
||||||
.expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
|
.expect("Failed to register pageserver_tenant_synthetic_size metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
// Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
|
||||||
@@ -541,27 +428,6 @@ pub static SMGR_QUERY_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
pub struct BasebackupQueryTime(HistogramVec);
|
|
||||||
pub static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
|
|
||||||
BasebackupQueryTime({
|
|
||||||
register_histogram_vec!(
|
|
||||||
"pageserver_basebackup_query_seconds",
|
|
||||||
"Histogram of basebackup queries durations, by result type",
|
|
||||||
&["result"],
|
|
||||||
CRITICAL_OP_BUCKETS.into(),
|
|
||||||
)
|
|
||||||
.expect("failed to define a metric")
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
impl DurationResultObserver for BasebackupQueryTime {
|
|
||||||
fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
|
|
||||||
let label_value = if res.is_ok() { "ok" } else { "error" };
|
|
||||||
let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
|
|
||||||
metric.observe(duration.as_secs_f64());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||||
register_int_gauge_vec!(
|
register_int_gauge_vec!(
|
||||||
"pageserver_live_connections",
|
"pageserver_live_connections",
|
||||||
@@ -886,7 +752,10 @@ impl StorageTimeMetrics {
|
|||||||
pub struct TimelineMetrics {
|
pub struct TimelineMetrics {
|
||||||
tenant_id: String,
|
tenant_id: String,
|
||||||
timeline_id: String,
|
timeline_id: String,
|
||||||
|
pub reconstruct_time_histo: Histogram,
|
||||||
pub get_reconstruct_data_time_histo: Histogram,
|
pub get_reconstruct_data_time_histo: Histogram,
|
||||||
|
pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
|
||||||
|
pub materialized_page_cache_hit_upon_request_counter: GenericCounter<AtomicU64>,
|
||||||
pub flush_time_histo: StorageTimeMetrics,
|
pub flush_time_histo: StorageTimeMetrics,
|
||||||
pub compact_time_histo: StorageTimeMetrics,
|
pub compact_time_histo: StorageTimeMetrics,
|
||||||
pub create_images_time_histo: StorageTimeMetrics,
|
pub create_images_time_histo: StorageTimeMetrics,
|
||||||
@@ -914,9 +783,15 @@ impl TimelineMetrics {
|
|||||||
) -> Self {
|
) -> Self {
|
||||||
let tenant_id = tenant_id.to_string();
|
let tenant_id = tenant_id.to_string();
|
||||||
let timeline_id = timeline_id.to_string();
|
let timeline_id = timeline_id.to_string();
|
||||||
|
let reconstruct_time_histo = RECONSTRUCT_TIME
|
||||||
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
|
.unwrap();
|
||||||
let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
|
let get_reconstruct_data_time_histo = GET_RECONSTRUCT_DATA_TIME
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
||||||
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
|
.unwrap();
|
||||||
let flush_time_histo =
|
let flush_time_histo =
|
||||||
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
|
||||||
let compact_time_histo =
|
let compact_time_histo =
|
||||||
@@ -958,13 +833,19 @@ impl TimelineMetrics {
|
|||||||
let read_num_fs_layers = READ_NUM_FS_LAYERS
|
let read_num_fs_layers = READ_NUM_FS_LAYERS
|
||||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
let materialized_page_cache_hit_upon_request_counter = MATERIALIZED_PAGE_CACHE_HIT_DIRECT
|
||||||
|
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||||
|
.unwrap();
|
||||||
let evictions_with_low_residence_duration =
|
let evictions_with_low_residence_duration =
|
||||||
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
|
||||||
|
|
||||||
TimelineMetrics {
|
TimelineMetrics {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
timeline_id,
|
timeline_id,
|
||||||
|
reconstruct_time_histo,
|
||||||
get_reconstruct_data_time_histo,
|
get_reconstruct_data_time_histo,
|
||||||
|
materialized_page_cache_hit_counter,
|
||||||
|
materialized_page_cache_hit_upon_request_counter,
|
||||||
flush_time_histo,
|
flush_time_histo,
|
||||||
compact_time_histo,
|
compact_time_histo,
|
||||||
create_images_time_histo,
|
create_images_time_histo,
|
||||||
@@ -991,7 +872,10 @@ impl Drop for TimelineMetrics {
|
|||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let tenant_id = &self.tenant_id;
|
let tenant_id = &self.tenant_id;
|
||||||
let timeline_id = &self.timeline_id;
|
let timeline_id = &self.timeline_id;
|
||||||
|
let _ = RECONSTRUCT_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = GET_RECONSTRUCT_DATA_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
|
let _ = MATERIALIZED_PAGE_CACHE_HIT.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
|
let _ = MATERIALIZED_PAGE_CACHE_HIT_DIRECT.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = WAIT_LSN_TIME.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
|
||||||
@@ -1084,6 +968,7 @@ impl RemoteTimelineClientMetrics {
|
|||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
status: &'static str,
|
status: &'static str,
|
||||||
) -> Histogram {
|
) -> Histogram {
|
||||||
|
// XXX would be nice to have an upgradable RwLock
|
||||||
let mut guard = self.remote_operation_time.lock().unwrap();
|
let mut guard = self.remote_operation_time.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str(), status);
|
let key = (file_kind.as_str(), op_kind.as_str(), status);
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1105,6 +990,7 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntGauge {
|
) -> IntGauge {
|
||||||
|
// XXX would be nice to have an upgradable RwLock
|
||||||
let mut guard = self.calls_unfinished_gauge.lock().unwrap();
|
let mut guard = self.calls_unfinished_gauge.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1125,6 +1011,7 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> Histogram {
|
) -> Histogram {
|
||||||
|
// XXX would be nice to have an upgradable RwLock
|
||||||
let mut guard = self.calls_started_hist.lock().unwrap();
|
let mut guard = self.calls_started_hist.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1145,6 +1032,7 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntCounter {
|
) -> IntCounter {
|
||||||
|
// XXX would be nice to have an upgradable RwLock
|
||||||
let mut guard = self.bytes_started_counter.lock().unwrap();
|
let mut guard = self.bytes_started_counter.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1165,6 +1053,7 @@ impl RemoteTimelineClientMetrics {
|
|||||||
file_kind: &RemoteOpFileKind,
|
file_kind: &RemoteOpFileKind,
|
||||||
op_kind: &RemoteOpKind,
|
op_kind: &RemoteOpKind,
|
||||||
) -> IntCounter {
|
) -> IntCounter {
|
||||||
|
// XXX would be nice to have an upgradable RwLock
|
||||||
let mut guard = self.bytes_finished_counter.lock().unwrap();
|
let mut guard = self.bytes_finished_counter.lock().unwrap();
|
||||||
let key = (file_kind.as_str(), op_kind.as_str());
|
let key = (file_kind.as_str(), op_kind.as_str());
|
||||||
let metric = guard.entry(key).or_insert_with(move || {
|
let metric = guard.entry(key).or_insert_with(move || {
|
||||||
@@ -1430,8 +1319,4 @@ pub fn preinitialize_metrics() {
|
|||||||
|
|
||||||
// Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
|
// Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
|
||||||
BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
|
BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
|
||||||
|
|
||||||
// Python tests need these.
|
|
||||||
MATERIALIZED_PAGE_CACHE_HIT_DIRECT.get();
|
|
||||||
MATERIALIZED_PAGE_CACHE_HIT.get();
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -53,8 +53,8 @@ use utils::{
|
|||||||
lsn::Lsn,
|
lsn::Lsn,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::repository::Key;
|
||||||
use crate::tenant::writeback_ephemeral_file;
|
use crate::tenant::writeback_ephemeral_file;
|
||||||
use crate::{metrics::PageCacheSizeMetrics, repository::Key};
|
|
||||||
|
|
||||||
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
|
||||||
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
const TEST_PAGE_CACHE_SIZE: usize = 50;
|
||||||
@@ -187,8 +187,6 @@ pub struct PageCache {
|
|||||||
/// Index of the next candidate to evict, for the Clock replacement algorithm.
|
/// Index of the next candidate to evict, for the Clock replacement algorithm.
|
||||||
/// This is interpreted modulo the page cache size.
|
/// This is interpreted modulo the page cache size.
|
||||||
next_evict_slot: AtomicUsize,
|
next_evict_slot: AtomicUsize,
|
||||||
|
|
||||||
size_metrics: &'static PageCacheSizeMetrics,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -315,10 +313,6 @@ impl PageCache {
|
|||||||
key: &Key,
|
key: &Key,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
) -> Option<(Lsn, PageReadGuard)> {
|
) -> Option<(Lsn, PageReadGuard)> {
|
||||||
crate::metrics::PAGE_CACHE
|
|
||||||
.read_accesses_materialized_page
|
|
||||||
.inc();
|
|
||||||
|
|
||||||
let mut cache_key = CacheKey::MaterializedPage {
|
let mut cache_key = CacheKey::MaterializedPage {
|
||||||
hash_key: MaterializedPageHashKey {
|
hash_key: MaterializedPageHashKey {
|
||||||
tenant_id,
|
tenant_id,
|
||||||
@@ -329,21 +323,8 @@ impl PageCache {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if let Some(guard) = self.try_lock_for_read(&mut cache_key) {
|
if let Some(guard) = self.try_lock_for_read(&mut cache_key) {
|
||||||
if let CacheKey::MaterializedPage {
|
if let CacheKey::MaterializedPage { hash_key: _, lsn } = cache_key {
|
||||||
hash_key: _,
|
Some((lsn, guard))
|
||||||
lsn: available_lsn,
|
|
||||||
} = cache_key
|
|
||||||
{
|
|
||||||
if available_lsn == lsn {
|
|
||||||
crate::metrics::PAGE_CACHE
|
|
||||||
.read_hits_materialized_page_exact
|
|
||||||
.inc();
|
|
||||||
} else {
|
|
||||||
crate::metrics::PAGE_CACHE
|
|
||||||
.read_hits_materialized_page_older_lsn
|
|
||||||
.inc();
|
|
||||||
}
|
|
||||||
Some((available_lsn, guard))
|
|
||||||
} else {
|
} else {
|
||||||
panic!("unexpected key type in slot");
|
panic!("unexpected key type in slot");
|
||||||
}
|
}
|
||||||
@@ -518,31 +499,11 @@ impl PageCache {
|
|||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
|
fn lock_for_read(&self, cache_key: &mut CacheKey) -> anyhow::Result<ReadBufResult> {
|
||||||
let (read_access, hit) = match cache_key {
|
|
||||||
CacheKey::MaterializedPage { .. } => {
|
|
||||||
unreachable!("Materialized pages use lookup_materialized_page")
|
|
||||||
}
|
|
||||||
CacheKey::EphemeralPage { .. } => (
|
|
||||||
&crate::metrics::PAGE_CACHE.read_accesses_ephemeral,
|
|
||||||
&crate::metrics::PAGE_CACHE.read_hits_ephemeral,
|
|
||||||
),
|
|
||||||
CacheKey::ImmutableFilePage { .. } => (
|
|
||||||
&crate::metrics::PAGE_CACHE.read_accesses_immutable,
|
|
||||||
&crate::metrics::PAGE_CACHE.read_hits_immutable,
|
|
||||||
),
|
|
||||||
};
|
|
||||||
read_access.inc();
|
|
||||||
|
|
||||||
let mut is_first_iteration = true;
|
|
||||||
loop {
|
loop {
|
||||||
// First check if the key already exists in the cache.
|
// First check if the key already exists in the cache.
|
||||||
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
|
if let Some(read_guard) = self.try_lock_for_read(cache_key) {
|
||||||
if is_first_iteration {
|
|
||||||
hit.inc();
|
|
||||||
}
|
|
||||||
return Ok(ReadBufResult::Found(read_guard));
|
return Ok(ReadBufResult::Found(read_guard));
|
||||||
}
|
}
|
||||||
is_first_iteration = false;
|
|
||||||
|
|
||||||
// Not found. Find a victim buffer
|
// Not found. Find a victim buffer
|
||||||
let (slot_idx, mut inner) =
|
let (slot_idx, mut inner) =
|
||||||
@@ -720,9 +681,6 @@ impl PageCache {
|
|||||||
|
|
||||||
if let Ok(version_idx) = versions.binary_search_by_key(old_lsn, |v| v.lsn) {
|
if let Ok(version_idx) = versions.binary_search_by_key(old_lsn, |v| v.lsn) {
|
||||||
versions.remove(version_idx);
|
versions.remove(version_idx);
|
||||||
self.size_metrics
|
|
||||||
.current_bytes_materialized_page
|
|
||||||
.sub_page_sz(1);
|
|
||||||
if versions.is_empty() {
|
if versions.is_empty() {
|
||||||
old_entry.remove_entry();
|
old_entry.remove_entry();
|
||||||
}
|
}
|
||||||
@@ -735,13 +693,11 @@ impl PageCache {
|
|||||||
let mut map = self.ephemeral_page_map.write().unwrap();
|
let mut map = self.ephemeral_page_map.write().unwrap();
|
||||||
map.remove(&(*file_id, *blkno))
|
map.remove(&(*file_id, *blkno))
|
||||||
.expect("could not find old key in mapping");
|
.expect("could not find old key in mapping");
|
||||||
self.size_metrics.current_bytes_ephemeral.sub_page_sz(1);
|
|
||||||
}
|
}
|
||||||
CacheKey::ImmutableFilePage { file_id, blkno } => {
|
CacheKey::ImmutableFilePage { file_id, blkno } => {
|
||||||
let mut map = self.immutable_page_map.write().unwrap();
|
let mut map = self.immutable_page_map.write().unwrap();
|
||||||
map.remove(&(*file_id, *blkno))
|
map.remove(&(*file_id, *blkno))
|
||||||
.expect("could not find old key in mapping");
|
.expect("could not find old key in mapping");
|
||||||
self.size_metrics.current_bytes_immutable.sub_page_sz(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -769,9 +725,6 @@ impl PageCache {
|
|||||||
slot_idx,
|
slot_idx,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
self.size_metrics
|
|
||||||
.current_bytes_materialized_page
|
|
||||||
.add_page_sz(1);
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -782,7 +735,6 @@ impl PageCache {
|
|||||||
Entry::Occupied(entry) => Some(*entry.get()),
|
Entry::Occupied(entry) => Some(*entry.get()),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(slot_idx);
|
entry.insert(slot_idx);
|
||||||
self.size_metrics.current_bytes_ephemeral.add_page_sz(1);
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -793,7 +745,6 @@ impl PageCache {
|
|||||||
Entry::Occupied(entry) => Some(*entry.get()),
|
Entry::Occupied(entry) => Some(*entry.get()),
|
||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
entry.insert(slot_idx);
|
entry.insert(slot_idx);
|
||||||
self.size_metrics.current_bytes_immutable.add_page_sz(1);
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -893,12 +844,6 @@ impl PageCache {
|
|||||||
|
|
||||||
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
|
let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());
|
||||||
|
|
||||||
let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;
|
|
||||||
size_metrics.max_bytes.set_page_sz(num_pages);
|
|
||||||
size_metrics.current_bytes_ephemeral.set_page_sz(0);
|
|
||||||
size_metrics.current_bytes_immutable.set_page_sz(0);
|
|
||||||
size_metrics.current_bytes_materialized_page.set_page_sz(0);
|
|
||||||
|
|
||||||
let slots = page_buffer
|
let slots = page_buffer
|
||||||
.chunks_exact_mut(PAGE_SZ)
|
.chunks_exact_mut(PAGE_SZ)
|
||||||
.map(|chunk| {
|
.map(|chunk| {
|
||||||
@@ -921,30 +866,6 @@ impl PageCache {
|
|||||||
immutable_page_map: Default::default(),
|
immutable_page_map: Default::default(),
|
||||||
slots,
|
slots,
|
||||||
next_evict_slot: AtomicUsize::new(0),
|
next_evict_slot: AtomicUsize::new(0),
|
||||||
size_metrics,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trait PageSzBytesMetric {
|
|
||||||
fn set_page_sz(&self, count: usize);
|
|
||||||
fn add_page_sz(&self, count: usize);
|
|
||||||
fn sub_page_sz(&self, count: usize);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
fn count_times_page_sz(count: usize) -> u64 {
|
|
||||||
u64::try_from(count).unwrap() * u64::try_from(PAGE_SZ).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PageSzBytesMetric for metrics::UIntGauge {
|
|
||||||
fn set_page_sz(&self, count: usize) {
|
|
||||||
self.set(count_times_page_sz(count));
|
|
||||||
}
|
|
||||||
fn add_page_sz(&self, count: usize) {
|
|
||||||
self.add(count_times_page_sz(count));
|
|
||||||
}
|
|
||||||
fn sub_page_sz(&self, count: usize) {
|
|
||||||
self.sub(count_times_page_sz(count));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -390,9 +390,7 @@ impl PageServerHandler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Check that the timeline exists
|
// Check that the timeline exists
|
||||||
let timeline = tenant
|
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||||
.get_timeline(timeline_id, true)
|
|
||||||
.map_err(|e| anyhow::anyhow!(e))?;
|
|
||||||
|
|
||||||
// switch client to COPYBOTH
|
// switch client to COPYBOTH
|
||||||
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
|
||||||
@@ -904,7 +902,7 @@ where
|
|||||||
|
|
||||||
self.check_permission(Some(tenant_id))?;
|
self.check_permission(Some(tenant_id))?;
|
||||||
|
|
||||||
let lsn = if params.len() >= 3 {
|
let lsn = if params.len() == 3 {
|
||||||
Some(
|
Some(
|
||||||
Lsn::from_str(params[2])
|
Lsn::from_str(params[2])
|
||||||
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
|
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
|
||||||
@@ -913,24 +911,10 @@ where
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
metrics::metric_vec_duration::observe_async_block_duration_by_result(
|
// Check that the timeline exists
|
||||||
&*crate::metrics::BASEBACKUP_QUERY_TIME,
|
self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false, ctx)
|
||||||
async move {
|
.await?;
|
||||||
self.handle_basebackup_request(
|
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||||
pgb,
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
lsn,
|
|
||||||
None,
|
|
||||||
false,
|
|
||||||
ctx,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
|
||||||
anyhow::Ok(())
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
}
|
}
|
||||||
// return pair of prev_lsn and last_lsn
|
// return pair of prev_lsn and last_lsn
|
||||||
else if query_string.starts_with("get_last_record_rlsn ") {
|
else if query_string.starts_with("get_last_record_rlsn ") {
|
||||||
@@ -1246,6 +1230,6 @@ async fn get_active_tenant_timeline(
|
|||||||
.map_err(GetActiveTimelineError::Tenant)?;
|
.map_err(GetActiveTimelineError::Tenant)?;
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(|e| GetActiveTimelineError::Timeline(anyhow::anyhow!(e)))?;
|
.map_err(GetActiveTimelineError::Timeline)?;
|
||||||
Ok(timeline)
|
Ok(timeline)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,16 +43,6 @@ pub enum CalculateLogicalSizeError {
|
|||||||
Other(#[from] anyhow::Error),
|
Other(#[from] anyhow::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
|
||||||
pub enum RelationError {
|
|
||||||
#[error("Relation Already Exists")]
|
|
||||||
AlreadyExists,
|
|
||||||
#[error("invalid relnode")]
|
|
||||||
InvalidRelnode,
|
|
||||||
#[error(transparent)]
|
|
||||||
Other(#[from] anyhow::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
|
/// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
|
||||||
/// and other special kinds of files, in a versioned key-value store. The
|
/// and other special kinds of files, in a versioned key-value store. The
|
||||||
@@ -111,9 +101,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<Bytes, PageReconstructError> {
|
) -> Result<Bytes, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(
|
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||||
RelationError::InvalidRelnode.into(),
|
"invalid relnode"
|
||||||
));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
|
let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
|
||||||
@@ -158,9 +148,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<BlockNumber, PageReconstructError> {
|
) -> Result<BlockNumber, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(
|
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||||
RelationError::InvalidRelnode.into(),
|
"invalid relnode"
|
||||||
));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
|
||||||
@@ -203,9 +193,9 @@ impl Timeline {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<bool, PageReconstructError> {
|
) -> Result<bool, PageReconstructError> {
|
||||||
if tag.relnode == 0 {
|
if tag.relnode == 0 {
|
||||||
return Err(PageReconstructError::Other(
|
return Err(PageReconstructError::Other(anyhow::anyhow!(
|
||||||
RelationError::InvalidRelnode.into(),
|
"invalid relnode"
|
||||||
));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// first try to lookup relation in cache
|
// first try to lookup relation in cache
|
||||||
@@ -734,7 +724,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
blknum: BlockNumber,
|
blknum: BlockNumber,
|
||||||
rec: NeonWalRecord,
|
rec: NeonWalRecord,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));
|
self.put(rel_block_to_key(rel, blknum), Value::WalRecord(rec));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -761,7 +751,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
blknum: BlockNumber,
|
blknum: BlockNumber,
|
||||||
img: Bytes,
|
img: Bytes,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
|
self.put(rel_block_to_key(rel, blknum), Value::Image(img));
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -885,38 +875,32 @@ impl<'a> DatadirModification<'a> {
|
|||||||
rel: RelTag,
|
rel: RelTag,
|
||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<(), RelationError> {
|
) -> anyhow::Result<()> {
|
||||||
if rel.relnode == 0 {
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
return Err(RelationError::InvalidRelnode);
|
|
||||||
}
|
|
||||||
// It's possible that this is the first rel for this db in this
|
// It's possible that this is the first rel for this db in this
|
||||||
// tablespace. Create the reldir entry for it if so.
|
// tablespace. Create the reldir entry for it if so.
|
||||||
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
|
let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
|
||||||
.context("deserialize db")?;
|
|
||||||
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||||
let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
|
let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
|
||||||
// Didn't exist. Update dbdir
|
// Didn't exist. Update dbdir
|
||||||
dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
|
dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
|
||||||
let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
|
let buf = DbDirectory::ser(&dbdir)?;
|
||||||
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
self.put(DBDIR_KEY, Value::Image(buf.into()));
|
||||||
|
|
||||||
// and create the RelDirectory
|
// and create the RelDirectory
|
||||||
RelDirectory::default()
|
RelDirectory::default()
|
||||||
} else {
|
} else {
|
||||||
// reldir already exists, fetch it
|
// reldir already exists, fetch it
|
||||||
RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
|
RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?
|
||||||
.context("deserialize db")?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add the new relation to the rel directory entry, and write it back
|
// Add the new relation to the rel directory entry, and write it back
|
||||||
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
|
||||||
return Err(RelationError::AlreadyExists);
|
anyhow::bail!("rel {rel} already exists");
|
||||||
}
|
}
|
||||||
self.put(
|
self.put(
|
||||||
rel_dir_key,
|
rel_dir_key,
|
||||||
Value::Image(Bytes::from(
|
Value::Image(Bytes::from(RelDirectory::ser(&rel_dir)?)),
|
||||||
RelDirectory::ser(&rel_dir).context("serialize")?,
|
|
||||||
)),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Put size
|
// Put size
|
||||||
@@ -941,7 +925,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
let last_lsn = self.tline.get_last_record_lsn();
|
let last_lsn = self.tline.get_last_record_lsn();
|
||||||
if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
|
if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
|
||||||
let size_key = rel_size_to_key(rel);
|
let size_key = rel_size_to_key(rel);
|
||||||
@@ -972,7 +956,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
nblocks: BlockNumber,
|
nblocks: BlockNumber,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
|
|
||||||
// Put size
|
// Put size
|
||||||
let size_key = rel_size_to_key(rel);
|
let size_key = rel_size_to_key(rel);
|
||||||
@@ -993,7 +977,7 @@ impl<'a> DatadirModification<'a> {
|
|||||||
|
|
||||||
/// Drop a relation.
|
/// Drop a relation.
|
||||||
pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
|
pub async fn put_rel_drop(&mut self, rel: RelTag, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||||
anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
|
anyhow::ensure!(rel.relnode != 0, "invalid relnode");
|
||||||
|
|
||||||
// Remove it from the directory entry
|
// Remove it from the directory entry
|
||||||
let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
|
||||||
|
|||||||
@@ -506,17 +506,17 @@ pub async fn shutdown_tasks(
|
|||||||
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let join_handle = tokio::select! {
|
let completed = tokio::select! {
|
||||||
biased;
|
biased;
|
||||||
_ = &mut join_handle => { None },
|
_ = &mut join_handle => { true },
|
||||||
_ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
|
_ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
|
||||||
// allow some time to elapse before logging to cut down the number of log
|
// allow some time to elapse before logging to cut down the number of log
|
||||||
// lines.
|
// lines.
|
||||||
info!("waiting for {} to shut down", task.name);
|
info!("waiting for {} to shut down", task.name);
|
||||||
Some(join_handle)
|
false
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if let Some(join_handle) = join_handle {
|
if !completed {
|
||||||
// we never handled this return value, but:
|
// we never handled this return value, but:
|
||||||
// - we don't deschedule which would lead to is_cancelled
|
// - we don't deschedule which would lead to is_cancelled
|
||||||
// - panics are already logged (is_panicked)
|
// - panics are already logged (is_panicked)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -38,8 +38,8 @@ pub mod defaults {
|
|||||||
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
|
||||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||||
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
|
||||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
|
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
|
||||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
|
||||||
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
|
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
|
||||||
pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
|
pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ use std::sync::Arc;
|
|||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||||
pub use historic_layer_coverage::LayerKey;
|
pub use historic_layer_coverage::{LayerKey, Replacement};
|
||||||
|
|
||||||
use super::storage_layer::range_eq;
|
use super::storage_layer::range_eq;
|
||||||
use super::storage_layer::PersistentLayerDesc;
|
use super::storage_layer::PersistentLayerDesc;
|
||||||
@@ -66,7 +66,7 @@ use super::storage_layer::PersistentLayerDesc;
|
|||||||
///
|
///
|
||||||
/// LayerMap tracks what layers exist on a timeline.
|
/// LayerMap tracks what layers exist on a timeline.
|
||||||
///
|
///
|
||||||
#[derive(Default)]
|
#[derive(Default, Clone)]
|
||||||
pub struct LayerMap {
|
pub struct LayerMap {
|
||||||
//
|
//
|
||||||
// 'open_layer' holds the current InMemoryLayer that is accepting new
|
// 'open_layer' holds the current InMemoryLayer that is accepting new
|
||||||
@@ -649,6 +649,34 @@ impl LayerMap {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Similar to `Arc::ptr_eq`, but only compares the object pointers, not vtables.
|
||||||
|
///
|
||||||
|
/// Returns `true` if the two `Arc` point to the same layer, false otherwise.
|
||||||
|
///
|
||||||
|
/// If comparing persistent layers, ALWAYS compare the layer descriptor key.
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn compare_arced_layers<L: ?Sized>(left: &Arc<L>, right: &Arc<L>) -> bool {
|
||||||
|
// "dyn Trait" objects are "fat pointers" in that they have two components:
|
||||||
|
// - pointer to the object
|
||||||
|
// - pointer to the vtable
|
||||||
|
//
|
||||||
|
// rust does not provide a guarantee that these vtables are unique, but however
|
||||||
|
// `Arc::ptr_eq` as of writing (at least up to 1.67) uses a comparison where both the
|
||||||
|
// pointer and the vtable need to be equal.
|
||||||
|
//
|
||||||
|
// See: https://github.com/rust-lang/rust/issues/103763
|
||||||
|
//
|
||||||
|
// A future version of rust will most likely use this form below, where we cast each
|
||||||
|
// pointer into a pointer to unit, which drops the inaccessible vtable pointer, making it
|
||||||
|
// not affect the comparison.
|
||||||
|
//
|
||||||
|
// See: https://github.com/rust-lang/rust/pull/106450
|
||||||
|
let left = Arc::as_ptr(left) as *const ();
|
||||||
|
let right = Arc::as_ptr(right) as *const ();
|
||||||
|
|
||||||
|
left == right
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::LayerMap;
|
use super::LayerMap;
|
||||||
@@ -660,7 +688,7 @@ mod tests {
|
|||||||
|
|
||||||
use crate::tenant::{
|
use crate::tenant::{
|
||||||
storage_layer::{PersistentLayer, PersistentLayerDesc},
|
storage_layer::{PersistentLayer, PersistentLayerDesc},
|
||||||
timeline::LayerFileManager,
|
timeline::LayerMapping,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -712,7 +740,7 @@ mod tests {
|
|||||||
// and can remove it in the future.
|
// and can remove it in the future.
|
||||||
let _map = LayerMap::default();
|
let _map = LayerMap::default();
|
||||||
|
|
||||||
let mut mapping = LayerFileManager::new();
|
let mut mapping = LayerMapping::new();
|
||||||
|
|
||||||
mapping
|
mapping
|
||||||
.replace_and_verify(not_found, new_version)
|
.replace_and_verify(not_found, new_version)
|
||||||
@@ -727,7 +755,7 @@ mod tests {
|
|||||||
let downloaded = Arc::new(skeleton);
|
let downloaded = Arc::new(skeleton);
|
||||||
|
|
||||||
let mut map = LayerMap::default();
|
let mut map = LayerMap::default();
|
||||||
let mut mapping = LayerFileManager::new();
|
let mut mapping = LayerMapping::new();
|
||||||
|
|
||||||
// two disjoint Arcs in different lifecycle phases. even if it seems they must be the
|
// two disjoint Arcs in different lifecycle phases. even if it seems they must be the
|
||||||
// same layer, we use LayerMap::compare_arced_layers as the identity of layers.
|
// same layer, we use LayerMap::compare_arced_layers as the identity of layers.
|
||||||
|
|||||||
@@ -43,6 +43,18 @@ impl Ord for LayerKey {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a, L: crate::tenant::storage_layer::Layer + ?Sized> From<&'a L> for LayerKey {
|
||||||
|
fn from(layer: &'a L) -> Self {
|
||||||
|
let kr = layer.get_key_range();
|
||||||
|
let lr = layer.get_lsn_range();
|
||||||
|
LayerKey {
|
||||||
|
key: kr.start.to_i128()..kr.end.to_i128(),
|
||||||
|
lsn: lr.start.0..lr.end.0,
|
||||||
|
is_image: !layer.is_incremental(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<&PersistentLayerDesc> for LayerKey {
|
impl From<&PersistentLayerDesc> for LayerKey {
|
||||||
fn from(layer: &PersistentLayerDesc) -> Self {
|
fn from(layer: &PersistentLayerDesc) -> Self {
|
||||||
let kr = layer.get_key_range();
|
let kr = layer.get_key_range();
|
||||||
@@ -60,6 +72,7 @@ impl From<&PersistentLayerDesc> for LayerKey {
|
|||||||
/// Allows answering layer map queries very efficiently,
|
/// Allows answering layer map queries very efficiently,
|
||||||
/// but doesn't allow retroactive insertion, which is
|
/// but doesn't allow retroactive insertion, which is
|
||||||
/// sometimes necessary. See BufferedHistoricLayerCoverage.
|
/// sometimes necessary. See BufferedHistoricLayerCoverage.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct HistoricLayerCoverage<Value> {
|
pub struct HistoricLayerCoverage<Value> {
|
||||||
/// The latest state
|
/// The latest state
|
||||||
head: LayerCoverageTuple<Value>,
|
head: LayerCoverageTuple<Value>,
|
||||||
@@ -413,6 +426,7 @@ fn test_persistent_overlapping() {
|
|||||||
///
|
///
|
||||||
/// See this for more on persistent and retroactive techniques:
|
/// See this for more on persistent and retroactive techniques:
|
||||||
/// https://www.youtube.com/watch?v=WqCWghETNDc&t=581s
|
/// https://www.youtube.com/watch?v=WqCWghETNDc&t=581s
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct BufferedHistoricLayerCoverage<Value> {
|
pub struct BufferedHistoricLayerCoverage<Value> {
|
||||||
/// A persistent layer map that we rebuild when we need to retroactively update
|
/// A persistent layer map that we rebuild when we need to retroactively update
|
||||||
historic_coverage: HistoricLayerCoverage<Value>,
|
historic_coverage: HistoricLayerCoverage<Value>,
|
||||||
@@ -456,6 +470,64 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
|||||||
self.buffer.insert(layer_key, None);
|
self.buffer.insert(layer_key, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Replaces a previous layer with a new layer value.
|
||||||
|
///
|
||||||
|
/// The replacement is conditional on:
|
||||||
|
/// - there is an existing `LayerKey` record
|
||||||
|
/// - there is no buffered removal for the given `LayerKey`
|
||||||
|
/// - the given closure returns true for the current `Value`
|
||||||
|
///
|
||||||
|
/// The closure is used to compare the latest value (buffered insert, or existing layer)
|
||||||
|
/// against some expectation. This allows to use `Arc::ptr_eq` or similar which would be
|
||||||
|
/// inaccessible via `PartialEq` trait.
|
||||||
|
///
|
||||||
|
/// Returns a `Replacement` value describing the outcome; only the case of
|
||||||
|
/// `Replacement::Replaced` modifies the map and requires a rebuild.
|
||||||
|
///
|
||||||
|
/// This function is unlikely to be used in the future because LayerMap now only records the
|
||||||
|
/// layer descriptors. Therefore, anything added to the layer map will only be removed or
|
||||||
|
/// added, and never replaced.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn replace<F>(
|
||||||
|
&mut self,
|
||||||
|
layer_key: &LayerKey,
|
||||||
|
new: Value,
|
||||||
|
check_expected: F,
|
||||||
|
) -> Replacement<Value>
|
||||||
|
where
|
||||||
|
F: FnOnce(&Value) -> bool,
|
||||||
|
{
|
||||||
|
let (slot, in_buffered) = match self.buffer.get(layer_key) {
|
||||||
|
Some(inner @ Some(_)) => {
|
||||||
|
// we compare against the buffered version, because there will be a later
|
||||||
|
// rebuild before querying
|
||||||
|
(inner.as_ref(), true)
|
||||||
|
}
|
||||||
|
Some(None) => {
|
||||||
|
// buffer has removal for this key; it will not be equivalent by any check_expected.
|
||||||
|
return Replacement::RemovalBuffered;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// no pending modification for the key, check layers
|
||||||
|
(self.layers.get(layer_key), false)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match slot {
|
||||||
|
Some(existing) if !check_expected(existing) => {
|
||||||
|
// unfortunate clone here, but otherwise the nll borrowck grows the region of
|
||||||
|
// 'a to cover the whole function, and we could not mutate in the other
|
||||||
|
// Some(existing) branch
|
||||||
|
Replacement::Unexpected(existing.clone())
|
||||||
|
}
|
||||||
|
None => Replacement::NotFound,
|
||||||
|
Some(_existing) => {
|
||||||
|
self.insert(layer_key.to_owned(), new);
|
||||||
|
Replacement::Replaced { in_buffered }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn rebuild(&mut self) {
|
pub fn rebuild(&mut self) {
|
||||||
// Find the first LSN that needs to be rebuilt
|
// Find the first LSN that needs to be rebuilt
|
||||||
let rebuild_since: u64 = match self.buffer.iter().next() {
|
let rebuild_since: u64 = match self.buffer.iter().next() {
|
||||||
@@ -524,6 +596,22 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Outcome of the replace operation.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Replacement<Value> {
|
||||||
|
/// Previous value was replaced with the new value.
|
||||||
|
Replaced {
|
||||||
|
/// Replacement happened for a scheduled insert.
|
||||||
|
in_buffered: bool,
|
||||||
|
},
|
||||||
|
/// Key was not found buffered updates or existing layers.
|
||||||
|
NotFound,
|
||||||
|
/// Key has been scheduled for removal, it was not replaced.
|
||||||
|
RemovalBuffered,
|
||||||
|
/// Previous value was rejected by the closure.
|
||||||
|
Unexpected(Value),
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_retroactive_regression_1() {
|
fn test_retroactive_regression_1() {
|
||||||
let mut map = BufferedHistoricLayerCoverage::new();
|
let mut map = BufferedHistoricLayerCoverage::new();
|
||||||
@@ -632,3 +720,139 @@ fn test_retroactive_simple() {
|
|||||||
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
|
assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_retroactive_replacement() {
|
||||||
|
let mut map = BufferedHistoricLayerCoverage::new();
|
||||||
|
|
||||||
|
let keys = [
|
||||||
|
LayerKey {
|
||||||
|
key: 0..5,
|
||||||
|
lsn: 100..101,
|
||||||
|
is_image: true,
|
||||||
|
},
|
||||||
|
LayerKey {
|
||||||
|
key: 3..9,
|
||||||
|
lsn: 110..111,
|
||||||
|
is_image: true,
|
||||||
|
},
|
||||||
|
LayerKey {
|
||||||
|
key: 4..6,
|
||||||
|
lsn: 120..121,
|
||||||
|
is_image: true,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let layers = [
|
||||||
|
"Image 1".to_string(),
|
||||||
|
"Image 2".to_string(),
|
||||||
|
"Image 3".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (key, layer) in keys.iter().zip(layers.iter()) {
|
||||||
|
map.insert(key.to_owned(), layer.to_owned());
|
||||||
|
}
|
||||||
|
|
||||||
|
// rebuild is not necessary here, because replace works for both buffered updates and existing
|
||||||
|
// layers.
|
||||||
|
|
||||||
|
for (key, orig_layer) in keys.iter().zip(layers.iter()) {
|
||||||
|
let replacement = format!("Remote {orig_layer}");
|
||||||
|
|
||||||
|
// evict
|
||||||
|
let ret = map.replace(key, replacement.clone(), |l| l == orig_layer);
|
||||||
|
assert!(
|
||||||
|
matches!(ret, Replacement::Replaced { .. }),
|
||||||
|
"replace {orig_layer}: {ret:?}"
|
||||||
|
);
|
||||||
|
map.rebuild();
|
||||||
|
|
||||||
|
let at = key.lsn.end + 1;
|
||||||
|
|
||||||
|
let version = map.get().expect("rebuilt").get_version(at).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
version.image_coverage.query(4).as_deref(),
|
||||||
|
Some(replacement.as_str()),
|
||||||
|
"query for 4 at version {at} after eviction",
|
||||||
|
);
|
||||||
|
|
||||||
|
// download
|
||||||
|
let ret = map.replace(key, orig_layer.clone(), |l| l == &replacement);
|
||||||
|
assert!(
|
||||||
|
matches!(ret, Replacement::Replaced { .. }),
|
||||||
|
"replace {orig_layer} back: {ret:?}"
|
||||||
|
);
|
||||||
|
map.rebuild();
|
||||||
|
let version = map.get().expect("rebuilt").get_version(at).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
version.image_coverage.query(4).as_deref(),
|
||||||
|
Some(orig_layer.as_str()),
|
||||||
|
"query for 4 at version {at} after download",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn missing_key_is_not_inserted_with_replace() {
|
||||||
|
let mut map = BufferedHistoricLayerCoverage::new();
|
||||||
|
let key = LayerKey {
|
||||||
|
key: 0..5,
|
||||||
|
lsn: 100..101,
|
||||||
|
is_image: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
let ret = map.replace(&key, "should not replace", |_| true);
|
||||||
|
assert!(matches!(ret, Replacement::NotFound), "{ret:?}");
|
||||||
|
map.rebuild();
|
||||||
|
assert!(map
|
||||||
|
.get()
|
||||||
|
.expect("no changes to rebuild")
|
||||||
|
.get_version(102)
|
||||||
|
.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replacing_buffered_insert_and_remove() {
|
||||||
|
let mut map = BufferedHistoricLayerCoverage::new();
|
||||||
|
let key = LayerKey {
|
||||||
|
key: 0..5,
|
||||||
|
lsn: 100..101,
|
||||||
|
is_image: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
map.insert(key.clone(), "Image 1");
|
||||||
|
let ret = map.replace(&key, "Remote Image 1", |&l| l == "Image 1");
|
||||||
|
assert!(
|
||||||
|
matches!(ret, Replacement::Replaced { in_buffered: true }),
|
||||||
|
"{ret:?}"
|
||||||
|
);
|
||||||
|
map.rebuild();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
map.get()
|
||||||
|
.expect("rebuilt")
|
||||||
|
.get_version(102)
|
||||||
|
.unwrap()
|
||||||
|
.image_coverage
|
||||||
|
.query(4),
|
||||||
|
Some("Remote Image 1")
|
||||||
|
);
|
||||||
|
|
||||||
|
map.remove(key.clone());
|
||||||
|
let ret = map.replace(&key, "should not replace", |_| true);
|
||||||
|
assert!(
|
||||||
|
matches!(ret, Replacement::RemovalBuffered),
|
||||||
|
"cannot replace after scheduled remove: {ret:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
map.rebuild();
|
||||||
|
|
||||||
|
let ret = map.replace(&key, "should not replace", |_| true);
|
||||||
|
assert!(
|
||||||
|
matches!(ret, Replacement::NotFound),
|
||||||
|
"cannot replace after remove + rebuild: {ret:?}"
|
||||||
|
);
|
||||||
|
|
||||||
|
let at_version = map.get().expect("rebuilt").get_version(102);
|
||||||
|
assert!(at_version.is_none());
|
||||||
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ use rpds::RedBlackTreeMapSync;
|
|||||||
///
|
///
|
||||||
/// NOTE The struct is parameterized over Value for easier
|
/// NOTE The struct is parameterized over Value for easier
|
||||||
/// testing, but in practice it's some sort of layer.
|
/// testing, but in practice it's some sort of layer.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct LayerCoverage<Value> {
|
pub struct LayerCoverage<Value> {
|
||||||
/// For every change in coverage (as we sweep the key space)
|
/// For every change in coverage (as we sweep the key space)
|
||||||
/// we store (lsn.end, value).
|
/// we store (lsn.end, value).
|
||||||
@@ -139,6 +140,7 @@ impl<Value: Clone> LayerCoverage<Value> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Image and delta coverage at a specific LSN.
|
/// Image and delta coverage at a specific LSN.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct LayerCoverageTuple<Value> {
|
pub struct LayerCoverageTuple<Value> {
|
||||||
pub image_coverage: LayerCoverage<Value>,
|
pub image_coverage: LayerCoverage<Value>,
|
||||||
pub delta_coverage: LayerCoverage<Value>,
|
pub delta_coverage: LayerCoverage<Value>,
|
||||||
|
|||||||
146
pageserver/src/tenant/layer_map_mgr.rs
Normal file
146
pageserver/src/tenant/layer_map_mgr.rs
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
//! This module implements `LayerMapMgr`, which manages a layer map object and provides lock-free access to the state.
|
||||||
|
//!
|
||||||
|
//! A common usage pattern is as follows:
|
||||||
|
//!
|
||||||
|
//! ```ignore
|
||||||
|
//! async fn compaction(&self) {
|
||||||
|
//! // Get the current state.
|
||||||
|
//! let state = self.layer_map_mgr.read();
|
||||||
|
//! // No lock held at this point. Do compaction based on the state. This part usually incurs I/O operations and may
|
||||||
|
//! // take a long time.
|
||||||
|
//! let compaction_result = self.do_compaction(&state).await?;
|
||||||
|
//! // Update the state.
|
||||||
|
//! self.layer_map_mgr.update(|mut state| async move {
|
||||||
|
//! // do updates to the state, return it.
|
||||||
|
//! Ok(state)
|
||||||
|
//! }).await?;
|
||||||
|
//! }
|
||||||
|
//! ```
|
||||||
|
use anyhow::Result;
|
||||||
|
use arc_swap::ArcSwap;
|
||||||
|
use futures::Future;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use super::layer_map::LayerMap;
|
||||||
|
|
||||||
|
/// Manages the storage state. Provide utility functions to modify the layer map and get an immutable reference to the
|
||||||
|
/// layer map.
|
||||||
|
pub struct LayerMapMgr {
|
||||||
|
layer_map: ArcSwap<LayerMap>,
|
||||||
|
state_lock: tokio::sync::Mutex<()>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LayerMapMgr {
|
||||||
|
/// Get the current state of the layer map.
|
||||||
|
pub fn read(&self) -> Arc<LayerMap> {
|
||||||
|
// TODO: it is possible to use `load` to reduce the overhead of cloning the Arc, but read path usually involves
|
||||||
|
// disk reads and layer mapping fetching, and therefore it's not a big deal to use a more optimized version
|
||||||
|
// here.
|
||||||
|
self.layer_map.load_full()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clone the layer map for modification.
|
||||||
|
fn clone_for_write(&self, _state_lock_witness: &tokio::sync::MutexGuard<'_, ()>) -> LayerMap {
|
||||||
|
(**self.layer_map.load()).clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(layer_map: LayerMap) -> Self {
|
||||||
|
Self {
|
||||||
|
layer_map: ArcSwap::new(Arc::new(layer_map)),
|
||||||
|
state_lock: tokio::sync::Mutex::new(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update the layer map.
|
||||||
|
pub async fn update<O, F>(&self, operation: O) -> Result<()>
|
||||||
|
where
|
||||||
|
O: FnOnce(LayerMap) -> F,
|
||||||
|
F: Future<Output = Result<LayerMap>>,
|
||||||
|
{
|
||||||
|
let state_lock = self.state_lock.lock().await;
|
||||||
|
let state = self.clone_for_write(&state_lock);
|
||||||
|
let new_state = operation(state).await?;
|
||||||
|
self.layer_map.store(Arc::new(new_state));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use utils::{
|
||||||
|
id::{TenantId, TimelineId},
|
||||||
|
lsn::Lsn,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{repository::Key, tenant::storage_layer::PersistentLayerDesc};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_layer_map_manage() -> Result<()> {
|
||||||
|
let mgr = LayerMapMgr::new(Default::default());
|
||||||
|
mgr.update(|mut map| async move {
|
||||||
|
let mut updates = map.batch_update();
|
||||||
|
updates.insert_historic(PersistentLayerDesc::new_img(
|
||||||
|
TenantId::generate(),
|
||||||
|
TimelineId::generate(),
|
||||||
|
Key::from_i128(0)..Key::from_i128(1),
|
||||||
|
Lsn(0),
|
||||||
|
false,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
updates.flush();
|
||||||
|
Ok(map)
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let ref_1 = mgr.read();
|
||||||
|
|
||||||
|
mgr.update(|mut map| async move {
|
||||||
|
let mut updates = map.batch_update();
|
||||||
|
updates.insert_historic(PersistentLayerDesc::new_img(
|
||||||
|
TenantId::generate(),
|
||||||
|
TimelineId::generate(),
|
||||||
|
Key::from_i128(1)..Key::from_i128(2),
|
||||||
|
Lsn(0),
|
||||||
|
false,
|
||||||
|
0,
|
||||||
|
));
|
||||||
|
updates.flush();
|
||||||
|
Ok(map)
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let ref_2 = mgr.read();
|
||||||
|
|
||||||
|
// Modification should not be visible to the old reference.
|
||||||
|
assert_eq!(
|
||||||
|
ref_1
|
||||||
|
.search(Key::from_i128(0), Lsn(1))
|
||||||
|
.unwrap()
|
||||||
|
.layer
|
||||||
|
.key_range,
|
||||||
|
Key::from_i128(0)..Key::from_i128(1)
|
||||||
|
);
|
||||||
|
assert!(ref_1.search(Key::from_i128(1), Lsn(1)).is_none());
|
||||||
|
|
||||||
|
// Modification should be visible to the new reference.
|
||||||
|
assert_eq!(
|
||||||
|
ref_2
|
||||||
|
.search(Key::from_i128(0), Lsn(1))
|
||||||
|
.unwrap()
|
||||||
|
.layer
|
||||||
|
.key_range,
|
||||||
|
Key::from_i128(0)..Key::from_i128(1)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
ref_2
|
||||||
|
.search(Key::from_i128(1), Lsn(1))
|
||||||
|
.unwrap()
|
||||||
|
.layer
|
||||||
|
.key_range,
|
||||||
|
Key::from_i128(1)..Key::from_i128(2)
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -675,7 +675,7 @@ pub async fn immediate_gc(
|
|||||||
.get(&tenant_id)
|
.get(&tenant_id)
|
||||||
.map(Arc::clone)
|
.map(Arc::clone)
|
||||||
.with_context(|| format!("tenant {tenant_id}"))
|
.with_context(|| format!("tenant {tenant_id}"))
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
.map_err(ApiError::NotFound)?;
|
||||||
|
|
||||||
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
|
||||||
// Use tenant's pitr setting
|
// Use tenant's pitr setting
|
||||||
@@ -724,11 +724,11 @@ pub async fn immediate_compact(
|
|||||||
.get(&tenant_id)
|
.get(&tenant_id)
|
||||||
.map(Arc::clone)
|
.map(Arc::clone)
|
||||||
.with_context(|| format!("tenant {tenant_id}"))
|
.with_context(|| format!("tenant {tenant_id}"))
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
.map_err(ApiError::NotFound)?;
|
||||||
|
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.get_timeline(timeline_id, true)
|
.get_timeline(timeline_id, true)
|
||||||
.map_err(|e| ApiError::NotFound(e.into()))?;
|
.map_err(ApiError::NotFound)?;
|
||||||
|
|
||||||
// Run in task_mgr to avoid race with tenant_detach operation
|
// Run in task_mgr to avoid race with tenant_detach operation
|
||||||
let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download);
|
let ctx = ctx.detached_child(TaskKind::Compaction, DownloadBehavior::Download);
|
||||||
|
|||||||
@@ -608,7 +608,10 @@ impl RemoteTimelineClient {
|
|||||||
self.calls_unfinished_metric_begin(&op);
|
self.calls_unfinished_metric_begin(&op);
|
||||||
upload_queue.queued_operations.push_back(op);
|
upload_queue.queued_operations.push_back(op);
|
||||||
|
|
||||||
info!("scheduled layer file upload {layer_file_name}");
|
info!(
|
||||||
|
"scheduled layer file upload {}",
|
||||||
|
layer_file_name.file_name()
|
||||||
|
);
|
||||||
|
|
||||||
// Launch the task immediately, if possible
|
// Launch the task immediately, if possible
|
||||||
self.launch_queued_tasks(upload_queue);
|
self.launch_queued_tasks(upload_queue);
|
||||||
@@ -661,7 +664,7 @@ impl RemoteTimelineClient {
|
|||||||
});
|
});
|
||||||
self.calls_unfinished_metric_begin(&op);
|
self.calls_unfinished_metric_begin(&op);
|
||||||
upload_queue.queued_operations.push_back(op);
|
upload_queue.queued_operations.push_back(op);
|
||||||
info!("scheduled layer file deletion {name}");
|
info!("scheduled layer file deletion {}", name.file_name());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch the tasks immediately, if possible
|
// Launch the tasks immediately, if possible
|
||||||
@@ -750,18 +753,22 @@ impl RemoteTimelineClient {
|
|||||||
|
|
||||||
// Have a failpoint that can use the `pause` failpoint action.
|
// Have a failpoint that can use the `pause` failpoint action.
|
||||||
// We don't want to block the executor thread, hence, spawn_blocking + await.
|
// We don't want to block the executor thread, hence, spawn_blocking + await.
|
||||||
if cfg!(feature = "testing") {
|
#[cfg(feature = "testing")]
|
||||||
tokio::task::spawn_blocking({
|
tokio::task::spawn_blocking({
|
||||||
let current = tracing::Span::current();
|
let current = tracing::Span::current();
|
||||||
move || {
|
move || {
|
||||||
let _entered = current.entered();
|
let _entered = current.entered();
|
||||||
tracing::info!("at failpoint persist_deleted_index_part");
|
tracing::info!(
|
||||||
fail::fail_point!("persist_deleted_index_part");
|
"at failpoint persist_index_part_with_deleted_flag_after_set_before_upload_pause"
|
||||||
}
|
);
|
||||||
})
|
fail::fail_point!(
|
||||||
.await
|
"persist_index_part_with_deleted_flag_after_set_before_upload_pause"
|
||||||
.expect("spawn_blocking");
|
);
|
||||||
}
|
}
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("spawn_blocking");
|
||||||
|
|
||||||
upload::upload_index_part(
|
upload::upload_index_part(
|
||||||
self.conf,
|
self.conf,
|
||||||
&self.storage_impl,
|
&self.storage_impl,
|
||||||
@@ -825,7 +832,7 @@ impl RemoteTimelineClient {
|
|||||||
.queued_operations
|
.queued_operations
|
||||||
.push_back(op);
|
.push_back(op);
|
||||||
|
|
||||||
info!("scheduled layer file deletion {name}");
|
info!("scheduled layer file deletion {}", name.file_name());
|
||||||
deletions_queued += 1;
|
deletions_queued += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -859,8 +866,10 @@ impl RemoteTimelineClient {
|
|||||||
"Found {} files not bound to index_file.json, proceeding with their deletion",
|
"Found {} files not bound to index_file.json, proceeding with their deletion",
|
||||||
remaining.len()
|
remaining.len()
|
||||||
);
|
);
|
||||||
warn!("About to remove {} files", remaining.len());
|
for file in remaining {
|
||||||
self.storage_impl.delete_objects(&remaining).await?;
|
warn!("Removing {}", file.object_name().unwrap_or_default());
|
||||||
|
self.storage_impl.delete(&file).await?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let index_file_path = timeline_storage_path.join(Path::new(IndexPart::FILE_NAME));
|
let index_file_path = timeline_storage_path.join(Path::new(IndexPart::FILE_NAME));
|
||||||
@@ -1362,7 +1371,7 @@ mod tests {
|
|||||||
struct TestSetup {
|
struct TestSetup {
|
||||||
runtime: &'static tokio::runtime::Runtime,
|
runtime: &'static tokio::runtime::Runtime,
|
||||||
entered_runtime: EnterGuard<'static>,
|
entered_runtime: EnterGuard<'static>,
|
||||||
harness: TenantHarness,
|
harness: TenantHarness<'static>,
|
||||||
tenant: Arc<Tenant>,
|
tenant: Arc<Tenant>,
|
||||||
tenant_ctx: RequestContext,
|
tenant_ctx: RequestContext,
|
||||||
remote_fs_dir: PathBuf,
|
remote_fs_dir: PathBuf,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ use tracing::{info, warn};
|
|||||||
|
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::tenant::storage_layer::LayerFileName;
|
use crate::tenant::storage_layer::LayerFileName;
|
||||||
use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
use crate::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||||
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
|
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
|
||||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||||
use utils::crashsafe::path_with_suffix_extension;
|
use utils::crashsafe::path_with_suffix_extension;
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
#[cfg(debug_assertions)]
|
|
||||||
use utils::tracing_span_assert::{check_fields_present, MultiNameExtractor};
|
|
||||||
|
|
||||||
#[cfg(not(debug_assertions))]
|
|
||||||
pub(crate) fn debug_assert_current_span_has_tenant_id() {}
|
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
pub(crate) static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<MultiNameExtractor<2>> =
|
|
||||||
once_cell::sync::Lazy::new(|| MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]));
|
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
#[track_caller]
|
|
||||||
pub(crate) fn debug_assert_current_span_has_tenant_id() {
|
|
||||||
if let Err(missing) = check_fields_present([&*TENANT_ID_EXTRACTOR]) {
|
|
||||||
panic!(
|
|
||||||
"missing extractors: {:?}",
|
|
||||||
missing.into_iter().map(|e| e.name()).collect::<Vec<_>>()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -335,7 +335,7 @@ impl LayerAccessStats {
|
|||||||
/// All layers should implement a minimal `std::fmt::Debug` without tenant or
|
/// All layers should implement a minimal `std::fmt::Debug` without tenant or
|
||||||
/// timeline names, because those are known in the context of which the layers
|
/// timeline names, because those are known in the context of which the layers
|
||||||
/// are used in (timeline).
|
/// are used in (timeline).
|
||||||
pub trait Layer: std::fmt::Debug + std::fmt::Display + Send + Sync {
|
pub trait Layer: std::fmt::Debug + Send + Sync {
|
||||||
/// Range of keys that this layer covers
|
/// Range of keys that this layer covers
|
||||||
fn get_key_range(&self) -> Range<Key>;
|
fn get_key_range(&self) -> Range<Key>;
|
||||||
|
|
||||||
@@ -373,6 +373,9 @@ pub trait Layer: std::fmt::Debug + std::fmt::Display + Send + Sync {
|
|||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> Result<ValueReconstructResult>;
|
) -> Result<ValueReconstructResult>;
|
||||||
|
|
||||||
|
/// A short ID string that uniquely identifies the given layer within a [`LayerMap`].
|
||||||
|
fn short_id(&self) -> String;
|
||||||
|
|
||||||
/// Dump summary of the contents of the layer to stdout
|
/// Dump summary of the contents of the layer to stdout
|
||||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()>;
|
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()>;
|
||||||
}
|
}
|
||||||
@@ -509,12 +512,10 @@ pub mod tests {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
impl std::fmt::Display for LayerDescriptor {
|
fn short_id(&self) -> String {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
self.layer_desc().short_id()
|
||||||
write!(f, "{}", self.layer_desc().short_id())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -222,14 +222,13 @@ impl Layer for DeltaLayer {
|
|||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||||
println!(
|
println!(
|
||||||
"----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} size {} ----",
|
"----- delta layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
|
||||||
self.desc.tenant_id,
|
self.desc.tenant_id,
|
||||||
self.desc.timeline_id,
|
self.desc.timeline_id,
|
||||||
self.desc.key_range.start,
|
self.desc.key_range.start,
|
||||||
self.desc.key_range.end,
|
self.desc.key_range.end,
|
||||||
self.desc.lsn_range.start,
|
self.desc.lsn_range.start,
|
||||||
self.desc.lsn_range.end,
|
self.desc.lsn_range.end
|
||||||
self.desc.file_size,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if !verbose {
|
if !verbose {
|
||||||
@@ -395,11 +394,10 @@ impl Layer for DeltaLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
impl std::fmt::Display for DeltaLayer {
|
fn short_id(&self) -> String {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
self.layer_desc().short_id()
|
||||||
write!(f, "{}", self.layer_desc().short_id())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -210,15 +210,9 @@ pub enum LayerFileName {
|
|||||||
|
|
||||||
impl LayerFileName {
|
impl LayerFileName {
|
||||||
pub fn file_name(&self) -> String {
|
pub fn file_name(&self) -> String {
|
||||||
self.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for LayerFileName {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self {
|
match self {
|
||||||
Self::Image(fname) => write!(f, "{fname}"),
|
Self::Image(fname) => fname.to_string(),
|
||||||
Self::Delta(fname) => write!(f, "{fname}"),
|
Self::Delta(fname) => fname.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -153,14 +153,12 @@ impl Layer for ImageLayer {
|
|||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
fn dump(&self, verbose: bool, ctx: &RequestContext) -> Result<()> {
|
||||||
println!(
|
println!(
|
||||||
"----- image layer for ten {} tli {} key {}-{} at {} is_incremental {} size {} ----",
|
"----- image layer for ten {} tli {} key {}-{} at {} ----",
|
||||||
self.desc.tenant_id,
|
self.desc.tenant_id,
|
||||||
self.desc.timeline_id,
|
self.desc.timeline_id,
|
||||||
self.desc.key_range.start,
|
self.desc.key_range.start,
|
||||||
self.desc.key_range.end,
|
self.desc.key_range.end,
|
||||||
self.lsn,
|
self.lsn
|
||||||
self.desc.is_incremental,
|
|
||||||
self.desc.file_size
|
|
||||||
);
|
);
|
||||||
|
|
||||||
if !verbose {
|
if !verbose {
|
||||||
@@ -232,12 +230,10 @@ impl Layer for ImageLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
impl std::fmt::Display for ImageLayer {
|
fn short_id(&self) -> String {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
self.layer_desc().short_id()
|
||||||
write!(f, "{}", self.layer_desc().short_id())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -131,6 +131,13 @@ impl Layer for InMemoryLayer {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn short_id(&self) -> String {
|
||||||
|
let inner = self.inner.read().unwrap();
|
||||||
|
|
||||||
|
let end_lsn = inner.end_lsn.unwrap_or(Lsn(u64::MAX));
|
||||||
|
format!("inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0)
|
||||||
|
}
|
||||||
|
|
||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
fn dump(&self, verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
fn dump(&self, verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||||
let inner = self.inner.read().unwrap();
|
let inner = self.inner.read().unwrap();
|
||||||
@@ -233,15 +240,6 @@ impl Layer for InMemoryLayer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for InMemoryLayer {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
let inner = self.inner.read().unwrap();
|
|
||||||
|
|
||||||
let end_lsn = inner.end_lsn.unwrap_or(Lsn(u64::MAX));
|
|
||||||
write!(f, "inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl InMemoryLayer {
|
impl InMemoryLayer {
|
||||||
///
|
///
|
||||||
/// Get layer size on the disk
|
/// Get layer size on the disk
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use core::fmt::Display;
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use utils::{
|
use utils::{
|
||||||
id::{TenantId, TimelineId},
|
id::{TenantId, TimelineId},
|
||||||
@@ -49,8 +48,8 @@ impl PersistentLayerDesc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn short_id(&self) -> impl Display {
|
pub fn short_id(&self) -> String {
|
||||||
self.filename()
|
self.filename().file_name()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -174,16 +173,13 @@ impl PersistentLayerDesc {
|
|||||||
|
|
||||||
pub fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
pub fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||||
println!(
|
println!(
|
||||||
"----- layer for ten {} tli {} keys {}-{} lsn {}-{} is_delta {} is_incremental {} size {} ----",
|
"----- layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
|
||||||
self.tenant_id,
|
self.tenant_id,
|
||||||
self.timeline_id,
|
self.timeline_id,
|
||||||
self.key_range.start,
|
self.key_range.start,
|
||||||
self.key_range.end,
|
self.key_range.end,
|
||||||
self.lsn_range.start,
|
self.lsn_range.start,
|
||||||
self.lsn_range.end,
|
self.lsn_range.end
|
||||||
self.is_delta,
|
|
||||||
self.is_incremental,
|
|
||||||
self.file_size,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -71,22 +71,22 @@ impl Layer for RemoteLayer {
|
|||||||
_reconstruct_state: &mut ValueReconstructState,
|
_reconstruct_state: &mut ValueReconstructState,
|
||||||
_ctx: &RequestContext,
|
_ctx: &RequestContext,
|
||||||
) -> Result<ValueReconstructResult> {
|
) -> Result<ValueReconstructResult> {
|
||||||
bail!("layer {self} needs to be downloaded");
|
bail!(
|
||||||
|
"layer {} needs to be downloaded",
|
||||||
|
self.filename().file_name()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// debugging function to print out the contents of the layer
|
/// debugging function to print out the contents of the layer
|
||||||
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> {
|
||||||
println!(
|
println!(
|
||||||
"----- remote layer for ten {} tli {} keys {}-{} lsn {}-{} is_delta {} is_incremental {} size {} ----",
|
"----- remote layer for ten {} tli {} keys {}-{} lsn {}-{} ----",
|
||||||
self.desc.tenant_id,
|
self.desc.tenant_id,
|
||||||
self.desc.timeline_id,
|
self.desc.timeline_id,
|
||||||
self.desc.key_range.start,
|
self.desc.key_range.start,
|
||||||
self.desc.key_range.end,
|
self.desc.key_range.end,
|
||||||
self.desc.lsn_range.start,
|
self.desc.lsn_range.start,
|
||||||
self.desc.lsn_range.end,
|
self.desc.lsn_range.end
|
||||||
self.desc.is_delta,
|
|
||||||
self.desc.is_incremental,
|
|
||||||
self.desc.file_size,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -106,12 +106,10 @@ impl Layer for RemoteLayer {
|
|||||||
fn is_incremental(&self) -> bool {
|
fn is_incremental(&self) -> bool {
|
||||||
self.layer_desc().is_incremental
|
self.layer_desc().is_incremental
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
/// Boilerplate to implement the Layer trait, always use layer_desc for persistent layers.
|
||||||
impl std::fmt::Display for RemoteLayer {
|
fn short_id(&self) -> String {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
self.layer_desc().short_id()
|
||||||
write!(f, "{}", self.layer_desc().short_id())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -70,6 +70,7 @@ impl Timeline {
|
|||||||
};
|
};
|
||||||
|
|
||||||
self_clone.eviction_task(cancel).await;
|
self_clone.eviction_task(cancel).await;
|
||||||
|
info!("eviction task finishing");
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@@ -77,9 +78,6 @@ impl Timeline {
|
|||||||
|
|
||||||
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
#[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
|
||||||
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
|
async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
|
||||||
scopeguard::defer! {
|
|
||||||
info!("eviction task finishing");
|
|
||||||
}
|
|
||||||
use crate::tenant::tasks::random_init_delay;
|
use crate::tenant::tasks::random_init_delay;
|
||||||
{
|
{
|
||||||
let policy = self.get_eviction_policy();
|
let policy = self.get_eviction_policy();
|
||||||
@@ -88,6 +86,7 @@ impl Timeline {
|
|||||||
EvictionPolicy::NoEviction => Duration::from_secs(10),
|
EvictionPolicy::NoEviction => Duration::from_secs(10),
|
||||||
};
|
};
|
||||||
if random_init_delay(period, &cancel).await.is_err() {
|
if random_init_delay(period, &cancel).await.is_err() {
|
||||||
|
info!("shutting down");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -102,6 +101,7 @@ impl Timeline {
|
|||||||
ControlFlow::Continue(sleep_until) => {
|
ControlFlow::Continue(sleep_until) => {
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = cancel.cancelled() => {
|
_ = cancel.cancelled() => {
|
||||||
|
info!("shutting down");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ = tokio::time::sleep_until(sleep_until) => { }
|
_ = tokio::time::sleep_until(sleep_until) => { }
|
||||||
@@ -209,7 +209,7 @@ impl Timeline {
|
|||||||
let last_activity_ts = hist_layer.access_stats().latest_activity().unwrap_or_else(|| {
|
let last_activity_ts = hist_layer.access_stats().latest_activity().unwrap_or_else(|| {
|
||||||
// We only use this fallback if there's an implementation error.
|
// We only use this fallback if there's an implementation error.
|
||||||
// `latest_activity` already does rate-limited warn!() log.
|
// `latest_activity` already does rate-limited warn!() log.
|
||||||
debug!(layer=%hist_layer, "last_activity returns None, using SystemTime::now");
|
debug!(layer=%hist_layer.filename().file_name(), "last_activity returns None, using SystemTime::now");
|
||||||
SystemTime::now()
|
SystemTime::now()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,128 +0,0 @@
|
|||||||
use anyhow::Context;
|
|
||||||
use once_cell::sync::OnceCell;
|
|
||||||
|
|
||||||
use tokio::sync::Semaphore;
|
|
||||||
use utils::lsn::Lsn;
|
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
/// Internal structure to hold all data needed for logical size calculation.
|
|
||||||
///
|
|
||||||
/// Calculation consists of two stages:
|
|
||||||
///
|
|
||||||
/// 1. Initial size calculation. That might take a long time, because it requires
|
|
||||||
/// reading all layers containing relation sizes at `initial_part_end`.
|
|
||||||
///
|
|
||||||
/// 2. Collecting an incremental part and adding that to the initial size.
|
|
||||||
/// Increments are appended on walreceiver writing new timeline data,
|
|
||||||
/// which result in increase or decrease of the logical size.
|
|
||||||
pub(super) struct LogicalSize {
|
|
||||||
/// Size, potentially slow to compute. Calculating this might require reading multiple
|
|
||||||
/// layers, and even ancestor's layers.
|
|
||||||
///
|
|
||||||
/// NOTE: size at a given LSN is constant, but after a restart we will calculate
|
|
||||||
/// the initial size at a different LSN.
|
|
||||||
pub initial_logical_size: OnceCell<u64>,
|
|
||||||
|
|
||||||
/// Semaphore to track ongoing calculation of `initial_logical_size`.
|
|
||||||
pub initial_size_computation: Arc<tokio::sync::Semaphore>,
|
|
||||||
|
|
||||||
/// Latest Lsn that has its size uncalculated, could be absent for freshly created timelines.
|
|
||||||
pub initial_part_end: Option<Lsn>,
|
|
||||||
|
|
||||||
/// All other size changes after startup, combined together.
|
|
||||||
///
|
|
||||||
/// Size shouldn't ever be negative, but this is signed for two reasons:
|
|
||||||
///
|
|
||||||
/// 1. If we initialized the "baseline" size lazily, while we already
|
|
||||||
/// process incoming WAL, the incoming WAL records could decrement the
|
|
||||||
/// variable and temporarily make it negative. (This is just future-proofing;
|
|
||||||
/// the initialization is currently not done lazily.)
|
|
||||||
///
|
|
||||||
/// 2. If there is a bug and we e.g. forget to increment it in some cases
|
|
||||||
/// when size grows, but remember to decrement it when it shrinks again, the
|
|
||||||
/// variable could go negative. In that case, it seems better to at least
|
|
||||||
/// try to keep tracking it, rather than clamp or overflow it. Note that
|
|
||||||
/// get_current_logical_size() will clamp the returned value to zero if it's
|
|
||||||
/// negative, and log an error. Could set it permanently to zero or some
|
|
||||||
/// special value to indicate "broken" instead, but this will do for now.
|
|
||||||
///
|
|
||||||
/// Note that we also expose a copy of this value as a prometheus metric,
|
|
||||||
/// see `current_logical_size_gauge`. Use the `update_current_logical_size`
|
|
||||||
/// to modify this, it will also keep the prometheus metric in sync.
|
|
||||||
pub size_added_after_initial: AtomicI64,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Normalized current size, that the data in pageserver occupies.
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
|
||||||
pub(super) enum CurrentLogicalSize {
|
|
||||||
/// The size is not yet calculated to the end, this is an intermediate result,
|
|
||||||
/// constructed from walreceiver increments and normalized: logical data could delete some objects, hence be negative,
|
|
||||||
/// yet total logical size cannot be below 0.
|
|
||||||
Approximate(u64),
|
|
||||||
// Fully calculated logical size, only other future walreceiver increments are changing it, and those changes are
|
|
||||||
// available for observation without any calculations.
|
|
||||||
Exact(u64),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CurrentLogicalSize {
|
|
||||||
pub(super) fn size(&self) -> u64 {
|
|
||||||
*match self {
|
|
||||||
Self::Approximate(size) => size,
|
|
||||||
Self::Exact(size) => size,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LogicalSize {
|
|
||||||
pub(super) fn empty_initial() -> Self {
|
|
||||||
Self {
|
|
||||||
initial_logical_size: OnceCell::with_value(0),
|
|
||||||
// initial_logical_size already computed, so, don't admit any calculations
|
|
||||||
initial_size_computation: Arc::new(Semaphore::new(0)),
|
|
||||||
initial_part_end: None,
|
|
||||||
size_added_after_initial: AtomicI64::new(0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn deferred_initial(compute_to: Lsn) -> Self {
|
|
||||||
Self {
|
|
||||||
initial_logical_size: OnceCell::new(),
|
|
||||||
initial_size_computation: Arc::new(Semaphore::new(1)),
|
|
||||||
initial_part_end: Some(compute_to),
|
|
||||||
size_added_after_initial: AtomicI64::new(0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn current_size(&self) -> anyhow::Result<CurrentLogicalSize> {
|
|
||||||
let size_increment: i64 = self.size_added_after_initial.load(AtomicOrdering::Acquire);
|
|
||||||
// ^^^ keep this type explicit so that the casts in this function break if
|
|
||||||
// we change the type.
|
|
||||||
match self.initial_logical_size.get() {
|
|
||||||
Some(initial_size) => {
|
|
||||||
initial_size.checked_add_signed(size_increment)
|
|
||||||
.with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
|
|
||||||
.map(CurrentLogicalSize::Exact)
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
|
|
||||||
Ok(CurrentLogicalSize::Approximate(non_negative_size_increment))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn increment_size(&self, delta: i64) {
|
|
||||||
self.size_added_after_initial
|
|
||||||
.fetch_add(delta, AtomicOrdering::SeqCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Make the value computed by initial logical size computation
|
|
||||||
/// available for re-use. This doesn't contain the incremental part.
|
|
||||||
pub(super) fn initialized_size(&self, lsn: Lsn) -> Option<u64> {
|
|
||||||
match self.initial_part_end {
|
|
||||||
Some(v) if v == lsn => self.initial_logical_size.get().copied(),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
#[cfg(debug_assertions)]
|
|
||||||
use utils::tracing_span_assert::{check_fields_present, Extractor, MultiNameExtractor};
|
|
||||||
|
|
||||||
#[cfg(not(debug_assertions))]
|
|
||||||
pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {}
|
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
#[track_caller]
|
|
||||||
pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
|
|
||||||
static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy<MultiNameExtractor<2>> =
|
|
||||||
once_cell::sync::Lazy::new(|| {
|
|
||||||
MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"])
|
|
||||||
});
|
|
||||||
|
|
||||||
let fields: [&dyn Extractor; 2] = [
|
|
||||||
&*crate::tenant::span::TENANT_ID_EXTRACTOR,
|
|
||||||
&*TIMELINE_ID_EXTRACTOR,
|
|
||||||
];
|
|
||||||
if let Err(missing) = check_fields_present(fields) {
|
|
||||||
panic!(
|
|
||||||
"missing extractors: {:?}",
|
|
||||||
missing.into_iter().map(|e| e.name()).collect::<Vec<_>>()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,219 +0,0 @@
|
|||||||
use std::{collections::hash_map::Entry, fs, path::PathBuf, sync::Arc};
|
|
||||||
|
|
||||||
use anyhow::Context;
|
|
||||||
use tracing::{error, info, info_span, warn};
|
|
||||||
use utils::{crashsafe, id::TimelineId, lsn::Lsn};
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
context::RequestContext,
|
|
||||||
import_datadir,
|
|
||||||
tenant::{ignore_absent_files, Tenant},
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::Timeline;
|
|
||||||
|
|
||||||
/// A timeline with some of its files on disk, being initialized.
|
|
||||||
/// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
|
|
||||||
/// its local files are removed. In the worst case of a crash, an uninit mark file is left behind, which causes the directory
|
|
||||||
/// to be removed on next restart.
|
|
||||||
///
|
|
||||||
/// The caller is responsible for proper timeline data filling before the final init.
|
|
||||||
#[must_use]
|
|
||||||
pub struct UninitializedTimeline<'t> {
|
|
||||||
pub(crate) owning_tenant: &'t Tenant,
|
|
||||||
timeline_id: TimelineId,
|
|
||||||
raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> UninitializedTimeline<'t> {
|
|
||||||
pub(crate) fn new(
|
|
||||||
owning_tenant: &'t Tenant,
|
|
||||||
timeline_id: TimelineId,
|
|
||||||
raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
owning_tenant,
|
|
||||||
timeline_id,
|
|
||||||
raw_timeline,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Finish timeline creation: insert it into the Tenant's timelines map and remove the
|
|
||||||
/// uninit mark file.
|
|
||||||
///
|
|
||||||
/// This function launches the flush loop if not already done.
|
|
||||||
///
|
|
||||||
/// The caller is responsible for activating the timeline (function `.activate()`).
|
|
||||||
pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
|
|
||||||
let timeline_id = self.timeline_id;
|
|
||||||
let tenant_id = self.owning_tenant.tenant_id;
|
|
||||||
|
|
||||||
let (new_timeline, uninit_mark) = self.raw_timeline.take().with_context(|| {
|
|
||||||
format!("No timeline for initalization found for {tenant_id}/{timeline_id}")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Check that the caller initialized disk_consistent_lsn
|
|
||||||
let new_disk_consistent_lsn = new_timeline.get_disk_consistent_lsn();
|
|
||||||
anyhow::ensure!(
|
|
||||||
new_disk_consistent_lsn.is_valid(),
|
|
||||||
"new timeline {tenant_id}/{timeline_id} has invalid disk_consistent_lsn"
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut timelines = self.owning_tenant.timelines.lock().unwrap();
|
|
||||||
match timelines.entry(timeline_id) {
|
|
||||||
Entry::Occupied(_) => anyhow::bail!(
|
|
||||||
"Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map"
|
|
||||||
),
|
|
||||||
Entry::Vacant(v) => {
|
|
||||||
uninit_mark.remove_uninit_mark().with_context(|| {
|
|
||||||
format!(
|
|
||||||
"Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}"
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
v.insert(Arc::clone(&new_timeline));
|
|
||||||
|
|
||||||
new_timeline.maybe_spawn_flush_loop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(new_timeline)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Prepares timeline data by loading it from the basebackup archive.
|
|
||||||
pub(crate) async fn import_basebackup_from_tar(
|
|
||||||
self,
|
|
||||||
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
|
||||||
base_lsn: Lsn,
|
|
||||||
broker_client: storage_broker::BrokerClientChannel,
|
|
||||||
ctx: &RequestContext,
|
|
||||||
) -> anyhow::Result<Arc<Timeline>> {
|
|
||||||
let raw_timeline = self.raw_timeline()?;
|
|
||||||
|
|
||||||
import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
|
|
||||||
.await
|
|
||||||
.context("Failed to import basebackup")?;
|
|
||||||
|
|
||||||
// Flush the new layer files to disk, before we make the timeline as available to
|
|
||||||
// the outside world.
|
|
||||||
//
|
|
||||||
// Flush loop needs to be spawned in order to be able to flush.
|
|
||||||
raw_timeline.maybe_spawn_flush_loop();
|
|
||||||
|
|
||||||
fail::fail_point!("before-checkpoint-new-timeline", |_| {
|
|
||||||
anyhow::bail!("failpoint before-checkpoint-new-timeline");
|
|
||||||
});
|
|
||||||
|
|
||||||
raw_timeline
|
|
||||||
.freeze_and_flush()
|
|
||||||
.await
|
|
||||||
.context("Failed to flush after basebackup import")?;
|
|
||||||
|
|
||||||
// All the data has been imported. Insert the Timeline into the tenant's timelines
|
|
||||||
// map and remove the uninit mark file.
|
|
||||||
let tl = self.finish_creation()?;
|
|
||||||
tl.activate(broker_client, None, ctx);
|
|
||||||
Ok(tl)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
|
|
||||||
Ok(&self
|
|
||||||
.raw_timeline
|
|
||||||
.as_ref()
|
|
||||||
.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"No raw timeline {}/{} found",
|
|
||||||
self.owning_tenant.tenant_id, self.timeline_id
|
|
||||||
)
|
|
||||||
})?
|
|
||||||
.0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for UninitializedTimeline<'_> {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
if let Some((_, uninit_mark)) = self.raw_timeline.take() {
|
|
||||||
let _entered = info_span!("drop_uninitialized_timeline", tenant = %self.owning_tenant.tenant_id, timeline = %self.timeline_id).entered();
|
|
||||||
error!("Timeline got dropped without initializing, cleaning its files");
|
|
||||||
cleanup_timeline_directory(uninit_mark);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) {
|
|
||||||
let timeline_path = &uninit_mark.timeline_path;
|
|
||||||
match ignore_absent_files(|| fs::remove_dir_all(timeline_path)) {
|
|
||||||
Ok(()) => {
|
|
||||||
info!("Timeline dir {timeline_path:?} removed successfully, removing the uninit mark")
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
drop(uninit_mark); // mark handles its deletion on drop, gets retained if timeline dir exists
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An uninit mark file, created along the timeline dir to ensure the timeline either gets fully initialized and loaded into pageserver's memory,
|
|
||||||
/// or gets removed eventually.
|
|
||||||
///
|
|
||||||
/// XXX: it's important to create it near the timeline dir, not inside it to ensure timeline dir gets removed first.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) struct TimelineUninitMark {
|
|
||||||
uninit_mark_deleted: bool,
|
|
||||||
uninit_mark_path: PathBuf,
|
|
||||||
pub(crate) timeline_path: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TimelineUninitMark {
|
|
||||||
pub(crate) fn new(uninit_mark_path: PathBuf, timeline_path: PathBuf) -> Self {
|
|
||||||
Self {
|
|
||||||
uninit_mark_deleted: false,
|
|
||||||
uninit_mark_path,
|
|
||||||
timeline_path,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn remove_uninit_mark(mut self) -> anyhow::Result<()> {
|
|
||||||
if !self.uninit_mark_deleted {
|
|
||||||
self.delete_mark_file_if_present()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete_mark_file_if_present(&mut self) -> anyhow::Result<()> {
|
|
||||||
let uninit_mark_file = &self.uninit_mark_path;
|
|
||||||
let uninit_mark_parent = uninit_mark_file
|
|
||||||
.parent()
|
|
||||||
.with_context(|| format!("Uninit mark file {uninit_mark_file:?} has no parent"))?;
|
|
||||||
ignore_absent_files(|| fs::remove_file(uninit_mark_file)).with_context(|| {
|
|
||||||
format!("Failed to remove uninit mark file at path {uninit_mark_file:?}")
|
|
||||||
})?;
|
|
||||||
crashsafe::fsync(uninit_mark_parent).context("Failed to fsync uninit mark parent")?;
|
|
||||||
self.uninit_mark_deleted = true;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for TimelineUninitMark {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
if !self.uninit_mark_deleted {
|
|
||||||
if self.timeline_path.exists() {
|
|
||||||
error!(
|
|
||||||
"Uninit mark {} is not removed, timeline {} stays uninitialized",
|
|
||||||
self.uninit_mark_path.display(),
|
|
||||||
self.timeline_path.display()
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
// unblock later timeline creation attempts
|
|
||||||
warn!(
|
|
||||||
"Removing intermediate uninit mark file {}",
|
|
||||||
self.uninit_mark_path.display()
|
|
||||||
);
|
|
||||||
if let Err(e) = self.delete_mark_file_if_present() {
|
|
||||||
error!("Failed to remove the uninit mark file: {e}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1321,7 +1321,7 @@ mod tests {
|
|||||||
|
|
||||||
const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";
|
const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";
|
||||||
|
|
||||||
async fn dummy_state(harness: &TenantHarness) -> ConnectionManagerState {
|
async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState {
|
||||||
let (tenant, ctx) = harness.load().await;
|
let (tenant, ctx) = harness.load().await;
|
||||||
let timeline = tenant
|
let timeline = tenant
|
||||||
.create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)
|
.create_test_timeline(TIMELINE_ID, Lsn(0x8), crate::DEFAULT_PG_VERSION, &ctx)
|
||||||
|
|||||||
@@ -71,8 +71,6 @@ pub(super) async fn handle_walreceiver_connection(
|
|||||||
ctx: RequestContext,
|
ctx: RequestContext,
|
||||||
node: NodeId,
|
node: NodeId,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
|
||||||
|
|
||||||
WALRECEIVER_STARTED_CONNECTIONS.inc();
|
WALRECEIVER_STARTED_CONNECTIONS.inc();
|
||||||
|
|
||||||
// Connect to the database in replication mode.
|
// Connect to the database in replication mode.
|
||||||
@@ -142,9 +140,6 @@ pub(super) async fn handle_walreceiver_connection(
|
|||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
// Enrich the log lines emitted by this closure with meaningful context.
|
|
||||||
// TODO: technically, this task outlives the surrounding function, so, the
|
|
||||||
// spans won't be properly nested.
|
|
||||||
.instrument(tracing::info_span!("poller")),
|
.instrument(tracing::info_span!("poller")),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -302,6 +302,15 @@ impl VirtualFile {
|
|||||||
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
.observe_closure_duration(|| self.open_options.open(&self.path))?;
|
||||||
|
|
||||||
// Perform the requested operation on it
|
// Perform the requested operation on it
|
||||||
|
//
|
||||||
|
// TODO: We could downgrade the locks to read mode before calling
|
||||||
|
// 'func', to allow a little bit more concurrency, but the standard
|
||||||
|
// library RwLock doesn't allow downgrading without releasing the lock,
|
||||||
|
// and that doesn't seem worth the trouble.
|
||||||
|
//
|
||||||
|
// XXX: `parking_lot::RwLock` can enable such downgrades, yet its implementation is fair and
|
||||||
|
// may deadlock on subsequent read calls.
|
||||||
|
// Simply replacing all `RwLock` in project causes deadlocks, so use it sparingly.
|
||||||
let result = STORAGE_IO_TIME
|
let result = STORAGE_IO_TIME
|
||||||
.with_label_values(&[op, &self.tenant_id, &self.timeline_id])
|
.with_label_values(&[op, &self.tenant_id, &self.timeline_id])
|
||||||
.observe_closure_duration(|| func(&file));
|
.observe_closure_duration(|| func(&file));
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
|
|||||||
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||||
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::Result;
|
||||||
use bytes::{Buf, Bytes, BytesMut};
|
use bytes::{Buf, Bytes, BytesMut};
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
@@ -1082,10 +1082,7 @@ impl<'a> WalIngest<'a> {
|
|||||||
.await?
|
.await?
|
||||||
{
|
{
|
||||||
// create it with 0 size initially, the logic below will extend it
|
// create it with 0 size initially, the logic below will extend it
|
||||||
modification
|
modification.put_rel_creation(rel, 0, ctx).await?;
|
||||||
.put_rel_creation(rel, 0, ctx)
|
|
||||||
.await
|
|
||||||
.context("Relation Error")?;
|
|
||||||
0
|
0
|
||||||
} else {
|
} else {
|
||||||
self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?
|
self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?
|
||||||
|
|||||||
@@ -122,43 +122,6 @@ hnsw_populate(HierarchicalNSW* hnsw, Relation indexRel, Relation heapRel)
|
|||||||
true, true, hnsw_build_callback, (void *) hnsw, NULL);
|
true, true, hnsw_build_callback, (void *) hnsw, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __APPLE__
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/sysctl.h>
|
|
||||||
|
|
||||||
static void
|
|
||||||
hnsw_check_available_memory(Size requested)
|
|
||||||
{
|
|
||||||
size_t total;
|
|
||||||
if (sysctlbyname("hw.memsize", NULL, &total, NULL, 0) < 0)
|
|
||||||
elog(ERROR, "Failed to get amount of RAM: %m");
|
|
||||||
|
|
||||||
if ((Size)NBuffers*BLCKSZ + requested >= total)
|
|
||||||
elog(ERROR, "HNSW index requeries %ld bytes while only %ld are available",
|
|
||||||
requested, total - (Size)NBuffers*BLCKSZ);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#include <sys/sysinfo.h>
|
|
||||||
|
|
||||||
static void
|
|
||||||
hnsw_check_available_memory(Size requested)
|
|
||||||
{
|
|
||||||
struct sysinfo si;
|
|
||||||
Size total;
|
|
||||||
if (sysinfo(&si) < 0)
|
|
||||||
elog(ERROR, "Failed to get amount of RAM: %m");
|
|
||||||
|
|
||||||
total = si.totalram*si.mem_unit;
|
|
||||||
if ((Size)NBuffers*BLCKSZ + requested >= total)
|
|
||||||
elog(ERROR, "HNSW index requeries %ld bytes while only %ld are available",
|
|
||||||
requested, total - (Size)NBuffers*BLCKSZ);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static HierarchicalNSW*
|
static HierarchicalNSW*
|
||||||
hnsw_get_index(Relation indexRel, Relation heapRel)
|
hnsw_get_index(Relation indexRel, Relation heapRel)
|
||||||
{
|
{
|
||||||
@@ -193,8 +156,6 @@ hnsw_get_index(Relation indexRel, Relation heapRel)
|
|||||||
size_data_per_element = size_links_level0 + data_size + sizeof(label_t);
|
size_data_per_element = size_links_level0 + data_size + sizeof(label_t);
|
||||||
shmem_size = hnsw_sizeof() + maxelements * size_data_per_element;
|
shmem_size = hnsw_sizeof() + maxelements * size_data_per_element;
|
||||||
|
|
||||||
hnsw_check_available_memory(shmem_size);
|
|
||||||
|
|
||||||
/* first try to attach to existed index */
|
/* first try to attach to existed index */
|
||||||
if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
|
if (!dsm_impl_op(DSM_OP_ATTACH, handle, 0, &impl_private,
|
||||||
&mapped_address, &mapped_size, DEBUG1))
|
&mapped_address, &mapped_size, DEBUG1))
|
||||||
@@ -580,7 +541,6 @@ l2_distance(PG_FUNCTION_ARGS)
|
|||||||
errmsg("different array dimensions %d and %d", a_dim, b_dim)));
|
errmsg("different array dimensions %d and %d", a_dim, b_dim)));
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma clang loop vectorize(enable)
|
|
||||||
for (int i = 0; i < a_dim; i++)
|
for (int i = 0; i < a_dim; i++)
|
||||||
{
|
{
|
||||||
diff = ax[i] - bx[i];
|
diff = ax[i] - bx[i];
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
comment = 'hnsw index'
|
comment = 'hNsw index'
|
||||||
default_version = '0.1.0'
|
default_version = '0.1.0'
|
||||||
module_pathname = '$libdir/hnsw'
|
module_pathname = '$libdir/hnsw'
|
||||||
relocatable = true
|
relocatable = true
|
||||||
|
|||||||
@@ -223,7 +223,6 @@ dist_t fstdistfunc_scalar(const coord_t *x, const coord_t *y, size_t n)
|
|||||||
{
|
{
|
||||||
dist_t distance = 0.0;
|
dist_t distance = 0.0;
|
||||||
|
|
||||||
#pragma clang loop vectorize(enable)
|
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
dist_t diff = x[i] - y[i];
|
dist_t diff = x[i] - y[i];
|
||||||
|
|||||||
@@ -32,7 +32,6 @@
|
|||||||
#include "port.h"
|
#include "port.h"
|
||||||
#include <curl/curl.h>
|
#include <curl/curl.h>
|
||||||
#include "utils/jsonb.h"
|
#include "utils/jsonb.h"
|
||||||
#include "libpq/crypt.h"
|
|
||||||
|
|
||||||
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
|
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
|
||||||
|
|
||||||
@@ -162,22 +161,7 @@ ConstructDeltaMessage()
|
|||||||
PushKeyValue(&state, "name", entry->name);
|
PushKeyValue(&state, "name", entry->name);
|
||||||
if (entry->password)
|
if (entry->password)
|
||||||
{
|
{
|
||||||
#if PG_MAJORVERSION_NUM == 14
|
|
||||||
char *logdetail;
|
|
||||||
#else
|
|
||||||
const char *logdetail;
|
|
||||||
#endif
|
|
||||||
PushKeyValue(&state, "password", (char *) entry->password);
|
PushKeyValue(&state, "password", (char *) entry->password);
|
||||||
char *encrypted_password = get_role_password(entry->name, &logdetail);
|
|
||||||
|
|
||||||
if (encrypted_password)
|
|
||||||
{
|
|
||||||
PushKeyValue(&state, "encrypted_password", encrypted_password);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
elog(ERROR, "Failed to get encrypted password: %s", logdetail);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (entry->old_name[0] != '\0')
|
if (entry->old_name[0] != '\0')
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -190,7 +190,7 @@ lfc_change_limit_hook(int newval, void *extra)
|
|||||||
hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
|
hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
|
||||||
lfc_ctl->used -= 1;
|
lfc_ctl->used -= 1;
|
||||||
}
|
}
|
||||||
elog(DEBUG1, "set local file cache limit to %d", new_size);
|
elog(LOG, "set local file cache limit to %d", new_size);
|
||||||
LWLockRelease(lfc_lock);
|
LWLockRelease(lfc_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
#define PageStoreTrace DEBUG5
|
#define PageStoreTrace DEBUG5
|
||||||
|
|
||||||
|
#define MAX_RECONNECT_ATTEMPTS 5
|
||||||
#define RECONNECT_INTERVAL_USEC 1000000
|
#define RECONNECT_INTERVAL_USEC 1000000
|
||||||
|
|
||||||
bool connected = false;
|
bool connected = false;
|
||||||
@@ -54,15 +55,13 @@ int32 max_cluster_size;
|
|||||||
char *page_server_connstring;
|
char *page_server_connstring;
|
||||||
char *neon_auth_token;
|
char *neon_auth_token;
|
||||||
|
|
||||||
int readahead_buffer_size = 128;
|
int n_unflushed_requests = 0;
|
||||||
int flush_every_n_requests = 8;
|
int flush_every_n_requests = 8;
|
||||||
|
int readahead_buffer_size = 128;
|
||||||
int n_reconnect_attempts = 0;
|
|
||||||
int max_reconnect_attempts = 60;
|
|
||||||
|
|
||||||
bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
|
bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
|
||||||
|
|
||||||
static bool pageserver_flush(void);
|
static void pageserver_flush(void);
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
pageserver_connect(int elevel)
|
pageserver_connect(int elevel)
|
||||||
@@ -233,17 +232,16 @@ pageserver_disconnect(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static void
|
||||||
pageserver_send(NeonRequest * request)
|
pageserver_send(NeonRequest * request)
|
||||||
{
|
{
|
||||||
StringInfoData req_buff;
|
StringInfoData req_buff;
|
||||||
|
int n_reconnect_attempts = 0;
|
||||||
|
|
||||||
/* If the connection was lost for some reason, reconnect */
|
/* If the connection was lost for some reason, reconnect */
|
||||||
if (connected && PQstatus(pageserver_conn) == CONNECTION_BAD)
|
if (connected && PQstatus(pageserver_conn) == CONNECTION_BAD)
|
||||||
{
|
|
||||||
neon_log(LOG, "pageserver_send disconnect bad connection");
|
|
||||||
pageserver_disconnect();
|
pageserver_disconnect();
|
||||||
}
|
|
||||||
|
|
||||||
req_buff = nm_pack_request(request);
|
req_buff = nm_pack_request(request);
|
||||||
|
|
||||||
@@ -254,36 +252,53 @@ pageserver_send(NeonRequest * request)
|
|||||||
* See https://github.com/neondatabase/neon/issues/1138
|
* See https://github.com/neondatabase/neon/issues/1138
|
||||||
* So try to reestablish connection in case of failure.
|
* So try to reestablish connection in case of failure.
|
||||||
*/
|
*/
|
||||||
if (!connected)
|
while (true)
|
||||||
{
|
{
|
||||||
while (!pageserver_connect(n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))
|
if (!connected)
|
||||||
{
|
{
|
||||||
n_reconnect_attempts += 1;
|
if (!pageserver_connect(n_reconnect_attempts < MAX_RECONNECT_ATTEMPTS ? LOG : ERROR))
|
||||||
pg_usleep(RECONNECT_INTERVAL_USEC);
|
{
|
||||||
|
n_reconnect_attempts += 1;
|
||||||
|
pg_usleep(RECONNECT_INTERVAL_USEC);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n_reconnect_attempts = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Send request.
|
* Send request.
|
||||||
*
|
*
|
||||||
* In principle, this could block if the output buffer is full, and we
|
* In principle, this could block if the output buffer is full, and we
|
||||||
* should use async mode and check for interrupts while waiting. In
|
* should use async mode and check for interrupts while waiting. In
|
||||||
* practice, our requests are small enough to always fit in the output and
|
* practice, our requests are small enough to always fit in the output and
|
||||||
* TCP buffer.
|
* TCP buffer.
|
||||||
*/
|
*/
|
||||||
if (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0)
|
if (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0)
|
||||||
{
|
{
|
||||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||||
pageserver_disconnect();
|
if (n_reconnect_attempts < MAX_RECONNECT_ATTEMPTS)
|
||||||
neon_log(LOG, "pageserver_send disconnect because failed to send page request (try to reconnect): %s", msg);
|
{
|
||||||
pfree(msg);
|
neon_log(LOG, "failed to send page request (try to reconnect): %s", msg);
|
||||||
pfree(req_buff.data);
|
if (n_reconnect_attempts != 0) /* do not sleep before first reconnect attempt, assuming that pageserver is already restarted */
|
||||||
return false;
|
pg_usleep(RECONNECT_INTERVAL_USEC);
|
||||||
|
n_reconnect_attempts += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pageserver_disconnect();
|
||||||
|
neon_log(ERROR, "failed to send page request: %s", msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfree(req_buff.data);
|
pfree(req_buff.data);
|
||||||
|
|
||||||
|
n_unflushed_requests++;
|
||||||
|
|
||||||
|
if (flush_every_n_requests > 0 && n_unflushed_requests >= flush_every_n_requests)
|
||||||
|
pageserver_flush();
|
||||||
|
|
||||||
if (message_level_is_interesting(PageStoreTrace))
|
if (message_level_is_interesting(PageStoreTrace))
|
||||||
{
|
{
|
||||||
char *msg = nm_to_string((NeonMessage *) request);
|
char *msg = nm_to_string((NeonMessage *) request);
|
||||||
@@ -291,7 +306,6 @@ pageserver_send(NeonRequest * request)
|
|||||||
neon_log(PageStoreTrace, "sent request: %s", msg);
|
neon_log(PageStoreTrace, "sent request: %s", msg);
|
||||||
pfree(msg);
|
pfree(msg);
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static NeonResponse *
|
static NeonResponse *
|
||||||
@@ -326,25 +340,16 @@ pageserver_receive(void)
|
|||||||
}
|
}
|
||||||
else if (rc == -1)
|
else if (rc == -1)
|
||||||
{
|
{
|
||||||
neon_log(LOG, "pageserver_receive disconnect because call_PQgetCopyData returns -1: %s", pchomp(PQerrorMessage(pageserver_conn)));
|
|
||||||
pageserver_disconnect();
|
pageserver_disconnect();
|
||||||
resp = NULL;
|
resp = NULL;
|
||||||
}
|
}
|
||||||
else if (rc == -2)
|
else if (rc == -2)
|
||||||
{
|
neon_log(ERROR, "could not read COPY data: %s", pchomp(PQerrorMessage(pageserver_conn)));
|
||||||
char* msg = pchomp(PQerrorMessage(pageserver_conn));
|
|
||||||
pageserver_disconnect();
|
|
||||||
neon_log(ERROR, "pageserver_receive disconnect because could not read COPY data: %s", msg);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
neon_log(ERROR, "unexpected PQgetCopyData return value: %d", rc);
|
||||||
pageserver_disconnect();
|
|
||||||
neon_log(ERROR, "pageserver_receive disconnect because unexpected PQgetCopyData return value: %d", rc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
PG_CATCH();
|
PG_CATCH();
|
||||||
{
|
{
|
||||||
neon_log(LOG, "pageserver_receive disconnect due to caught exception");
|
|
||||||
pageserver_disconnect();
|
pageserver_disconnect();
|
||||||
PG_RE_THROW();
|
PG_RE_THROW();
|
||||||
}
|
}
|
||||||
@@ -354,25 +359,21 @@ pageserver_receive(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static void
|
||||||
pageserver_flush(void)
|
pageserver_flush(void)
|
||||||
{
|
{
|
||||||
if (!connected)
|
if (!connected)
|
||||||
{
|
{
|
||||||
neon_log(WARNING, "Tried to flush while disconnected");
|
neon_log(WARNING, "Tried to flush while disconnected");
|
||||||
}
|
}
|
||||||
else
|
else if (PQflush(pageserver_conn))
|
||||||
{
|
{
|
||||||
if (PQflush(pageserver_conn))
|
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||||
{
|
|
||||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
pageserver_disconnect();
|
||||||
pageserver_disconnect();
|
neon_log(ERROR, "failed to flush page requests: %s", msg);
|
||||||
neon_log(LOG, "pageserver_flush disconnect because failed to flush page requests: %s", msg);
|
|
||||||
pfree(msg);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return true;
|
n_unflushed_requests = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
page_server_api api = {
|
page_server_api api = {
|
||||||
@@ -438,14 +439,6 @@ pg_init_libpagestore(void)
|
|||||||
PGC_USERSET,
|
PGC_USERSET,
|
||||||
0, /* no flags required */
|
0, /* no flags required */
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
DefineCustomIntVariable("neon.max_reconnect_attempts",
|
|
||||||
"Maximal attempts to reconnect to pages server (with 1 second timeout)",
|
|
||||||
NULL,
|
|
||||||
&max_reconnect_attempts,
|
|
||||||
10, 0, INT_MAX,
|
|
||||||
PGC_USERSET,
|
|
||||||
0,
|
|
||||||
NULL, NULL, NULL);
|
|
||||||
DefineCustomIntVariable("neon.readahead_buffer_size",
|
DefineCustomIntVariable("neon.readahead_buffer_size",
|
||||||
"number of prefetches to buffer",
|
"number of prefetches to buffer",
|
||||||
"This buffer is used to hold and manage prefetched "
|
"This buffer is used to hold and manage prefetched "
|
||||||
|
|||||||
@@ -145,9 +145,9 @@ extern char *nm_to_string(NeonMessage * msg);
|
|||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
bool (*send) (NeonRequest * request);
|
void (*send) (NeonRequest * request);
|
||||||
NeonResponse *(*receive) (void);
|
NeonResponse *(*receive) (void);
|
||||||
bool (*flush) (void);
|
void (*flush) (void);
|
||||||
} page_server_api;
|
} page_server_api;
|
||||||
|
|
||||||
extern void prefetch_on_ps_disconnect(void);
|
extern void prefetch_on_ps_disconnect(void);
|
||||||
|
|||||||
@@ -489,8 +489,7 @@ prefetch_wait_for(uint64 ring_index)
|
|||||||
if (MyPState->ring_flush <= ring_index &&
|
if (MyPState->ring_flush <= ring_index &&
|
||||||
MyPState->ring_unused > MyPState->ring_flush)
|
MyPState->ring_unused > MyPState->ring_flush)
|
||||||
{
|
{
|
||||||
if (!page_server->flush())
|
page_server->flush();
|
||||||
return false;
|
|
||||||
MyPState->ring_flush = MyPState->ring_unused;
|
MyPState->ring_flush = MyPState->ring_unused;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -678,8 +677,7 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
|||||||
|
|
||||||
Assert(slot->response == NULL);
|
Assert(slot->response == NULL);
|
||||||
Assert(slot->my_ring_index == MyPState->ring_unused);
|
Assert(slot->my_ring_index == MyPState->ring_unused);
|
||||||
|
page_server->send((NeonRequest *) &request);
|
||||||
while (!page_server->send((NeonRequest *) &request));
|
|
||||||
|
|
||||||
/* update prefetch state */
|
/* update prefetch state */
|
||||||
MyPState->n_requests_inflight += 1;
|
MyPState->n_requests_inflight += 1;
|
||||||
@@ -689,7 +687,6 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
|||||||
/* update slot state */
|
/* update slot state */
|
||||||
slot->status = PRFS_REQUESTED;
|
slot->status = PRFS_REQUESTED;
|
||||||
|
|
||||||
|
|
||||||
prfh_insert(MyPState->prf_hash, slot, &found);
|
prfh_insert(MyPState->prf_hash, slot, &found);
|
||||||
Assert(!found);
|
Assert(!found);
|
||||||
}
|
}
|
||||||
@@ -746,7 +743,6 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
|||||||
prefetch_set_unused(ring_index);
|
prefetch_set_unused(ring_index);
|
||||||
entry = NULL;
|
entry = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
/* if we don't want the latest version, only accept requests with the exact same LSN */
|
/* if we don't want the latest version, only accept requests with the exact same LSN */
|
||||||
else
|
else
|
||||||
@@ -760,23 +756,20 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (entry != NULL)
|
/*
|
||||||
|
* We received a prefetch for a page that was recently read and
|
||||||
|
* removed from the buffers. Remove that request from the buffers.
|
||||||
|
*/
|
||||||
|
else if (slot->status == PRFS_TAG_REMAINS)
|
||||||
{
|
{
|
||||||
/*
|
prefetch_set_unused(ring_index);
|
||||||
* We received a prefetch for a page that was recently read and
|
entry = NULL;
|
||||||
* removed from the buffers. Remove that request from the buffers.
|
}
|
||||||
*/
|
else
|
||||||
if (slot->status == PRFS_TAG_REMAINS)
|
{
|
||||||
{
|
/* The buffered request is good enough, return that index */
|
||||||
prefetch_set_unused(ring_index);
|
pgBufferUsage.prefetch.duplicates++;
|
||||||
entry = NULL;
|
return ring_index;
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* The buffered request is good enough, return that index */
|
|
||||||
pgBufferUsage.prefetch.duplicates++;
|
|
||||||
return ring_index;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -866,7 +859,8 @@ page_server_request(void const *req)
|
|||||||
{
|
{
|
||||||
NeonResponse* resp;
|
NeonResponse* resp;
|
||||||
do {
|
do {
|
||||||
while (!page_server->send((NeonRequest *) req) || !page_server->flush());
|
page_server->send((NeonRequest *) req);
|
||||||
|
page_server->flush();
|
||||||
MyPState->ring_flush = MyPState->ring_unused;
|
MyPState->ring_flush = MyPState->ring_unused;
|
||||||
consume_prefetch_responses();
|
consume_prefetch_responses();
|
||||||
resp = page_server->receive();
|
resp = page_server->receive();
|
||||||
@@ -2681,6 +2675,7 @@ bool
|
|||||||
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
||||||
{
|
{
|
||||||
XLogRecPtr end_recptr = record->EndRecPtr;
|
XLogRecPtr end_recptr = record->EndRecPtr;
|
||||||
|
XLogRecPtr prev_end_recptr = record->ReadRecPtr - 1;
|
||||||
RelFileNode rnode;
|
RelFileNode rnode;
|
||||||
ForkNumber forknum;
|
ForkNumber forknum;
|
||||||
BlockNumber blkno;
|
BlockNumber blkno;
|
||||||
@@ -2724,15 +2719,16 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
|
|
||||||
no_redo_needed = buffer < 0;
|
no_redo_needed = buffer < 0;
|
||||||
|
|
||||||
/* In both cases st lwlsn past this WAL record */
|
/* we don't have the buffer in memory, update lwLsn past this record */
|
||||||
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
|
|
||||||
|
|
||||||
/* we don't have the buffer in memory, update lwLsn past this record,
|
|
||||||
* also evict page fro file cache
|
|
||||||
*/
|
|
||||||
if (no_redo_needed)
|
if (no_redo_needed)
|
||||||
|
{
|
||||||
|
SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno);
|
||||||
lfc_evict(rnode, forknum, blkno);
|
lfc_evict(rnode, forknum, blkno);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetLastWrittenLSNForBlock(prev_end_recptr, rnode, forknum, blkno);
|
||||||
|
}
|
||||||
|
|
||||||
LWLockRelease(partitionLock);
|
LWLockRelease(partitionLock);
|
||||||
|
|
||||||
@@ -2740,10 +2736,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
if (get_cached_relsize(rnode, forknum, &relsize))
|
if (get_cached_relsize(rnode, forknum, &relsize))
|
||||||
{
|
{
|
||||||
if (relsize < blkno + 1)
|
if (relsize < blkno + 1)
|
||||||
{
|
|
||||||
update_cached_relsize(rnode, forknum, blkno + 1);
|
update_cached_relsize(rnode, forknum, blkno + 1);
|
||||||
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -2775,7 +2768,6 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
|
|||||||
Assert(nbresponse->n_blocks > blkno);
|
Assert(nbresponse->n_blocks > blkno);
|
||||||
|
|
||||||
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
|
set_cached_relsize(rnode, forknum, nbresponse->n_blocks);
|
||||||
SetLastWrittenLSNForRelation(end_recptr, rnode, forknum);
|
|
||||||
|
|
||||||
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
|
elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ nwp_register_gucs(void)
|
|||||||
"Walproposer reconnects to offline safekeepers once in this interval.",
|
"Walproposer reconnects to offline safekeepers once in this interval.",
|
||||||
NULL,
|
NULL,
|
||||||
&wal_acceptor_reconnect_timeout,
|
&wal_acceptor_reconnect_timeout,
|
||||||
1000, 0, INT_MAX, /* default, min, max */
|
5000, 0, INT_MAX, /* default, min, max */
|
||||||
PGC_SIGHUP, /* context */
|
PGC_SIGHUP, /* context */
|
||||||
GUC_UNIT_MS, /* flags */
|
GUC_UNIT_MS, /* flags */
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|||||||
123
poetry.lock
generated
123
poetry.lock
generated
@@ -1654,74 +1654,71 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "psycopg2-binary"
|
name = "psycopg2-binary"
|
||||||
version = "2.9.6"
|
version = "2.9.3"
|
||||||
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
description = "psycopg2 - Python-PostgreSQL Database Adapter"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "psycopg2-binary-2.9.6.tar.gz", hash = "sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c"},
|
{file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_ppc64le.whl", hash = "sha256:0a29729145aaaf1ad8bafe663131890e2111f13416b60e460dae0a96af5905c9"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a79d622f5206d695d7824cbf609a4f5b88ea6d6dab5f7c147fc6d333a8787e4"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:090f3348c0ab2cceb6dfbe6bf721ef61262ddf518cd6cc6ecc7d334996d64efa"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a9e1f75f96ea388fbcef36c70640c4efbe4650658f3d6a2967b4cc70e907352e"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c3ae8e75eb7160851e59adc77b3a19a976e50622e44fd4fd47b8b18208189d42"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-win32.whl", hash = "sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-win32.whl", hash = "sha256:7b1e9b80afca7b7a386ef087db614faebbf8839b7f4db5eb107d0f1a53225029"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9"},
|
{file = "psycopg2_binary-2.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:8b344adbb9a862de0c635f4f0425b7958bf5a4b927c8594e6e8d261775796d53"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:e847774f8ffd5b398a75bc1c18fbb56564cda3d629fe68fd81971fece2d3c67e"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68641a34023d306be959101b345732360fc2ea4938982309b786f7be1b43a4a1"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3303f8807f342641851578ee7ed1f3efc9802d00a6f83c101d21c608cb864460"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_aarch64.whl", hash = "sha256:e3699852e22aa68c10de06524a3721ade969abf382da95884e6a10ff798f9281"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-manylinux_2_24_ppc64le.whl", hash = "sha256:526ea0378246d9b080148f2d6681229f4b5964543c170dd10bf4faaab6e0d27f"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:b1c8068513f5b158cf7e29c43a77eb34b407db29aca749d3eb9293ee0d3103ca"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:15803fa813ea05bef089fa78835118b5434204f3a17cb9f1e5dbfd0b9deea5af"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:152f09f57417b831418304c7f30d727dc83a12761627bb826951692cc6491e57"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:404224e5fef3b193f892abdbf8961ce20e0b6642886cfe1fe1923f41aaa75c9d"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-win32.whl", hash = "sha256:1f6b813106a3abdf7b03640d36e24669234120c72e91d5cbaeb87c5f7c36c65b"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-win32.whl", hash = "sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb"},
|
{file = "psycopg2_binary-2.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:2d872e3c9d5d075a2e104540965a1cf898b52274a5923936e5bfddb58c59c7c2"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:10bb90fb4d523a2aa67773d4ff2b833ec00857f5912bafcfd5f5414e45280fb1"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp36-cp36m-win32.whl", hash = "sha256:498807b927ca2510baea1b05cc91d7da4718a0f53cb766c154c417a39f1820a0"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a52ecab70af13e899f7847b3e074eeb16ebac5615665db33bce8a1009cf33"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:0d236c2825fa656a2d98bbb0e52370a2e852e5a0ec45fc4f402977313329174d"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a29b3ca4ec9defec6d42bf5feb36bb5817ba3c0230dd83b4edf4bf02684cd0ae"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:34b9ccdf210cbbb1303c7c4db2905fa0319391bd5904d32689e6dd5c963d2ea8"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_aarch64.whl", hash = "sha256:12b11322ea00ad8db8c46f18b7dfc47ae215e4df55b46c67a94b4effbaec7094"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84d2222e61f313c4848ff05353653bf5f5cf6ce34df540e4274516880d9c3763"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash = "sha256:53293533fcbb94c202b7c800a12c873cfe24599656b341f56e71dd2b557be063"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30637a20623e2a2eacc420059be11527f4458ef54352d870b8181a4c3020ae6b"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c381bda330ddf2fccbafab789d83ebc6c53db126e4383e73794c74eedce855ef"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8122cfc7cae0da9a3077216528b8bb3629c43b25053284cc868744bfe71eb141"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d29409b625a143649d03d0fd7b57e4b92e0ecad9726ba682244b73be91d2fdb"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38601cbbfe600362c43714482f43b7c110b20cb0f8172422c616b09b85a750c5"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:183a517a3a63503f70f808b58bfbf962f23d73b6dccddae5aa56152ef2bcb232"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c7e62ab8b332147a7593a385d4f368874d5fe4ad4e341770d4983442d89603e3"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:15c4e4cfa45f5a60599d9cec5f46cd7b1b29d86a6390ec23e8eebaae84e64554"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2ab652e729ff4ad76d400df2624d223d6e265ef81bb8aa17fbd63607878ecbee"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c83a74b68270028dc8ee74d38ecfaf9c90eed23c8959fca95bd703d25b82c88e"},
|
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d4e6036decf4b72d6425d5b29bbd3e8f0ff1059cda7ac7b96d6ac5ed34ffbacd"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-win32.whl", hash = "sha256:a8c28fd40a4226b4a84bdf2d2b5b37d2c7bd49486b5adcc200e8c7ec991dfa7e"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp37-cp37m-win_amd64.whl", hash = "sha256:51537e3d299be0db9137b321dfb6a5022caaab275775680e0c3d281feefaca6b"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf4499e0a83b7b7edcb8dabecbd8501d0d3a5ef66457200f77bde3d210d5debb"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7e13a5a2c01151f1208d5207e42f33ba86d561b7a89fca67c700b9486a06d0e2"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e0f754d27fddcfd74006455b6e04e6705d6c31a612ec69ddc040a5468e44b4e"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_ppc64le.whl", hash = "sha256:63638d875be8c2784cfc952c9ac34e2b50e43f9f0a0660b65e2a87d656b3116c"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d57c3fd55d9058645d26ae37d76e61156a27722097229d32a9e73ed54819982a"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ffb7a888a047696e7f8240d649b43fb3644f14f0ee229077e7f6b9f9081635bd"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71f14375d6f73b62800530b581aed3ada394039877818b2d5f7fc77e3bb6894d"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0c9d5450c566c80c396b7402895c4369a410cab5a82707b11aee1e624da7d004"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:441cc2f8869a4f0f4bb408475e5ae0ee1f3b55b33f350406150277f7f35384fc"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:d1c1b569ecafe3a69380a94e6ae09a4789bbb23666f3d3a08d06bbd2451f5ef1"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:65bee1e49fa6f9cf327ce0e01c4c10f39165ee76d35c846ade7cb0ec6683e303"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8fc53f9af09426a61db9ba357865c77f26076d48669f2e1bb24d85a22fb52307"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:af335bac6b666cc6aea16f11d486c3b794029d9df029967f9938a4bed59b6a19"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:cfec476887aa231b8548ece2e06d28edc87c1397ebd83922299af2e051cf2827"},
|
{file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:65c07febd1936d63bfde78948b76cd4c2a411572a44ac50719ead41947d0f26b"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-win32.whl", hash = "sha256:4dfb4be774c4436a4526d0c554af0cc2e02082c38303852a36f6456ece7b3503"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp38-cp38-win_amd64.whl", hash = "sha256:02c6e3cf3439e213e4ee930308dc122d6fb4d4bea9aef4a12535fbd605d1a2fe"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e9182eb20f41417ea1dd8e8f7888c4d7c6e805f8a7c98c1081778a3da2bee3e4"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8a6979cf527e2603d349a91060f428bcb135aea2be3201dff794813256c274f1"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8338a271cb71d8da40b023a35d9c1e919eba6cbd8fa20a54b748a332c355d896"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_ppc64le.whl", hash = "sha256:7af0dd86ddb2f8af5da57a976d27cd2cd15510518d582b478fbb2292428710b4"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3ed340d2b858d6e6fb5083f87c09996506af483227735de6964a6100b4e6a54"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:93cd1967a18aa0edd4b95b1dfd554cf15af657cb606280996d393dadc88c3c35"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f81e65376e52f03422e1fb475c9514185669943798ed019ac50410fb4c4df232"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bda845b664bb6c91446ca9609fc69f7db6c334ec5e4adc87571c34e4f47b7ddb"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfb13af3c5dd3a9588000910178de17010ebcccd37b4f9794b00595e3a8ddad3"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:01310cf4cf26db9aea5158c217caa92d291f0500051a6469ac52166e1a16f5b7"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4c727b597c6444a16e9119386b59388f8a424223302d0c06c676ec8b4bc1f963"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:99485cab9ba0fa9b84f1f9e1fef106f44a46ef6afdeec8885e0b88d0772b49e8"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:4d67fbdaf177da06374473ef6f7ed8cc0a9dc640b01abfe9e8a2ccb1b1402c1f"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-win32.whl", hash = "sha256:46f0e0a6b5fa5851bbd9ab1bc805eef362d3a230fbdfbc209f4a236d0a7a990d"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0892ef645c2fabb0c75ec32d79f4252542d0caec1d5d949630e7d242ca4681a3"},
|
{file = "psycopg2_binary-2.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:accfe7e982411da3178ec690baaceaad3c278652998b2c45828aaac66cd8285f"},
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:02c0f3757a4300cf379eb49f543fb7ac527fb00144d39246ee40e1df684ab514"},
|
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-win32.whl", hash = "sha256:c3dba7dab16709a33a847e5cd756767271697041fbe3fe97c215b1fc1f5c9848"},
|
|
||||||
{file = "psycopg2_binary-2.9.6-cp39-cp39-win_amd64.whl", hash = "sha256:f6a88f384335bb27812293fdb11ac6aee2ca3f51d3c7820fe03de0a304ab6249"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ license.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
async-trait.workspace = true
|
async-trait.workspace = true
|
||||||
|
atty.workspace = true
|
||||||
base64.workspace = true
|
base64.workspace = true
|
||||||
bstr.workspace = true
|
bstr.workspace = true
|
||||||
bytes = { workspace = true, features = ["serde"] }
|
bytes = { workspace = true, features = ["serde"] }
|
||||||
@@ -29,7 +30,6 @@ metrics.workspace = true
|
|||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
opentelemetry.workspace = true
|
opentelemetry.workspace = true
|
||||||
parking_lot.workspace = true
|
parking_lot.workspace = true
|
||||||
pbkdf2.workspace = true
|
|
||||||
pin-project-lite.workspace = true
|
pin-project-lite.workspace = true
|
||||||
postgres_backend.workspace = true
|
postgres_backend.workspace = true
|
||||||
pq_proto.workspace = true
|
pq_proto.workspace = true
|
||||||
@@ -38,7 +38,6 @@ rand.workspace = true
|
|||||||
regex.workspace = true
|
regex.workspace = true
|
||||||
reqwest = { workspace = true, features = ["json"] }
|
reqwest = { workspace = true, features = ["json"] }
|
||||||
reqwest-middleware.workspace = true
|
reqwest-middleware.workspace = true
|
||||||
reqwest-retry.workspace = true
|
|
||||||
reqwest-tracing.workspace = true
|
reqwest-tracing.workspace = true
|
||||||
routerify.workspace = true
|
routerify.workspace = true
|
||||||
rustls-pemfile.workspace = true
|
rustls-pemfile.workspace = true
|
||||||
|
|||||||
@@ -136,17 +136,18 @@ impl Default for ConnCfg {
|
|||||||
|
|
||||||
impl ConnCfg {
|
impl ConnCfg {
|
||||||
/// Establish a raw TCP connection to the compute node.
|
/// Establish a raw TCP connection to the compute node.
|
||||||
async fn connect_raw(&self, timeout: Duration) -> io::Result<(SocketAddr, TcpStream, &str)> {
|
async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream, &str)> {
|
||||||
use tokio_postgres::config::Host;
|
use tokio_postgres::config::Host;
|
||||||
|
|
||||||
// wrap TcpStream::connect with timeout
|
// wrap TcpStream::connect with timeout
|
||||||
let connect_with_timeout = |host, port| {
|
let connect_with_timeout = |host, port| {
|
||||||
tokio::time::timeout(timeout, TcpStream::connect((host, port))).map(
|
let connection_timeout = Duration::from_millis(10000);
|
||||||
|
tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map(
|
||||||
move |res| match res {
|
move |res| match res {
|
||||||
Ok(tcpstream_connect_res) => tcpstream_connect_res,
|
Ok(tcpstream_connect_res) => tcpstream_connect_res,
|
||||||
Err(_) => Err(io::Error::new(
|
Err(_) => Err(io::Error::new(
|
||||||
io::ErrorKind::TimedOut,
|
io::ErrorKind::TimedOut,
|
||||||
format!("exceeded connection timeout {timeout:?}"),
|
format!("exceeded connection timeout {connection_timeout:?}"),
|
||||||
)),
|
)),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -222,9 +223,8 @@ impl ConnCfg {
|
|||||||
async fn do_connect(
|
async fn do_connect(
|
||||||
&self,
|
&self,
|
||||||
allow_self_signed_compute: bool,
|
allow_self_signed_compute: bool,
|
||||||
timeout: Duration,
|
|
||||||
) -> Result<PostgresConnection, ConnectionError> {
|
) -> Result<PostgresConnection, ConnectionError> {
|
||||||
let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
|
let (socket_addr, stream, host) = self.connect_raw().await?;
|
||||||
|
|
||||||
let tls_connector = native_tls::TlsConnector::builder()
|
let tls_connector = native_tls::TlsConnector::builder()
|
||||||
.danger_accept_invalid_certs(allow_self_signed_compute)
|
.danger_accept_invalid_certs(allow_self_signed_compute)
|
||||||
@@ -264,9 +264,8 @@ impl ConnCfg {
|
|||||||
pub async fn connect(
|
pub async fn connect(
|
||||||
&self,
|
&self,
|
||||||
allow_self_signed_compute: bool,
|
allow_self_signed_compute: bool,
|
||||||
timeout: Duration,
|
|
||||||
) -> Result<PostgresConnection, ConnectionError> {
|
) -> Result<PostgresConnection, ConnectionError> {
|
||||||
self.do_connect(allow_self_signed_compute, timeout)
|
self.do_connect(allow_self_signed_compute)
|
||||||
.inspect_err(|err| {
|
.inspect_err(|err| {
|
||||||
// Immediately log the error we have at our disposal.
|
// Immediately log the error we have at our disposal.
|
||||||
error!("couldn't connect to compute node: {err}");
|
error!("couldn't connect to compute node: {err}");
|
||||||
|
|||||||
@@ -212,7 +212,7 @@ pub struct CacheOptions {
|
|||||||
|
|
||||||
impl CacheOptions {
|
impl CacheOptions {
|
||||||
/// Default options for [`crate::auth::caches::NodeInfoCache`].
|
/// Default options for [`crate::auth::caches::NodeInfoCache`].
|
||||||
pub const DEFAULT_OPTIONS_NODE_INFO: &str = "size=4000,ttl=4m";
|
pub const DEFAULT_OPTIONS_NODE_INFO: &str = "size=4000,ttl=5m";
|
||||||
|
|
||||||
/// Parse cache options passed via cmdline.
|
/// Parse cache options passed via cmdline.
|
||||||
/// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
|
/// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use postgres_backend::{self, AuthType, PostgresBackend, PostgresBackendTCP, Quer
|
|||||||
use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
|
use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
|
||||||
use std::future;
|
use std::future;
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tracing::{error, info, info_span, Instrument};
|
use tracing::{error, info, info_span};
|
||||||
|
|
||||||
static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);
|
static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);
|
||||||
|
|
||||||
@@ -44,30 +44,19 @@ pub async fn task_main(listener: TcpListener) -> anyhow::Result<()> {
|
|||||||
.set_nodelay(true)
|
.set_nodelay(true)
|
||||||
.context("failed to set client socket option")?;
|
.context("failed to set client socket option")?;
|
||||||
|
|
||||||
let span = info_span!("mgmt", peer = %peer_addr);
|
tokio::task::spawn(async move {
|
||||||
|
let span = info_span!("mgmt", peer = %peer_addr);
|
||||||
|
let _enter = span.enter();
|
||||||
|
|
||||||
tokio::task::spawn(
|
info!("started a new console management API thread");
|
||||||
async move {
|
scopeguard::defer! {
|
||||||
info!("serving a new console management API connection");
|
info!("console management API thread is about to finish");
|
||||||
|
|
||||||
// these might be long running connections, have a separate logging for cancelling
|
|
||||||
// on shutdown and other ways of stopping.
|
|
||||||
let cancelled = scopeguard::guard(tracing::Span::current(), |span| {
|
|
||||||
let _e = span.entered();
|
|
||||||
info!("console management API task cancelled");
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Err(e) = handle_connection(socket).await {
|
|
||||||
error!("serving failed with an error: {e}");
|
|
||||||
} else {
|
|
||||||
info!("serving completed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// we can no longer get dropped
|
|
||||||
scopeguard::ScopeGuard::into_inner(cancelled);
|
|
||||||
}
|
}
|
||||||
.instrument(span),
|
|
||||||
);
|
if let Err(e) = handle_connection(socket).await {
|
||||||
|
error!("thread failed with an error: {e}");
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -88,14 +77,14 @@ impl postgres_backend::Handler<tokio::net::TcpStream> for MgmtHandler {
|
|||||||
pgb: &mut PostgresBackendTCP,
|
pgb: &mut PostgresBackendTCP,
|
||||||
query: &str,
|
query: &str,
|
||||||
) -> Result<(), QueryError> {
|
) -> Result<(), QueryError> {
|
||||||
try_process_query(pgb, query).map_err(|e| {
|
try_process_query(pgb, query).await.map_err(|e| {
|
||||||
error!("failed to process response: {e:?}");
|
error!("failed to process response: {e:?}");
|
||||||
e
|
e
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), QueryError> {
|
async fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), QueryError> {
|
||||||
let resp: KickSession = serde_json::from_str(query).context("Failed to parse query as json")?;
|
let resp: KickSession = serde_json::from_str(query).context("Failed to parse query as json")?;
|
||||||
|
|
||||||
let span = info_span!("event", session_id = resp.session_id);
|
let span = info_span!("event", session_id = resp.session_id);
|
||||||
|
|||||||
@@ -2,16 +2,12 @@
|
|||||||
//! Other modules should use stuff from this module instead of
|
//! Other modules should use stuff from this module instead of
|
||||||
//! directly relying on deps like `reqwest` (think loose coupling).
|
//! directly relying on deps like `reqwest` (think loose coupling).
|
||||||
|
|
||||||
pub mod conn_pool;
|
|
||||||
pub mod server;
|
pub mod server;
|
||||||
pub mod sql_over_http;
|
pub mod sql_over_http;
|
||||||
pub mod websocket;
|
pub mod websocket;
|
||||||
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
pub use reqwest::{Request, Response, StatusCode};
|
pub use reqwest::{Request, Response, StatusCode};
|
||||||
pub use reqwest_middleware::{ClientWithMiddleware, Error};
|
pub use reqwest_middleware::{ClientWithMiddleware, Error};
|
||||||
pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
|
|
||||||
|
|
||||||
use crate::url::ApiUrl;
|
use crate::url::ApiUrl;
|
||||||
use reqwest_middleware::RequestBuilder;
|
use reqwest_middleware::RequestBuilder;
|
||||||
@@ -25,24 +21,6 @@ pub fn new_client() -> ClientWithMiddleware {
|
|||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_client_with_timeout(default_timout: Duration) -> ClientWithMiddleware {
|
|
||||||
let timeout_client = reqwest::ClientBuilder::new()
|
|
||||||
.timeout(default_timout)
|
|
||||||
.build()
|
|
||||||
.expect("Failed to create http client with timeout");
|
|
||||||
|
|
||||||
let retry_policy =
|
|
||||||
ExponentialBackoff::builder().build_with_total_retry_duration(default_timout);
|
|
||||||
|
|
||||||
reqwest_middleware::ClientBuilder::new(timeout_client)
|
|
||||||
.with(reqwest_tracing::TracingMiddleware::default())
|
|
||||||
// As per docs, "This middleware always errors when given requests with streaming bodies".
|
|
||||||
// That's all right because we only use this client to send `serde_json::RawValue`, which
|
|
||||||
// is not a stream.
|
|
||||||
.with(RetryTransientMiddleware::new_with_policy(retry_policy))
|
|
||||||
.build()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Thin convenience wrapper for an API provided by an http endpoint.
|
/// Thin convenience wrapper for an API provided by an http endpoint.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Endpoint {
|
pub struct Endpoint {
|
||||||
|
|||||||
@@ -1,278 +0,0 @@
|
|||||||
use parking_lot::Mutex;
|
|
||||||
use pq_proto::StartupMessageParams;
|
|
||||||
use std::fmt;
|
|
||||||
use std::{collections::HashMap, sync::Arc};
|
|
||||||
|
|
||||||
use futures::TryFutureExt;
|
|
||||||
|
|
||||||
use crate::config;
|
|
||||||
use crate::{auth, console};
|
|
||||||
|
|
||||||
use super::sql_over_http::MAX_RESPONSE_SIZE;
|
|
||||||
|
|
||||||
use crate::proxy::invalidate_cache;
|
|
||||||
use crate::proxy::NUM_RETRIES_WAKE_COMPUTE;
|
|
||||||
|
|
||||||
use tracing::error;
|
|
||||||
use tracing::info;
|
|
||||||
|
|
||||||
pub const APP_NAME: &str = "sql_over_http";
|
|
||||||
const MAX_CONNS_PER_ENDPOINT: usize = 20;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct ConnInfo {
|
|
||||||
pub username: String,
|
|
||||||
pub dbname: String,
|
|
||||||
pub hostname: String,
|
|
||||||
pub password: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ConnInfo {
|
|
||||||
// hm, change to hasher to avoid cloning?
|
|
||||||
pub fn db_and_user(&self) -> (String, String) {
|
|
||||||
(self.dbname.clone(), self.username.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ConnInfo {
|
|
||||||
// use custom display to avoid logging password
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
write!(f, "{}@{}/{}", self.username, self.hostname, self.dbname)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ConnPoolEntry {
|
|
||||||
conn: tokio_postgres::Client,
|
|
||||||
_last_access: std::time::Instant,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Per-endpoint connection pool, (dbname, username) -> Vec<ConnPoolEntry>
|
|
||||||
// Number of open connections is limited by the `max_conns_per_endpoint`.
|
|
||||||
pub struct EndpointConnPool {
|
|
||||||
pools: HashMap<(String, String), Vec<ConnPoolEntry>>,
|
|
||||||
total_conns: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct GlobalConnPool {
|
|
||||||
// endpoint -> per-endpoint connection pool
|
|
||||||
//
|
|
||||||
// That should be a fairly conteded map, so return reference to the per-endpoint
|
|
||||||
// pool as early as possible and release the lock.
|
|
||||||
global_pool: Mutex<HashMap<String, Arc<Mutex<EndpointConnPool>>>>,
|
|
||||||
|
|
||||||
// Maximum number of connections per one endpoint.
|
|
||||||
// Can mix different (dbname, username) connections.
|
|
||||||
// When running out of free slots for a particular endpoint,
|
|
||||||
// falls back to opening a new connection for each request.
|
|
||||||
max_conns_per_endpoint: usize,
|
|
||||||
|
|
||||||
proxy_config: &'static crate::config::ProxyConfig,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl GlobalConnPool {
|
|
||||||
pub fn new(config: &'static crate::config::ProxyConfig) -> Arc<Self> {
|
|
||||||
Arc::new(Self {
|
|
||||||
global_pool: Mutex::new(HashMap::new()),
|
|
||||||
max_conns_per_endpoint: MAX_CONNS_PER_ENDPOINT,
|
|
||||||
proxy_config: config,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get(
|
|
||||||
&self,
|
|
||||||
conn_info: &ConnInfo,
|
|
||||||
force_new: bool,
|
|
||||||
) -> anyhow::Result<tokio_postgres::Client> {
|
|
||||||
let mut client: Option<tokio_postgres::Client> = None;
|
|
||||||
|
|
||||||
if !force_new {
|
|
||||||
let pool = self.get_endpoint_pool(&conn_info.hostname).await;
|
|
||||||
|
|
||||||
// find a pool entry by (dbname, username) if exists
|
|
||||||
let mut pool = pool.lock();
|
|
||||||
let pool_entries = pool.pools.get_mut(&conn_info.db_and_user());
|
|
||||||
if let Some(pool_entries) = pool_entries {
|
|
||||||
if let Some(entry) = pool_entries.pop() {
|
|
||||||
client = Some(entry.conn);
|
|
||||||
pool.total_conns -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ok return cached connection if found and establish a new one otherwise
|
|
||||||
if let Some(client) = client {
|
|
||||||
if client.is_closed() {
|
|
||||||
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
|
||||||
connect_to_compute(self.proxy_config, conn_info).await
|
|
||||||
} else {
|
|
||||||
info!("pool: reusing connection '{conn_info}'");
|
|
||||||
Ok(client)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
info!("pool: opening a new connection '{conn_info}'");
|
|
||||||
connect_to_compute(self.proxy_config, conn_info).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn put(
|
|
||||||
&self,
|
|
||||||
conn_info: &ConnInfo,
|
|
||||||
client: tokio_postgres::Client,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let pool = self.get_endpoint_pool(&conn_info.hostname).await;
|
|
||||||
|
|
||||||
// return connection to the pool
|
|
||||||
let mut total_conns;
|
|
||||||
let mut returned = false;
|
|
||||||
let mut per_db_size = 0;
|
|
||||||
{
|
|
||||||
let mut pool = pool.lock();
|
|
||||||
total_conns = pool.total_conns;
|
|
||||||
|
|
||||||
let pool_entries: &mut Vec<ConnPoolEntry> = pool
|
|
||||||
.pools
|
|
||||||
.entry(conn_info.db_and_user())
|
|
||||||
.or_insert_with(|| Vec::with_capacity(1));
|
|
||||||
if total_conns < self.max_conns_per_endpoint {
|
|
||||||
pool_entries.push(ConnPoolEntry {
|
|
||||||
conn: client,
|
|
||||||
_last_access: std::time::Instant::now(),
|
|
||||||
});
|
|
||||||
|
|
||||||
total_conns += 1;
|
|
||||||
returned = true;
|
|
||||||
per_db_size = pool_entries.len();
|
|
||||||
|
|
||||||
pool.total_conns += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// do logging outside of the mutex
|
|
||||||
if returned {
|
|
||||||
info!("pool: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
|
|
||||||
} else {
|
|
||||||
info!("pool: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_endpoint_pool(&self, endpoint: &String) -> Arc<Mutex<EndpointConnPool>> {
|
|
||||||
// find or create a pool for this endpoint
|
|
||||||
let mut created = false;
|
|
||||||
let mut global_pool = self.global_pool.lock();
|
|
||||||
let pool = global_pool
|
|
||||||
.entry(endpoint.clone())
|
|
||||||
.or_insert_with(|| {
|
|
||||||
created = true;
|
|
||||||
Arc::new(Mutex::new(EndpointConnPool {
|
|
||||||
pools: HashMap::new(),
|
|
||||||
total_conns: 0,
|
|
||||||
}))
|
|
||||||
})
|
|
||||||
.clone();
|
|
||||||
let global_pool_size = global_pool.len();
|
|
||||||
drop(global_pool);
|
|
||||||
|
|
||||||
// log new global pool size
|
|
||||||
if created {
|
|
||||||
info!(
|
|
||||||
"pool: created new pool for '{endpoint}', global pool size now {global_pool_size}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
pool
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Wake up the destination if needed. Code here is a bit involved because
|
|
||||||
// we reuse the code from the usual proxy and we need to prepare few structures
|
|
||||||
// that this code expects.
|
|
||||||
//
|
|
||||||
async fn connect_to_compute(
|
|
||||||
config: &config::ProxyConfig,
|
|
||||||
conn_info: &ConnInfo,
|
|
||||||
) -> anyhow::Result<tokio_postgres::Client> {
|
|
||||||
let tls = config.tls_config.as_ref();
|
|
||||||
let common_names = tls.and_then(|tls| tls.common_names.clone());
|
|
||||||
|
|
||||||
let credential_params = StartupMessageParams::new([
|
|
||||||
("user", &conn_info.username),
|
|
||||||
("database", &conn_info.dbname),
|
|
||||||
("application_name", APP_NAME),
|
|
||||||
]);
|
|
||||||
|
|
||||||
let creds = config
|
|
||||||
.auth_backend
|
|
||||||
.as_ref()
|
|
||||||
.map(|_| {
|
|
||||||
auth::ClientCredentials::parse(
|
|
||||||
&credential_params,
|
|
||||||
Some(&conn_info.hostname),
|
|
||||||
common_names,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.transpose()?;
|
|
||||||
let extra = console::ConsoleReqExtra {
|
|
||||||
session_id: uuid::Uuid::new_v4(),
|
|
||||||
application_name: Some(APP_NAME),
|
|
||||||
};
|
|
||||||
|
|
||||||
let node_info = &mut creds.wake_compute(&extra).await?.expect("msg");
|
|
||||||
|
|
||||||
// This code is a copy of `connect_to_compute` from `src/proxy.rs` with
|
|
||||||
// the difference that it uses `tokio_postgres` for the connection.
|
|
||||||
let mut num_retries: usize = NUM_RETRIES_WAKE_COMPUTE;
|
|
||||||
loop {
|
|
||||||
match connect_to_compute_once(node_info, conn_info).await {
|
|
||||||
Err(e) if num_retries > 0 => {
|
|
||||||
info!("compute node's state has changed; requesting a wake-up");
|
|
||||||
match creds.wake_compute(&extra).await? {
|
|
||||||
// Update `node_info` and try one more time.
|
|
||||||
Some(new) => {
|
|
||||||
*node_info = new;
|
|
||||||
}
|
|
||||||
// Link auth doesn't work that way, so we just exit.
|
|
||||||
None => return Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
other => return other,
|
|
||||||
}
|
|
||||||
|
|
||||||
num_retries -= 1;
|
|
||||||
info!("retrying after wake-up ({num_retries} attempts left)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn connect_to_compute_once(
|
|
||||||
node_info: &console::CachedNodeInfo,
|
|
||||||
conn_info: &ConnInfo,
|
|
||||||
) -> anyhow::Result<tokio_postgres::Client> {
|
|
||||||
let mut config = (*node_info.config).clone();
|
|
||||||
|
|
||||||
let (client, connection) = config
|
|
||||||
.user(&conn_info.username)
|
|
||||||
.password(&conn_info.password)
|
|
||||||
.dbname(&conn_info.dbname)
|
|
||||||
.max_backend_message_size(MAX_RESPONSE_SIZE)
|
|
||||||
.connect(tokio_postgres::NoTls)
|
|
||||||
.inspect_err(|e: &tokio_postgres::Error| {
|
|
||||||
error!(
|
|
||||||
"failed to connect to compute node hosts={:?} ports={:?}: {}",
|
|
||||||
node_info.config.get_hosts(),
|
|
||||||
node_info.config.get_ports(),
|
|
||||||
e
|
|
||||||
);
|
|
||||||
invalidate_cache(node_info)
|
|
||||||
})
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
tokio::spawn(async move {
|
|
||||||
if let Err(e) = connection.await {
|
|
||||||
error!("connection error: {}", e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
Ok(client)
|
|
||||||
}
|
|
||||||
@@ -1,11 +1,10 @@
|
|||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use futures::pin_mut;
|
use futures::pin_mut;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use hyper::body::HttpBody;
|
use hyper::body::HttpBody;
|
||||||
use hyper::http::HeaderName;
|
use hyper::http::HeaderName;
|
||||||
use hyper::http::HeaderValue;
|
use hyper::http::HeaderValue;
|
||||||
use hyper::{Body, HeaderMap, Request};
|
use hyper::{Body, HeaderMap, Request};
|
||||||
|
use pq_proto::StartupMessageParams;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use serde_json::Map;
|
use serde_json::Map;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
@@ -14,8 +13,7 @@ use tokio_postgres::types::Type;
|
|||||||
use tokio_postgres::Row;
|
use tokio_postgres::Row;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use super::conn_pool::ConnInfo;
|
use crate::{auth, config::ProxyConfig, console};
|
||||||
use super::conn_pool::GlobalConnPool;
|
|
||||||
|
|
||||||
#[derive(serde::Deserialize)]
|
#[derive(serde::Deserialize)]
|
||||||
struct QueryData {
|
struct QueryData {
|
||||||
@@ -23,13 +21,12 @@ struct QueryData {
|
|||||||
params: Vec<serde_json::Value>,
|
params: Vec<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const MAX_RESPONSE_SIZE: usize = 1024 * 1024; // 1 MB
|
const APP_NAME: &str = "sql_over_http";
|
||||||
|
const MAX_RESPONSE_SIZE: usize = 1024 * 1024; // 1 MB
|
||||||
const MAX_REQUEST_SIZE: u64 = 1024 * 1024; // 1 MB
|
const MAX_REQUEST_SIZE: u64 = 1024 * 1024; // 1 MB
|
||||||
|
|
||||||
static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
|
static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
|
||||||
static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
|
static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
|
||||||
static ALLOW_POOL: HeaderName = HeaderName::from_static("neon-pool-opt-in");
|
|
||||||
|
|
||||||
static HEADER_VALUE_TRUE: HeaderValue = HeaderValue::from_static("true");
|
static HEADER_VALUE_TRUE: HeaderValue = HeaderValue::from_static("true");
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -96,7 +93,7 @@ fn json_array_to_pg_array(value: &Value) -> Result<Option<String>, serde_json::E
|
|||||||
fn get_conn_info(
|
fn get_conn_info(
|
||||||
headers: &HeaderMap,
|
headers: &HeaderMap,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
) -> Result<ConnInfo, anyhow::Error> {
|
) -> Result<(String, String, String, String), anyhow::Error> {
|
||||||
let connection_string = headers
|
let connection_string = headers
|
||||||
.get("Neon-Connection-String")
|
.get("Neon-Connection-String")
|
||||||
.ok_or(anyhow::anyhow!("missing connection string"))?
|
.ok_or(anyhow::anyhow!("missing connection string"))?
|
||||||
@@ -149,33 +146,61 @@ fn get_conn_info(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(ConnInfo {
|
Ok((
|
||||||
username: username.to_owned(),
|
username.to_owned(),
|
||||||
dbname: dbname.to_owned(),
|
dbname.to_owned(),
|
||||||
hostname: hostname.to_owned(),
|
hostname.to_owned(),
|
||||||
password: password.to_owned(),
|
password.to_owned(),
|
||||||
})
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: return different http error codes
|
// TODO: return different http error codes
|
||||||
pub async fn handle(
|
pub async fn handle(
|
||||||
|
config: &'static ProxyConfig,
|
||||||
request: Request<Body>,
|
request: Request<Body>,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
conn_pool: Arc<GlobalConnPool>,
|
|
||||||
) -> anyhow::Result<Value> {
|
) -> anyhow::Result<Value> {
|
||||||
//
|
//
|
||||||
// Determine the destination and connection params
|
// Determine the destination and connection params
|
||||||
//
|
//
|
||||||
let headers = request.headers();
|
let headers = request.headers();
|
||||||
let conn_info = get_conn_info(headers, sni_hostname)?;
|
let (username, dbname, hostname, password) = get_conn_info(headers, sni_hostname)?;
|
||||||
|
let credential_params = StartupMessageParams::new([
|
||||||
|
("user", &username),
|
||||||
|
("database", &dbname),
|
||||||
|
("application_name", APP_NAME),
|
||||||
|
]);
|
||||||
|
|
||||||
// Determine the output options. Default behaviour is 'false'. Anything that is not
|
// Determine the output options. Default behaviour is 'false'. Anything that is not
|
||||||
// strictly 'true' assumed to be false.
|
// strictly 'true' assumed to be false.
|
||||||
let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
|
let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
|
||||||
let array_mode = headers.get(&ARRAY_MODE) == Some(&HEADER_VALUE_TRUE);
|
let array_mode = headers.get(&ARRAY_MODE) == Some(&HEADER_VALUE_TRUE);
|
||||||
|
|
||||||
// Allow connection pooling only if explicitly requested
|
//
|
||||||
let allow_pool = headers.get(&ALLOW_POOL) == Some(&HEADER_VALUE_TRUE);
|
// Wake up the destination if needed. Code here is a bit involved because
|
||||||
|
// we reuse the code from the usual proxy and we need to prepare few structures
|
||||||
|
// that this code expects.
|
||||||
|
//
|
||||||
|
let tls = config.tls_config.as_ref();
|
||||||
|
let common_names = tls.and_then(|tls| tls.common_names.clone());
|
||||||
|
let creds = config
|
||||||
|
.auth_backend
|
||||||
|
.as_ref()
|
||||||
|
.map(|_| auth::ClientCredentials::parse(&credential_params, Some(&hostname), common_names))
|
||||||
|
.transpose()?;
|
||||||
|
let extra = console::ConsoleReqExtra {
|
||||||
|
session_id: uuid::Uuid::new_v4(),
|
||||||
|
application_name: Some(APP_NAME),
|
||||||
|
};
|
||||||
|
let node = creds.wake_compute(&extra).await?.expect("msg");
|
||||||
|
let conf = node.value.config;
|
||||||
|
let port = *conf.get_ports().first().expect("no port");
|
||||||
|
let host = match conf.get_hosts().first().expect("no host") {
|
||||||
|
tokio_postgres::config::Host::Tcp(host) => host,
|
||||||
|
tokio_postgres::config::Host::Unix(_) => {
|
||||||
|
return Err(anyhow::anyhow!("unix socket is not supported"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let request_content_length = match request.body().size_hint().upper() {
|
let request_content_length = match request.body().size_hint().upper() {
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
@@ -195,11 +220,28 @@ pub async fn handle(
|
|||||||
let QueryData { query, params } = serde_json::from_slice(&body)?;
|
let QueryData { query, params } = serde_json::from_slice(&body)?;
|
||||||
let query_params = json_to_pg_text(params)?;
|
let query_params = json_to_pg_text(params)?;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Connenct to the destination
|
||||||
|
//
|
||||||
|
let (client, connection) = tokio_postgres::Config::new()
|
||||||
|
.host(host)
|
||||||
|
.port(port)
|
||||||
|
.user(&username)
|
||||||
|
.password(&password)
|
||||||
|
.dbname(&dbname)
|
||||||
|
.max_backend_message_size(MAX_RESPONSE_SIZE)
|
||||||
|
.connect(tokio_postgres::NoTls)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = connection.await {
|
||||||
|
eprintln!("connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
//
|
//
|
||||||
// Now execute the query and return the result
|
// Now execute the query and return the result
|
||||||
//
|
//
|
||||||
let client = conn_pool.get(&conn_info, !allow_pool).await?;
|
|
||||||
|
|
||||||
let row_stream = client.query_raw_txt(query, query_params).await?;
|
let row_stream = client.query_raw_txt(query, query_params).await?;
|
||||||
|
|
||||||
// Manually drain the stream into a vector to leave row_stream hanging
|
// Manually drain the stream into a vector to leave row_stream hanging
|
||||||
@@ -238,11 +280,6 @@ pub async fn handle(
|
|||||||
json!({
|
json!({
|
||||||
"name": Value::String(c.name().to_owned()),
|
"name": Value::String(c.name().to_owned()),
|
||||||
"dataTypeID": Value::Number(c.type_().oid().into()),
|
"dataTypeID": Value::Number(c.type_().oid().into()),
|
||||||
"tableID": c.table_oid(),
|
|
||||||
"columnID": c.column_id(),
|
|
||||||
"dataTypeSize": c.type_size(),
|
|
||||||
"dataTypeModifier": c.type_modifier(),
|
|
||||||
"format": "text",
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
@@ -256,13 +293,6 @@ pub async fn handle(
|
|||||||
.map(|row| pg_text_row_to_json(row, raw_output, array_mode))
|
.map(|row| pg_text_row_to_json(row, raw_output, array_mode))
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
if allow_pool {
|
|
||||||
// return connection to the pool
|
|
||||||
tokio::task::spawn(async move {
|
|
||||||
let _ = conn_pool.put(&conn_info, client).await;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// resulting JSON format is based on the format of node-postgres result
|
// resulting JSON format is based on the format of node-postgres result
|
||||||
Ok(json!({
|
Ok(json!({
|
||||||
"command": command_tag_name,
|
"command": command_tag_name,
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ use tls_listener::TlsListener;
|
|||||||
use tokio::{
|
use tokio::{
|
||||||
io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
|
io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf},
|
||||||
net::TcpListener,
|
net::TcpListener,
|
||||||
|
select,
|
||||||
};
|
};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info, info_span, warn, Instrument};
|
use tracing::{error, info, info_span, warn, Instrument};
|
||||||
@@ -35,7 +36,7 @@ use utils::http::{error::ApiError, json::json_response};
|
|||||||
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
// Tracking issue: https://github.com/rust-lang/rust/issues/98407.
|
||||||
use sync_wrapper::SyncWrapper;
|
use sync_wrapper::SyncWrapper;
|
||||||
|
|
||||||
use super::{conn_pool::GlobalConnPool, sql_over_http};
|
use super::sql_over_http;
|
||||||
|
|
||||||
pin_project! {
|
pin_project! {
|
||||||
/// This is a wrapper around a [`WebSocketStream`] that
|
/// This is a wrapper around a [`WebSocketStream`] that
|
||||||
@@ -164,7 +165,6 @@ async fn serve_websocket(
|
|||||||
async fn ws_handler(
|
async fn ws_handler(
|
||||||
mut request: Request<Body>,
|
mut request: Request<Body>,
|
||||||
config: &'static ProxyConfig,
|
config: &'static ProxyConfig,
|
||||||
conn_pool: Arc<GlobalConnPool>,
|
|
||||||
cancel_map: Arc<CancelMap>,
|
cancel_map: Arc<CancelMap>,
|
||||||
session_id: uuid::Uuid,
|
session_id: uuid::Uuid,
|
||||||
sni_hostname: Option<String>,
|
sni_hostname: Option<String>,
|
||||||
@@ -193,9 +193,14 @@ async fn ws_handler(
|
|||||||
// TODO: that deserves a refactor as now this function also handles http json client besides websockets.
|
// TODO: that deserves a refactor as now this function also handles http json client besides websockets.
|
||||||
// Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
|
// Right now I don't want to blow up sql-over-http patch with file renames and do that as a follow up instead.
|
||||||
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
} else if request.uri().path() == "/sql" && request.method() == Method::POST {
|
||||||
let result = sql_over_http::handle(request, sni_hostname, conn_pool)
|
let result = select! {
|
||||||
.instrument(info_span!("sql-over-http"))
|
_ = tokio::time::sleep(std::time::Duration::from_secs(10)) => {
|
||||||
.await;
|
Err(anyhow::anyhow!("Query timed out"))
|
||||||
|
}
|
||||||
|
response = sql_over_http::handle(config, request, sni_hostname) => {
|
||||||
|
response
|
||||||
|
}
|
||||||
|
};
|
||||||
let status_code = match result {
|
let status_code = match result {
|
||||||
Ok(_) => StatusCode::OK,
|
Ok(_) => StatusCode::OK,
|
||||||
Err(_) => StatusCode::BAD_REQUEST,
|
Err(_) => StatusCode::BAD_REQUEST,
|
||||||
@@ -235,8 +240,6 @@ pub async fn task_main(
|
|||||||
info!("websocket server has shut down");
|
info!("websocket server has shut down");
|
||||||
}
|
}
|
||||||
|
|
||||||
let conn_pool: Arc<GlobalConnPool> = GlobalConnPool::new(config);
|
|
||||||
|
|
||||||
let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
|
let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
|
||||||
let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
|
let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
|
||||||
Some(config) => config.into(),
|
Some(config) => config.into(),
|
||||||
@@ -261,18 +264,15 @@ pub async fn task_main(
|
|||||||
let make_svc =
|
let make_svc =
|
||||||
hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
|
hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
|
||||||
let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
|
let sni_name = stream.get_ref().1.sni_hostname().map(|s| s.to_string());
|
||||||
let conn_pool = conn_pool.clone();
|
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
Ok::<_, Infallible>(hyper::service::service_fn(move |req: Request<Body>| {
|
Ok::<_, Infallible>(hyper::service::service_fn(move |req: Request<Body>| {
|
||||||
let sni_name = sni_name.clone();
|
let sni_name = sni_name.clone();
|
||||||
let conn_pool = conn_pool.clone();
|
|
||||||
|
|
||||||
async move {
|
async move {
|
||||||
let cancel_map = Arc::new(CancelMap::default());
|
let cancel_map = Arc::new(CancelMap::default());
|
||||||
let session_id = uuid::Uuid::new_v4();
|
let session_id = uuid::Uuid::new_v4();
|
||||||
|
|
||||||
ws_handler(req, config, conn_pool, cancel_map, session_id, sni_name)
|
ws_handler(req, config, cancel_map, session_id, sni_name)
|
||||||
.instrument(info_span!(
|
.instrument(info_span!(
|
||||||
"ws-client",
|
"ws-client",
|
||||||
session = format_args!("{session_id}")
|
session = format_args!("{session_id}")
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
|
|||||||
.from_env_lossy();
|
.from_env_lossy();
|
||||||
|
|
||||||
let fmt_layer = tracing_subscriber::fmt::layer()
|
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||||
.with_ansi(false)
|
.with_ansi(atty::is(atty::Stream::Stderr))
|
||||||
.with_writer(std::io::stderr)
|
.with_writer(std::io::stderr)
|
||||||
.with_target(false);
|
.with_target(false);
|
||||||
|
|
||||||
|
|||||||
@@ -4,13 +4,11 @@ use crate::{config::MetricCollectionConfig, http};
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::{collections::HashMap, time::Duration};
|
use std::collections::HashMap;
|
||||||
use tracing::{error, info, instrument, trace, warn};
|
use tracing::{error, info, instrument, trace, warn};
|
||||||
|
|
||||||
const PROXY_IO_BYTES_PER_CLIENT: &str = "proxy_io_bytes_per_client";
|
const PROXY_IO_BYTES_PER_CLIENT: &str = "proxy_io_bytes_per_client";
|
||||||
|
|
||||||
const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Key that uniquely identifies the object, this metric describes.
|
/// Key that uniquely identifies the object, this metric describes.
|
||||||
/// Currently, endpoint_id is enough, but this may change later,
|
/// Currently, endpoint_id is enough, but this may change later,
|
||||||
@@ -32,7 +30,7 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result<()> {
|
|||||||
info!("metrics collector has shut down");
|
info!("metrics collector has shut down");
|
||||||
}
|
}
|
||||||
|
|
||||||
let http_client = http::new_client_with_timeout(DEFAULT_HTTP_REPORTING_TIMEOUT);
|
let http_client = http::new_client();
|
||||||
let mut cached_metrics: HashMap<Ids, (u64, DateTime<Utc>)> = HashMap::new();
|
let mut cached_metrics: HashMap<Ids, (u64, DateTime<Utc>)> = HashMap::new();
|
||||||
let hostname = hostname::get()?.as_os_str().to_string_lossy().into_owned();
|
let hostname = hostname::get()?.as_os_str().to_string_lossy().into_owned();
|
||||||
|
|
||||||
@@ -184,35 +182,35 @@ async fn collect_metrics_iteration(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if !res.status().is_success() {
|
if res.status().is_success() {
|
||||||
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
// update cached metrics after they were sent successfully
|
||||||
for metric in chunk.iter().filter(|metric| metric.value > (1u64 << 40)) {
|
for send_metric in chunk {
|
||||||
// Report if the metric value is suspiciously large
|
let stop_time = match send_metric.kind {
|
||||||
error!("potentially abnormal metric value: {:?}", metric);
|
EventType::Incremental { stop_time, .. } => stop_time,
|
||||||
}
|
_ => unreachable!(),
|
||||||
}
|
};
|
||||||
// update cached metrics after they were sent
|
|
||||||
// (to avoid sending the same metrics twice)
|
|
||||||
// see the relevant discussion on why to do so even if the status is not success:
|
|
||||||
// https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956
|
|
||||||
for send_metric in chunk {
|
|
||||||
let stop_time = match send_metric.kind {
|
|
||||||
EventType::Incremental { stop_time, .. } => stop_time,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
cached_metrics
|
cached_metrics
|
||||||
.entry(Ids {
|
.entry(Ids {
|
||||||
endpoint_id: send_metric.extra.endpoint_id.clone(),
|
endpoint_id: send_metric.extra.endpoint_id.clone(),
|
||||||
branch_id: send_metric.extra.branch_id.clone(),
|
branch_id: send_metric.extra.branch_id.clone(),
|
||||||
})
|
})
|
||||||
// update cached value (add delta) and time
|
// update cached value (add delta) and time
|
||||||
.and_modify(|e| {
|
.and_modify(|e| {
|
||||||
e.0 = e.0.saturating_add(send_metric.value);
|
e.0 = e.0.saturating_add(send_metric.value);
|
||||||
e.1 = stop_time
|
e.1 = stop_time
|
||||||
})
|
})
|
||||||
// cache new metric
|
// cache new metric
|
||||||
.or_insert((send_metric.value, stop_time));
|
.or_insert((send_metric.value, stop_time));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error!("metrics endpoint refused the sent metrics: {:?}", res);
|
||||||
|
for metric in chunk.iter() {
|
||||||
|
// Report if the metric value is suspiciously large
|
||||||
|
if metric.value > (1u64 << 40) {
|
||||||
|
error!("potentially abnormal metric value: {:?}", metric);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -16,16 +16,13 @@ use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCou
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
|
use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::{
|
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
|
||||||
io::{AsyncRead, AsyncWrite, AsyncWriteExt},
|
|
||||||
time,
|
|
||||||
};
|
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info, warn};
|
use tracing::{error, info, warn};
|
||||||
use utils::measured_stream::MeasuredStream;
|
use utils::measured_stream::MeasuredStream;
|
||||||
|
|
||||||
/// Number of times we should retry the `/proxy_wake_compute` http request.
|
/// Number of times we should retry the `/proxy_wake_compute` http request.
|
||||||
pub const NUM_RETRIES_WAKE_COMPUTE: usize = 1;
|
const NUM_RETRIES_WAKE_COMPUTE: usize = 1;
|
||||||
|
|
||||||
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
|
||||||
const ERR_PROTO_VIOLATION: &str = "protocol violation";
|
const ERR_PROTO_VIOLATION: &str = "protocol violation";
|
||||||
@@ -286,36 +283,34 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If we couldn't connect, a cached connection info might be to blame
|
|
||||||
/// (e.g. the compute node's address might've changed at the wrong time).
|
|
||||||
/// Invalidate the cache entry (if any) to prevent subsequent errors.
|
|
||||||
#[tracing::instrument(name = "invalidate_cache", skip_all)]
|
|
||||||
pub fn invalidate_cache(node_info: &console::CachedNodeInfo) {
|
|
||||||
let is_cached = node_info.cached();
|
|
||||||
if is_cached {
|
|
||||||
warn!("invalidating stalled compute node info cache entry");
|
|
||||||
node_info.invalidate();
|
|
||||||
}
|
|
||||||
|
|
||||||
let label = match is_cached {
|
|
||||||
true => "compute_cached",
|
|
||||||
false => "compute_uncached",
|
|
||||||
};
|
|
||||||
NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Try to connect to the compute node once.
|
/// Try to connect to the compute node once.
|
||||||
#[tracing::instrument(name = "connect_once", skip_all)]
|
#[tracing::instrument(name = "connect_once", skip_all)]
|
||||||
async fn connect_to_compute_once(
|
async fn connect_to_compute_once(
|
||||||
node_info: &console::CachedNodeInfo,
|
node_info: &console::CachedNodeInfo,
|
||||||
timeout: time::Duration,
|
|
||||||
) -> Result<PostgresConnection, compute::ConnectionError> {
|
) -> Result<PostgresConnection, compute::ConnectionError> {
|
||||||
|
// If we couldn't connect, a cached connection info might be to blame
|
||||||
|
// (e.g. the compute node's address might've changed at the wrong time).
|
||||||
|
// Invalidate the cache entry (if any) to prevent subsequent errors.
|
||||||
|
let invalidate_cache = |_: &compute::ConnectionError| {
|
||||||
|
let is_cached = node_info.cached();
|
||||||
|
if is_cached {
|
||||||
|
warn!("invalidating stalled compute node info cache entry");
|
||||||
|
node_info.invalidate();
|
||||||
|
}
|
||||||
|
|
||||||
|
let label = match is_cached {
|
||||||
|
true => "compute_cached",
|
||||||
|
false => "compute_uncached",
|
||||||
|
};
|
||||||
|
NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
|
||||||
|
};
|
||||||
|
|
||||||
let allow_self_signed_compute = node_info.allow_self_signed_compute;
|
let allow_self_signed_compute = node_info.allow_self_signed_compute;
|
||||||
|
|
||||||
node_info
|
node_info
|
||||||
.config
|
.config
|
||||||
.connect(allow_self_signed_compute, timeout)
|
.connect(allow_self_signed_compute)
|
||||||
.inspect_err(|_: &compute::ConnectionError| invalidate_cache(node_info))
|
.inspect_err(invalidate_cache)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -332,27 +327,7 @@ async fn connect_to_compute(
|
|||||||
loop {
|
loop {
|
||||||
// Apply startup params to the (possibly, cached) compute node info.
|
// Apply startup params to the (possibly, cached) compute node info.
|
||||||
node_info.config.set_startup_params(params);
|
node_info.config.set_startup_params(params);
|
||||||
|
match connect_to_compute_once(node_info).await {
|
||||||
// Set a shorter timeout for the initial connection attempt.
|
|
||||||
//
|
|
||||||
// In case we try to connect to an outdated address that is no longer valid, the
|
|
||||||
// default behavior of Kubernetes is to drop the packets, causing us to wait for
|
|
||||||
// the entire timeout period. We want to fail fast in such cases.
|
|
||||||
//
|
|
||||||
// A specific case to consider is when we have cached compute node information
|
|
||||||
// with a 4-minute TTL (Time To Live), but the user has executed a `/suspend` API
|
|
||||||
// call, resulting in the nonexistence of the compute node.
|
|
||||||
//
|
|
||||||
// We only use caching in case of scram proxy backed by the console, so reduce
|
|
||||||
// the timeout only in that case.
|
|
||||||
let is_scram_proxy = matches!(creds, auth::BackendType::Console(_, _));
|
|
||||||
let timeout = if is_scram_proxy && num_retries == NUM_RETRIES_WAKE_COMPUTE {
|
|
||||||
time::Duration::from_secs(2)
|
|
||||||
} else {
|
|
||||||
time::Duration::from_secs(10)
|
|
||||||
};
|
|
||||||
|
|
||||||
match connect_to_compute_once(node_info, timeout).await {
|
|
||||||
Err(e) if num_retries > 0 => {
|
Err(e) if num_retries > 0 => {
|
||||||
info!("compute node's state has changed; requesting a wake-up");
|
info!("compute node's state has changed; requesting a wake-up");
|
||||||
match creds.wake_compute(extra).map_err(io_error).await? {
|
match creds.wake_compute(extra).map_err(io_error).await? {
|
||||||
|
|||||||
@@ -45,74 +45,17 @@ fn hmac_sha256<'a>(key: &[u8], parts: impl IntoIterator<Item = &'a [u8]>) -> [u8
|
|||||||
let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("bad key size");
|
let mut mac = Hmac::<Sha256>::new_from_slice(key).expect("bad key size");
|
||||||
parts.into_iter().for_each(|s| mac.update(s));
|
parts.into_iter().for_each(|s| mac.update(s));
|
||||||
|
|
||||||
mac.finalize().into_bytes().into()
|
// TODO: maybe newer `hmac` et al already migrated to regular arrays?
|
||||||
|
let mut result = [0u8; 32];
|
||||||
|
result.copy_from_slice(mac.finalize().into_bytes().as_slice());
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {
|
fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {
|
||||||
let mut hasher = Sha256::new();
|
let mut hasher = Sha256::new();
|
||||||
parts.into_iter().for_each(|s| hasher.update(s));
|
parts.into_iter().for_each(|s| hasher.update(s));
|
||||||
|
|
||||||
hasher.finalize().into()
|
let mut result = [0u8; 32];
|
||||||
}
|
result.copy_from_slice(hasher.finalize().as_slice());
|
||||||
|
result
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::sasl::{Mechanism, Step};
|
|
||||||
|
|
||||||
use super::{password::SaltedPassword, Exchange, ServerSecret};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn happy_path() {
|
|
||||||
let iterations = 4096;
|
|
||||||
let salt_base64 = "QSXCR+Q6sek8bf92";
|
|
||||||
let pw = SaltedPassword::new(
|
|
||||||
b"pencil",
|
|
||||||
base64::decode(salt_base64).unwrap().as_slice(),
|
|
||||||
iterations,
|
|
||||||
);
|
|
||||||
|
|
||||||
let secret = ServerSecret {
|
|
||||||
iterations,
|
|
||||||
salt_base64: salt_base64.to_owned(),
|
|
||||||
stored_key: pw.client_key().sha256(),
|
|
||||||
server_key: pw.server_key(),
|
|
||||||
doomed: false,
|
|
||||||
};
|
|
||||||
const NONCE: [u8; 18] = [
|
|
||||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
|
|
||||||
];
|
|
||||||
let mut exchange = Exchange::new(&secret, || NONCE, None);
|
|
||||||
|
|
||||||
let client_first = "n,,n=user,r=rOprNGfwEbeRWgbNEkqO";
|
|
||||||
let client_final = "c=biws,r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,p=rw1r5Kph5ThxmaUBC2GAQ6MfXbPnNkFiTIvdb/Rear0=";
|
|
||||||
let server_first =
|
|
||||||
"r=rOprNGfwEbeRWgbNEkqOAQIDBAUGBwgJCgsMDQ4PEBES,s=QSXCR+Q6sek8bf92,i=4096";
|
|
||||||
let server_final = "v=qtUDIofVnIhM7tKn93EQUUt5vgMOldcDVu1HC+OH0o0=";
|
|
||||||
|
|
||||||
exchange = match exchange.exchange(client_first).unwrap() {
|
|
||||||
Step::Continue(exchange, message) => {
|
|
||||||
assert_eq!(message, server_first);
|
|
||||||
exchange
|
|
||||||
}
|
|
||||||
Step::Success(_, _) => panic!("expected continue, got success"),
|
|
||||||
Step::Failure(f) => panic!("{f}"),
|
|
||||||
};
|
|
||||||
|
|
||||||
let key = match exchange.exchange(client_final).unwrap() {
|
|
||||||
Step::Success(key, message) => {
|
|
||||||
assert_eq!(message, server_final);
|
|
||||||
key
|
|
||||||
}
|
|
||||||
Step::Continue(_, _) => panic!("expected success, got continue"),
|
|
||||||
Step::Failure(f) => panic!("{f}"),
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
key.as_bytes(),
|
|
||||||
[
|
|
||||||
74, 103, 1, 132, 12, 31, 200, 48, 28, 54, 82, 232, 207, 12, 138, 189, 40, 32, 134,
|
|
||||||
27, 125, 170, 232, 35, 171, 167, 166, 41, 70, 228, 182, 112,
|
|
||||||
]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,19 @@ impl SaltedPassword {
|
|||||||
/// See `scram-common.c : scram_SaltedPassword` for details.
|
/// See `scram-common.c : scram_SaltedPassword` for details.
|
||||||
/// Further reading: <https://datatracker.ietf.org/doc/html/rfc2898> (see `PBKDF2`).
|
/// Further reading: <https://datatracker.ietf.org/doc/html/rfc2898> (see `PBKDF2`).
|
||||||
pub fn new(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
pub fn new(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
||||||
pbkdf2::pbkdf2_hmac_array::<sha2::Sha256, 32>(password, salt, iterations).into()
|
let one = 1_u32.to_be_bytes(); // magic
|
||||||
|
|
||||||
|
let mut current = super::hmac_sha256(password, [salt, &one]);
|
||||||
|
let mut result = current;
|
||||||
|
for _ in 1..iterations {
|
||||||
|
current = super::hmac_sha256(password, [current.as_ref()]);
|
||||||
|
// TODO: result = current.zip(result).map(|(x, y)| x ^ y), issue #80094
|
||||||
|
for (i, x) in current.iter().enumerate() {
|
||||||
|
result[i] ^= x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Derive `ClientKey` from a salted hashed password.
|
/// Derive `ClientKey` from a salted hashed password.
|
||||||
@@ -34,41 +46,3 @@ impl From<[u8; SALTED_PASSWORD_LEN]> for SaltedPassword {
|
|||||||
Self { bytes }
|
Self { bytes }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::SaltedPassword;
|
|
||||||
|
|
||||||
fn legacy_pbkdf2_impl(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
|
|
||||||
let one = 1_u32.to_be_bytes(); // magic
|
|
||||||
|
|
||||||
let mut current = super::super::hmac_sha256(password, [salt, &one]);
|
|
||||||
let mut result = current;
|
|
||||||
for _ in 1..iterations {
|
|
||||||
current = super::super::hmac_sha256(password, [current.as_ref()]);
|
|
||||||
// TODO: result = current.zip(result).map(|(x, y)| x ^ y), issue #80094
|
|
||||||
for (i, x) in current.iter().enumerate() {
|
|
||||||
result[i] ^= x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result.into()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn pbkdf2() {
|
|
||||||
let password = "a-very-secure-password";
|
|
||||||
let salt = "such-a-random-salt";
|
|
||||||
let iterations = 4096;
|
|
||||||
let output = [
|
|
||||||
203, 18, 206, 81, 4, 154, 193, 100, 147, 41, 211, 217, 177, 203, 69, 210, 194, 211,
|
|
||||||
101, 1, 248, 156, 96, 0, 8, 223, 30, 87, 158, 41, 20, 42,
|
|
||||||
];
|
|
||||||
|
|
||||||
let actual = SaltedPassword::new(password.as_bytes(), salt.as_bytes(), iterations);
|
|
||||||
let expected = legacy_pbkdf2_impl(password.as_bytes(), salt.as_bytes(), iterations);
|
|
||||||
|
|
||||||
assert_eq!(actual.bytes, output);
|
|
||||||
assert_eq!(actual.bytes, expected.bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ authors = []
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
pytest = "^7.3.1"
|
pytest = "^7.3.1"
|
||||||
psycopg2-binary = "^2.9.6"
|
psycopg2-binary = "^2.9.1"
|
||||||
typing-extensions = "^4.6.1"
|
typing-extensions = "^4.6.1"
|
||||||
PyJWT = {version = "^2.1.0", extras = ["crypto"]}
|
PyJWT = {version = "^2.1.0", extras = ["crypto"]}
|
||||||
requests = "^2.31.0"
|
requests = "^2.31.0"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[toolchain]
|
[toolchain]
|
||||||
channel = "1.70.0"
|
channel = "1.68.2"
|
||||||
profile = "default"
|
profile = "default"
|
||||||
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
# The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
|
||||||
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
# https://rust-lang.github.io/rustup/concepts/profiles.html
|
||||||
|
|||||||
@@ -191,12 +191,6 @@ impl Storage for FileStorage {
|
|||||||
control_partial_path.display()
|
control_partial_path.display()
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
control_partial.flush().await.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"failed to flush safekeeper state into control file at: {}",
|
|
||||||
control_partial_path.display()
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// fsync the file
|
// fsync the file
|
||||||
if !self.conf.no_sync {
|
if !self.conf.no_sync {
|
||||||
|
|||||||
@@ -188,7 +188,6 @@ async fn pull_timeline(status: TimelineStatus, host: String) -> Result<Response>
|
|||||||
let mut response = client.get(&http_url).send().await?;
|
let mut response = client.get(&http_url).send().await?;
|
||||||
while let Some(chunk) = response.chunk().await? {
|
while let Some(chunk) = response.chunk().await? {
|
||||||
file.write_all(&chunk).await?;
|
file.write_all(&chunk).await?;
|
||||||
file.flush().await?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -403,18 +403,16 @@ impl SafekeeperPostgresHandler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// take the latest commit_lsn if don't have stop_pos
|
// take the latest commit_lsn if don't have stop_pos
|
||||||
let end_pos = stop_pos.unwrap_or(*commit_lsn_watch_rx.borrow());
|
let mut end_pos = stop_pos.unwrap_or(*commit_lsn_watch_rx.borrow());
|
||||||
|
|
||||||
if end_pos < start_pos {
|
if end_pos < start_pos {
|
||||||
warn!(
|
warn!("start_pos {} is ahead of end_pos {}", start_pos, end_pos);
|
||||||
"requested start_pos {} is ahead of available WAL end_pos {}",
|
end_pos = start_pos;
|
||||||
start_pos, end_pos
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"starting streaming from {:?} till {:?}, available WAL ends at {}",
|
"starting streaming from {:?} till {:?}",
|
||||||
start_pos, stop_pos, end_pos
|
start_pos, stop_pos
|
||||||
);
|
);
|
||||||
|
|
||||||
// switch to copy
|
// switch to copy
|
||||||
@@ -549,14 +547,12 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
|
|||||||
self.end_pos = *self.commit_lsn_watch_rx.borrow();
|
self.end_pos = *self.commit_lsn_watch_rx.borrow();
|
||||||
if self.end_pos > self.start_pos {
|
if self.end_pos > self.start_pos {
|
||||||
// We have something to send.
|
// We have something to send.
|
||||||
trace!("got end_pos {:?}, streaming", self.end_pos);
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for WAL to appear, now self.end_pos == self.start_pos.
|
// Wait for WAL to appear, now self.end_pos == self.start_pos.
|
||||||
if let Some(lsn) = wait_for_lsn(&mut self.commit_lsn_watch_rx, self.start_pos).await? {
|
if let Some(lsn) = wait_for_lsn(&mut self.commit_lsn_watch_rx, self.start_pos).await? {
|
||||||
self.end_pos = lsn;
|
self.end_pos = lsn;
|
||||||
trace!("got end_pos {:?}, streaming", self.end_pos);
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user