mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-14 22:00:38 +00:00
Compare commits
79 Commits
initdb-cac
...
auth-broke
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f3f7d0d3f1 | ||
|
|
0724df1d3f | ||
|
|
4d47049b00 | ||
|
|
5687384a8e | ||
|
|
249f5ea17d | ||
|
|
6abcc1f298 | ||
|
|
3e97cf0d6e | ||
|
|
054ef4988b | ||
|
|
5202cd75b5 | ||
|
|
f475dac0e6 | ||
|
|
a4100373e5 | ||
|
|
040d8cf4f6 | ||
|
|
75bfd57e01 | ||
|
|
4bc2686dee | ||
|
|
8e7d2aab76 | ||
|
|
2703abccc7 | ||
|
|
76515cdae3 | ||
|
|
08c7f933a3 | ||
|
|
4ad3aa7c96 | ||
|
|
9c59e3b4b9 | ||
|
|
40f7930a7d | ||
|
|
ec07a1ecc9 | ||
|
|
c4cdfe66ac | ||
|
|
42e19e952f | ||
|
|
3d255d601b | ||
|
|
80e974d05b | ||
|
|
7fdf1ab5b6 | ||
|
|
7bae78186b | ||
|
|
7e560dd00e | ||
|
|
684e924211 | ||
|
|
8ace9ea25f | ||
|
|
6a4f49b08b | ||
|
|
c6e89445e2 | ||
|
|
04f32b9526 | ||
|
|
6f2333f52b | ||
|
|
d447f49bc3 | ||
|
|
c5972389aa | ||
|
|
c4f5736d5a | ||
|
|
518f598e2d | ||
|
|
4b711caf5e | ||
|
|
2cf47b1477 | ||
|
|
7dcfcccf7c | ||
|
|
a26cc29d92 | ||
|
|
5f2f31e879 | ||
|
|
938b163b42 | ||
|
|
5cbf5b45ae | ||
|
|
af5c54ed14 | ||
|
|
523cf71721 | ||
|
|
c47f355ec1 | ||
|
|
4f67b0225b | ||
|
|
2f7cecaf6a | ||
|
|
589594c2e1 | ||
|
|
70fe007519 | ||
|
|
b224a5a377 | ||
|
|
a65d437930 | ||
|
|
fc67f8dc60 | ||
|
|
2b65a2b53e | ||
|
|
9490360df4 | ||
|
|
91d947654e | ||
|
|
37aa6fd953 | ||
|
|
3ad567290c | ||
|
|
3a110e45ed | ||
|
|
e7e6319e20 | ||
|
|
d865881d59 | ||
|
|
1c5d6e59a0 | ||
|
|
263dfba6ee | ||
|
|
df3996265f | ||
|
|
29699529df | ||
|
|
f446e08fb8 | ||
|
|
4d5add9ca0 | ||
|
|
59b4c2eaf9 | ||
|
|
5432155b0d | ||
|
|
e16e82749f | ||
|
|
9f653893b9 | ||
|
|
913af44219 | ||
|
|
ecd615ab6d | ||
|
|
c9b2ec9ff1 | ||
|
|
a3800dcb0c | ||
|
|
9a32aa828d |
@@ -13,6 +13,7 @@
|
|||||||
# Directories
|
# Directories
|
||||||
!.cargo/
|
!.cargo/
|
||||||
!.config/
|
!.config/
|
||||||
|
!compute/
|
||||||
!compute_tools/
|
!compute_tools/
|
||||||
!control_plane/
|
!control_plane/
|
||||||
!libs/
|
!libs/
|
||||||
|
|||||||
10
.github/workflows/_build-and-test-locally.yml
vendored
10
.github/workflows/_build-and-test-locally.yml
vendored
@@ -257,7 +257,15 @@ jobs:
|
|||||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||||
|
|
||||||
- name: Install postgres binaries
|
- name: Install postgres binaries
|
||||||
run: cp -a pg_install /tmp/neon/pg_install
|
run: |
|
||||||
|
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
||||||
|
tar c \
|
||||||
|
pg_install/v* \
|
||||||
|
pg_install/build/*/src/test/regress/*.so \
|
||||||
|
pg_install/build/*/src/test/regress/pg_regress \
|
||||||
|
pg_install/build/*/src/test/isolation/isolationtester \
|
||||||
|
pg_install/build/*/src/test/isolation/pg_isolation_regress \
|
||||||
|
| tar x -C /tmp/neon
|
||||||
|
|
||||||
- name: Upload Neon artifact
|
- name: Upload Neon artifact
|
||||||
uses: ./.github/actions/upload
|
uses: ./.github/actions/upload
|
||||||
|
|||||||
120
.github/workflows/build_and_test.yml
vendored
120
.github/workflows/build_and_test.yml
vendored
@@ -120,6 +120,59 @@ jobs:
|
|||||||
- name: Run mypy to check types
|
- name: Run mypy to check types
|
||||||
run: poetry run mypy .
|
run: poetry run mypy .
|
||||||
|
|
||||||
|
# Check that the vendor/postgres-* submodules point to the
|
||||||
|
# corresponding REL_*_STABLE_neon branches.
|
||||||
|
check-submodules:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
|
||||||
|
- uses: dorny/paths-filter@v3
|
||||||
|
id: check-if-submodules-changed
|
||||||
|
with:
|
||||||
|
filters: |
|
||||||
|
vendor:
|
||||||
|
- 'vendor/**'
|
||||||
|
|
||||||
|
- name: Check vendor/postgres-v14 submodule reference
|
||||||
|
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
|
||||||
|
uses: jtmullen/submodule-branch-check-action@v1
|
||||||
|
with:
|
||||||
|
path: "vendor/postgres-v14"
|
||||||
|
fetch_depth: "50"
|
||||||
|
sub_fetch_depth: "50"
|
||||||
|
pass_if_unchanged: true
|
||||||
|
|
||||||
|
- name: Check vendor/postgres-v15 submodule reference
|
||||||
|
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
|
||||||
|
uses: jtmullen/submodule-branch-check-action@v1
|
||||||
|
with:
|
||||||
|
path: "vendor/postgres-v15"
|
||||||
|
fetch_depth: "50"
|
||||||
|
sub_fetch_depth: "50"
|
||||||
|
pass_if_unchanged: true
|
||||||
|
|
||||||
|
- name: Check vendor/postgres-v16 submodule reference
|
||||||
|
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
|
||||||
|
uses: jtmullen/submodule-branch-check-action@v1
|
||||||
|
with:
|
||||||
|
path: "vendor/postgres-v16"
|
||||||
|
fetch_depth: "50"
|
||||||
|
sub_fetch_depth: "50"
|
||||||
|
pass_if_unchanged: true
|
||||||
|
|
||||||
|
- name: Check vendor/postgres-v17 submodule reference
|
||||||
|
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
|
||||||
|
uses: jtmullen/submodule-branch-check-action@v1
|
||||||
|
with:
|
||||||
|
path: "vendor/postgres-v17"
|
||||||
|
fetch_depth: "50"
|
||||||
|
sub_fetch_depth: "50"
|
||||||
|
pass_if_unchanged: true
|
||||||
|
|
||||||
check-codestyle-rust:
|
check-codestyle-rust:
|
||||||
needs: [ check-permissions, build-build-tools-image ]
|
needs: [ check-permissions, build-build-tools-image ]
|
||||||
strategy:
|
strategy:
|
||||||
@@ -549,7 +602,20 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
version: [ v14, v15, v16, v17 ]
|
version:
|
||||||
|
# Much data was already generated on old PG versions with bullseye's
|
||||||
|
# libraries, the locales of which can cause data incompatibilities.
|
||||||
|
# However, new PG versions should check if they can be built on newer
|
||||||
|
# images, as that reduces the support burden of old and ancient
|
||||||
|
# distros.
|
||||||
|
- pg: v14
|
||||||
|
debian: bullseye-slim
|
||||||
|
- pg: v15
|
||||||
|
debian: bullseye-slim
|
||||||
|
- pg: v16
|
||||||
|
debian: bullseye-slim
|
||||||
|
- pg: v17
|
||||||
|
debian: bookworm-slim
|
||||||
arch: [ x64, arm64 ]
|
arch: [ x64, arm64 ]
|
||||||
|
|
||||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||||
@@ -592,41 +658,46 @@ jobs:
|
|||||||
context: .
|
context: .
|
||||||
build-args: |
|
build-args: |
|
||||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
PG_VERSION=${{ matrix.version }}
|
PG_VERSION=${{ matrix.version.pg }}
|
||||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||||
|
DEBIAN_FLAVOR=${{ matrix.version.debian }}
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.compute-node
|
file: compute/Dockerfile.compute-node
|
||||||
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||||
|
|
||||||
- name: Build neon extensions test image
|
- name: Build neon extensions test image
|
||||||
if: matrix.version == 'v16'
|
if: matrix.version.pg == 'v16'
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
build-args: |
|
build-args: |
|
||||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
PG_VERSION=${{ matrix.version }}
|
PG_VERSION=${{ matrix.version.pg }}
|
||||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||||
|
DEBIAN_FLAVOR=${{ matrix.version.debian }}
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.compute-node
|
file: compute/Dockerfile.compute-node
|
||||||
target: neon-pg-ext-test
|
target: neon-pg-ext-test
|
||||||
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
|
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
|
||||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
|
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
||||||
|
|
||||||
- name: Build compute-tools image
|
- name: Build compute-tools image
|
||||||
# compute-tools are Postgres independent, so build it only once
|
# compute-tools are Postgres independent, so build it only once
|
||||||
if: matrix.version == 'v17'
|
# We pick 16, because that builds on debian 11 with older glibc (and is
|
||||||
|
# thus compatible with newer glibc), rather than 17 on Debian 12, as
|
||||||
|
# that isn't guaranteed to be compatible with Debian 11
|
||||||
|
if: matrix.version.pg == 'v16'
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
target: compute-tools-image
|
target: compute-tools-image
|
||||||
@@ -635,10 +706,11 @@ jobs:
|
|||||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||||
|
DEBIAN_FLAVOR=${{ matrix.version.debian }}
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
pull: true
|
pull: true
|
||||||
file: Dockerfile.compute-node
|
file: compute/Dockerfile.compute-node
|
||||||
tags: |
|
tags: |
|
||||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||||
|
|
||||||
@@ -726,7 +798,7 @@ jobs:
|
|||||||
- name: Build vm image
|
- name: Build vm image
|
||||||
run: |
|
run: |
|
||||||
./vm-builder \
|
./vm-builder \
|
||||||
-spec=vm-image-spec.yaml \
|
-spec=compute/vm-image-spec.yaml \
|
||||||
-src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
-src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||||
-dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
-dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||||
|
|
||||||
@@ -790,6 +862,9 @@ jobs:
|
|||||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
id-token: write # for `aws-actions/configure-aws-credentials`
|
||||||
|
|
||||||
env:
|
env:
|
||||||
VERSIONS: v14 v15 v16 v17
|
VERSIONS: v14 v15 v16 v17
|
||||||
|
|
||||||
@@ -834,13 +909,19 @@ jobs:
|
|||||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
|
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
|
||||||
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
||||||
|
|
||||||
|
- name: Configure AWS-prod credentials
|
||||||
|
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-region: eu-central-1
|
||||||
|
mask-aws-account-id: true
|
||||||
|
role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
|
||||||
|
|
||||||
- name: Login to prod ECR
|
- name: Login to prod ECR
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||||
with:
|
with:
|
||||||
registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
|
registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
|
||||||
username: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_ACCESS_KEY_ID }}
|
|
||||||
password: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_SECRET_ACCESS_KEY }}
|
|
||||||
|
|
||||||
- name: Copy all images to prod ECR
|
- name: Copy all images to prod ECR
|
||||||
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||||
@@ -1109,10 +1190,9 @@ jobs:
|
|||||||
|
|
||||||
files_to_promote+=("s3://${BUCKET}/${s3_key}")
|
files_to_promote+=("s3://${BUCKET}/${s3_key}")
|
||||||
|
|
||||||
# TODO Add v17
|
for pg_version in v14 v15 v16 v17; do
|
||||||
for pg_version in v14 v15 v16; do
|
|
||||||
# We run less tests for debug builds, so we don't need to promote them
|
# We run less tests for debug builds, so we don't need to promote them
|
||||||
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
|
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v17" ] ; }; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
102
.github/workflows/cloud-regress.yml
vendored
Normal file
102
.github/workflows/cloud-regress.yml
vendored
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
name: Cloud Regression Test
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# * is a special character in YAML so you have to quote this string
|
||||||
|
# ┌───────────── minute (0 - 59)
|
||||||
|
# │ ┌───────────── hour (0 - 23)
|
||||||
|
# │ │ ┌───────────── day of the month (1 - 31)
|
||||||
|
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||||
|
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||||
|
- cron: '45 1 * * *' # run once a day, timezone is utc
|
||||||
|
workflow_dispatch: # adds ability to run this manually
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash -euxo pipefail {0}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
# Allow only one workflow
|
||||||
|
group: ${{ github.workflow }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
regress:
|
||||||
|
env:
|
||||||
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
|
DEFAULT_PG_VERSION: 16
|
||||||
|
TEST_OUTPUT: /tmp/test_output
|
||||||
|
BUILD_TYPE: remote
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||||
|
|
||||||
|
runs-on: us-east-2
|
||||||
|
container:
|
||||||
|
image: neondatabase/build-tools:pinned
|
||||||
|
options: --init
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
|
||||||
|
- name: Patch the test
|
||||||
|
run: |
|
||||||
|
cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
|
||||||
|
patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
|
||||||
|
|
||||||
|
- name: Generate a random password
|
||||||
|
id: pwgen
|
||||||
|
run: |
|
||||||
|
set +x
|
||||||
|
DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
|
||||||
|
echo "::add-mask::${DBPASS//\//}"
|
||||||
|
echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
|
||||||
|
|
||||||
|
- name: Change tests according to the generated password
|
||||||
|
env:
|
||||||
|
DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
|
||||||
|
run: |
|
||||||
|
cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
|
||||||
|
for fname in sql/*.sql expected/*.out; do
|
||||||
|
sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
|
||||||
|
done
|
||||||
|
for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
|
||||||
|
USER=$(echo "${ph}" | cut -c 22-)
|
||||||
|
MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
|
||||||
|
sed -i.bak "s/${ph}/${MD5}/" expected/password.out
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Download Neon artifact
|
||||||
|
uses: ./.github/actions/download
|
||||||
|
with:
|
||||||
|
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||||
|
path: /tmp/neon/
|
||||||
|
prefix: latest
|
||||||
|
|
||||||
|
- name: Run the regression tests
|
||||||
|
uses: ./.github/actions/run-python-test-set
|
||||||
|
with:
|
||||||
|
build_type: ${{ env.BUILD_TYPE }}
|
||||||
|
test_selection: cloud_regress
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
|
extra_params: -m remote_cluster
|
||||||
|
env:
|
||||||
|
BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
|
||||||
|
|
||||||
|
- name: Create Allure report
|
||||||
|
id: create-allure-report
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
uses: ./.github/actions/allure-report-generate
|
||||||
|
|
||||||
|
- name: Post to a Slack channel
|
||||||
|
if: ${{ github.event.schedule && failure() }}
|
||||||
|
uses: slackapi/slack-github-action@v1
|
||||||
|
with:
|
||||||
|
channel-id: "C033QLM5P7D" # on-call-staging-stream
|
||||||
|
slack-message: |
|
||||||
|
Periodic pg_regress on staging: ${{ job.status }}
|
||||||
|
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||||
|
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||||
|
env:
|
||||||
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
4
.github/workflows/trigger-e2e-tests.yml
vendored
4
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -102,12 +102,12 @@ jobs:
|
|||||||
# Default set of platforms to run e2e tests on
|
# Default set of platforms to run e2e tests on
|
||||||
platforms='["docker", "k8s"]'
|
platforms='["docker", "k8s"]'
|
||||||
|
|
||||||
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
|
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or compute/Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
|
||||||
# If the workflow run is not a pull request, add k8s-neonvm to the list.
|
# If the workflow run is not a pull request, add k8s-neonvm to the list.
|
||||||
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
|
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
|
||||||
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
|
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
|
||||||
case "$f" in
|
case "$f" in
|
||||||
vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
|
vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
|
||||||
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
|
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
318
Cargo.lock
generated
318
Cargo.lock
generated
@@ -255,12 +255,6 @@ dependencies = [
|
|||||||
"syn 2.0.52",
|
"syn 2.0.52",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "atomic"
|
|
||||||
version = "0.5.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atomic-take"
|
name = "atomic-take"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
@@ -295,8 +289,8 @@ dependencies = [
|
|||||||
"fastrand 2.0.0",
|
"fastrand 2.0.0",
|
||||||
"hex",
|
"hex",
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"time",
|
"time",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -486,7 +480,7 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"p256 0.11.1",
|
"p256 0.11.1",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"sha2",
|
"sha2",
|
||||||
"subtle",
|
"subtle",
|
||||||
"time",
|
"time",
|
||||||
@@ -593,7 +587,7 @@ dependencies = [
|
|||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"http-body 0.4.5",
|
"http-body 0.4.5",
|
||||||
"http-body 1.0.0",
|
"http-body 1.0.0",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"hyper-rustls 0.24.0",
|
"hyper-rustls 0.24.0",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
@@ -684,7 +678,7 @@ dependencies = [
|
|||||||
"futures-util",
|
"futures-util",
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"http-body 0.4.5",
|
"http-body 0.4.5",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"itoa",
|
"itoa",
|
||||||
"matchit 0.7.0",
|
"matchit 0.7.0",
|
||||||
"memchr",
|
"memchr",
|
||||||
@@ -1089,9 +1083,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ciborium"
|
name = "ciborium"
|
||||||
version = "0.2.1"
|
version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
|
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ciborium-io",
|
"ciborium-io",
|
||||||
"ciborium-ll",
|
"ciborium-ll",
|
||||||
@@ -1100,18 +1094,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ciborium-io"
|
name = "ciborium-io"
|
||||||
version = "0.2.1"
|
version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
|
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ciborium-ll"
|
name = "ciborium-ll"
|
||||||
version = "0.2.1"
|
version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
|
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ciborium-io",
|
"ciborium-io",
|
||||||
"half 1.8.2",
|
"half",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1224,7 +1218,7 @@ dependencies = [
|
|||||||
"compute_api",
|
"compute_api",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures",
|
"futures",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
"notify",
|
"notify",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
@@ -1327,10 +1321,9 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"git-version",
|
|
||||||
"humantime",
|
"humantime",
|
||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
@@ -2304,12 +2297,6 @@ dependencies = [
|
|||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "half"
|
|
||||||
version = "1.8.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "half"
|
name = "half"
|
||||||
version = "2.4.1"
|
version = "2.4.1"
|
||||||
@@ -2411,17 +2398,6 @@ dependencies = [
|
|||||||
"digest",
|
"digest",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hostname"
|
|
||||||
version = "0.3.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"match_cfg",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hostname"
|
name = "hostname"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@@ -2430,7 +2406,7 @@ checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"libc",
|
"libc",
|
||||||
"windows 0.52.0",
|
"windows",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2539,9 +2515,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hyper"
|
name = "hyper"
|
||||||
version = "0.14.26"
|
version = "0.14.30"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4"
|
checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
@@ -2554,7 +2530,7 @@ dependencies = [
|
|||||||
"httpdate",
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"socket2 0.4.9",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -2589,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
|
checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"log",
|
"log",
|
||||||
"rustls 0.21.11",
|
"rustls 0.21.11",
|
||||||
"rustls-native-certs 0.6.2",
|
"rustls-native-certs 0.6.2",
|
||||||
@@ -2620,7 +2596,7 @@ version = "0.4.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
|
checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-io-timeout",
|
"tokio-io-timeout",
|
||||||
@@ -2639,7 +2615,7 @@ dependencies = [
|
|||||||
"http-body 1.0.0",
|
"http-body 1.0.0",
|
||||||
"hyper 1.2.0",
|
"hyper 1.2.0",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"socket2 0.5.5",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower",
|
"tower",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
@@ -2648,16 +2624,16 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "iana-time-zone"
|
name = "iana-time-zone"
|
||||||
version = "0.1.56"
|
version = "0.1.61"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
|
checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"android_system_properties",
|
"android_system_properties",
|
||||||
"core-foundation-sys",
|
"core-foundation-sys",
|
||||||
"iana-time-zone-haiku",
|
"iana-time-zone-haiku",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"windows 0.48.0",
|
"windows-core",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2870,7 +2846,7 @@ dependencies = [
|
|||||||
"base64 0.21.1",
|
"base64 0.21.1",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
"pem",
|
"pem",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"simple_asn1",
|
"simple_asn1",
|
||||||
@@ -2908,11 +2884,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"spin 0.5.2",
|
"spin",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2974,12 +2950,6 @@ dependencies = [
|
|||||||
"hashbrown 0.14.5",
|
"hashbrown 0.14.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "match_cfg"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -3072,15 +3042,6 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "memoffset"
|
|
||||||
version = "0.8.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memoffset"
|
name = "memoffset"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
@@ -3616,7 +3577,6 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"camino",
|
"camino",
|
||||||
"clap",
|
"clap",
|
||||||
"git-version",
|
|
||||||
"humantime",
|
"humantime",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
@@ -3655,12 +3615,11 @@ dependencies = [
|
|||||||
"enumset",
|
"enumset",
|
||||||
"fail",
|
"fail",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
|
||||||
"hex",
|
"hex",
|
||||||
"hex-literal",
|
"hex-literal",
|
||||||
"humantime",
|
"humantime",
|
||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"indoc",
|
"indoc",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"md5",
|
"md5",
|
||||||
@@ -3775,7 +3734,6 @@ dependencies = [
|
|||||||
"clap",
|
"clap",
|
||||||
"criterion",
|
"criterion",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
|
||||||
"hex-literal",
|
"hex-literal",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@@ -3853,7 +3811,7 @@ dependencies = [
|
|||||||
"ahash",
|
"ahash",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"half 2.4.1",
|
"half",
|
||||||
"hashbrown 0.14.5",
|
"hashbrown 0.14.5",
|
||||||
"num",
|
"num",
|
||||||
"num-bigint",
|
"num-bigint",
|
||||||
@@ -4140,7 +4098,7 @@ dependencies = [
|
|||||||
"crc32c",
|
"crc32c",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"log",
|
"log",
|
||||||
"memoffset 0.8.0",
|
"memoffset 0.9.0",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"postgres",
|
"postgres",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -4338,6 +4296,7 @@ dependencies = [
|
|||||||
"camino-tempfile",
|
"camino-tempfile",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
|
"compute_api",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
"dashmap",
|
"dashmap",
|
||||||
"ecdsa 0.16.9",
|
"ecdsa 0.16.9",
|
||||||
@@ -4345,17 +4304,16 @@ dependencies = [
|
|||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
"framed-websockets",
|
"framed-websockets",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
|
||||||
"hashbrown 0.14.5",
|
"hashbrown 0.14.5",
|
||||||
"hashlink",
|
"hashlink",
|
||||||
"hex",
|
"hex",
|
||||||
"hmac",
|
"hmac",
|
||||||
"hostname 0.3.1",
|
"hostname",
|
||||||
"http 1.1.0",
|
"http 1.1.0",
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
"humantime",
|
"humantime",
|
||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"hyper 1.2.0",
|
"hyper 1.2.0",
|
||||||
"hyper-util",
|
"hyper-util",
|
||||||
"indexmap 2.0.1",
|
"indexmap 2.0.1",
|
||||||
@@ -4400,7 +4358,7 @@ dependencies = [
|
|||||||
"signature 2.2.0",
|
"signature 2.2.0",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"smol_str",
|
"smol_str",
|
||||||
"socket2 0.5.5",
|
"socket2",
|
||||||
"subtle",
|
"subtle",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tikv-jemalloc-ctl",
|
"tikv-jemalloc-ctl",
|
||||||
@@ -4578,7 +4536,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1"
|
checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"pem",
|
"pem",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"time",
|
"time",
|
||||||
"yasna",
|
"yasna",
|
||||||
]
|
]
|
||||||
@@ -4602,7 +4560,7 @@ dependencies = [
|
|||||||
"rustls-pki-types",
|
"rustls-pki-types",
|
||||||
"ryu",
|
"ryu",
|
||||||
"sha1_smol",
|
"sha1_smol",
|
||||||
"socket2 0.5.5",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls 0.25.0",
|
"tokio-rustls 0.25.0",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
@@ -4714,7 +4672,7 @@ dependencies = [
|
|||||||
"futures-util",
|
"futures-util",
|
||||||
"http-types",
|
"http-types",
|
||||||
"humantime-serde",
|
"humantime-serde",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
@@ -4747,7 +4705,7 @@ dependencies = [
|
|||||||
"h2 0.3.26",
|
"h2 0.3.26",
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"http-body 0.4.5",
|
"http-body 0.4.5",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"hyper-rustls 0.24.0",
|
"hyper-rustls 0.24.0",
|
||||||
"ipnet",
|
"ipnet",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
@@ -4905,21 +4863,6 @@ dependencies = [
|
|||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "ring"
|
|
||||||
version = "0.16.20"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"libc",
|
|
||||||
"once_cell",
|
|
||||||
"spin 0.5.2",
|
|
||||||
"untrusted 0.7.1",
|
|
||||||
"web-sys",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.6"
|
version = "0.17.6"
|
||||||
@@ -4929,8 +4872,8 @@ dependencies = [
|
|||||||
"cc",
|
"cc",
|
||||||
"getrandom 0.2.11",
|
"getrandom 0.2.11",
|
||||||
"libc",
|
"libc",
|
||||||
"spin 0.9.8",
|
"spin",
|
||||||
"untrusted 0.9.0",
|
"untrusted",
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -4950,7 +4893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "496c1d3718081c45ba9c31fbfc07417900aa96f4070ff90dc29961836b7a9945"
|
checksum = "496c1d3718081c45ba9c31fbfc07417900aa96f4070ff90dc29961836b7a9945"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -5074,7 +5017,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4"
|
checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"rustls-webpki 0.101.7",
|
"rustls-webpki 0.101.7",
|
||||||
"sct",
|
"sct",
|
||||||
]
|
]
|
||||||
@@ -5086,7 +5029,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
|
checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"rustls-pki-types",
|
"rustls-pki-types",
|
||||||
"rustls-webpki 0.102.2",
|
"rustls-webpki 0.102.2",
|
||||||
"subtle",
|
"subtle",
|
||||||
@@ -5143,24 +5086,14 @@ version = "1.3.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
|
checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "rustls-webpki"
|
|
||||||
version = "0.100.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab"
|
|
||||||
dependencies = [
|
|
||||||
"ring 0.16.20",
|
|
||||||
"untrusted 0.7.1",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustls-webpki"
|
name = "rustls-webpki"
|
||||||
version = "0.101.7"
|
version = "0.101.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
|
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"untrusted 0.9.0",
|
"untrusted",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5169,9 +5102,9 @@ version = "0.102.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
|
checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"rustls-pki-types",
|
"rustls-pki-types",
|
||||||
"untrusted 0.9.0",
|
"untrusted",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5202,10 +5135,9 @@ dependencies = [
|
|||||||
"desim",
|
"desim",
|
||||||
"fail",
|
"fail",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
|
||||||
"hex",
|
"hex",
|
||||||
"humantime",
|
"humantime",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot 0.12.1",
|
||||||
@@ -5262,11 +5194,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "schannel"
|
name = "schannel"
|
||||||
version = "0.1.21"
|
version = "0.1.23"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
|
checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.42.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5290,8 +5222,8 @@ version = "0.7.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
|
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"untrusted 0.9.0",
|
"untrusted",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5400,7 +5332,7 @@ version = "0.32.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
|
checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hostname 0.4.0",
|
"hostname",
|
||||||
"libc",
|
"libc",
|
||||||
"os_info",
|
"os_info",
|
||||||
"rustc_version",
|
"rustc_version",
|
||||||
@@ -5712,16 +5644,6 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "socket2"
|
|
||||||
version = "0.4.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socket2"
|
name = "socket2"
|
||||||
version = "0.5.5"
|
version = "0.5.5"
|
||||||
@@ -5732,12 +5654,6 @@ dependencies = [
|
|||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "spin"
|
|
||||||
version = "0.5.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "spin"
|
name = "spin"
|
||||||
version = "0.9.8"
|
version = "0.9.8"
|
||||||
@@ -5781,9 +5697,8 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"git-version",
|
|
||||||
"humantime",
|
"humantime",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"metrics",
|
"metrics",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot 0.12.1",
|
||||||
@@ -5809,10 +5724,9 @@ dependencies = [
|
|||||||
"diesel_migrations",
|
"diesel_migrations",
|
||||||
"fail",
|
"fail",
|
||||||
"futures",
|
"futures",
|
||||||
"git-version",
|
|
||||||
"hex",
|
"hex",
|
||||||
"humantime",
|
"humantime",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"lasso",
|
"lasso",
|
||||||
"measured",
|
"measured",
|
||||||
@@ -5862,7 +5776,6 @@ dependencies = [
|
|||||||
"either",
|
"either",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"git-version",
|
|
||||||
"hex",
|
"hex",
|
||||||
"humantime",
|
"humantime",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
@@ -6228,7 +6141,7 @@ dependencies = [
|
|||||||
"num_cpus",
|
"num_cpus",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"signal-hook-registry",
|
"signal-hook-registry",
|
||||||
"socket2 0.5.5",
|
"socket2",
|
||||||
"tokio-macros",
|
"tokio-macros",
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
@@ -6288,7 +6201,7 @@ dependencies = [
|
|||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"postgres-types",
|
"postgres-types",
|
||||||
"socket2 0.5.5",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
]
|
]
|
||||||
@@ -6300,7 +6213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "0ea13f22eda7127c827983bdaf0d7fff9df21c8817bab02815ac277a21143677"
|
checksum = "0ea13f22eda7127c827983bdaf0d7fff9df21c8817bab02815ac277a21143677"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"rustls 0.22.4",
|
"rustls 0.22.4",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-postgres",
|
"tokio-postgres",
|
||||||
@@ -6434,7 +6347,7 @@ dependencies = [
|
|||||||
"h2 0.3.26",
|
"h2 0.3.26",
|
||||||
"http 0.2.9",
|
"http 0.2.9",
|
||||||
"http-body 0.4.5",
|
"http-body 0.4.5",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"hyper-timeout",
|
"hyper-timeout",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
@@ -6611,7 +6524,7 @@ dependencies = [
|
|||||||
name = "tracing-utils"
|
name = "tracing-utils"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"opentelemetry-otlp",
|
"opentelemetry-otlp",
|
||||||
"opentelemetry-semantic-conventions",
|
"opentelemetry-semantic-conventions",
|
||||||
@@ -6714,12 +6627,6 @@ version = "0.2.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "untrusted"
|
|
||||||
version = "0.7.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "untrusted"
|
name = "untrusted"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
@@ -6728,17 +6635,18 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ureq"
|
name = "ureq"
|
||||||
version = "2.7.1"
|
version = "2.9.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
|
checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.21.1",
|
"base64 0.22.1",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"rustls 0.21.11",
|
"rustls 0.22.4",
|
||||||
"rustls-webpki 0.100.2",
|
"rustls-pki-types",
|
||||||
|
"rustls-webpki 0.102.2",
|
||||||
"url",
|
"url",
|
||||||
"webpki-roots 0.23.1",
|
"webpki-roots 0.26.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -6799,10 +6707,11 @@ dependencies = [
|
|||||||
"criterion",
|
"criterion",
|
||||||
"fail",
|
"fail",
|
||||||
"futures",
|
"futures",
|
||||||
|
"git-version",
|
||||||
"hex",
|
"hex",
|
||||||
"hex-literal",
|
"hex-literal",
|
||||||
"humantime",
|
"humantime",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"jsonwebtoken",
|
"jsonwebtoken",
|
||||||
"metrics",
|
"metrics",
|
||||||
"nix 0.27.1",
|
"nix 0.27.1",
|
||||||
@@ -6837,11 +6746,10 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uuid"
|
name = "uuid"
|
||||||
version = "1.6.1"
|
version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
|
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atomic",
|
|
||||||
"getrandom 0.2.11",
|
"getrandom 0.2.11",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
@@ -7075,15 +6983,6 @@ dependencies = [
|
|||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "webpki-roots"
|
|
||||||
version = "0.23.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
|
|
||||||
dependencies = [
|
|
||||||
"rustls-webpki 0.100.2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "webpki-roots"
|
name = "webpki-roots"
|
||||||
version = "0.25.2"
|
version = "0.25.2"
|
||||||
@@ -7152,15 +7051,6 @@ version = "0.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows"
|
|
||||||
version = "0.48.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
|
|
||||||
dependencies = [
|
|
||||||
"windows-targets 0.48.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows"
|
name = "windows"
|
||||||
version = "0.52.0"
|
version = "0.52.0"
|
||||||
@@ -7180,21 +7070,6 @@ dependencies = [
|
|||||||
"windows-targets 0.52.4",
|
"windows-targets 0.52.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-sys"
|
|
||||||
version = "0.42.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
|
|
||||||
dependencies = [
|
|
||||||
"windows_aarch64_gnullvm 0.42.2",
|
|
||||||
"windows_aarch64_msvc 0.42.2",
|
|
||||||
"windows_i686_gnu 0.42.2",
|
|
||||||
"windows_i686_msvc 0.42.2",
|
|
||||||
"windows_x86_64_gnu 0.42.2",
|
|
||||||
"windows_x86_64_gnullvm 0.42.2",
|
|
||||||
"windows_x86_64_msvc 0.42.2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7243,12 +7118,6 @@ dependencies = [
|
|||||||
"windows_x86_64_msvc 0.52.4",
|
"windows_x86_64_msvc 0.52.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_aarch64_gnullvm"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_gnullvm"
|
name = "windows_aarch64_gnullvm"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7261,12 +7130,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
|
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_aarch64_msvc"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_aarch64_msvc"
|
name = "windows_aarch64_msvc"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7279,12 +7142,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
|
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_i686_gnu"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_gnu"
|
name = "windows_i686_gnu"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7297,12 +7154,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
|
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_i686_msvc"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_i686_msvc"
|
name = "windows_i686_msvc"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7315,12 +7166,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
|
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_gnu"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnu"
|
name = "windows_x86_64_gnu"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7333,12 +7178,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
|
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_gnullvm"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_gnullvm"
|
name = "windows_x86_64_gnullvm"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7351,12 +7190,6 @@ version = "0.52.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
|
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_msvc"
|
|
||||||
version = "0.42.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows_x86_64_msvc"
|
name = "windows_x86_64_msvc"
|
||||||
version = "0.48.0"
|
version = "0.48.0"
|
||||||
@@ -7433,10 +7266,11 @@ dependencies = [
|
|||||||
"futures-util",
|
"futures-util",
|
||||||
"generic-array",
|
"generic-array",
|
||||||
"getrandom 0.2.11",
|
"getrandom 0.2.11",
|
||||||
|
"half",
|
||||||
"hashbrown 0.14.5",
|
"hashbrown 0.14.5",
|
||||||
"hex",
|
"hex",
|
||||||
"hmac",
|
"hmac",
|
||||||
"hyper 0.14.26",
|
"hyper 0.14.30",
|
||||||
"indexmap 1.9.3",
|
"indexmap 1.9.3",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"itertools 0.12.1",
|
"itertools 0.12.1",
|
||||||
@@ -7504,7 +7338,7 @@ dependencies = [
|
|||||||
"der 0.7.8",
|
"der 0.7.8",
|
||||||
"hex",
|
"hex",
|
||||||
"pem",
|
"pem",
|
||||||
"ring 0.17.6",
|
"ring",
|
||||||
"signature 2.2.0",
|
"signature 2.2.0",
|
||||||
"spki 0.7.3",
|
"spki 0.7.3",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
|
|||||||
@@ -76,8 +76,6 @@ clap = { version = "4.0", features = ["derive"] }
|
|||||||
comfy-table = "7.1"
|
comfy-table = "7.1"
|
||||||
const_format = "0.2"
|
const_format = "0.2"
|
||||||
crc32c = "0.6"
|
crc32c = "0.6"
|
||||||
crossbeam-deque = "0.8.5"
|
|
||||||
crossbeam-utils = "0.8.5"
|
|
||||||
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
||||||
either = "1.8"
|
either = "1.8"
|
||||||
enum-map = "2.4.2"
|
enum-map = "2.4.2"
|
||||||
@@ -95,7 +93,7 @@ hdrhistogram = "7.5.2"
|
|||||||
hex = "0.4"
|
hex = "0.4"
|
||||||
hex-literal = "0.4"
|
hex-literal = "0.4"
|
||||||
hmac = "0.12.1"
|
hmac = "0.12.1"
|
||||||
hostname = "0.3.1"
|
hostname = "0.4"
|
||||||
http = {version = "1.1.0", features = ["std"]}
|
http = {version = "1.1.0", features = ["std"]}
|
||||||
http-types = { version = "2", default-features = false }
|
http-types = { version = "2", default-features = false }
|
||||||
humantime = "2.1"
|
humantime = "2.1"
|
||||||
@@ -104,7 +102,6 @@ hyper = "0.14"
|
|||||||
tokio-tungstenite = "0.20.0"
|
tokio-tungstenite = "0.20.0"
|
||||||
indexmap = "2"
|
indexmap = "2"
|
||||||
indoc = "2"
|
indoc = "2"
|
||||||
inotify = "0.10.2"
|
|
||||||
ipnet = "2.9.0"
|
ipnet = "2.9.0"
|
||||||
itertools = "0.10"
|
itertools = "0.10"
|
||||||
jsonwebtoken = "9"
|
jsonwebtoken = "9"
|
||||||
@@ -113,7 +110,7 @@ libc = "0.2"
|
|||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
measured = { version = "0.0.22", features=["lasso"] }
|
measured = { version = "0.0.22", features=["lasso"] }
|
||||||
measured-process = { version = "0.0.22" }
|
measured-process = { version = "0.0.22" }
|
||||||
memoffset = "0.8"
|
memoffset = "0.9"
|
||||||
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
|
||||||
notify = "6.0.0"
|
notify = "6.0.0"
|
||||||
num_cpus = "1.15"
|
num_cpus = "1.15"
|
||||||
@@ -142,7 +139,6 @@ rpds = "0.13"
|
|||||||
rustc-hash = "1.1.0"
|
rustc-hash = "1.1.0"
|
||||||
rustls = "0.22"
|
rustls = "0.22"
|
||||||
rustls-pemfile = "2"
|
rustls-pemfile = "2"
|
||||||
rustls-split = "0.3"
|
|
||||||
scopeguard = "1.1"
|
scopeguard = "1.1"
|
||||||
sysinfo = "0.29.2"
|
sysinfo = "0.29.2"
|
||||||
sd-notify = "0.4.1"
|
sd-notify = "0.4.1"
|
||||||
@@ -164,7 +160,6 @@ strum_macros = "0.26"
|
|||||||
svg_fmt = "0.4.3"
|
svg_fmt = "0.4.3"
|
||||||
sync_wrapper = "0.1.2"
|
sync_wrapper = "0.1.2"
|
||||||
tar = "0.4"
|
tar = "0.4"
|
||||||
task-local-extensions = "0.1.4"
|
|
||||||
test-context = "0.3"
|
test-context = "0.3"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
tikv-jemallocator = "0.5"
|
tikv-jemallocator = "0.5"
|
||||||
|
|||||||
@@ -3,13 +3,15 @@ ARG REPOSITORY=neondatabase
|
|||||||
ARG IMAGE=build-tools
|
ARG IMAGE=build-tools
|
||||||
ARG TAG=pinned
|
ARG TAG=pinned
|
||||||
ARG BUILD_TAG
|
ARG BUILD_TAG
|
||||||
|
ARG DEBIAN_FLAVOR=bullseye-slim
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Layer "build-deps"
|
# Layer "build-deps"
|
||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM debian:bullseye-slim AS build-deps
|
FROM debian:$DEBIAN_FLAVOR AS build-deps
|
||||||
|
ARG DEBIAN_FLAVOR
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
|
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
|
||||||
@@ -280,7 +282,7 @@ FROM build-deps AS vector-pg-build
|
|||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
COPY patches/pgvector.patch /pgvector.patch
|
COPY compute/patches/pgvector.patch /pgvector.patch
|
||||||
|
|
||||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||||
# because we build the images on different machines than where we run them.
|
# because we build the images on different machines than where we run them.
|
||||||
@@ -366,7 +368,7 @@ FROM build-deps AS rum-pg-build
|
|||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||||
|
|
||||||
COPY patches/rum.patch /rum.patch
|
COPY compute/patches/rum.patch /rum.patch
|
||||||
|
|
||||||
RUN case "${PG_VERSION}" in "v17") \
|
RUN case "${PG_VERSION}" in "v17") \
|
||||||
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
|
||||||
@@ -1027,10 +1029,47 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
|
|||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
|
|
||||||
FROM debian:bullseye-slim AS compute-tools-image
|
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
|
||||||
|
ARG DEBIAN_FLAVOR
|
||||||
|
|
||||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layer "pgbouncer"
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
|
||||||
|
FROM debian:$DEBIAN_FLAVOR AS pgbouncer
|
||||||
|
ARG DEBIAN_FLAVOR
|
||||||
|
RUN set -e \
|
||||||
|
&& apt-get update \
|
||||||
|
&& apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
git \
|
||||||
|
libevent-dev \
|
||||||
|
libtool \
|
||||||
|
pkg-config
|
||||||
|
|
||||||
|
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
|
||||||
|
ENV PGBOUNCER_TAG=pgbouncer_1_22_1
|
||||||
|
RUN set -e \
|
||||||
|
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
|
||||||
|
&& cd pgbouncer \
|
||||||
|
&& ./autogen.sh \
|
||||||
|
&& LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
|
||||||
|
&& make -j $(nproc) dist_man_MANS= \
|
||||||
|
&& make install dist_man_MANS=
|
||||||
|
|
||||||
|
#########################################################################################
|
||||||
|
#
|
||||||
|
# Layers "postgres-exporter" and "sql-exporter"
|
||||||
|
#
|
||||||
|
#########################################################################################
|
||||||
|
|
||||||
|
FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
|
||||||
|
FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
|
||||||
|
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
#
|
#
|
||||||
# Clean up postgres folder before inclusion
|
# Clean up postgres folder before inclusion
|
||||||
@@ -1078,7 +1117,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
|||||||
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
||||||
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
||||||
COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
||||||
COPY patches/rum.patch /ext-src
|
COPY compute/patches/rum.patch /ext-src
|
||||||
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
||||||
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
||||||
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
||||||
@@ -1086,9 +1125,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
|
|||||||
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
|
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
|
||||||
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
|
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
|
||||||
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
|
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
|
||||||
COPY patches/pg_hint_plan.patch /ext-src
|
COPY compute/patches/pg_hint_plan.patch /ext-src
|
||||||
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
||||||
COPY patches/pg_cron.patch /ext-src
|
COPY compute/patches/pg_cron.patch /ext-src
|
||||||
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
||||||
#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
||||||
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
||||||
@@ -1097,7 +1136,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
|||||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||||
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||||
COPY patches/pg_anon.patch /ext-src
|
COPY compute/patches/pg_anon.patch /ext-src
|
||||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||||
RUN case "${PG_VERSION}" in "v17") \
|
RUN case "${PG_VERSION}" in "v17") \
|
||||||
@@ -1144,7 +1183,9 @@ ENV PGDATABASE=postgres
|
|||||||
# Put it all together into the final image
|
# Put it all together into the final image
|
||||||
#
|
#
|
||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM debian:bullseye-slim
|
FROM debian:$DEBIAN_FLAVOR
|
||||||
|
ARG DEBIAN_FLAVOR
|
||||||
|
ENV DEBIAN_FLAVOR=$DEBIAN_FLAVOR
|
||||||
# Add user postgres
|
# Add user postgres
|
||||||
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||||
echo "postgres:test_console_pass" | chpasswd && \
|
echo "postgres:test_console_pass" | chpasswd && \
|
||||||
@@ -1160,23 +1201,50 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
|||||||
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
|
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
|
||||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||||
|
|
||||||
|
# pgbouncer and its config
|
||||||
|
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||||
|
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
|
||||||
|
|
||||||
|
# Metrics exporter binaries and configuration files
|
||||||
|
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
|
||||||
|
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
|
||||||
|
|
||||||
|
COPY --chmod=0644 compute/etc/sql_exporter.yml /etc/sql_exporter.yml
|
||||||
|
COPY --chmod=0644 compute/etc/neon_collector.yml /etc/neon_collector.yml
|
||||||
|
COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
|
||||||
|
COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
|
||||||
|
|
||||||
# Create remote extension download directory
|
# Create remote extension download directory
|
||||||
RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
|
RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
|
||||||
|
|
||||||
# Install:
|
# Install:
|
||||||
# libreadline8 for psql
|
# libreadline8 for psql
|
||||||
# libicu67, locales for collations (including ICU and plpgsql_check)
|
|
||||||
# liblz4-1 for lz4
|
# liblz4-1 for lz4
|
||||||
# libossp-uuid16 for extension ossp-uuid
|
# libossp-uuid16 for extension ossp-uuid
|
||||||
# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
|
# libgeos, libsfcgal1, and libprotobuf-c1 for PostGIS
|
||||||
# libxml2, libxslt1.1 for xml2
|
# libxml2, libxslt1.1 for xml2
|
||||||
# libzstd1 for zstd
|
# libzstd1 for zstd
|
||||||
# libboost* for rdkit
|
# libboost* for rdkit
|
||||||
# ca-certificates for communicating with s3 by compute_ctl
|
# ca-certificates for communicating with s3 by compute_ctl
|
||||||
RUN apt update && \
|
|
||||||
|
|
||||||
|
RUN apt update && \
|
||||||
|
case $DEBIAN_FLAVOR in \
|
||||||
|
# Version-specific installs for Bullseye (PG14-PG16):
|
||||||
|
# libicu67, locales for collations (including ICU and plpgsql_check)
|
||||||
|
# libgdal28, libproj19 for PostGIS
|
||||||
|
bullseye*) \
|
||||||
|
VERSION_INSTALLS="libicu67 libgdal28 libproj19"; \
|
||||||
|
;; \
|
||||||
|
# Version-specific installs for Bookworm (PG17):
|
||||||
|
# libicu72, locales for collations (including ICU and plpgsql_check)
|
||||||
|
# libgdal32, libproj25 for PostGIS
|
||||||
|
bookworm*) \
|
||||||
|
VERSION_INSTALLS="libicu72 libgdal32 libproj25"; \
|
||||||
|
;; \
|
||||||
|
esac && \
|
||||||
apt install --no-install-recommends -y \
|
apt install --no-install-recommends -y \
|
||||||
gdb \
|
gdb \
|
||||||
libicu67 \
|
|
||||||
liblz4-1 \
|
liblz4-1 \
|
||||||
libreadline8 \
|
libreadline8 \
|
||||||
libboost-iostreams1.74.0 \
|
libboost-iostreams1.74.0 \
|
||||||
@@ -1185,8 +1253,6 @@ RUN apt update && \
|
|||||||
libboost-system1.74.0 \
|
libboost-system1.74.0 \
|
||||||
libossp-uuid16 \
|
libossp-uuid16 \
|
||||||
libgeos-c1v5 \
|
libgeos-c1v5 \
|
||||||
libgdal28 \
|
|
||||||
libproj19 \
|
|
||||||
libprotobuf-c1 \
|
libprotobuf-c1 \
|
||||||
libsfcgal1 \
|
libsfcgal1 \
|
||||||
libxml2 \
|
libxml2 \
|
||||||
@@ -1195,7 +1261,8 @@ RUN apt update && \
|
|||||||
libcurl4-openssl-dev \
|
libcurl4-openssl-dev \
|
||||||
locales \
|
locales \
|
||||||
procps \
|
procps \
|
||||||
ca-certificates && \
|
ca-certificates \
|
||||||
|
$VERSION_INSTALLS && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||||
|
|
||||||
21
compute/README.md
Normal file
21
compute/README.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
This directory contains files that are needed to build the compute
|
||||||
|
images, or included in the compute images.
|
||||||
|
|
||||||
|
Dockerfile.compute-node
|
||||||
|
To build the compute image
|
||||||
|
|
||||||
|
vm-image-spec.yaml
|
||||||
|
Instructions for vm-builder, to turn the compute-node image into
|
||||||
|
corresponding vm-compute-node image.
|
||||||
|
|
||||||
|
etc/
|
||||||
|
Configuration files included in /etc in the compute image
|
||||||
|
|
||||||
|
patches/
|
||||||
|
Some extensions need to be patched to work with Neon. This
|
||||||
|
directory contains such patches. They are applied to the extension
|
||||||
|
sources in Dockerfile.compute-node
|
||||||
|
|
||||||
|
In addition to these, postgres itself, the neon postgres extension,
|
||||||
|
and compute_ctl are built and copied into the compute image by
|
||||||
|
Dockerfile.compute-node.
|
||||||
246
compute/etc/neon_collector.yml
Normal file
246
compute/etc/neon_collector.yml
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
collector_name: neon_collector
|
||||||
|
metrics:
|
||||||
|
- metric_name: lfc_misses
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_misses'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_misses]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
|
||||||
|
|
||||||
|
- metric_name: lfc_used
|
||||||
|
type: gauge
|
||||||
|
help: 'LFC chunks used (chunk = 1MB)'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_used]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
|
||||||
|
|
||||||
|
- metric_name: lfc_hits
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_hits'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_hits]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
|
||||||
|
|
||||||
|
- metric_name: lfc_writes
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_writes'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_writes]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
|
||||||
|
|
||||||
|
- metric_name: lfc_cache_size_limit
|
||||||
|
type: gauge
|
||||||
|
help: 'LFC cache size limit in bytes'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_cache_size_limit]
|
||||||
|
query: |
|
||||||
|
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
|
||||||
|
|
||||||
|
- metric_name: connection_counts
|
||||||
|
type: gauge
|
||||||
|
help: 'Connection counts'
|
||||||
|
key_labels:
|
||||||
|
- datname
|
||||||
|
- state
|
||||||
|
values: [count]
|
||||||
|
query: |
|
||||||
|
select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
|
||||||
|
|
||||||
|
- metric_name: pg_stats_userdb
|
||||||
|
type: gauge
|
||||||
|
help: 'Stats for several oldest non-system dbs'
|
||||||
|
key_labels:
|
||||||
|
- datname
|
||||||
|
value_label: kind
|
||||||
|
values:
|
||||||
|
- db_size
|
||||||
|
- deadlocks
|
||||||
|
# Rows
|
||||||
|
- inserted
|
||||||
|
- updated
|
||||||
|
- deleted
|
||||||
|
# We export stats for 10 non-system database. Without this limit
|
||||||
|
# it is too easy to abuse the system by creating lots of databases.
|
||||||
|
query: |
|
||||||
|
select pg_database_size(datname) as db_size, deadlocks,
|
||||||
|
tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
|
||||||
|
datname
|
||||||
|
from pg_stat_database
|
||||||
|
where datname IN (
|
||||||
|
select datname
|
||||||
|
from pg_database
|
||||||
|
where datname <> 'postgres' and not datistemplate
|
||||||
|
order by oid
|
||||||
|
limit 10
|
||||||
|
);
|
||||||
|
|
||||||
|
- metric_name: max_cluster_size
|
||||||
|
type: gauge
|
||||||
|
help: 'neon.max_cluster_size setting'
|
||||||
|
key_labels:
|
||||||
|
values: [max_cluster_size]
|
||||||
|
query: |
|
||||||
|
select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
|
||||||
|
|
||||||
|
- metric_name: db_total_size
|
||||||
|
type: gauge
|
||||||
|
help: 'Size of all databases'
|
||||||
|
key_labels:
|
||||||
|
values: [total]
|
||||||
|
query: |
|
||||||
|
select sum(pg_database_size(datname)) as total from pg_database;
|
||||||
|
|
||||||
|
# DEPRECATED
|
||||||
|
- metric_name: lfc_approximate_working_set_size
|
||||||
|
type: gauge
|
||||||
|
help: 'Approximate working set size in pages of 8192 bytes'
|
||||||
|
key_labels:
|
||||||
|
values: [approximate_working_set_size]
|
||||||
|
query: |
|
||||||
|
select neon.approximate_working_set_size(false) as approximate_working_set_size;
|
||||||
|
|
||||||
|
- metric_name: lfc_approximate_working_set_size_windows
|
||||||
|
type: gauge
|
||||||
|
help: 'Approximate working set size in pages of 8192 bytes'
|
||||||
|
key_labels: [duration]
|
||||||
|
values: [size]
|
||||||
|
# NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
|
||||||
|
# of durations in a pretty-printed form.
|
||||||
|
query: |
|
||||||
|
select
|
||||||
|
x as duration,
|
||||||
|
neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
|
||||||
|
from
|
||||||
|
(values ('5m'),('15m'),('1h')) as t (x);
|
||||||
|
|
||||||
|
- metric_name: compute_current_lsn
|
||||||
|
type: gauge
|
||||||
|
help: 'Current LSN of the database'
|
||||||
|
key_labels:
|
||||||
|
values: [lsn]
|
||||||
|
query: |
|
||||||
|
select
|
||||||
|
case
|
||||||
|
when pg_catalog.pg_is_in_recovery()
|
||||||
|
then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
|
||||||
|
else (pg_current_wal_lsn() - '0/0')::FLOAT8
|
||||||
|
end as lsn;
|
||||||
|
|
||||||
|
- metric_name: compute_receive_lsn
|
||||||
|
type: gauge
|
||||||
|
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
|
||||||
|
key_labels:
|
||||||
|
values: [lsn]
|
||||||
|
query: |
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN pg_catalog.pg_is_in_recovery()
|
||||||
|
THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
|
||||||
|
ELSE 0
|
||||||
|
END AS lsn;
|
||||||
|
|
||||||
|
- metric_name: replication_delay_bytes
|
||||||
|
type: gauge
|
||||||
|
help: 'Bytes between received and replayed LSN'
|
||||||
|
key_labels:
|
||||||
|
values: [replication_delay_bytes]
|
||||||
|
# We use a GREATEST call here because this calculation can be negative.
|
||||||
|
# The calculation is not atomic, meaning after we've gotten the receive
|
||||||
|
# LSN, the replay LSN may have advanced past the receive LSN we
|
||||||
|
# are using for the calculation.
|
||||||
|
query: |
|
||||||
|
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
|
||||||
|
|
||||||
|
- metric_name: replication_delay_seconds
|
||||||
|
type: gauge
|
||||||
|
help: 'Time since last LSN was replayed'
|
||||||
|
key_labels:
|
||||||
|
values: [replication_delay_seconds]
|
||||||
|
query: |
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
|
||||||
|
ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
|
||||||
|
END AS replication_delay_seconds;
|
||||||
|
|
||||||
|
- metric_name: checkpoints_req
|
||||||
|
type: gauge
|
||||||
|
help: 'Number of requested checkpoints'
|
||||||
|
key_labels:
|
||||||
|
values: [checkpoints_req]
|
||||||
|
query: |
|
||||||
|
SELECT checkpoints_req FROM pg_stat_bgwriter;
|
||||||
|
|
||||||
|
- metric_name: checkpoints_timed
|
||||||
|
type: gauge
|
||||||
|
help: 'Number of scheduled checkpoints'
|
||||||
|
key_labels:
|
||||||
|
values: [checkpoints_timed]
|
||||||
|
query: |
|
||||||
|
SELECT checkpoints_timed FROM pg_stat_bgwriter;
|
||||||
|
|
||||||
|
- metric_name: compute_logical_snapshot_files
|
||||||
|
type: gauge
|
||||||
|
help: 'Number of snapshot files in pg_logical/snapshot'
|
||||||
|
key_labels:
|
||||||
|
- timeline_id
|
||||||
|
values: [num_logical_snapshot_files]
|
||||||
|
query: |
|
||||||
|
SELECT
|
||||||
|
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
|
||||||
|
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
|
||||||
|
-- temporary snapshot files are renamed to the actual snapshot files after they are
|
||||||
|
-- completely built. We only WAL-log the completely built snapshot files.
|
||||||
|
(SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
|
||||||
|
|
||||||
|
# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
|
||||||
|
# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
|
||||||
|
|
||||||
|
# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
|
||||||
|
- metric_name: logical_slot_restart_lsn
|
||||||
|
type: gauge
|
||||||
|
help: 'restart_lsn of logical slots'
|
||||||
|
key_labels:
|
||||||
|
- slot_name
|
||||||
|
values: [restart_lsn]
|
||||||
|
query: |
|
||||||
|
select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
|
||||||
|
from pg_replication_slots
|
||||||
|
where slot_type = 'logical';
|
||||||
|
|
||||||
|
- metric_name: compute_subscriptions_count
|
||||||
|
type: gauge
|
||||||
|
help: 'Number of logical replication subscriptions grouped by enabled/disabled'
|
||||||
|
key_labels:
|
||||||
|
- enabled
|
||||||
|
values: [subscriptions_count]
|
||||||
|
query: |
|
||||||
|
select subenabled::text as enabled, count(*) as subscriptions_count
|
||||||
|
from pg_subscription
|
||||||
|
group by subenabled;
|
||||||
|
|
||||||
|
- metric_name: retained_wal
|
||||||
|
type: gauge
|
||||||
|
help: 'Retained WAL in inactive replication slots'
|
||||||
|
key_labels:
|
||||||
|
- slot_name
|
||||||
|
values: [retained_wal]
|
||||||
|
query: |
|
||||||
|
SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
|
||||||
|
FROM pg_replication_slots
|
||||||
|
WHERE active = false;
|
||||||
|
|
||||||
|
- metric_name: wal_is_lost
|
||||||
|
type: gauge
|
||||||
|
help: 'Whether or not the replication slot wal_status is lost'
|
||||||
|
key_labels:
|
||||||
|
- slot_name
|
||||||
|
values: [wal_is_lost]
|
||||||
|
query: |
|
||||||
|
SELECT slot_name,
|
||||||
|
CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
|
||||||
|
FROM pg_replication_slots;
|
||||||
55
compute/etc/neon_collector_autoscaling.yml
Normal file
55
compute/etc/neon_collector_autoscaling.yml
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
collector_name: neon_collector_autoscaling
|
||||||
|
metrics:
|
||||||
|
- metric_name: lfc_misses
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_misses'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_misses]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
|
||||||
|
|
||||||
|
- metric_name: lfc_used
|
||||||
|
type: gauge
|
||||||
|
help: 'LFC chunks used (chunk = 1MB)'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_used]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
|
||||||
|
|
||||||
|
- metric_name: lfc_hits
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_hits'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_hits]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
|
||||||
|
|
||||||
|
- metric_name: lfc_writes
|
||||||
|
type: gauge
|
||||||
|
help: 'lfc_writes'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_writes]
|
||||||
|
query: |
|
||||||
|
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
|
||||||
|
|
||||||
|
- metric_name: lfc_cache_size_limit
|
||||||
|
type: gauge
|
||||||
|
help: 'LFC cache size limit in bytes'
|
||||||
|
key_labels:
|
||||||
|
values: [lfc_cache_size_limit]
|
||||||
|
query: |
|
||||||
|
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
|
||||||
|
|
||||||
|
- metric_name: lfc_approximate_working_set_size_windows
|
||||||
|
type: gauge
|
||||||
|
help: 'Approximate working set size in pages of 8192 bytes'
|
||||||
|
key_labels: [duration_seconds]
|
||||||
|
values: [size]
|
||||||
|
# NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
|
||||||
|
# size looking back 1..60 minutes, labeled with the number of minutes.
|
||||||
|
query: |
|
||||||
|
select
|
||||||
|
x::text as duration_seconds,
|
||||||
|
neon.approximate_working_set_size_seconds(x) as size
|
||||||
|
from
|
||||||
|
(select generate_series * 60 as x from generate_series(1, 60)) as t (x);
|
||||||
17
compute/etc/pgbouncer.ini
Normal file
17
compute/etc/pgbouncer.ini
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
[databases]
|
||||||
|
*=host=localhost port=5432 auth_user=cloud_admin
|
||||||
|
[pgbouncer]
|
||||||
|
listen_port=6432
|
||||||
|
listen_addr=0.0.0.0
|
||||||
|
auth_type=scram-sha-256
|
||||||
|
auth_user=cloud_admin
|
||||||
|
auth_dbname=postgres
|
||||||
|
client_tls_sslmode=disable
|
||||||
|
server_tls_sslmode=disable
|
||||||
|
pool_mode=transaction
|
||||||
|
max_client_conn=10000
|
||||||
|
default_pool_size=64
|
||||||
|
max_prepared_statements=0
|
||||||
|
admin_users=postgres
|
||||||
|
unix_socket_dir=/tmp/
|
||||||
|
unix_socket_mode=0777
|
||||||
33
compute/etc/sql_exporter.yml
Normal file
33
compute/etc/sql_exporter.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Configuration for sql_exporter
|
||||||
|
# Global defaults.
|
||||||
|
global:
|
||||||
|
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
|
||||||
|
scrape_timeout: 10s
|
||||||
|
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
|
||||||
|
scrape_timeout_offset: 500ms
|
||||||
|
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
|
||||||
|
min_interval: 0s
|
||||||
|
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
|
||||||
|
# as will concurrent scrapes.
|
||||||
|
max_connections: 1
|
||||||
|
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
|
||||||
|
# always be the same as max_connections.
|
||||||
|
max_idle_connections: 1
|
||||||
|
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
|
||||||
|
# If 0, connections are not closed due to a connection's age.
|
||||||
|
max_connection_lifetime: 5m
|
||||||
|
|
||||||
|
# The target to monitor and the collectors to execute on it.
|
||||||
|
target:
|
||||||
|
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
|
||||||
|
# the schema gets dropped or replaced to match the driver expected DSN format.
|
||||||
|
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
|
||||||
|
|
||||||
|
# Collectors (referenced by name) to execute on the target.
|
||||||
|
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||||
|
collectors: [neon_collector]
|
||||||
|
|
||||||
|
# Collector files specifies a list of globs. One collector definition is read from each matching file.
|
||||||
|
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||||
|
collector_files:
|
||||||
|
- "neon_collector.yml"
|
||||||
33
compute/etc/sql_exporter_autoscaling.yml
Normal file
33
compute/etc/sql_exporter_autoscaling.yml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Configuration for sql_exporter for autoscaling-agent
|
||||||
|
# Global defaults.
|
||||||
|
global:
|
||||||
|
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
|
||||||
|
scrape_timeout: 10s
|
||||||
|
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
|
||||||
|
scrape_timeout_offset: 500ms
|
||||||
|
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
|
||||||
|
min_interval: 0s
|
||||||
|
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
|
||||||
|
# as will concurrent scrapes.
|
||||||
|
max_connections: 1
|
||||||
|
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
|
||||||
|
# always be the same as max_connections.
|
||||||
|
max_idle_connections: 1
|
||||||
|
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
|
||||||
|
# If 0, connections are not closed due to a connection's age.
|
||||||
|
max_connection_lifetime: 5m
|
||||||
|
|
||||||
|
# The target to monitor and the collectors to execute on it.
|
||||||
|
target:
|
||||||
|
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
|
||||||
|
# the schema gets dropped or replaced to match the driver expected DSN format.
|
||||||
|
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
|
||||||
|
|
||||||
|
# Collectors (referenced by name) to execute on the target.
|
||||||
|
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||||
|
collectors: [neon_collector_autoscaling]
|
||||||
|
|
||||||
|
# Collector files specifies a list of globs. One collector definition is read from each matching file.
|
||||||
|
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
|
||||||
|
collector_files:
|
||||||
|
- "neon_collector_autoscaling.yml"
|
||||||
3949
compute/patches/cloud_regress_pg16.patch
Normal file
3949
compute/patches/cloud_regress_pg16.patch
Normal file
File diff suppressed because it is too large
Load Diff
112
compute/vm-image-spec.yaml
Normal file
112
compute/vm-image-spec.yaml
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
|
||||||
|
---
|
||||||
|
commands:
|
||||||
|
- name: cgconfigparser
|
||||||
|
user: root
|
||||||
|
sysvInitAction: sysinit
|
||||||
|
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
|
||||||
|
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
|
||||||
|
# running it as root.
|
||||||
|
- name: chmod-resize-swap
|
||||||
|
user: root
|
||||||
|
sysvInitAction: sysinit
|
||||||
|
shell: 'chmod 711 /neonvm/bin/resize-swap'
|
||||||
|
- name: pgbouncer
|
||||||
|
user: postgres
|
||||||
|
sysvInitAction: respawn
|
||||||
|
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
|
||||||
|
- name: postgres-exporter
|
||||||
|
user: nobody
|
||||||
|
sysvInitAction: respawn
|
||||||
|
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
|
||||||
|
- name: sql-exporter
|
||||||
|
user: nobody
|
||||||
|
sysvInitAction: respawn
|
||||||
|
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
|
||||||
|
- name: sql-exporter-autoscaling
|
||||||
|
user: nobody
|
||||||
|
sysvInitAction: respawn
|
||||||
|
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
|
||||||
|
shutdownHook: |
|
||||||
|
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
|
||||||
|
files:
|
||||||
|
- filename: compute_ctl-resize-swap
|
||||||
|
content: |
|
||||||
|
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||||
|
# as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
|
||||||
|
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
|
||||||
|
- filename: cgconfig.conf
|
||||||
|
content: |
|
||||||
|
# Configuration for cgroups in VM compute nodes
|
||||||
|
group neon-postgres {
|
||||||
|
perm {
|
||||||
|
admin {
|
||||||
|
uid = postgres;
|
||||||
|
}
|
||||||
|
task {
|
||||||
|
gid = users;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memory {}
|
||||||
|
}
|
||||||
|
build: |
|
||||||
|
# Build cgroup-tools
|
||||||
|
#
|
||||||
|
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
|
||||||
|
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
|
||||||
|
# requires cgroup v2, so we'll build cgroup-tools ourselves.
|
||||||
|
FROM debian:bullseye-slim as libcgroup-builder
|
||||||
|
ENV LIBCGROUP_VERSION=v2.0.3
|
||||||
|
|
||||||
|
RUN set -exu \
|
||||||
|
&& apt update \
|
||||||
|
&& apt install --no-install-recommends -y \
|
||||||
|
git \
|
||||||
|
ca-certificates \
|
||||||
|
automake \
|
||||||
|
cmake \
|
||||||
|
make \
|
||||||
|
gcc \
|
||||||
|
byacc \
|
||||||
|
flex \
|
||||||
|
libtool \
|
||||||
|
libpam0g-dev \
|
||||||
|
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
|
||||||
|
&& INSTALL_DIR="/libcgroup-install" \
|
||||||
|
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
|
||||||
|
&& cd libcgroup \
|
||||||
|
# extracted from bootstrap.sh, with modified flags:
|
||||||
|
&& (test -d m4 || mkdir m4) \
|
||||||
|
&& autoreconf -fi \
|
||||||
|
&& rm -rf autom4te.cache \
|
||||||
|
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
|
||||||
|
# actually build the thing...
|
||||||
|
&& make install
|
||||||
|
merge: |
|
||||||
|
# tweak nofile limits
|
||||||
|
RUN set -e \
|
||||||
|
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
|
||||||
|
&& test ! -e /etc/security || ( \
|
||||||
|
echo '* - nofile 1048576' >>/etc/security/limits.conf \
|
||||||
|
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
|
||||||
|
)
|
||||||
|
|
||||||
|
# Allow postgres user (compute_ctl) to run swap resizer.
|
||||||
|
# Need to install sudo in order to allow this.
|
||||||
|
#
|
||||||
|
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
|
||||||
|
RUN set -e \
|
||||||
|
&& apt update \
|
||||||
|
&& apt install --no-install-recommends -y \
|
||||||
|
sudo \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
|
||||||
|
|
||||||
|
COPY cgconfig.conf /etc/cgconfig.conf
|
||||||
|
|
||||||
|
RUN set -e \
|
||||||
|
&& chmod 0644 /etc/cgconfig.conf
|
||||||
|
|
||||||
|
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
|
||||||
|
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
|
||||||
|
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
|
||||||
@@ -11,9 +11,17 @@ use crate::compute::ComputeNode;
|
|||||||
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
fn configurator_main_loop(compute: &Arc<ComputeNode>) {
|
||||||
info!("waiting for reconfiguration requests");
|
info!("waiting for reconfiguration requests");
|
||||||
loop {
|
loop {
|
||||||
let state = compute.state.lock().unwrap();
|
let mut state = compute.state.lock().unwrap();
|
||||||
let mut state = compute.state_changed.wait(state).unwrap();
|
|
||||||
|
|
||||||
|
// We have to re-check the status after re-acquiring the lock because it could be that
|
||||||
|
// the status has changed while we were waiting for the lock, and we might not need to
|
||||||
|
// wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
|
||||||
|
// we are waiting for a condition variable that will never be signaled.
|
||||||
|
if state.status != ComputeStatus::ConfigurationPending {
|
||||||
|
state = compute.state_changed.wait(state).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-check the status after waking up
|
||||||
if state.status == ComputeStatus::ConfigurationPending {
|
if state.status == ComputeStatus::ConfigurationPending {
|
||||||
info!("got configuration request");
|
info!("got configuration request");
|
||||||
state.status = ComputeStatus::Configuration;
|
state.status = ComputeStatus::Configuration;
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ anyhow.workspace = true
|
|||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
comfy-table.workspace = true
|
comfy-table.workspace = true
|
||||||
git-version.workspace = true
|
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
nix.workspace = true
|
nix.workspace = true
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
|
|||||||
@@ -346,7 +346,14 @@ impl StorageController {
|
|||||||
let pg_log_path = pg_data_path.join("postgres.log");
|
let pg_log_path = pg_data_path.join("postgres.log");
|
||||||
|
|
||||||
if !tokio::fs::try_exists(&pg_data_path).await? {
|
if !tokio::fs::try_exists(&pg_data_path).await? {
|
||||||
let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
|
let initdb_args = [
|
||||||
|
"-D",
|
||||||
|
pg_data_path.as_ref(),
|
||||||
|
"--username",
|
||||||
|
&username(),
|
||||||
|
"--no-sync",
|
||||||
|
"--no-instructions",
|
||||||
|
];
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Initializing storage controller database with args: {:?}",
|
"Initializing storage controller database with args: {:?}",
|
||||||
initdb_args
|
initdb_args
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
|
|||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use pageserver_api::{
|
use pageserver_api::{
|
||||||
controller_api::{
|
controller_api::{
|
||||||
NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
|
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
|
||||||
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||||
},
|
},
|
||||||
models::{
|
models::{
|
||||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||||
@@ -339,7 +339,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
listen_pg_port,
|
listen_pg_port,
|
||||||
listen_http_addr,
|
listen_http_addr,
|
||||||
listen_http_port,
|
listen_http_port,
|
||||||
availability_zone_id,
|
availability_zone_id: AvailabilityZone(availability_zone_id),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
# Example docker compose configuration
|
# Example docker compose configuration
|
||||||
|
|
||||||
The configuration in this directory is used for testing Neon docker images: it is
|
The configuration in this directory is used for testing Neon docker images: it is
|
||||||
not intended for deploying a usable system. To run a development environment where
|
not intended for deploying a usable system. To run a development environment where
|
||||||
you can experiment with a minature Neon system, use `cargo neon` rather than container images.
|
you can experiment with a miniature Neon system, use `cargo neon` rather than container images.
|
||||||
|
|
||||||
This configuration does not start the storage controller, because the controller
|
This configuration does not start the storage controller, because the controller
|
||||||
needs a way to reconfigure running computes, and no such thing exists in this setup.
|
needs a way to reconfigure running computes, and no such thing exists in this setup.
|
||||||
|
|||||||
343
docs/rfcs/038-independent-compute-release.md
Normal file
343
docs/rfcs/038-independent-compute-release.md
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
# Independent compute release
|
||||||
|
|
||||||
|
Created at: 2024-08-30. Author: Alexey Kondratov (@ololobus)
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
This document proposes an approach to fully independent compute release flow. It attempts to
|
||||||
|
cover the following features:
|
||||||
|
|
||||||
|
- Process is automated as much as possible to minimize human errors.
|
||||||
|
- Compute<->storage protocol compatibility is ensured.
|
||||||
|
- A transparent release history is available with an easy rollback strategy.
|
||||||
|
- Although not in the scope of this document, there is a viable way to extend the proposed release
|
||||||
|
flow to achieve the canary and/or blue-green deployment strategies.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
Previously, the compute release was tightly coupled to the storage release. This meant that once
|
||||||
|
some storage nodes got restarted with a newer version, all new compute starts using these nodes
|
||||||
|
automatically got a new version. Thus, two releases happen in parallel, which increases the blast
|
||||||
|
radius and makes ownership fuzzy.
|
||||||
|
|
||||||
|
Now, we practice a manual v0 independent compute release flow -- after getting a new compute release
|
||||||
|
image and tag, we pin it region by region using Admin UI. It's better, but it still has its own flaws:
|
||||||
|
|
||||||
|
1. It's a simple but fairly manual process, as you need to click through a few pages.
|
||||||
|
2. It's prone to human errors, e.g., you could mistype or copy the wrong compute tag.
|
||||||
|
3. We now require an additional approval in the Admin UI, which partially solves the 2.,
|
||||||
|
but also makes the whole process pretty annoying, as you constantly need to go back
|
||||||
|
and forth between two people.
|
||||||
|
|
||||||
|
## Non-goals
|
||||||
|
|
||||||
|
It's not the goal of this document to propose a design for some general-purpose release tool like Helm.
|
||||||
|
The document considers how the current compute fleet is orchestrated at Neon. Even if we later
|
||||||
|
decide to split the control plane further (e.g., introduce a separate compute controller), the proposed
|
||||||
|
release process shouldn't change much, i.e., the releases table and API will reside in
|
||||||
|
one of the parts.
|
||||||
|
|
||||||
|
Achieving the canary and/or blue-green deploy strategies is out of the scope of this document. They
|
||||||
|
were kept in mind, though, so it's expected that the proposed approach will lay down the foundation
|
||||||
|
for implementing them in future iterations.
|
||||||
|
|
||||||
|
## Impacted components
|
||||||
|
|
||||||
|
Compute, control plane, CI, observability (some Grafana dashboards may require changes).
|
||||||
|
|
||||||
|
## Prior art
|
||||||
|
|
||||||
|
One of the very close examples is how Helm tracks [releases history](https://helm.sh/docs/helm/helm_history/).
|
||||||
|
|
||||||
|
In the code:
|
||||||
|
|
||||||
|
- [Release](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/release.go#L20-L43)
|
||||||
|
- [Release info](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/info.go#L24-L40)
|
||||||
|
- [Release status](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/status.go#L18-L42)
|
||||||
|
|
||||||
|
TL;DR it has several important attributes:
|
||||||
|
|
||||||
|
- Revision -- unique release ID/primary key. It is not the same as the application version,
|
||||||
|
because the same version can be deployed several times, e.g., after a newer version rollback.
|
||||||
|
- App version -- version of the application chart/code.
|
||||||
|
- Config -- set of overrides to the default config of the application.
|
||||||
|
- Status -- current status of the release in the history.
|
||||||
|
- Timestamps -- tracks when a release was created and deployed.
|
||||||
|
|
||||||
|
## Proposed implementation
|
||||||
|
|
||||||
|
### Separate release branch
|
||||||
|
|
||||||
|
We will use a separate release branch, `release-compute`, to have a clean history for releases and commits.
|
||||||
|
In order to avoid confusion with storage releases, we will use a different prefix for compute [git release
|
||||||
|
tags](https://github.com/neondatabase/neon/releases) -- `release-compute-XXXX`. We will use the same tag for
|
||||||
|
Docker images as well. The `neondatabase/compute-node-v16:release-compute-XXXX` looks longer and a bit redundant,
|
||||||
|
but it's better to have image and git tags in sync.
|
||||||
|
|
||||||
|
Currently, control plane relies on the numeric compute and storage release versions to decide on compute->storage
|
||||||
|
compatibility. Once we implement this proposal, we should drop this code as release numbers will be completely
|
||||||
|
independent. The only constraint we want is that it must monotonically increase within the same release branch.
|
||||||
|
|
||||||
|
### Compute config/settings manifest
|
||||||
|
|
||||||
|
We will create a new sub-directory `compute` and file `compute/manifest.yaml` with a structure:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
pg_settings:
|
||||||
|
# Common settings for primaries and secondaries of all versions.
|
||||||
|
common:
|
||||||
|
wal_log_hints: "off"
|
||||||
|
max_wal_size: "1024"
|
||||||
|
|
||||||
|
per_version:
|
||||||
|
14:
|
||||||
|
# Common settings for both replica and primary of version PG 14
|
||||||
|
common:
|
||||||
|
shared_preload_libraries: "neon,pg_stat_statements,extension_x"
|
||||||
|
15:
|
||||||
|
common:
|
||||||
|
shared_preload_libraries: "neon,pg_stat_statements,extension_x"
|
||||||
|
# Settings that should be applied only to
|
||||||
|
replica:
|
||||||
|
# Available only starting Postgres 15th
|
||||||
|
recovery_prefetch: "off"
|
||||||
|
# ...
|
||||||
|
17:
|
||||||
|
common:
|
||||||
|
# For example, if third-party `extension_x` is not yet available for PG 17
|
||||||
|
shared_preload_libraries: "neon,pg_stat_statements"
|
||||||
|
replica:
|
||||||
|
recovery_prefetch: "off"
|
||||||
|
```
|
||||||
|
|
||||||
|
**N.B.** Setting value should be a string with `on|off` for booleans and a number (as a string)
|
||||||
|
without units for all numeric settings. That's how the control plane currently operates.
|
||||||
|
|
||||||
|
The priority of settings will be (a higher number is a higher priority):
|
||||||
|
|
||||||
|
1. Any static and hard-coded settings in the control plane
|
||||||
|
2. `pg_settings->common`
|
||||||
|
3. Per-version `common`
|
||||||
|
4. Per-version `replica`
|
||||||
|
5. Any per-user/project/endpoint overrides in the control plane
|
||||||
|
6. Any dynamic setting calculated based on the compute size
|
||||||
|
|
||||||
|
**N.B.** For simplicity, we do not do any custom logic for `shared_preload_libraries`, so it's completely
|
||||||
|
overridden if specified on some level. Make sure that you include all necessary extensions in it when you
|
||||||
|
do any overrides.
|
||||||
|
|
||||||
|
**N.B.** There is a tricky question about what to do with custom compute image pinning we sometimes
|
||||||
|
do for particular projects and customers. That's usually some ad-hoc work and images are based on
|
||||||
|
the latest compute image, so it's relatively safe to assume that we could use settings from the latest compute
|
||||||
|
release. If for some reason that's not true, and further overrides are needed, it's also possible to do
|
||||||
|
on the project level together with pinning the image, so it's on-call/engineer/support responsibility to
|
||||||
|
ensure that compute starts with the specified custom image. The only real risk is that compute image will get
|
||||||
|
stale and settings from new releases will drift away, so eventually it will get something incompatible,
|
||||||
|
but i) this is some operational issue, as we do not want stale images anyway, and ii) base settings
|
||||||
|
receive something really new so rarely that the chance of this happening is very low. If we want to solve it completely,
|
||||||
|
then together with pinning the image we could also pin the matching release revision in the control plane.
|
||||||
|
|
||||||
|
The compute team will own the content of `compute/manifest.yaml`.
|
||||||
|
|
||||||
|
### Control plane: releases table
|
||||||
|
|
||||||
|
In order to store information about releases, the control plane will use a table `compute_releases` with the following
|
||||||
|
schema:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE compute_releases (
|
||||||
|
-- Unique release ID
|
||||||
|
-- N.B. Revision won't by synchronized across all regions, because all control planes are technically independent
|
||||||
|
-- services. We have the same situation with Helm releases as well because they could be deployed and rolled back
|
||||||
|
-- independently in different clusters.
|
||||||
|
revision BIGSERIAL PRIMARY KEY,
|
||||||
|
-- Numeric version of the compute image, e.g. 9057
|
||||||
|
version BIGINT NOT NULL,
|
||||||
|
-- Compute image tag, e.g. `release-9057`
|
||||||
|
tag TEXT NOT NULL,
|
||||||
|
-- Current release status. Currently, it will be a simple enum
|
||||||
|
-- * `deployed` -- release is deployed and used for new compute starts.
|
||||||
|
-- Exactly one release can have this status at a time.
|
||||||
|
-- * `superseded` -- release has been replaced by a newer one.
|
||||||
|
-- But we can always extend it in the future when we need more statuses
|
||||||
|
-- for more complex deployment strategies.
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
-- Any additional metadata for compute in the corresponding release
|
||||||
|
manifest JSONB NOT NULL,
|
||||||
|
-- Timestamp when release record was created in the control plane database
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT now(),
|
||||||
|
-- Timestamp when release deployment was finished
|
||||||
|
deployed_at TIMESTAMP
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
We keep track of the old releases not only for the sake of audit, but also because we usually have ~30% of
|
||||||
|
old computes started using the image from one of the previous releases. Yet, when users want to reconfigure
|
||||||
|
them without restarting, the control plane needs to know what settings are applicable to them, so we also need
|
||||||
|
information about the previous releases that are readily available. There could be some other auxiliary info
|
||||||
|
needed as well: supported extensions, compute flags, etc.
|
||||||
|
|
||||||
|
**N.B.** Here, we can end up in an ambiguous situation when the same compute image is deployed twice, e.g.,
|
||||||
|
it was deployed once, then rolled back, and then deployed again, potentially with a different manifest. Yet,
|
||||||
|
we could've started some computes with the first deployment and some with the second. Thus, when we need to
|
||||||
|
look up the manifest for the compute by its image tag, we will see two records in the table with the same tag,
|
||||||
|
but different revision numbers. We can assume that this could happen only in case of rollbacks, so we
|
||||||
|
can just take the latest revision for the given tag.
|
||||||
|
|
||||||
|
### Control plane: management API
|
||||||
|
|
||||||
|
The control plane will implement new API methods to manage releases:
|
||||||
|
|
||||||
|
1. `POST /management/api/v2/compute_releases` to create a new release. With payload
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"version": 9057,
|
||||||
|
"tag": "release-9057",
|
||||||
|
"manifest": {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
and response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"revision": 53,
|
||||||
|
"version": 9057,
|
||||||
|
"tag": "release-9057",
|
||||||
|
"status": "deployed",
|
||||||
|
"manifest": {},
|
||||||
|
"created_at": "2024-08-15T15:52:01.0000Z",
|
||||||
|
"deployed_at": "2024-08-15T15:52:01.0000Z",
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Here, we can actually mix-in custom (remote) extensions metadata into the `manifest`, so that the control plane
|
||||||
|
will get information about all available extensions not bundled into compute image. The corresponding
|
||||||
|
workflow in `neondatabase/build-custom-extensions` should produce it as an artifact and make
|
||||||
|
it accessible to the workflow in the `neondatabase/infra`. See the complete release flow below. Doing that,
|
||||||
|
we put a constraint that new custom extension requires new compute release, which is good for the safety,
|
||||||
|
but is not exactly what we want operational-wise (we want to be able to deploy new extensions without new
|
||||||
|
images). Yet, it can be solved incrementally: v0 -- do not do anything with extensions at all;
|
||||||
|
v1 -- put them into the same manifest; v2 -- make them separate entities with their own lifecycle.
|
||||||
|
|
||||||
|
**N.B.** This method is intended to be used in CI workflows, and CI/network can be flaky. It's reasonable
|
||||||
|
to assume that we could retry the request several times, even though it's already succeeded. Although it's
|
||||||
|
not a big deal to create several identical releases one-by-one, it's better to avoid it, so the control plane
|
||||||
|
should check if the latest release is identical and just return `304 Not Modified` in this case.
|
||||||
|
|
||||||
|
2. `POST /management/api/v2/compute_releases/rollback` to rollback to any previously deployed release. With payload
|
||||||
|
including the revision of the release to rollback to:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"revision": 52
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Rollback marks the current release as `superseded` and creates a new release with all the same data as the
|
||||||
|
requested revision, but with a new revision number.
|
||||||
|
|
||||||
|
This rollback API is not strictly needed, as we can just use `infra` repo workflow to deploy any
|
||||||
|
available tag. It's still nice to have for on-call and any urgent matters, for example, if we need
|
||||||
|
to rollback and GitHub is down. It's much easier to specify only the revision number vs. crafting
|
||||||
|
all the necessary data for the new release payload.
|
||||||
|
|
||||||
|
### Compute->storage compatibility tests
|
||||||
|
|
||||||
|
In order to safely release new compute versions independently from storage, we need to ensure that the currently
|
||||||
|
deployed storage is compatible with the new compute version. Currently, we maintain backward compatibility
|
||||||
|
in storage, but newer computes may require a newer storage version.
|
||||||
|
|
||||||
|
Remote end-to-end (e2e) tests [already accept](https://github.com/neondatabase/cloud/blob/e3468d433e0d73d02b7d7e738d027f509b522408/.github/workflows/testing.yml#L43-L48)
|
||||||
|
`storage_image_tag` and `compute_image_tag` as separate inputs. That means that we could reuse e2e tests to ensure
|
||||||
|
compatibility between storage and compute:
|
||||||
|
|
||||||
|
1. Pick the latest storage release tag and use it as `storage_image_tag`.
|
||||||
|
2. Pick a new compute tag built in the current compute release PR and use it as `compute_image_tag`.
|
||||||
|
Here, we should use a temporary ECR image tag, because the final tag will be known only after the release PR is merged.
|
||||||
|
3. Trigger e2e tests as usual.
|
||||||
|
|
||||||
|
### Release flow
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
sequenceDiagram
|
||||||
|
|
||||||
|
actor oncall as Compute on-call person
|
||||||
|
participant neon as neondatabase/neon
|
||||||
|
|
||||||
|
box private
|
||||||
|
participant cloud as neondatabase/cloud
|
||||||
|
participant exts as neondatabase/build-custom-extensions
|
||||||
|
participant infra as neondatabase/infra
|
||||||
|
end
|
||||||
|
|
||||||
|
box cloud
|
||||||
|
participant preprod as Pre-prod control plane
|
||||||
|
participant prod as Production control plane
|
||||||
|
participant k8s as Compute k8s
|
||||||
|
end
|
||||||
|
|
||||||
|
oncall ->> neon: Open release PR into release-compute
|
||||||
|
|
||||||
|
activate neon
|
||||||
|
neon ->> cloud: CI: trigger e2e compatibility tests
|
||||||
|
activate cloud
|
||||||
|
cloud -->> neon: CI: e2e tests pass
|
||||||
|
deactivate cloud
|
||||||
|
neon ->> neon: CI: pass PR checks, get approvals
|
||||||
|
deactivate neon
|
||||||
|
|
||||||
|
oncall ->> neon: Merge release PR into release-compute
|
||||||
|
|
||||||
|
activate neon
|
||||||
|
neon ->> neon: CI: pass checks, build and push images
|
||||||
|
neon ->> exts: CI: trigger extensions build
|
||||||
|
activate exts
|
||||||
|
exts -->> neon: CI: extensions are ready
|
||||||
|
deactivate exts
|
||||||
|
neon ->> neon: CI: create release tag
|
||||||
|
neon ->> infra: Trigger release workflow using the produced tag
|
||||||
|
deactivate neon
|
||||||
|
|
||||||
|
activate infra
|
||||||
|
infra ->> infra: CI: pass checks
|
||||||
|
infra ->> preprod: Release new compute image to pre-prod automatically <br/> POST /management/api/v2/compute_releases
|
||||||
|
activate preprod
|
||||||
|
preprod -->> infra: 200 OK
|
||||||
|
deactivate preprod
|
||||||
|
|
||||||
|
infra ->> infra: CI: wait for per-region production deploy approvals
|
||||||
|
oncall ->> infra: CI: approve deploys region by region
|
||||||
|
infra ->> k8s: Prewarm new compute image
|
||||||
|
infra ->> prod: POST /management/api/v2/compute_releases
|
||||||
|
activate prod
|
||||||
|
prod -->> infra: 200 OK
|
||||||
|
deactivate prod
|
||||||
|
deactivate infra
|
||||||
|
```
|
||||||
|
|
||||||
|
## Further work
|
||||||
|
|
||||||
|
As briefly mentioned in other sections, eventually, we would like to use more complex deployment strategies.
|
||||||
|
For example, we can pass a fraction of the total compute starts that should use the new release. Then we can
|
||||||
|
mark the release as `partial` or `canary` and monitor its performance. If everything is fine, we can promote it
|
||||||
|
to `deployed` status. If not, we can roll back to the previous one.
|
||||||
|
|
||||||
|
## Alternatives
|
||||||
|
|
||||||
|
In theory, we can try using Helm as-is:
|
||||||
|
|
||||||
|
1. Write a compute Helm chart. That will actually have only some config map, which the control plane can access and read.
|
||||||
|
N.B. We could reuse the control plane chart as well, but then it's not a fully independent release again and even more fuzzy.
|
||||||
|
2. The control plane will read it and start using the new compute version for new starts.
|
||||||
|
|
||||||
|
Drawbacks:
|
||||||
|
|
||||||
|
1. Helm releases work best if the workload is controlled by the Helm chart itself. Then you can have different
|
||||||
|
deployment strategies like rolling update or canary or blue/green deployments. At Neon, the compute starts are controlled
|
||||||
|
by control plane, so it makes it much more tricky.
|
||||||
|
2. Releases visibility will suffer, i.e. instead of a nice table in the control plane and Admin UI, we would need to use
|
||||||
|
`helm` cli and/or K8s UIs like K8sLens.
|
||||||
|
3. We do not restart all computes shortly after the new version release. This means that for some features and compatibility
|
||||||
|
purpose (see above) control plane may need some auxiliary info from the previous releases.
|
||||||
@@ -268,6 +268,22 @@ pub struct GenericOption {
|
|||||||
/// declare a `trait` on it.
|
/// declare a `trait` on it.
|
||||||
pub type GenericOptions = Option<Vec<GenericOption>>;
|
pub type GenericOptions = Option<Vec<GenericOption>>;
|
||||||
|
|
||||||
|
/// Configured the local-proxy application with the relevant JWKS and roles it should
|
||||||
|
/// use for authorizing connect requests using JWT.
|
||||||
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||||
|
pub struct LocalProxySpec {
|
||||||
|
pub jwks: Vec<JwksSettings>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||||
|
pub struct JwksSettings {
|
||||||
|
pub id: String,
|
||||||
|
pub role_names: Vec<String>,
|
||||||
|
pub jwks_url: String,
|
||||||
|
pub provider_name: String,
|
||||||
|
pub jwt_audience: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -104,9 +104,6 @@ pub struct ConfigToml {
|
|||||||
pub image_compression: ImageCompressionAlgorithm,
|
pub image_compression: ImageCompressionAlgorithm,
|
||||||
pub ephemeral_bytes_per_memory_kb: usize,
|
pub ephemeral_bytes_per_memory_kb: usize,
|
||||||
pub l0_flush: Option<crate::models::L0FlushConfig>,
|
pub l0_flush: Option<crate::models::L0FlushConfig>,
|
||||||
#[serde(skip_serializing)]
|
|
||||||
// TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
|
|
||||||
pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
|
|
||||||
pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
|
pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
|
||||||
pub io_buffer_alignment: usize,
|
pub io_buffer_alignment: usize,
|
||||||
}
|
}
|
||||||
@@ -384,7 +381,6 @@ impl Default for ConfigToml {
|
|||||||
image_compression: (DEFAULT_IMAGE_COMPRESSION),
|
image_compression: (DEFAULT_IMAGE_COMPRESSION),
|
||||||
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||||
l0_flush: None,
|
l0_flush: None,
|
||||||
compact_level0_phase1_value_access: Default::default(),
|
|
||||||
virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
|
virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
|
||||||
|
|
||||||
io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
|
io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::fmt::Display;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
@@ -57,7 +58,7 @@ pub struct NodeRegisterRequest {
|
|||||||
pub listen_http_addr: String,
|
pub listen_http_addr: String,
|
||||||
pub listen_http_port: u16,
|
pub listen_http_port: u16,
|
||||||
|
|
||||||
pub availability_zone_id: String,
|
pub availability_zone_id: AvailabilityZone,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
@@ -74,10 +75,19 @@ pub struct TenantPolicyRequest {
|
|||||||
pub scheduling: Option<ShardSchedulingPolicy>,
|
pub scheduling: Option<ShardSchedulingPolicy>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||||
|
pub struct AvailabilityZone(pub String);
|
||||||
|
|
||||||
|
impl Display for AvailabilityZone {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct ShardsPreferredAzsRequest {
|
pub struct ShardsPreferredAzsRequest {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub preferred_az_ids: HashMap<TenantShardId, String>,
|
pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -37,14 +37,11 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
|
|||||||
/// ```mermaid
|
/// ```mermaid
|
||||||
/// stateDiagram-v2
|
/// stateDiagram-v2
|
||||||
///
|
///
|
||||||
/// [*] --> Loading: spawn_load()
|
|
||||||
/// [*] --> Attaching: spawn_attach()
|
/// [*] --> Attaching: spawn_attach()
|
||||||
///
|
///
|
||||||
/// Loading --> Activating: activate()
|
|
||||||
/// Attaching --> Activating: activate()
|
/// Attaching --> Activating: activate()
|
||||||
/// Activating --> Active: infallible
|
/// Activating --> Active: infallible
|
||||||
///
|
///
|
||||||
/// Loading --> Broken: load() failure
|
|
||||||
/// Attaching --> Broken: attach() failure
|
/// Attaching --> Broken: attach() failure
|
||||||
///
|
///
|
||||||
/// Active --> Stopping: set_stopping(), part of shutdown & detach
|
/// Active --> Stopping: set_stopping(), part of shutdown & detach
|
||||||
@@ -68,10 +65,6 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
|
|||||||
)]
|
)]
|
||||||
#[serde(tag = "slug", content = "data")]
|
#[serde(tag = "slug", content = "data")]
|
||||||
pub enum TenantState {
|
pub enum TenantState {
|
||||||
/// This tenant is being loaded from local disk.
|
|
||||||
///
|
|
||||||
/// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
|
|
||||||
Loading,
|
|
||||||
/// This tenant is being attached to the pageserver.
|
/// This tenant is being attached to the pageserver.
|
||||||
///
|
///
|
||||||
/// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
|
/// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
|
||||||
@@ -121,8 +114,6 @@ impl TenantState {
|
|||||||
// But, our attach task might still be fetching the remote timelines, etc.
|
// But, our attach task might still be fetching the remote timelines, etc.
|
||||||
// So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
|
// So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
|
||||||
Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
|
Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
|
||||||
// tenant mgr startup distinguishes attaching from loading via marker file.
|
|
||||||
Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
|
|
||||||
// We only reach Active after successful load / attach.
|
// We only reach Active after successful load / attach.
|
||||||
// So, call atttachment status Attached.
|
// So, call atttachment status Attached.
|
||||||
Self::Active => Attached,
|
Self::Active => Attached,
|
||||||
@@ -191,10 +182,11 @@ impl LsnLease {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
|
/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
|
||||||
|
///
|
||||||
|
/// XXX: We used to have more variants here, but now it's just one, which makes this rather
|
||||||
|
/// useless. Remove, once we've checked that there's no client code left that looks at this.
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||||
pub enum ActivatingFrom {
|
pub enum ActivatingFrom {
|
||||||
/// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
|
|
||||||
Loading,
|
|
||||||
/// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
|
/// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
|
||||||
Attaching,
|
Attaching,
|
||||||
}
|
}
|
||||||
@@ -1562,11 +1554,8 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tenantstatus_activating_serde() {
|
fn tenantstatus_activating_serde() {
|
||||||
let states = [
|
let states = [TenantState::Activating(ActivatingFrom::Attaching)];
|
||||||
TenantState::Activating(ActivatingFrom::Loading),
|
let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
|
||||||
TenantState::Activating(ActivatingFrom::Attaching),
|
|
||||||
];
|
|
||||||
let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
|
|
||||||
|
|
||||||
let actual = serde_json::to_string(&states).unwrap();
|
let actual = serde_json::to_string(&states).unwrap();
|
||||||
|
|
||||||
@@ -1581,13 +1570,7 @@ mod tests {
|
|||||||
fn tenantstatus_activating_strum() {
|
fn tenantstatus_activating_strum() {
|
||||||
// tests added, because we use these for metrics
|
// tests added, because we use these for metrics
|
||||||
let examples = [
|
let examples = [
|
||||||
(line!(), TenantState::Loading, "Loading"),
|
|
||||||
(line!(), TenantState::Attaching, "Attaching"),
|
(line!(), TenantState::Attaching, "Attaching"),
|
||||||
(
|
|
||||||
line!(),
|
|
||||||
TenantState::Activating(ActivatingFrom::Loading),
|
|
||||||
"Activating",
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
line!(),
|
line!(),
|
||||||
TenantState::Activating(ActivatingFrom::Attaching),
|
TenantState::Activating(ActivatingFrom::Attaching),
|
||||||
|
|||||||
@@ -984,6 +984,7 @@ pub fn short_error(e: &QueryError) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn log_query_error(query: &str, e: &QueryError) {
|
fn log_query_error(query: &str, e: &QueryError) {
|
||||||
|
// If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`.
|
||||||
match e {
|
match e {
|
||||||
QueryError::Disconnected(ConnectionError::Io(io_error)) => {
|
QueryError::Disconnected(ConnectionError::Io(io_error)) => {
|
||||||
if is_expected_io_error(io_error) {
|
if is_expected_io_error(io_error) {
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ bincode.workspace = true
|
|||||||
bytes.workspace = true
|
bytes.workspace = true
|
||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
|
git-version.workspace = true
|
||||||
hex = { workspace = true, features = ["serde"] }
|
hex = { workspace = true, features = ["serde"] }
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
hyper = { workspace = true, features = ["full"] }
|
hyper = { workspace = true, features = ["full"] }
|
||||||
|
|||||||
@@ -92,6 +92,10 @@ pub mod toml_edit_ext;
|
|||||||
|
|
||||||
pub mod circuit_breaker;
|
pub mod circuit_breaker;
|
||||||
|
|
||||||
|
// Re-export used in macro. Avoids adding git-version as dep in target crates.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub use git_version;
|
||||||
|
|
||||||
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
|
||||||
///
|
///
|
||||||
/// we have several cases:
|
/// we have several cases:
|
||||||
@@ -131,7 +135,7 @@ macro_rules! project_git_version {
|
|||||||
($const_identifier:ident) => {
|
($const_identifier:ident) => {
|
||||||
// this should try GIT_VERSION first only then git_version::git_version!
|
// this should try GIT_VERSION first only then git_version::git_version!
|
||||||
const $const_identifier: &::core::primitive::str = {
|
const $const_identifier: &::core::primitive::str = {
|
||||||
const __COMMIT_FROM_GIT: &::core::primitive::str = git_version::git_version! {
|
const __COMMIT_FROM_GIT: &::core::primitive::str = $crate::git_version::git_version! {
|
||||||
prefix = "",
|
prefix = "",
|
||||||
fallback = "unknown",
|
fallback = "unknown",
|
||||||
args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha
|
args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ crc32c.workspace = true
|
|||||||
either.workspace = true
|
either.workspace = true
|
||||||
fail.workspace = true
|
fail.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
git-version.workspace = true
|
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
humantime-serde.workspace = true
|
humantime-serde.workspace = true
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! Quantify a single walredo manager's throughput under N concurrent callers.
|
//! Quantify a single walredo manager's throughput under N concurrent callers.
|
||||||
//!
|
//!
|
||||||
//! The benchmark implementation ([`bench_impl`]) is parametrized by
|
//! The benchmark implementation ([`bench_impl`]) is parametrized by
|
||||||
//! - `redo_work` => [`Request::short_request`] or [`Request::medium_request`]
|
//! - `redo_work` => an async closure that takes a `PostgresRedoManager` and performs one redo
|
||||||
//! - `n_redos` => number of times the benchmark shell execute the `redo_work`
|
//! - `n_redos` => number of times the benchmark shell execute the `redo_work`
|
||||||
//! - `nclients` => number of clients (more on this shortly).
|
//! - `nclients` => number of clients (more on this shortly).
|
||||||
//!
|
//!
|
||||||
@@ -10,7 +10,7 @@
|
|||||||
//! Each task executes the `redo_work` `n_redos/nclients` times.
|
//! Each task executes the `redo_work` `n_redos/nclients` times.
|
||||||
//!
|
//!
|
||||||
//! We exercise the following combinations:
|
//! We exercise the following combinations:
|
||||||
//! - `redo_work = short / medium``
|
//! - `redo_work = ping / short / medium``
|
||||||
//! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]`
|
//! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]`
|
||||||
//!
|
//!
|
||||||
//! We let `criterion` determine the `n_redos` using `iter_custom`.
|
//! We let `criterion` determine the `n_redos` using `iter_custom`.
|
||||||
@@ -27,33 +27,43 @@
|
|||||||
//!
|
//!
|
||||||
//! # Reference Numbers
|
//! # Reference Numbers
|
||||||
//!
|
//!
|
||||||
//! 2024-04-15 on i3en.3xlarge
|
//! 2024-09-18 on im4gn.2xlarge
|
||||||
//!
|
//!
|
||||||
//! ```text
|
//! ```text
|
||||||
//! short/1 time: [24.584 µs 24.737 µs 24.922 µs]
|
//! ping/1 time: [21.789 µs 21.918 µs 22.078 µs]
|
||||||
//! short/2 time: [33.479 µs 33.660 µs 33.888 µs]
|
//! ping/2 time: [27.686 µs 27.812 µs 27.970 µs]
|
||||||
//! short/4 time: [42.713 µs 43.046 µs 43.440 µs]
|
//! ping/4 time: [35.468 µs 35.671 µs 35.926 µs]
|
||||||
//! short/8 time: [71.814 µs 72.478 µs 73.240 µs]
|
//! ping/8 time: [59.682 µs 59.987 µs 60.363 µs]
|
||||||
//! short/16 time: [132.73 µs 134.45 µs 136.22 µs]
|
//! ping/16 time: [101.79 µs 102.37 µs 103.08 µs]
|
||||||
//! short/32 time: [258.31 µs 260.73 µs 263.27 µs]
|
//! ping/32 time: [184.18 µs 185.15 µs 186.36 µs]
|
||||||
//! short/64 time: [511.61 µs 514.44 µs 517.51 µs]
|
//! ping/64 time: [349.86 µs 351.45 µs 353.47 µs]
|
||||||
//! short/128 time: [992.64 µs 998.23 µs 1.0042 ms]
|
//! ping/128 time: [684.53 µs 687.98 µs 692.17 µs]
|
||||||
//! medium/1 time: [110.11 µs 110.50 µs 110.96 µs]
|
//! short/1 time: [31.833 µs 32.126 µs 32.428 µs]
|
||||||
//! medium/2 time: [153.06 µs 153.85 µs 154.99 µs]
|
//! short/2 time: [35.558 µs 35.756 µs 35.992 µs]
|
||||||
//! medium/4 time: [317.51 µs 319.92 µs 322.85 µs]
|
//! short/4 time: [44.850 µs 45.138 µs 45.484 µs]
|
||||||
//! medium/8 time: [638.30 µs 644.68 µs 652.12 µs]
|
//! short/8 time: [65.985 µs 66.379 µs 66.853 µs]
|
||||||
//! medium/16 time: [1.2651 ms 1.2773 ms 1.2914 ms]
|
//! short/16 time: [127.06 µs 127.90 µs 128.87 µs]
|
||||||
//! medium/32 time: [2.5117 ms 2.5410 ms 2.5720 ms]
|
//! short/32 time: [252.98 µs 254.70 µs 256.73 µs]
|
||||||
//! medium/64 time: [4.8088 ms 4.8555 ms 4.9047 ms]
|
//! short/64 time: [497.13 µs 499.86 µs 503.26 µs]
|
||||||
//! medium/128 time: [8.8311 ms 8.9849 ms 9.1263 ms]
|
//! short/128 time: [987.46 µs 993.45 µs 1.0004 ms]
|
||||||
|
//! medium/1 time: [137.91 µs 138.55 µs 139.35 µs]
|
||||||
|
//! medium/2 time: [192.00 µs 192.91 µs 194.07 µs]
|
||||||
|
//! medium/4 time: [389.62 µs 391.55 µs 394.01 µs]
|
||||||
|
//! medium/8 time: [776.80 µs 780.33 µs 784.77 µs]
|
||||||
|
//! medium/16 time: [1.5323 ms 1.5383 ms 1.5459 ms]
|
||||||
|
//! medium/32 time: [3.0120 ms 3.0226 ms 3.0350 ms]
|
||||||
|
//! medium/64 time: [5.7405 ms 5.7787 ms 5.8166 ms]
|
||||||
|
//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms]
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use bytes::{Buf, Bytes};
|
use bytes::{Buf, Bytes};
|
||||||
use criterion::{BenchmarkId, Criterion};
|
use criterion::{BenchmarkId, Criterion};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
|
use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
|
||||||
use pageserver_api::{key::Key, shard::TenantShardId};
|
use pageserver_api::{key::Key, shard::TenantShardId};
|
||||||
use std::{
|
use std::{
|
||||||
|
future::Future,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
};
|
};
|
||||||
@@ -61,40 +71,59 @@ use tokio::{sync::Barrier, task::JoinSet};
|
|||||||
use utils::{id::TenantId, lsn::Lsn};
|
use utils::{id::TenantId, lsn::Lsn};
|
||||||
|
|
||||||
fn bench(c: &mut Criterion) {
|
fn bench(c: &mut Criterion) {
|
||||||
{
|
macro_rules! bench_group {
|
||||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
($name:expr, $redo_work:expr) => {{
|
||||||
for nclients in nclients {
|
let name: &str = $name;
|
||||||
let mut group = c.benchmark_group("short");
|
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
||||||
group.bench_with_input(
|
for nclients in nclients {
|
||||||
BenchmarkId::from_parameter(nclients),
|
let mut group = c.benchmark_group(name);
|
||||||
&nclients,
|
group.bench_with_input(
|
||||||
|b, nclients| {
|
BenchmarkId::from_parameter(nclients),
|
||||||
let redo_work = Arc::new(Request::short_input());
|
&nclients,
|
||||||
b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
|
|b, nclients| {
|
||||||
},
|
b.iter_custom(|iters| bench_impl($redo_work, iters, *nclients));
|
||||||
);
|
},
|
||||||
}
|
);
|
||||||
}
|
}
|
||||||
{
|
}};
|
||||||
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
|
|
||||||
for nclients in nclients {
|
|
||||||
let mut group = c.benchmark_group("medium");
|
|
||||||
group.bench_with_input(
|
|
||||||
BenchmarkId::from_parameter(nclients),
|
|
||||||
&nclients,
|
|
||||||
|b, nclients| {
|
|
||||||
let redo_work = Arc::new(Request::medium_input());
|
|
||||||
b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// benchmark the protocol implementation
|
||||||
|
//
|
||||||
|
let pg_version = 14;
|
||||||
|
bench_group!(
|
||||||
|
"ping",
|
||||||
|
Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
|
||||||
|
let _: () = mgr.ping(pg_version).await.unwrap();
|
||||||
|
})
|
||||||
|
);
|
||||||
|
//
|
||||||
|
// benchmarks with actual record redo
|
||||||
|
//
|
||||||
|
let make_redo_work = |req: &'static Request| {
|
||||||
|
Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
|
||||||
|
let page = req.execute(&mgr).await.unwrap();
|
||||||
|
assert_eq!(page.remaining(), 8192);
|
||||||
|
})
|
||||||
|
};
|
||||||
|
bench_group!("short", {
|
||||||
|
static REQUEST: Lazy<Request> = Lazy::new(Request::short_input);
|
||||||
|
make_redo_work(&REQUEST)
|
||||||
|
});
|
||||||
|
bench_group!("medium", {
|
||||||
|
static REQUEST: Lazy<Request> = Lazy::new(Request::medium_input);
|
||||||
|
make_redo_work(&REQUEST)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
criterion::criterion_group!(benches, bench);
|
criterion::criterion_group!(benches, bench);
|
||||||
criterion::criterion_main!(benches);
|
criterion::criterion_main!(benches);
|
||||||
|
|
||||||
// Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
|
// Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
|
||||||
fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
|
fn bench_impl<F, Fut>(redo_work: Arc<F>, n_redos: u64, nclients: u64) -> Duration
|
||||||
|
where
|
||||||
|
F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
|
||||||
|
Fut: Future<Output = ()> + Send + 'static,
|
||||||
|
{
|
||||||
let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();
|
let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();
|
||||||
|
|
||||||
let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
|
let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
|
||||||
@@ -135,17 +164,20 @@ fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn client(
|
async fn client<F, Fut>(
|
||||||
mgr: Arc<PostgresRedoManager>,
|
mgr: Arc<PostgresRedoManager>,
|
||||||
start: Arc<Barrier>,
|
start: Arc<Barrier>,
|
||||||
redo_work: Arc<Request>,
|
redo_work: Arc<F>,
|
||||||
n_redos: u64,
|
n_redos: u64,
|
||||||
) -> Duration {
|
) -> Duration
|
||||||
|
where
|
||||||
|
F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
|
||||||
|
Fut: Future<Output = ()> + Send + 'static,
|
||||||
|
{
|
||||||
start.wait().await;
|
start.wait().await;
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
for _ in 0..n_redos {
|
for _ in 0..n_redos {
|
||||||
let page = redo_work.execute(&mgr).await.unwrap();
|
redo_work(Arc::clone(&mgr)).await;
|
||||||
assert_eq!(page.remaining(), 8192);
|
|
||||||
// The real pageserver will rarely if ever do 2 walredos in a row without
|
// The real pageserver will rarely if ever do 2 walredos in a row without
|
||||||
// yielding to the executor.
|
// yielding to the executor.
|
||||||
tokio::task::yield_now().await;
|
tokio::task::yield_now().await;
|
||||||
|
|||||||
@@ -432,7 +432,7 @@ impl Client {
|
|||||||
self.mgmt_api_endpoint
|
self.mgmt_api_endpoint
|
||||||
);
|
);
|
||||||
|
|
||||||
self.request(Method::POST, &uri, req)
|
self.request(Method::PUT, &uri, req)
|
||||||
.await?
|
.await?
|
||||||
.json()
|
.json()
|
||||||
.await
|
.await
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ anyhow.workspace = true
|
|||||||
async-stream.workspace = true
|
async-stream.workspace = true
|
||||||
clap = { workspace = true, features = ["string"] }
|
clap = { workspace = true, features = ["string"] }
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
git-version.workspace = true
|
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
once_cell.workspace = true
|
once_cell.workspace = true
|
||||||
pageserver_api.workspace = true
|
pageserver_api.workspace = true
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ license.workspace = true
|
|||||||
anyhow.workspace = true
|
anyhow.workspace = true
|
||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
clap = { workspace = true, features = ["string"] }
|
clap = { workspace = true, features = ["string"] }
|
||||||
git-version.workspace = true
|
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
pageserver = { path = ".." }
|
pageserver = { path = ".." }
|
||||||
pageserver_api.workspace = true
|
pageserver_api.workspace = true
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ use clap::{Arg, ArgAction, Command};
|
|||||||
|
|
||||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||||
use pageserver::config::PageserverIdentity;
|
use pageserver::config::PageserverIdentity;
|
||||||
use pageserver::control_plane_client::ControlPlaneClient;
|
use pageserver::controller_upcall_client::ControllerUpcallClient;
|
||||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||||
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
|
||||||
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
|
use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
|
||||||
@@ -396,7 +396,7 @@ fn start_pageserver(
|
|||||||
// Set up deletion queue
|
// Set up deletion queue
|
||||||
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
||||||
remote_storage.clone(),
|
remote_storage.clone(),
|
||||||
ControlPlaneClient::new(conf, &shutdown_pageserver),
|
ControllerUpcallClient::new(conf, &shutdown_pageserver),
|
||||||
conf,
|
conf,
|
||||||
);
|
);
|
||||||
if let Some(deletion_workers) = deletion_workers {
|
if let Some(deletion_workers) = deletion_workers {
|
||||||
|
|||||||
@@ -324,7 +324,6 @@ impl PageServerConf {
|
|||||||
max_vectored_read_bytes,
|
max_vectored_read_bytes,
|
||||||
image_compression,
|
image_compression,
|
||||||
ephemeral_bytes_per_memory_kb,
|
ephemeral_bytes_per_memory_kb,
|
||||||
compact_level0_phase1_value_access: _,
|
|
||||||
l0_flush,
|
l0_flush,
|
||||||
virtual_file_direct_io,
|
virtual_file_direct_io,
|
||||||
concurrent_tenant_warmup,
|
concurrent_tenant_warmup,
|
||||||
@@ -535,16 +534,6 @@ mod tests {
|
|||||||
.expect("parse_and_validate");
|
.expect("parse_and_validate");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_compactl0_phase1_access_mode_is_ignored_silently() {
|
|
||||||
let input = indoc::indoc! {r#"
|
|
||||||
[compact_level0_phase1_value_access]
|
|
||||||
mode = "streaming-kmerge"
|
|
||||||
validate = "key-lsn-value"
|
|
||||||
"#};
|
|
||||||
toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If there's a typo in the pageserver config, we'd rather catch that typo
|
/// If there's a typo in the pageserver config, we'd rather catch that typo
|
||||||
/// and fail pageserver startup than silently ignoring the typo, leaving whoever
|
/// and fail pageserver startup than silently ignoring the typo, leaving whoever
|
||||||
/// made it in the believe that their config change is effective.
|
/// made it in the believe that their config change is effective.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
use pageserver_api::{
|
use pageserver_api::{
|
||||||
controller_api::NodeRegisterRequest,
|
controller_api::{AvailabilityZone, NodeRegisterRequest},
|
||||||
shard::TenantShardId,
|
shard::TenantShardId,
|
||||||
upcall_api::{
|
upcall_api::{
|
||||||
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
|
||||||
@@ -17,9 +17,12 @@ use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
|
|||||||
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
|
use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
|
||||||
use pageserver_api::config::NodeMetadata;
|
use pageserver_api::config::NodeMetadata;
|
||||||
|
|
||||||
/// The Pageserver's client for using the control plane API: this is a small subset
|
/// The Pageserver's client for using the storage controller upcall API: this is a small API
|
||||||
/// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
|
/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
|
||||||
pub struct ControlPlaneClient {
|
///
|
||||||
|
/// The server presenting this API may either be the storage controller or some other
|
||||||
|
/// service (such as the Neon control plane) providing a store of generation numbers.
|
||||||
|
pub struct ControllerUpcallClient {
|
||||||
http_client: reqwest::Client,
|
http_client: reqwest::Client,
|
||||||
base_url: Url,
|
base_url: Url,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
@@ -45,7 +48,7 @@ pub trait ControlPlaneGenerationsApi {
|
|||||||
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
|
) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ControlPlaneClient {
|
impl ControllerUpcallClient {
|
||||||
/// A None return value indicates that the input `conf` object does not have control
|
/// A None return value indicates that the input `conf` object does not have control
|
||||||
/// plane API enabled.
|
/// plane API enabled.
|
||||||
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
|
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
|
||||||
@@ -114,7 +117,7 @@ impl ControlPlaneClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
impl ControlPlaneGenerationsApi for ControllerUpcallClient {
|
||||||
/// Block until we get a successful response, or error out if we are shut down
|
/// Block until we get a successful response, or error out if we are shut down
|
||||||
async fn re_attach(
|
async fn re_attach(
|
||||||
&self,
|
&self,
|
||||||
@@ -148,10 +151,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
|||||||
.and_then(|jv| jv.as_str().map(|str| str.to_owned()));
|
.and_then(|jv| jv.as_str().map(|str| str.to_owned()));
|
||||||
|
|
||||||
match az_id_from_metadata {
|
match az_id_from_metadata {
|
||||||
Some(az_id) => Some(az_id),
|
Some(az_id) => Some(AvailabilityZone(az_id)),
|
||||||
None => {
|
None => {
|
||||||
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
|
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
|
||||||
conf.availability_zone.clone()
|
conf.availability_zone.clone().map(AvailabilityZone)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -216,29 +219,38 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
|
|||||||
.join("validate")
|
.join("validate")
|
||||||
.expect("Failed to build validate path");
|
.expect("Failed to build validate path");
|
||||||
|
|
||||||
let request = ValidateRequest {
|
// When sending validate requests, break them up into chunks so that we
|
||||||
tenants: tenants
|
// avoid possible edge cases of generating any HTTP requests that
|
||||||
.into_iter()
|
// require database I/O across many thousands of tenants.
|
||||||
.map(|(id, gen)| ValidateRequestTenant {
|
let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
|
||||||
id,
|
for tenant_chunk in (tenants).chunks(128) {
|
||||||
gen: gen
|
let request = ValidateRequest {
|
||||||
.into()
|
tenants: tenant_chunk
|
||||||
.expect("Generation should always be valid for a Tenant doing deletions"),
|
.iter()
|
||||||
})
|
.map(|(id, generation)| ValidateRequestTenant {
|
||||||
.collect(),
|
id: *id,
|
||||||
};
|
gen: (*generation).into().expect(
|
||||||
|
"Generation should always be valid for a Tenant doing deletions",
|
||||||
|
),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
};
|
||||||
|
|
||||||
failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
|
failpoint_support::sleep_millis_async!(
|
||||||
if self.cancel.is_cancelled() {
|
"control-plane-client-validate-sleep",
|
||||||
return Err(RetryForeverError::ShuttingDown);
|
&self.cancel
|
||||||
|
);
|
||||||
|
if self.cancel.is_cancelled() {
|
||||||
|
return Err(RetryForeverError::ShuttingDown);
|
||||||
|
}
|
||||||
|
|
||||||
|
let response: ValidateResponse =
|
||||||
|
self.retry_http_forever(&re_attach_path, request).await?;
|
||||||
|
for rt in response.tenants {
|
||||||
|
result.insert(rt.id, rt.valid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
|
Ok(result.into_iter().collect())
|
||||||
|
|
||||||
Ok(response
|
|
||||||
.tenants
|
|
||||||
.into_iter()
|
|
||||||
.map(|rt| (rt.id, rt.valid))
|
|
||||||
.collect())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6,7 +6,7 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use crate::control_plane_client::ControlPlaneGenerationsApi;
|
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
use crate::tenant::remote_timeline_client::remote_layer_path;
|
use crate::tenant::remote_timeline_client::remote_layer_path;
|
||||||
use crate::tenant::remote_timeline_client::remote_timeline_path;
|
use crate::tenant::remote_timeline_client::remote_timeline_path;
|
||||||
@@ -622,7 +622,7 @@ impl DeletionQueue {
|
|||||||
/// If remote_storage is None, then the returned workers will also be None.
|
/// If remote_storage is None, then the returned workers will also be None.
|
||||||
pub fn new<C>(
|
pub fn new<C>(
|
||||||
remote_storage: GenericRemoteStorage,
|
remote_storage: GenericRemoteStorage,
|
||||||
control_plane_client: Option<C>,
|
controller_upcall_client: Option<C>,
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
) -> (Self, Option<DeletionQueueWorkers<C>>)
|
) -> (Self, Option<DeletionQueueWorkers<C>>)
|
||||||
where
|
where
|
||||||
@@ -662,7 +662,7 @@ impl DeletionQueue {
|
|||||||
conf,
|
conf,
|
||||||
backend_rx,
|
backend_rx,
|
||||||
executor_tx,
|
executor_tx,
|
||||||
control_plane_client,
|
controller_upcall_client,
|
||||||
lsn_table.clone(),
|
lsn_table.clone(),
|
||||||
cancel.clone(),
|
cancel.clone(),
|
||||||
),
|
),
|
||||||
@@ -704,7 +704,7 @@ mod test {
|
|||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
control_plane_client::RetryForeverError,
|
controller_upcall_client::RetryForeverError,
|
||||||
repository::Key,
|
repository::Key,
|
||||||
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ use tracing::info;
|
|||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::control_plane_client::ControlPlaneGenerationsApi;
|
use crate::controller_upcall_client::ControlPlaneGenerationsApi;
|
||||||
use crate::control_plane_client::RetryForeverError;
|
use crate::controller_upcall_client::RetryForeverError;
|
||||||
use crate::metrics;
|
use crate::metrics;
|
||||||
use crate::virtual_file::MaybeFatalIo;
|
use crate::virtual_file::MaybeFatalIo;
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@ where
|
|||||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||||
|
|
||||||
// Client for calling into control plane API for validation of deletes
|
// Client for calling into control plane API for validation of deletes
|
||||||
control_plane_client: Option<C>,
|
controller_upcall_client: Option<C>,
|
||||||
|
|
||||||
// DeletionLists which are waiting generation validation. Not safe to
|
// DeletionLists which are waiting generation validation. Not safe to
|
||||||
// execute until [`validate`] has processed them.
|
// execute until [`validate`] has processed them.
|
||||||
@@ -94,7 +94,7 @@ where
|
|||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
|
rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
|
||||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||||
control_plane_client: Option<C>,
|
controller_upcall_client: Option<C>,
|
||||||
lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
|
lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
@@ -102,7 +102,7 @@ where
|
|||||||
conf,
|
conf,
|
||||||
rx,
|
rx,
|
||||||
tx,
|
tx,
|
||||||
control_plane_client,
|
controller_upcall_client,
|
||||||
lsn_table,
|
lsn_table,
|
||||||
pending_lists: Vec::new(),
|
pending_lists: Vec::new(),
|
||||||
validated_lists: Vec::new(),
|
validated_lists: Vec::new(),
|
||||||
@@ -145,8 +145,8 @@ where
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let tenants_valid = if let Some(control_plane_client) = &self.control_plane_client {
|
let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
|
||||||
match control_plane_client
|
match controller_upcall_client
|
||||||
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
|
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -589,6 +589,10 @@ async fn timeline_create_handler(
|
|||||||
StatusCode::SERVICE_UNAVAILABLE,
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
HttpErrorBody::from_msg(e.to_string()),
|
HttpErrorBody::from_msg(e.to_string()),
|
||||||
),
|
),
|
||||||
|
Err(e @ tenant::CreateTimelineError::AncestorArchived) => json_response(
|
||||||
|
StatusCode::NOT_ACCEPTABLE,
|
||||||
|
HttpErrorBody::from_msg(e.to_string()),
|
||||||
|
),
|
||||||
Err(tenant::CreateTimelineError::ShuttingDown) => json_response(
|
Err(tenant::CreateTimelineError::ShuttingDown) => json_response(
|
||||||
StatusCode::SERVICE_UNAVAILABLE,
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
HttpErrorBody::from_msg("tenant shutting down".to_string()),
|
HttpErrorBody::from_msg("tenant shutting down".to_string()),
|
||||||
@@ -2955,7 +2959,7 @@ pub fn make_router(
|
|||||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
|
||||||
|r| api_handler(r, timeline_preserve_initdb_handler),
|
|r| api_handler(r, timeline_preserve_initdb_handler),
|
||||||
)
|
)
|
||||||
.post(
|
.put(
|
||||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config",
|
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config",
|
||||||
|r| api_handler(r, timeline_archival_config_handler),
|
|r| api_handler(r, timeline_archival_config_handler),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ pub mod basebackup;
|
|||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod consumption_metrics;
|
pub mod consumption_metrics;
|
||||||
pub mod context;
|
pub mod context;
|
||||||
pub mod control_plane_client;
|
pub mod controller_upcall_client;
|
||||||
pub mod deletion_queue;
|
pub mod deletion_queue;
|
||||||
pub mod disk_usage_eviction_task;
|
pub mod disk_usage_eviction_task;
|
||||||
pub mod http;
|
pub mod http;
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ use metrics::{
|
|||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use pageserver_api::shard::TenantShardId;
|
use pageserver_api::shard::TenantShardId;
|
||||||
|
use postgres_backend::{is_expected_io_error, QueryError};
|
||||||
|
use pq_proto::framed::ConnectionError;
|
||||||
use strum::{EnumCount, VariantNames};
|
use strum::{EnumCount, VariantNames};
|
||||||
use strum_macros::{IntoStaticStr, VariantNames};
|
use strum_macros::{IntoStaticStr, VariantNames};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
@@ -1383,7 +1385,7 @@ impl SmgrQueryTimePerTimeline {
|
|||||||
&'a self,
|
&'a self,
|
||||||
op: SmgrQueryType,
|
op: SmgrQueryType,
|
||||||
ctx: &'c RequestContext,
|
ctx: &'c RequestContext,
|
||||||
) -> Option<impl Drop + '_> {
|
) -> Option<impl Drop + 'a> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
self.global_started[op as usize].inc();
|
self.global_started[op as usize].inc();
|
||||||
@@ -1508,6 +1510,7 @@ static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
|
|||||||
pub(crate) struct BasebackupQueryTime {
|
pub(crate) struct BasebackupQueryTime {
|
||||||
ok: Histogram,
|
ok: Histogram,
|
||||||
error: Histogram,
|
error: Histogram,
|
||||||
|
client_error: Histogram,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
|
pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
|
||||||
@@ -1521,6 +1524,7 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|
|
|||||||
BasebackupQueryTime {
|
BasebackupQueryTime {
|
||||||
ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
|
ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
|
||||||
error: vec.get_metric_with_label_values(&["error"]).unwrap(),
|
error: vec.get_metric_with_label_values(&["error"]).unwrap(),
|
||||||
|
client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1534,7 +1538,7 @@ impl BasebackupQueryTime {
|
|||||||
pub(crate) fn start_recording<'c: 'a, 'a>(
|
pub(crate) fn start_recording<'c: 'a, 'a>(
|
||||||
&'a self,
|
&'a self,
|
||||||
ctx: &'c RequestContext,
|
ctx: &'c RequestContext,
|
||||||
) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
|
) -> BasebackupQueryTimeOngoingRecording<'a, 'a> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
match ctx.micros_spent_throttled.open() {
|
match ctx.micros_spent_throttled.open() {
|
||||||
Ok(()) => (),
|
Ok(()) => (),
|
||||||
@@ -1557,7 +1561,7 @@ impl BasebackupQueryTime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
||||||
pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
|
pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
|
||||||
let elapsed = self.start.elapsed();
|
let elapsed = self.start.elapsed();
|
||||||
let ex_throttled = self
|
let ex_throttled = self
|
||||||
.ctx
|
.ctx
|
||||||
@@ -1576,10 +1580,15 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
|
|||||||
elapsed
|
elapsed
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let metric = if res.is_ok() {
|
// If you want to change categorize of a specific error, also change it in `log_query_error`.
|
||||||
&self.parent.ok
|
let metric = match res {
|
||||||
} else {
|
Ok(_) => &self.parent.ok,
|
||||||
&self.parent.error
|
Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
|
||||||
|
if is_expected_io_error(io_error) =>
|
||||||
|
{
|
||||||
|
&self.parent.client_error
|
||||||
|
}
|
||||||
|
Err(_) => &self.parent.error,
|
||||||
};
|
};
|
||||||
metric.observe(ex_throttled.as_secs_f64());
|
metric.observe(ex_throttled.as_secs_f64());
|
||||||
}
|
}
|
||||||
@@ -3208,45 +3217,38 @@ pub(crate) mod tenant_throttling {
|
|||||||
|
|
||||||
impl TimelineGet {
|
impl TimelineGet {
|
||||||
pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
|
pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
|
||||||
|
let per_tenant_label_values = &[
|
||||||
|
KIND,
|
||||||
|
&tenant_shard_id.tenant_id.to_string(),
|
||||||
|
&tenant_shard_id.shard_slug().to_string(),
|
||||||
|
];
|
||||||
TimelineGet {
|
TimelineGet {
|
||||||
count_accounted_start: {
|
count_accounted_start: {
|
||||||
GlobalAndPerTenantIntCounter {
|
GlobalAndPerTenantIntCounter {
|
||||||
global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
|
global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
|
||||||
per_tenant: COUNT_ACCOUNTED_START_PER_TENANT.with_label_values(&[
|
per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
|
||||||
KIND,
|
.with_label_values(per_tenant_label_values),
|
||||||
&tenant_shard_id.tenant_id.to_string(),
|
|
||||||
&tenant_shard_id.shard_slug().to_string(),
|
|
||||||
]),
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
count_accounted_finish: {
|
count_accounted_finish: {
|
||||||
GlobalAndPerTenantIntCounter {
|
GlobalAndPerTenantIntCounter {
|
||||||
global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
|
global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
|
||||||
per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT.with_label_values(&[
|
per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
|
||||||
KIND,
|
.with_label_values(per_tenant_label_values),
|
||||||
&tenant_shard_id.tenant_id.to_string(),
|
|
||||||
&tenant_shard_id.shard_slug().to_string(),
|
|
||||||
]),
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
wait_time: {
|
wait_time: {
|
||||||
GlobalAndPerTenantIntCounter {
|
GlobalAndPerTenantIntCounter {
|
||||||
global: WAIT_USECS.with_label_values(&[KIND]),
|
global: WAIT_USECS.with_label_values(&[KIND]),
|
||||||
per_tenant: WAIT_USECS_PER_TENANT.with_label_values(&[
|
per_tenant: WAIT_USECS_PER_TENANT
|
||||||
KIND,
|
.with_label_values(per_tenant_label_values),
|
||||||
&tenant_shard_id.tenant_id.to_string(),
|
|
||||||
&tenant_shard_id.shard_slug().to_string(),
|
|
||||||
]),
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
count_throttled: {
|
count_throttled: {
|
||||||
GlobalAndPerTenantIntCounter {
|
GlobalAndPerTenantIntCounter {
|
||||||
global: WAIT_COUNT.with_label_values(&[KIND]),
|
global: WAIT_COUNT.with_label_values(&[KIND]),
|
||||||
per_tenant: WAIT_COUNT_PER_TENANT.with_label_values(&[
|
per_tenant: WAIT_COUNT_PER_TENANT
|
||||||
KIND,
|
.with_label_values(per_tenant_label_values),
|
||||||
&tenant_shard_id.tenant_id.to_string(),
|
|
||||||
&tenant_shard_id.shard_slug().to_string(),
|
|
||||||
]),
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -840,6 +840,36 @@ impl Timeline {
|
|||||||
Ok(total_size * BLCKSZ as u64)
|
Ok(total_size * BLCKSZ as u64)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a KeySpace that covers all the Keys that are in use at AND below the given LSN. This is only used
|
||||||
|
/// for gc-compaction.
|
||||||
|
///
|
||||||
|
/// gc-compaction cannot use the same `collect_keyspace` function as the legacy compaction because it
|
||||||
|
/// processes data at multiple LSNs and needs to be aware of the fact that some key ranges might need to
|
||||||
|
/// be kept only for a specific range of LSN.
|
||||||
|
///
|
||||||
|
/// Consider the case that the user created branches at LSN 10 and 20, where the user created a table A at
|
||||||
|
/// LSN 10 and dropped that table at LSN 20. `collect_keyspace` at LSN 10 will return the key range
|
||||||
|
/// corresponding to that table, while LSN 20 won't. The keyspace info at a single LSN is not enough to
|
||||||
|
/// determine which keys to retain/drop for gc-compaction.
|
||||||
|
///
|
||||||
|
/// For now, it only drops AUX-v1 keys. But in the future, the function will be extended to return the keyspace
|
||||||
|
/// to be retained for each of the branch LSN.
|
||||||
|
///
|
||||||
|
/// The return value is (dense keyspace, sparse keyspace).
|
||||||
|
pub(crate) async fn collect_gc_compaction_keyspace(
|
||||||
|
&self,
|
||||||
|
) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {
|
||||||
|
let metadata_key_begin = Key::metadata_key_range().start;
|
||||||
|
let aux_v1_key = AUX_FILES_KEY;
|
||||||
|
let dense_keyspace = KeySpace {
|
||||||
|
ranges: vec![Key::MIN..aux_v1_key, aux_v1_key.next()..metadata_key_begin],
|
||||||
|
};
|
||||||
|
Ok((
|
||||||
|
dense_keyspace,
|
||||||
|
SparseKeySpace(KeySpace::single(Key::metadata_key_range())),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Get a KeySpace that covers all the Keys that are in use at the given LSN.
|
/// Get a KeySpace that covers all the Keys that are in use at the given LSN.
|
||||||
/// Anything that's not listed maybe removed from the underlying storage (from
|
/// Anything that's not listed maybe removed from the underlying storage (from
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ use camino::Utf8Path;
|
|||||||
use camino::Utf8PathBuf;
|
use camino::Utf8PathBuf;
|
||||||
use enumset::EnumSet;
|
use enumset::EnumSet;
|
||||||
use futures::stream::FuturesUnordered;
|
use futures::stream::FuturesUnordered;
|
||||||
use futures::FutureExt;
|
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use pageserver_api::models;
|
use pageserver_api::models;
|
||||||
use pageserver_api::models::AuxFilePolicy;
|
use pageserver_api::models::AuxFilePolicy;
|
||||||
@@ -34,6 +33,7 @@ use remote_storage::GenericRemoteStorage;
|
|||||||
use remote_storage::TimeoutOrCancel;
|
use remote_storage::TimeoutOrCancel;
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::future::Future;
|
||||||
use std::sync::Weak;
|
use std::sync::Weak;
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
use storage_broker::BrokerClientChannel;
|
use storage_broker::BrokerClientChannel;
|
||||||
@@ -563,6 +563,8 @@ pub enum CreateTimelineError {
|
|||||||
AncestorLsn(anyhow::Error),
|
AncestorLsn(anyhow::Error),
|
||||||
#[error("ancestor timeline is not active")]
|
#[error("ancestor timeline is not active")]
|
||||||
AncestorNotActive,
|
AncestorNotActive,
|
||||||
|
#[error("ancestor timeline is archived")]
|
||||||
|
AncestorArchived,
|
||||||
#[error("tenant shutting down")]
|
#[error("tenant shutting down")]
|
||||||
ShuttingDown,
|
ShuttingDown,
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -1031,13 +1033,9 @@ impl Tenant {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ok(TenantPreload {
|
Ok(TenantPreload {
|
||||||
timelines: Self::load_timeline_metadata(
|
timelines: self
|
||||||
self,
|
.load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
|
||||||
remote_timeline_ids,
|
.await?,
|
||||||
remote_storage,
|
|
||||||
cancel,
|
|
||||||
)
|
|
||||||
.await?,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1303,7 +1301,7 @@ impl Tenant {
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn load_timeline_metadata(
|
async fn load_timelines_metadata(
|
||||||
self: &Arc<Tenant>,
|
self: &Arc<Tenant>,
|
||||||
timeline_ids: HashSet<TimelineId>,
|
timeline_ids: HashSet<TimelineId>,
|
||||||
remote_storage: &GenericRemoteStorage,
|
remote_storage: &GenericRemoteStorage,
|
||||||
@@ -1311,33 +1309,10 @@ impl Tenant {
|
|||||||
) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
|
) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
|
||||||
let mut part_downloads = JoinSet::new();
|
let mut part_downloads = JoinSet::new();
|
||||||
for timeline_id in timeline_ids {
|
for timeline_id in timeline_ids {
|
||||||
let client = RemoteTimelineClient::new(
|
|
||||||
remote_storage.clone(),
|
|
||||||
self.deletion_queue_client.clone(),
|
|
||||||
self.conf,
|
|
||||||
self.tenant_shard_id,
|
|
||||||
timeline_id,
|
|
||||||
self.generation,
|
|
||||||
);
|
|
||||||
let cancel_clone = cancel.clone();
|
let cancel_clone = cancel.clone();
|
||||||
part_downloads.spawn(
|
part_downloads.spawn(
|
||||||
async move {
|
self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone)
|
||||||
debug!("starting index part download");
|
.instrument(info_span!("download_index_part", %timeline_id)),
|
||||||
|
|
||||||
let index_part = client.download_index_file(&cancel_clone).await;
|
|
||||||
|
|
||||||
debug!("finished index part download");
|
|
||||||
|
|
||||||
Result::<_, anyhow::Error>::Ok(TimelinePreload {
|
|
||||||
client,
|
|
||||||
timeline_id,
|
|
||||||
index_part,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
.map(move |res| {
|
|
||||||
res.with_context(|| format!("download index part for timeline {timeline_id}"))
|
|
||||||
})
|
|
||||||
.instrument(info_span!("download_index_part", %timeline_id)),
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1348,8 +1323,7 @@ impl Tenant {
|
|||||||
next = part_downloads.join_next() => {
|
next = part_downloads.join_next() => {
|
||||||
match next {
|
match next {
|
||||||
Some(result) => {
|
Some(result) => {
|
||||||
let preload_result = result.context("join preload task")?;
|
let preload = result.context("join preload task")?;
|
||||||
let preload = preload_result?;
|
|
||||||
timeline_preloads.insert(preload.timeline_id, preload);
|
timeline_preloads.insert(preload.timeline_id, preload);
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
@@ -1366,6 +1340,36 @@ impl Tenant {
|
|||||||
Ok(timeline_preloads)
|
Ok(timeline_preloads)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn load_timeline_metadata(
|
||||||
|
self: &Arc<Tenant>,
|
||||||
|
timeline_id: TimelineId,
|
||||||
|
remote_storage: GenericRemoteStorage,
|
||||||
|
cancel: CancellationToken,
|
||||||
|
) -> impl Future<Output = TimelinePreload> {
|
||||||
|
let client = RemoteTimelineClient::new(
|
||||||
|
remote_storage.clone(),
|
||||||
|
self.deletion_queue_client.clone(),
|
||||||
|
self.conf,
|
||||||
|
self.tenant_shard_id,
|
||||||
|
timeline_id,
|
||||||
|
self.generation,
|
||||||
|
);
|
||||||
|
async move {
|
||||||
|
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||||
|
debug!("starting index part download");
|
||||||
|
|
||||||
|
let index_part = client.download_index_file(&cancel).await;
|
||||||
|
|
||||||
|
debug!("finished index part download");
|
||||||
|
|
||||||
|
TimelinePreload {
|
||||||
|
client,
|
||||||
|
timeline_id,
|
||||||
|
index_part,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn apply_timeline_archival_config(
|
pub(crate) async fn apply_timeline_archival_config(
|
||||||
&self,
|
&self,
|
||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
@@ -1696,6 +1700,11 @@ impl Tenant {
|
|||||||
return Err(CreateTimelineError::AncestorNotActive);
|
return Err(CreateTimelineError::AncestorNotActive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ancestor_timeline.is_archived() == Some(true) {
|
||||||
|
info!("tried to branch archived timeline");
|
||||||
|
return Err(CreateTimelineError::AncestorArchived);
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
||||||
*lsn = lsn.align();
|
*lsn = lsn.align();
|
||||||
|
|
||||||
@@ -1966,9 +1975,6 @@ impl Tenant {
|
|||||||
TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => {
|
TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => {
|
||||||
panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state);
|
panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state);
|
||||||
}
|
}
|
||||||
TenantState::Loading => {
|
|
||||||
*current_state = TenantState::Activating(ActivatingFrom::Loading);
|
|
||||||
}
|
|
||||||
TenantState::Attaching => {
|
TenantState::Attaching => {
|
||||||
*current_state = TenantState::Activating(ActivatingFrom::Attaching);
|
*current_state = TenantState::Activating(ActivatingFrom::Attaching);
|
||||||
}
|
}
|
||||||
@@ -2149,7 +2155,7 @@ impl Tenant {
|
|||||||
async fn set_stopping(
|
async fn set_stopping(
|
||||||
&self,
|
&self,
|
||||||
progress: completion::Barrier,
|
progress: completion::Barrier,
|
||||||
allow_transition_from_loading: bool,
|
_allow_transition_from_loading: bool,
|
||||||
allow_transition_from_attaching: bool,
|
allow_transition_from_attaching: bool,
|
||||||
) -> Result<(), SetStoppingError> {
|
) -> Result<(), SetStoppingError> {
|
||||||
let mut rx = self.state.subscribe();
|
let mut rx = self.state.subscribe();
|
||||||
@@ -2164,7 +2170,6 @@ impl Tenant {
|
|||||||
);
|
);
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
TenantState::Loading => allow_transition_from_loading,
|
|
||||||
TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,
|
TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
@@ -2183,13 +2188,6 @@ impl Tenant {
|
|||||||
*current_state = TenantState::Stopping { progress };
|
*current_state = TenantState::Stopping { progress };
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
TenantState::Loading => {
|
|
||||||
if !allow_transition_from_loading {
|
|
||||||
unreachable!("3we ensured above that we're done with activation, and, there is no re-activation")
|
|
||||||
};
|
|
||||||
*current_state = TenantState::Stopping { progress };
|
|
||||||
true
|
|
||||||
}
|
|
||||||
TenantState::Active => {
|
TenantState::Active => {
|
||||||
// FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
|
// FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
|
||||||
// are created after the transition to Stopping. That's harmless, as the Timelines
|
// are created after the transition to Stopping. That's harmless, as the Timelines
|
||||||
@@ -2245,7 +2243,7 @@ impl Tenant {
|
|||||||
// The load & attach routines own the tenant state until it has reached `Active`.
|
// The load & attach routines own the tenant state until it has reached `Active`.
|
||||||
// So, wait until it's done.
|
// So, wait until it's done.
|
||||||
rx.wait_for(|state| match state {
|
rx.wait_for(|state| match state {
|
||||||
TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
|
TenantState::Activating(_) | TenantState::Attaching => {
|
||||||
info!(
|
info!(
|
||||||
"waiting for {} to turn Active|Broken|Stopping",
|
"waiting for {} to turn Active|Broken|Stopping",
|
||||||
<&'static str>::from(state)
|
<&'static str>::from(state)
|
||||||
@@ -2265,7 +2263,7 @@ impl Tenant {
|
|||||||
let reason = reason.to_string();
|
let reason = reason.to_string();
|
||||||
self.state.send_modify(|current_state| {
|
self.state.send_modify(|current_state| {
|
||||||
match *current_state {
|
match *current_state {
|
||||||
TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
|
TenantState::Activating(_) | TenantState::Attaching => {
|
||||||
unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
|
unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
|
||||||
}
|
}
|
||||||
TenantState::Active => {
|
TenantState::Active => {
|
||||||
@@ -2309,7 +2307,7 @@ impl Tenant {
|
|||||||
loop {
|
loop {
|
||||||
let current_state = receiver.borrow_and_update().clone();
|
let current_state = receiver.borrow_and_update().clone();
|
||||||
match current_state {
|
match current_state {
|
||||||
TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
|
TenantState::Attaching | TenantState::Activating(_) => {
|
||||||
// in these states, there's a chance that we can reach ::Active
|
// in these states, there's a chance that we can reach ::Active
|
||||||
self.activate_now();
|
self.activate_now();
|
||||||
match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {
|
match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {
|
||||||
@@ -3625,7 +3623,7 @@ impl Tenant {
|
|||||||
start_lsn: Lsn,
|
start_lsn: Lsn,
|
||||||
ancestor: Option<Arc<Timeline>>,
|
ancestor: Option<Arc<Timeline>>,
|
||||||
last_aux_file_policy: Option<AuxFilePolicy>,
|
last_aux_file_policy: Option<AuxFilePolicy>,
|
||||||
) -> anyhow::Result<UninitializedTimeline> {
|
) -> anyhow::Result<UninitializedTimeline<'a>> {
|
||||||
let tenant_shard_id = self.tenant_shard_id;
|
let tenant_shard_id = self.tenant_shard_id;
|
||||||
|
|
||||||
let resources = self.build_timeline_resources(new_timeline_id);
|
let resources = self.build_timeline_resources(new_timeline_id);
|
||||||
@@ -4142,7 +4140,7 @@ pub(crate) mod harness {
|
|||||||
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
|
||||||
|
|
||||||
let tenant = Arc::new(Tenant::new(
|
let tenant = Arc::new(Tenant::new(
|
||||||
TenantState::Loading,
|
TenantState::Attaching,
|
||||||
self.conf,
|
self.conf,
|
||||||
AttachedTenantConf::try_from(LocationConf::attached_single(
|
AttachedTenantConf::try_from(LocationConf::attached_single(
|
||||||
TenantConfOpt::from(self.tenant_conf.clone()),
|
TenantConfOpt::from(self.tenant_conf.clone()),
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ use itertools::Itertools;
|
|||||||
use super::storage_layer::LayerName;
|
use super::storage_layer::LayerName;
|
||||||
|
|
||||||
/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
|
/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
|
||||||
|
///
|
||||||
/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
|
/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
|
||||||
///
|
///
|
||||||
/// ```plain
|
/// ```plain
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ use utils::{backoff, completion, crashsafe};
|
|||||||
|
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::context::{DownloadBehavior, RequestContext};
|
use crate::context::{DownloadBehavior, RequestContext};
|
||||||
use crate::control_plane_client::{
|
use crate::controller_upcall_client::{
|
||||||
ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
|
ControlPlaneGenerationsApi, ControllerUpcallClient, RetryForeverError,
|
||||||
};
|
};
|
||||||
use crate::deletion_queue::DeletionQueueClient;
|
use crate::deletion_queue::DeletionQueueClient;
|
||||||
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
|
use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
|
||||||
@@ -122,7 +122,7 @@ pub(crate) enum ShardSelector {
|
|||||||
Known(ShardIndex),
|
Known(ShardIndex),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A convenience for use with the re_attach ControlPlaneClient function: rather
|
/// A convenience for use with the re_attach ControllerUpcallClient function: rather
|
||||||
/// than the serializable struct, we build this enum that encapsulates
|
/// than the serializable struct, we build this enum that encapsulates
|
||||||
/// the invariant that attached tenants always have generations.
|
/// the invariant that attached tenants always have generations.
|
||||||
///
|
///
|
||||||
@@ -341,7 +341,7 @@ async fn init_load_generations(
|
|||||||
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
||||||
);
|
);
|
||||||
emergency_generations(tenant_confs)
|
emergency_generations(tenant_confs)
|
||||||
} else if let Some(client) = ControlPlaneClient::new(conf, cancel) {
|
} else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
|
||||||
info!("Calling control plane API to re-attach tenants");
|
info!("Calling control plane API to re-attach tenants");
|
||||||
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
||||||
match client.re_attach(conf).await {
|
match client.re_attach(conf).await {
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
//! Common traits and structs for layers
|
//! Common traits and structs for layers
|
||||||
|
|
||||||
pub mod delta_layer;
|
pub mod delta_layer;
|
||||||
|
pub mod filter_iterator;
|
||||||
pub mod image_layer;
|
pub mod image_layer;
|
||||||
pub mod inmemory_layer;
|
pub mod inmemory_layer;
|
||||||
pub(crate) mod layer;
|
pub(crate) mod layer;
|
||||||
mod layer_desc;
|
mod layer_desc;
|
||||||
mod layer_name;
|
mod layer_name;
|
||||||
pub mod merge_iterator;
|
pub mod merge_iterator;
|
||||||
|
|
||||||
pub mod split_writer;
|
pub mod split_writer;
|
||||||
|
|
||||||
use crate::context::{AccessStatsBehavior, RequestContext};
|
use crate::context::{AccessStatsBehavior, RequestContext};
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ use crate::tenant::disk_btree::{
|
|||||||
use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
|
use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
|
||||||
use crate::tenant::timeline::GetVectoredError;
|
use crate::tenant::timeline::GetVectoredError;
|
||||||
use crate::tenant::vectored_blob_io::{
|
use crate::tenant::vectored_blob_io::{
|
||||||
BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||||
VectoredReadCoalesceMode, VectoredReadPlanner,
|
VectoredReadCoalesceMode, VectoredReadPlanner,
|
||||||
};
|
};
|
||||||
use crate::tenant::PageReconstructError;
|
use crate::tenant::PageReconstructError;
|
||||||
@@ -1021,13 +1021,30 @@ impl DeltaLayerInner {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
let view = BufView::new_slice(&blobs_buf.buf);
|
||||||
for meta in blobs_buf.blobs.iter().rev() {
|
for meta in blobs_buf.blobs.iter().rev() {
|
||||||
if Some(meta.meta.key) == ignore_key_with_err {
|
if Some(meta.meta.key) == ignore_key_with_err {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
let blob_read = meta.read(&view).await;
|
||||||
|
let blob_read = match blob_read {
|
||||||
|
Ok(buf) => buf,
|
||||||
|
Err(e) => {
|
||||||
|
reconstruct_state.on_key_error(
|
||||||
|
meta.meta.key,
|
||||||
|
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||||
|
"Failed to decompress blob from virtual file {}",
|
||||||
|
self.file.path,
|
||||||
|
))),
|
||||||
|
);
|
||||||
|
|
||||||
|
ignore_key_with_err = Some(meta.meta.key);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let value = Value::des(&blob_read);
|
||||||
|
|
||||||
let value = Value::des(&blobs_buf.buf[meta.start..meta.end]);
|
|
||||||
let value = match value {
|
let value = match value {
|
||||||
Ok(v) => v,
|
Ok(v) => v,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
@@ -1243,21 +1260,21 @@ impl DeltaLayerInner {
|
|||||||
buf.reserve(read.size());
|
buf.reserve(read.size());
|
||||||
let res = reader.read_blobs(&read, buf, ctx).await?;
|
let res = reader.read_blobs(&read, buf, ctx).await?;
|
||||||
|
|
||||||
|
let view = BufView::new_slice(&res.buf);
|
||||||
|
|
||||||
for blob in res.blobs {
|
for blob in res.blobs {
|
||||||
let key = blob.meta.key;
|
let key = blob.meta.key;
|
||||||
let lsn = blob.meta.lsn;
|
let lsn = blob.meta.lsn;
|
||||||
let data = &res.buf[blob.start..blob.end];
|
|
||||||
|
let data = blob.read(&view).await?;
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
Value::des(data)
|
Value::des(&data)
|
||||||
.with_context(|| {
|
.with_context(|| {
|
||||||
format!(
|
format!(
|
||||||
"blob failed to deserialize for {}@{}, {}..{}: {:?}",
|
"blob failed to deserialize for {}: {:?}",
|
||||||
blob.meta.key,
|
blob,
|
||||||
blob.meta.lsn,
|
utils::Hex(&data)
|
||||||
blob.start,
|
|
||||||
blob.end,
|
|
||||||
utils::Hex(data)
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1265,15 +1282,15 @@ impl DeltaLayerInner {
|
|||||||
// is it an image or will_init walrecord?
|
// is it an image or will_init walrecord?
|
||||||
// FIXME: this could be handled by threading the BlobRef to the
|
// FIXME: this could be handled by threading the BlobRef to the
|
||||||
// VectoredReadBuilder
|
// VectoredReadBuilder
|
||||||
let will_init = crate::repository::ValueBytes::will_init(data)
|
let will_init = crate::repository::ValueBytes::will_init(&data)
|
||||||
.inspect_err(|_e| {
|
.inspect_err(|_e| {
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
|
tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
|
||||||
})
|
})
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
|
||||||
per_blob_copy.clear();
|
per_blob_copy.clear();
|
||||||
per_blob_copy.extend_from_slice(data);
|
per_blob_copy.extend_from_slice(&data);
|
||||||
|
|
||||||
let (tmp, res) = writer
|
let (tmp, res) = writer
|
||||||
.put_value_bytes(
|
.put_value_bytes(
|
||||||
@@ -1538,8 +1555,11 @@ impl<'a> DeltaLayerIterator<'a> {
|
|||||||
.read_blobs(&plan, buf, self.ctx)
|
.read_blobs(&plan, buf, self.ctx)
|
||||||
.await?;
|
.await?;
|
||||||
let frozen_buf = blobs_buf.buf.freeze();
|
let frozen_buf = blobs_buf.buf.freeze();
|
||||||
|
let view = BufView::new_bytes(frozen_buf);
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let value = Value::des(&frozen_buf[meta.start..meta.end])?;
|
let blob_read = meta.read(&view).await?;
|
||||||
|
let value = Value::des(&blob_read)?;
|
||||||
|
|
||||||
next_batch.push_back((meta.meta.key, meta.meta.lsn, value));
|
next_batch.push_back((meta.meta.key, meta.meta.lsn, value));
|
||||||
}
|
}
|
||||||
self.key_values_batch = next_batch;
|
self.key_values_batch = next_batch;
|
||||||
@@ -1916,9 +1936,13 @@ pub(crate) mod test {
|
|||||||
let blobs_buf = vectored_blob_reader
|
let blobs_buf = vectored_blob_reader
|
||||||
.read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
|
.read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
|
let view = BufView::new_slice(&blobs_buf.buf);
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let value = &blobs_buf.buf[meta.start..meta.end];
|
let value = meta.read(&view).await?;
|
||||||
assert_eq!(value, entries_meta.index[&(meta.meta.key, meta.meta.lsn)]);
|
assert_eq!(
|
||||||
|
&value[..],
|
||||||
|
&entries_meta.index[&(meta.meta.key, meta.meta.lsn)]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = Some(blobs_buf.buf);
|
buf = Some(blobs_buf.buf);
|
||||||
|
|||||||
205
pageserver/src/tenant/storage_layer/filter_iterator.rs
Normal file
205
pageserver/src/tenant/storage_layer/filter_iterator.rs
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use anyhow::bail;
|
||||||
|
use pageserver_api::{
|
||||||
|
key::Key,
|
||||||
|
keyspace::{KeySpace, SparseKeySpace},
|
||||||
|
};
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use crate::repository::Value;
|
||||||
|
|
||||||
|
use super::merge_iterator::MergeIterator;
|
||||||
|
|
||||||
|
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
|
||||||
|
///
|
||||||
|
/// The iterator will skip any keys not included in the keyspace filter. In other words, the keyspace filter contains the keys
|
||||||
|
/// to be retained.
|
||||||
|
pub struct FilterIterator<'a> {
|
||||||
|
inner: MergeIterator<'a>,
|
||||||
|
retain_key_filters: Vec<Range<Key>>,
|
||||||
|
current_filter_idx: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FilterIterator<'a> {
|
||||||
|
pub fn create(
|
||||||
|
inner: MergeIterator<'a>,
|
||||||
|
dense_keyspace: KeySpace,
|
||||||
|
sparse_keyspace: SparseKeySpace,
|
||||||
|
) -> anyhow::Result<Self> {
|
||||||
|
let mut retain_key_filters = Vec::new();
|
||||||
|
retain_key_filters.extend(dense_keyspace.ranges);
|
||||||
|
retain_key_filters.extend(sparse_keyspace.0.ranges);
|
||||||
|
retain_key_filters.sort_by(|a, b| a.start.cmp(&b.start));
|
||||||
|
// Verify key filters are non-overlapping and sorted
|
||||||
|
for window in retain_key_filters.windows(2) {
|
||||||
|
if window[0].end > window[1].start {
|
||||||
|
bail!(
|
||||||
|
"Key filters are overlapping: {:?} and {:?}",
|
||||||
|
window[0],
|
||||||
|
window[1]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Self {
|
||||||
|
inner,
|
||||||
|
retain_key_filters,
|
||||||
|
current_filter_idx: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
|
||||||
|
while let Some(item) = self.inner.next().await? {
|
||||||
|
while self.current_filter_idx < self.retain_key_filters.len()
|
||||||
|
&& item.0 >= self.retain_key_filters[self.current_filter_idx].end
|
||||||
|
{
|
||||||
|
// [filter region] [filter region] [filter region]
|
||||||
|
// ^ item
|
||||||
|
// ^ current filter
|
||||||
|
self.current_filter_idx += 1;
|
||||||
|
// [filter region] [filter region] [filter region]
|
||||||
|
// ^ item
|
||||||
|
// ^ current filter
|
||||||
|
}
|
||||||
|
if self.current_filter_idx >= self.retain_key_filters.len() {
|
||||||
|
// We already exhausted all filters, so we should return now
|
||||||
|
// [filter region] [filter region] [filter region]
|
||||||
|
// ^ item
|
||||||
|
// ^ current filter (nothing)
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
if self.retain_key_filters[self.current_filter_idx].contains(&item.0) {
|
||||||
|
// [filter region] [filter region] [filter region]
|
||||||
|
// ^ item
|
||||||
|
// ^ current filter
|
||||||
|
return Ok(Some(item));
|
||||||
|
}
|
||||||
|
// If the key is not contained in the key retaining filters, continue to the next item.
|
||||||
|
// [filter region] [filter region] [filter region]
|
||||||
|
// ^ item
|
||||||
|
// ^ current filter
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
use pageserver_api::key::Key;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
tenant::{
|
||||||
|
harness::{TenantHarness, TIMELINE_ID},
|
||||||
|
storage_layer::delta_layer::test::produce_delta_layer,
|
||||||
|
},
|
||||||
|
DEFAULT_PG_VERSION,
|
||||||
|
};
|
||||||
|
|
||||||
|
async fn assert_filter_iter_equal(
|
||||||
|
filter_iter: &mut FilterIterator<'_>,
|
||||||
|
expect: &[(Key, Lsn, Value)],
|
||||||
|
) {
|
||||||
|
let mut expect_iter = expect.iter();
|
||||||
|
loop {
|
||||||
|
let o1 = filter_iter.next().await.unwrap();
|
||||||
|
let o2 = expect_iter.next();
|
||||||
|
assert_eq!(o1.is_some(), o2.is_some());
|
||||||
|
if o1.is_none() && o2.is_none() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let (k1, l1, v1) = o1.unwrap();
|
||||||
|
let (k2, l2, v2) = o2.unwrap();
|
||||||
|
assert_eq!(&k1, k2);
|
||||||
|
assert_eq!(l1, *l2);
|
||||||
|
assert_eq!(&v1, v2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn filter_keyspace_iterator() {
|
||||||
|
use crate::repository::Value;
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let (tenant, ctx) = harness.load().await;
|
||||||
|
|
||||||
|
let tline = tenant
|
||||||
|
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
fn get_key(id: u32) -> Key {
|
||||||
|
let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
|
||||||
|
key.field6 = id;
|
||||||
|
key
|
||||||
|
}
|
||||||
|
const N: usize = 100;
|
||||||
|
let test_deltas1 = (0..N)
|
||||||
|
.map(|idx| {
|
||||||
|
(
|
||||||
|
get_key(idx as u32),
|
||||||
|
Lsn(0x20 * ((idx as u64) % 10 + 1)),
|
||||||
|
Value::Image(Bytes::from(format!("img{idx:05}"))),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect_vec();
|
||||||
|
let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let merge_iter = MergeIterator::create(
|
||||||
|
&[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
|
||||||
|
&[],
|
||||||
|
&ctx,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut filter_iter = FilterIterator::create(
|
||||||
|
merge_iter,
|
||||||
|
KeySpace {
|
||||||
|
ranges: vec![
|
||||||
|
get_key(5)..get_key(10),
|
||||||
|
get_key(20)..get_key(30),
|
||||||
|
get_key(90)..get_key(110),
|
||||||
|
get_key(1000)..get_key(2000),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
SparseKeySpace(KeySpace::default()),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut result = Vec::new();
|
||||||
|
result.extend(test_deltas1[5..10].iter().cloned());
|
||||||
|
result.extend(test_deltas1[20..30].iter().cloned());
|
||||||
|
result.extend(test_deltas1[90..100].iter().cloned());
|
||||||
|
assert_filter_iter_equal(&mut filter_iter, &result).await;
|
||||||
|
|
||||||
|
let merge_iter = MergeIterator::create(
|
||||||
|
&[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
|
||||||
|
&[],
|
||||||
|
&ctx,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut filter_iter = FilterIterator::create(
|
||||||
|
merge_iter,
|
||||||
|
KeySpace {
|
||||||
|
ranges: vec![
|
||||||
|
get_key(0)..get_key(10),
|
||||||
|
get_key(20)..get_key(30),
|
||||||
|
get_key(90)..get_key(95),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
SparseKeySpace(KeySpace::default()),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let mut result = Vec::new();
|
||||||
|
result.extend(test_deltas1[0..10].iter().cloned());
|
||||||
|
result.extend(test_deltas1[20..30].iter().cloned());
|
||||||
|
result.extend(test_deltas1[90..95].iter().cloned());
|
||||||
|
assert_filter_iter_equal(&mut filter_iter, &result).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -36,7 +36,8 @@ use crate::tenant::disk_btree::{
|
|||||||
};
|
};
|
||||||
use crate::tenant::timeline::GetVectoredError;
|
use crate::tenant::timeline::GetVectoredError;
|
||||||
use crate::tenant::vectored_blob_io::{
|
use crate::tenant::vectored_blob_io::{
|
||||||
BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
|
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
|
||||||
|
VectoredReadPlanner,
|
||||||
};
|
};
|
||||||
use crate::tenant::PageReconstructError;
|
use crate::tenant::PageReconstructError;
|
||||||
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
|
||||||
@@ -547,15 +548,15 @@ impl ImageLayerInner {
|
|||||||
|
|
||||||
let buf = BytesMut::with_capacity(buf_size);
|
let buf = BytesMut::with_capacity(buf_size);
|
||||||
let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?;
|
let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?;
|
||||||
|
|
||||||
let frozen_buf = blobs_buf.buf.freeze();
|
let frozen_buf = blobs_buf.buf.freeze();
|
||||||
|
let view = BufView::new_bytes(frozen_buf);
|
||||||
|
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let img_buf = frozen_buf.slice(meta.start..meta.end);
|
let img_buf = meta.read(&view).await?;
|
||||||
|
|
||||||
key_count += 1;
|
key_count += 1;
|
||||||
writer
|
writer
|
||||||
.put_image(meta.meta.key, img_buf, ctx)
|
.put_image(meta.meta.key, img_buf.into_bytes(), ctx)
|
||||||
.await
|
.await
|
||||||
.context(format!("Storing key {}", meta.meta.key))?;
|
.context(format!("Storing key {}", meta.meta.key))?;
|
||||||
}
|
}
|
||||||
@@ -602,13 +603,28 @@ impl ImageLayerInner {
|
|||||||
match res {
|
match res {
|
||||||
Ok(blobs_buf) => {
|
Ok(blobs_buf) => {
|
||||||
let frozen_buf = blobs_buf.buf.freeze();
|
let frozen_buf = blobs_buf.buf.freeze();
|
||||||
|
let view = BufView::new_bytes(frozen_buf);
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let img_buf = frozen_buf.slice(meta.start..meta.end);
|
let img_buf = meta.read(&view).await;
|
||||||
|
|
||||||
|
let img_buf = match img_buf {
|
||||||
|
Ok(img_buf) => img_buf,
|
||||||
|
Err(e) => {
|
||||||
|
reconstruct_state.on_key_error(
|
||||||
|
meta.meta.key,
|
||||||
|
PageReconstructError::Other(anyhow!(e).context(format!(
|
||||||
|
"Failed to decompress blob from virtual file {}",
|
||||||
|
self.file.path,
|
||||||
|
))),
|
||||||
|
);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
reconstruct_state.update_key(
|
reconstruct_state.update_key(
|
||||||
&meta.meta.key,
|
&meta.meta.key,
|
||||||
self.lsn,
|
self.lsn,
|
||||||
Value::Image(img_buf),
|
Value::Image(img_buf.into_bytes()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1025,10 +1041,15 @@ impl<'a> ImageLayerIterator<'a> {
|
|||||||
let blobs_buf = vectored_blob_reader
|
let blobs_buf = vectored_blob_reader
|
||||||
.read_blobs(&plan, buf, self.ctx)
|
.read_blobs(&plan, buf, self.ctx)
|
||||||
.await?;
|
.await?;
|
||||||
let frozen_buf: Bytes = blobs_buf.buf.freeze();
|
let frozen_buf = blobs_buf.buf.freeze();
|
||||||
|
let view = BufView::new_bytes(frozen_buf);
|
||||||
for meta in blobs_buf.blobs.iter() {
|
for meta in blobs_buf.blobs.iter() {
|
||||||
let img_buf = frozen_buf.slice(meta.start..meta.end);
|
let img_buf = meta.read(&view).await?;
|
||||||
next_batch.push_back((meta.meta.key, self.image_layer.lsn, Value::Image(img_buf)));
|
next_batch.push_back((
|
||||||
|
meta.meta.key,
|
||||||
|
self.image_layer.lsn,
|
||||||
|
Value::Image(img_buf.into_bytes()),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
self.key_values_batch = next_batch;
|
self.key_values_batch = next_batch;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -481,8 +481,7 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
|
|||||||
let allowed_rps = tenant.timeline_get_throttle.steady_rps();
|
let allowed_rps = tenant.timeline_get_throttle.steady_rps();
|
||||||
let delta = now - prev;
|
let delta = now - prev;
|
||||||
info!(
|
info!(
|
||||||
n_seconds=%format_args!("{:.3}",
|
n_seconds=%format_args!("{:.3}", delta.as_secs_f64()),
|
||||||
delta.as_secs_f64()),
|
|
||||||
count_accounted = count_accounted_finish, // don't break existing log scraping
|
count_accounted = count_accounted_finish, // don't break existing log scraping
|
||||||
count_throttled,
|
count_throttled,
|
||||||
sum_throttled_usecs,
|
sum_throttled_usecs,
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ use pageserver_api::reltag::RelTag;
|
|||||||
use pageserver_api::shard::ShardIndex;
|
use pageserver_api::shard::ShardIndex;
|
||||||
|
|
||||||
use postgres_connection::PgConnectionConfig;
|
use postgres_connection::PgConnectionConfig;
|
||||||
use postgres_ffi::to_pg_timestamp;
|
use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE};
|
||||||
use utils::{
|
use utils::{
|
||||||
completion,
|
completion,
|
||||||
generation::Generation,
|
generation::Generation,
|
||||||
@@ -1337,6 +1337,10 @@ impl Timeline {
|
|||||||
_ctx: &RequestContext,
|
_ctx: &RequestContext,
|
||||||
) -> anyhow::Result<LsnLease> {
|
) -> anyhow::Result<LsnLease> {
|
||||||
let lease = {
|
let lease = {
|
||||||
|
// Normalize the requested LSN to be aligned, and move to the first record
|
||||||
|
// if it points to the beginning of the page (header).
|
||||||
|
let lsn = xlog_utils::normalize_lsn(lsn, WAL_SEGMENT_SIZE);
|
||||||
|
|
||||||
let mut gc_info = self.gc_info.write().unwrap();
|
let mut gc_info = self.gc_info.write().unwrap();
|
||||||
|
|
||||||
let valid_until = SystemTime::now() + length;
|
let valid_until = SystemTime::now() + length;
|
||||||
@@ -3597,7 +3601,7 @@ impl Timeline {
|
|||||||
ctx,
|
ctx,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| FlushLayerError::from_anyhow(self, e))?;
|
.map_err(|e| FlushLayerError::from_anyhow(self, e.into()))?;
|
||||||
|
|
||||||
if self.cancel.is_cancelled() {
|
if self.cancel.is_cancelled() {
|
||||||
return Err(FlushLayerError::Cancelled);
|
return Err(FlushLayerError::Cancelled);
|
||||||
@@ -3836,16 +3840,20 @@ impl Timeline {
|
|||||||
partition_size: u64,
|
partition_size: u64,
|
||||||
flags: EnumSet<CompactFlags>,
|
flags: EnumSet<CompactFlags>,
|
||||||
ctx: &RequestContext,
|
ctx: &RequestContext,
|
||||||
) -> anyhow::Result<((KeyPartitioning, SparseKeyPartitioning), Lsn)> {
|
) -> Result<((KeyPartitioning, SparseKeyPartitioning), Lsn), CompactionError> {
|
||||||
let Ok(mut partitioning_guard) = self.partitioning.try_lock() else {
|
let Ok(mut partitioning_guard) = self.partitioning.try_lock() else {
|
||||||
// NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.
|
// NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.
|
||||||
// The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`
|
// The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`
|
||||||
// and hence before the compaction task starts.
|
// and hence before the compaction task starts.
|
||||||
anyhow::bail!("repartition() called concurrently, this should not happen");
|
return Err(CompactionError::Other(anyhow!(
|
||||||
|
"repartition() called concurrently, this should not happen"
|
||||||
|
)));
|
||||||
};
|
};
|
||||||
let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard;
|
let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard;
|
||||||
if lsn < *partition_lsn {
|
if lsn < *partition_lsn {
|
||||||
anyhow::bail!("repartition() called with LSN going backwards, this should not happen");
|
return Err(CompactionError::Other(anyhow!(
|
||||||
|
"repartition() called with LSN going backwards, this should not happen"
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let distance = lsn.0 - partition_lsn.0;
|
let distance = lsn.0 - partition_lsn.0;
|
||||||
@@ -4447,6 +4455,12 @@ pub(crate) enum CompactionError {
|
|||||||
Other(anyhow::Error),
|
Other(anyhow::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CompactionError {
|
||||||
|
pub fn is_cancelled(&self) -> bool {
|
||||||
|
matches!(self, CompactionError::ShuttingDown)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<CollectKeySpaceError> for CompactionError {
|
impl From<CollectKeySpaceError> for CompactionError {
|
||||||
fn from(err: CollectKeySpaceError) -> Self {
|
fn from(err: CollectKeySpaceError) -> Self {
|
||||||
match err {
|
match err {
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}
|
|||||||
use crate::page_cache;
|
use crate::page_cache;
|
||||||
use crate::tenant::checks::check_valid_layermap;
|
use crate::tenant::checks::check_valid_layermap;
|
||||||
use crate::tenant::remote_timeline_client::WaitCompletionError;
|
use crate::tenant::remote_timeline_client::WaitCompletionError;
|
||||||
|
use crate::tenant::storage_layer::filter_iterator::FilterIterator;
|
||||||
use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
use crate::tenant::storage_layer::merge_iterator::MergeIterator;
|
||||||
use crate::tenant::storage_layer::split_writer::{
|
use crate::tenant::storage_layer::split_writer::{
|
||||||
SplitDeltaLayerWriter, SplitImageLayerWriter, SplitWriterResult,
|
SplitDeltaLayerWriter, SplitImageLayerWriter, SplitWriterResult,
|
||||||
@@ -389,7 +390,7 @@ impl Timeline {
|
|||||||
// error but continue.
|
// error but continue.
|
||||||
//
|
//
|
||||||
// Suppress error when it's due to cancellation
|
// Suppress error when it's due to cancellation
|
||||||
if !self.cancel.is_cancelled() {
|
if !self.cancel.is_cancelled() && !err.is_cancelled() {
|
||||||
tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
|
tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
|
||||||
}
|
}
|
||||||
(1, false)
|
(1, false)
|
||||||
@@ -1772,6 +1773,7 @@ impl Timeline {
|
|||||||
gc_cutoff,
|
gc_cutoff,
|
||||||
lowest_retain_lsn
|
lowest_retain_lsn
|
||||||
);
|
);
|
||||||
|
|
||||||
// Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
|
// Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
|
||||||
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
|
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
|
||||||
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
|
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
|
||||||
@@ -1820,7 +1822,12 @@ impl Timeline {
|
|||||||
image_layers.push(layer);
|
image_layers.push(layer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut merge_iter = MergeIterator::create(&delta_layers, &image_layers, ctx);
|
let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?;
|
||||||
|
let mut merge_iter = FilterIterator::create(
|
||||||
|
MergeIterator::create(&delta_layers, &image_layers, ctx),
|
||||||
|
dense_ks,
|
||||||
|
sparse_ks,
|
||||||
|
)?;
|
||||||
// Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
|
// Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
|
||||||
// Data of the same key.
|
// Data of the same key.
|
||||||
let mut accumulated_values = Vec::new();
|
let mut accumulated_values = Vec::new();
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ use crate::{
|
|||||||
pgdatadir_mapping::CollectKeySpaceError,
|
pgdatadir_mapping::CollectKeySpaceError,
|
||||||
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
|
||||||
tenant::{
|
tenant::{
|
||||||
storage_layer::LayerVisibilityHint, tasks::BackgroundLoopKind, timeline::EvictionError,
|
size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint,
|
||||||
LogicalSizeCalculationCause, Tenant,
|
tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -557,6 +557,8 @@ impl Timeline {
|
|||||||
gather_result = gather => {
|
gather_result = gather => {
|
||||||
match gather_result {
|
match gather_result {
|
||||||
Ok(_) => {},
|
Ok(_) => {},
|
||||||
|
// It can happen sometimes that we hit this instead of the cancellation token firing above
|
||||||
|
Err(CalculateSyntheticSizeError::Cancelled) => {}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// We don't care about the result, but, if it failed, we should log it,
|
// We don't care about the result, but, if it failed, we should log it,
|
||||||
// since consumption metric might be hitting the cached value and
|
// since consumption metric might be hitting the cached value and
|
||||||
|
|||||||
@@ -16,8 +16,9 @@
|
|||||||
//! Note that the vectored blob api does *not* go through the page cache.
|
//! Note that the vectored blob api does *not* go through the page cache.
|
||||||
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
use bytes::BytesMut;
|
use bytes::{Bytes, BytesMut};
|
||||||
use pageserver_api::key::Key;
|
use pageserver_api::key::Key;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
use tokio_epoll_uring::BoundedBuf;
|
use tokio_epoll_uring::BoundedBuf;
|
||||||
@@ -35,11 +36,123 @@ pub struct BlobMeta {
|
|||||||
pub lsn: Lsn,
|
pub lsn: Lsn,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Blob offsets into [`VectoredBlobsBuf::buf`]
|
/// A view into the vectored blobs read buffer.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub(crate) enum BufView<'a> {
|
||||||
|
Slice(&'a [u8]),
|
||||||
|
Bytes(bytes::Bytes),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BufView<'a> {
|
||||||
|
/// Creates a new slice-based view on the blob.
|
||||||
|
pub fn new_slice(slice: &'a [u8]) -> Self {
|
||||||
|
Self::Slice(slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new [`bytes::Bytes`]-based view on the blob.
|
||||||
|
pub fn new_bytes(bytes: bytes::Bytes) -> Self {
|
||||||
|
Self::Bytes(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert the view into `Bytes`.
|
||||||
|
///
|
||||||
|
/// If using slice as the underlying storage, the copy will be an O(n) operation.
|
||||||
|
pub fn into_bytes(self) -> Bytes {
|
||||||
|
match self {
|
||||||
|
BufView::Slice(slice) => Bytes::copy_from_slice(slice),
|
||||||
|
BufView::Bytes(bytes) => bytes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a sub-view of the blob based on the range.
|
||||||
|
fn view(&self, range: std::ops::Range<usize>) -> Self {
|
||||||
|
match self {
|
||||||
|
BufView::Slice(slice) => BufView::Slice(&slice[range]),
|
||||||
|
BufView::Bytes(bytes) => BufView::Bytes(bytes.slice(range)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Deref for BufView<'a> {
|
||||||
|
type Target = [u8];
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
match self {
|
||||||
|
BufView::Slice(slice) => slice,
|
||||||
|
BufView::Bytes(bytes) => bytes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> AsRef<[u8]> for BufView<'a> {
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
|
match self {
|
||||||
|
BufView::Slice(slice) => slice,
|
||||||
|
BufView::Bytes(bytes) => bytes.as_ref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a [u8]> for BufView<'a> {
|
||||||
|
fn from(value: &'a [u8]) -> Self {
|
||||||
|
Self::new_slice(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Bytes> for BufView<'_> {
|
||||||
|
fn from(value: Bytes) -> Self {
|
||||||
|
Self::new_bytes(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Blob offsets into [`VectoredBlobsBuf::buf`]. The byte ranges is potentially compressed,
|
||||||
|
/// subject to [`VectoredBlob::compression_bits`].
|
||||||
pub struct VectoredBlob {
|
pub struct VectoredBlob {
|
||||||
pub start: usize,
|
/// Blob metadata.
|
||||||
pub end: usize,
|
|
||||||
pub meta: BlobMeta,
|
pub meta: BlobMeta,
|
||||||
|
/// Start offset.
|
||||||
|
start: usize,
|
||||||
|
/// End offset.
|
||||||
|
end: usize,
|
||||||
|
/// Compression used on the the blob.
|
||||||
|
compression_bits: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VectoredBlob {
|
||||||
|
/// Reads a decompressed view of the blob.
|
||||||
|
pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {
|
||||||
|
let view = buf.view(self.start..self.end);
|
||||||
|
|
||||||
|
match self.compression_bits {
|
||||||
|
BYTE_UNCOMPRESSED => Ok(view),
|
||||||
|
BYTE_ZSTD => {
|
||||||
|
let mut decompressed_vec = Vec::new();
|
||||||
|
let mut decoder =
|
||||||
|
async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec);
|
||||||
|
decoder.write_all(&view).await?;
|
||||||
|
decoder.flush().await?;
|
||||||
|
// Zero-copy conversion from `Vec` to `Bytes`
|
||||||
|
Ok(BufView::new_bytes(Bytes::from(decompressed_vec)))
|
||||||
|
}
|
||||||
|
bits => {
|
||||||
|
let error = std::io::Error::new(
|
||||||
|
std::io::ErrorKind::InvalidData,
|
||||||
|
format!("Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", self.meta.key, self.meta.lsn, self.start, self.end),
|
||||||
|
);
|
||||||
|
Err(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for VectoredBlob {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}@{}, {}..{}",
|
||||||
|
self.meta.key, self.meta.lsn, self.start, self.end
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return type of [`VectoredBlobReader::read_blobs`]
|
/// Return type of [`VectoredBlobReader::read_blobs`]
|
||||||
@@ -514,7 +627,7 @@ impl<'a> VectoredBlobReader<'a> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buf = self
|
let buf = self
|
||||||
.file
|
.file
|
||||||
.read_exact_at(buf.slice(0..read.size()), read.start, ctx)
|
.read_exact_at(buf.slice(0..read.size()), read.start, ctx)
|
||||||
.await?
|
.await?
|
||||||
@@ -529,9 +642,6 @@ impl<'a> VectoredBlobReader<'a> {
|
|||||||
// of a blob is implicit: the start of the next blob if one exists
|
// of a blob is implicit: the start of the next blob if one exists
|
||||||
// or the end of the read.
|
// or the end of the read.
|
||||||
|
|
||||||
// Some scratch space, put here for reusing the allocation
|
|
||||||
let mut decompressed_vec = Vec::new();
|
|
||||||
|
|
||||||
for (blob_start, meta) in blobs_at {
|
for (blob_start, meta) in blobs_at {
|
||||||
let blob_start_in_buf = blob_start - start_offset;
|
let blob_start_in_buf = blob_start - start_offset;
|
||||||
let first_len_byte = buf[blob_start_in_buf as usize];
|
let first_len_byte = buf[blob_start_in_buf as usize];
|
||||||
@@ -557,35 +667,14 @@ impl<'a> VectoredBlobReader<'a> {
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
let start_raw = blob_start_in_buf + size_length;
|
let start = (blob_start_in_buf + size_length) as usize;
|
||||||
let end_raw = start_raw + blob_size;
|
let end = start + blob_size as usize;
|
||||||
let (start, end);
|
|
||||||
if compression_bits == BYTE_UNCOMPRESSED {
|
|
||||||
start = start_raw as usize;
|
|
||||||
end = end_raw as usize;
|
|
||||||
} else if compression_bits == BYTE_ZSTD {
|
|
||||||
let mut decoder =
|
|
||||||
async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec);
|
|
||||||
decoder
|
|
||||||
.write_all(&buf[start_raw as usize..end_raw as usize])
|
|
||||||
.await?;
|
|
||||||
decoder.flush().await?;
|
|
||||||
start = buf.len();
|
|
||||||
buf.extend_from_slice(&decompressed_vec);
|
|
||||||
end = buf.len();
|
|
||||||
decompressed_vec.clear();
|
|
||||||
} else {
|
|
||||||
let error = std::io::Error::new(
|
|
||||||
std::io::ErrorKind::InvalidData,
|
|
||||||
format!("invalid compression byte {compression_bits:x}"),
|
|
||||||
);
|
|
||||||
return Err(error);
|
|
||||||
}
|
|
||||||
|
|
||||||
metas.push(VectoredBlob {
|
metas.push(VectoredBlob {
|
||||||
start,
|
start,
|
||||||
end,
|
end,
|
||||||
meta: *meta,
|
meta: *meta,
|
||||||
|
compression_bits,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1020,8 +1109,13 @@ mod tests {
|
|||||||
let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
|
let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
|
||||||
assert_eq!(result.blobs.len(), 1);
|
assert_eq!(result.blobs.len(), 1);
|
||||||
let read_blob = &result.blobs[0];
|
let read_blob = &result.blobs[0];
|
||||||
let read_buf = &result.buf[read_blob.start..read_blob.end];
|
let view = BufView::new_slice(&result.buf);
|
||||||
assert_eq!(blob, read_buf, "mismatch for idx={idx} at offset={offset}");
|
let read_buf = read_blob.read(&view).await?;
|
||||||
|
assert_eq!(
|
||||||
|
&blob[..],
|
||||||
|
&read_buf[..],
|
||||||
|
"mismatch for idx={idx} at offset={offset}"
|
||||||
|
);
|
||||||
buf = result.buf;
|
buf = result.buf;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -205,6 +205,22 @@ impl PostgresRedoManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Do a ping request-response roundtrip.
|
||||||
|
///
|
||||||
|
/// Not used in production, but by Rust benchmarks.
|
||||||
|
///
|
||||||
|
/// # Cancel-Safety
|
||||||
|
///
|
||||||
|
/// This method is cancellation-safe.
|
||||||
|
pub async fn ping(&self, pg_version: u32) -> Result<(), Error> {
|
||||||
|
self.do_with_walredo_process(pg_version, |proc| async move {
|
||||||
|
proc.ping(Duration::from_secs(1))
|
||||||
|
.await
|
||||||
|
.map_err(Error::Other)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
pub fn status(&self) -> WalRedoManagerStatus {
|
pub fn status(&self) -> WalRedoManagerStatus {
|
||||||
WalRedoManagerStatus {
|
WalRedoManagerStatus {
|
||||||
last_redo_at: {
|
last_redo_at: {
|
||||||
@@ -297,6 +313,9 @@ impl PostgresRedoManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// # Cancel-Safety
|
||||||
|
///
|
||||||
|
/// This method is cancel-safe iff `closure` is cancel-safe.
|
||||||
async fn do_with_walredo_process<
|
async fn do_with_walredo_process<
|
||||||
F: FnOnce(Arc<Process>) -> Fut,
|
F: FnOnce(Arc<Process>) -> Fut,
|
||||||
Fut: Future<Output = Result<O, Error>>,
|
Fut: Future<Output = Result<O, Error>>,
|
||||||
@@ -537,6 +556,17 @@ mod tests {
|
|||||||
use tracing::Instrument;
|
use tracing::Instrument;
|
||||||
use utils::{id::TenantId, lsn::Lsn};
|
use utils::{id::TenantId, lsn::Lsn};
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_ping() {
|
||||||
|
let h = RedoHarness::new().unwrap();
|
||||||
|
|
||||||
|
h.manager
|
||||||
|
.ping(14)
|
||||||
|
.instrument(h.span())
|
||||||
|
.await
|
||||||
|
.expect("ping should work");
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn short_v14_redo() {
|
async fn short_v14_redo() {
|
||||||
let expected = std::fs::read("test_data/short_v14_redo.page").unwrap();
|
let expected = std::fs::read("test_data/short_v14_redo.page").unwrap();
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use self::no_leak_child::NoLeakChild;
|
|||||||
use crate::{
|
use crate::{
|
||||||
config::PageServerConf,
|
config::PageServerConf,
|
||||||
metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
|
metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
|
||||||
|
page_cache::PAGE_SZ,
|
||||||
span::debug_assert_current_span_has_tenant_id,
|
span::debug_assert_current_span_has_tenant_id,
|
||||||
walrecord::NeonWalRecord,
|
walrecord::NeonWalRecord,
|
||||||
};
|
};
|
||||||
@@ -237,6 +238,26 @@ impl WalRedoProcess {
|
|||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Do a ping request-response roundtrip.
|
||||||
|
///
|
||||||
|
/// Not used in production, but by Rust benchmarks.
|
||||||
|
pub(crate) async fn ping(&self, timeout: Duration) -> anyhow::Result<()> {
|
||||||
|
let mut writebuf: Vec<u8> = Vec::with_capacity(4);
|
||||||
|
protocol::build_ping_msg(&mut writebuf);
|
||||||
|
let Ok(res) = tokio::time::timeout(timeout, self.apply_wal_records0(&writebuf)).await
|
||||||
|
else {
|
||||||
|
anyhow::bail!("WAL redo ping timed out");
|
||||||
|
};
|
||||||
|
let response = res?;
|
||||||
|
if response.len() != PAGE_SZ {
|
||||||
|
anyhow::bail!(
|
||||||
|
"WAL redo ping response should respond with page-sized response: {}",
|
||||||
|
response.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// # Cancel-Safety
|
/// # Cancel-Safety
|
||||||
///
|
///
|
||||||
/// When not polled to completion (e.g. because in `tokio::select!` another
|
/// When not polled to completion (e.g. because in `tokio::select!` another
|
||||||
|
|||||||
@@ -55,3 +55,8 @@ pub(crate) fn build_get_page_msg(tag: BufferTag, buf: &mut Vec<u8>) {
|
|||||||
tag.ser_into(buf)
|
tag.ser_into(buf)
|
||||||
.expect("serialize BufferTag should always succeed");
|
.expect("serialize BufferTag should always succeed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn build_ping_msg(buf: &mut Vec<u8>) {
|
||||||
|
buf.put_u8(b'H');
|
||||||
|
buf.put_u32(4);
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ OBJS = \
|
|||||||
hll.o \
|
hll.o \
|
||||||
libpagestore.o \
|
libpagestore.o \
|
||||||
neon.o \
|
neon.o \
|
||||||
|
neon_pgversioncompat.o \
|
||||||
|
neon_perf_counters.o \
|
||||||
neon_utils.o \
|
neon_utils.o \
|
||||||
neon_walreader.o \
|
neon_walreader.o \
|
||||||
pagestore_smgr.o \
|
pagestore_smgr.o \
|
||||||
@@ -23,7 +25,18 @@ SHLIB_LINK_INTERNAL = $(libpq)
|
|||||||
SHLIB_LINK = -lcurl
|
SHLIB_LINK = -lcurl
|
||||||
|
|
||||||
EXTENSION = neon
|
EXTENSION = neon
|
||||||
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql
|
DATA = \
|
||||||
|
neon--1.0.sql \
|
||||||
|
neon--1.0--1.1.sql \
|
||||||
|
neon--1.1--1.2.sql \
|
||||||
|
neon--1.2--1.3.sql \
|
||||||
|
neon--1.3--1.4.sql \
|
||||||
|
neon--1.4--1.5.sql \
|
||||||
|
neon--1.5--1.4.sql \
|
||||||
|
neon--1.4--1.3.sql \
|
||||||
|
neon--1.3--1.2.sql \
|
||||||
|
neon--1.2--1.1.sql \
|
||||||
|
neon--1.1--1.0.sql
|
||||||
PGFILEDESC = "neon - cloud storage for PostgreSQL"
|
PGFILEDESC = "neon - cloud storage for PostgreSQL"
|
||||||
|
|
||||||
EXTRA_CLEAN = \
|
EXTRA_CLEAN = \
|
||||||
|
|||||||
@@ -109,6 +109,7 @@ typedef struct FileCacheControl
|
|||||||
* reenabling */
|
* reenabling */
|
||||||
uint32 size; /* size of cache file in chunks */
|
uint32 size; /* size of cache file in chunks */
|
||||||
uint32 used; /* number of used chunks */
|
uint32 used; /* number of used chunks */
|
||||||
|
uint32 used_pages; /* number of used pages */
|
||||||
uint32 limit; /* shared copy of lfc_size_limit */
|
uint32 limit; /* shared copy of lfc_size_limit */
|
||||||
uint64 hits;
|
uint64 hits;
|
||||||
uint64 misses;
|
uint64 misses;
|
||||||
@@ -905,6 +906,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
|||||||
/* Cache overflow: evict least recently used chunk */
|
/* Cache overflow: evict least recently used chunk */
|
||||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
|
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
|
||||||
|
|
||||||
|
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||||
|
{
|
||||||
|
lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
|
||||||
|
}
|
||||||
CriticalAssert(victim->access_count == 0);
|
CriticalAssert(victim->access_count == 0);
|
||||||
entry->offset = victim->offset; /* grab victim's chunk */
|
entry->offset = victim->offset; /* grab victim's chunk */
|
||||||
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
|
hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
|
||||||
@@ -959,6 +964,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
|||||||
|
|
||||||
for (int i = 0; i < blocks_in_chunk; i++)
|
for (int i = 0; i < blocks_in_chunk; i++)
|
||||||
{
|
{
|
||||||
|
lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
|
||||||
entry->bitmap[(chunk_offs + i) >> 5] |=
|
entry->bitmap[(chunk_offs + i) >> 5] |=
|
||||||
(1 << ((chunk_offs + i) & 31));
|
(1 << ((chunk_offs + i) & 31));
|
||||||
}
|
}
|
||||||
@@ -1051,6 +1057,11 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
|
|||||||
if (lfc_ctl)
|
if (lfc_ctl)
|
||||||
value = lfc_ctl->size;
|
value = lfc_ctl->size;
|
||||||
break;
|
break;
|
||||||
|
case 5:
|
||||||
|
key = "file_cache_used_pages";
|
||||||
|
if (lfc_ctl)
|
||||||
|
value = lfc_ctl->used_pages;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
SRF_RETURN_DONE(funcctx);
|
SRF_RETURN_DONE(funcctx);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include "utils/guc.h"
|
#include "utils/guc.h"
|
||||||
|
|
||||||
#include "neon.h"
|
#include "neon.h"
|
||||||
|
#include "neon_perf_counters.h"
|
||||||
#include "neon_utils.h"
|
#include "neon_utils.h"
|
||||||
#include "pagestore_client.h"
|
#include "pagestore_client.h"
|
||||||
#include "walproposer.h"
|
#include "walproposer.h"
|
||||||
@@ -331,6 +332,7 @@ CLEANUP_AND_DISCONNECT(PageServer *shard)
|
|||||||
}
|
}
|
||||||
if (shard->conn)
|
if (shard->conn)
|
||||||
{
|
{
|
||||||
|
MyNeonCounters->pageserver_disconnects_total++;
|
||||||
PQfinish(shard->conn);
|
PQfinish(shard->conn);
|
||||||
shard->conn = NULL;
|
shard->conn = NULL;
|
||||||
}
|
}
|
||||||
@@ -737,6 +739,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
|
|||||||
PageServer *shard = &page_servers[shard_no];
|
PageServer *shard = &page_servers[shard_no];
|
||||||
PGconn *pageserver_conn;
|
PGconn *pageserver_conn;
|
||||||
|
|
||||||
|
MyNeonCounters->pageserver_requests_sent_total++;
|
||||||
|
|
||||||
/* If the connection was lost for some reason, reconnect */
|
/* If the connection was lost for some reason, reconnect */
|
||||||
if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
|
if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
|
||||||
{
|
{
|
||||||
@@ -889,6 +893,7 @@ pageserver_flush(shardno_t shard_no)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
MyNeonCounters->pageserver_send_flushes_total++;
|
||||||
if (PQflush(pageserver_conn))
|
if (PQflush(pageserver_conn))
|
||||||
{
|
{
|
||||||
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
char *msg = pchomp(PQerrorMessage(pageserver_conn));
|
||||||
@@ -922,7 +927,7 @@ check_neon_id(char **newval, void **extra, GucSource source)
|
|||||||
static Size
|
static Size
|
||||||
PagestoreShmemSize(void)
|
PagestoreShmemSize(void)
|
||||||
{
|
{
|
||||||
return sizeof(PagestoreShmemState);
|
return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -941,6 +946,9 @@ PagestoreShmemInit(void)
|
|||||||
memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
|
memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
|
||||||
AssignPageserverConnstring(page_server_connstring, NULL);
|
AssignPageserverConnstring(page_server_connstring, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NeonPerfCountersShmemInit();
|
||||||
|
|
||||||
LWLockRelease(AddinShmemInitLock);
|
LWLockRelease(AddinShmemInitLock);
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|||||||
39
pgxn/neon/neon--1.4--1.5.sql
Normal file
39
pgxn/neon/neon--1.4--1.5.sql
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
|
||||||
|
|
||||||
|
|
||||||
|
CREATE FUNCTION get_backend_perf_counters()
|
||||||
|
RETURNS SETOF RECORD
|
||||||
|
AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters'
|
||||||
|
LANGUAGE C PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_perf_counters()
|
||||||
|
RETURNS SETOF RECORD
|
||||||
|
AS 'MODULE_PATHNAME', 'neon_get_perf_counters'
|
||||||
|
LANGUAGE C PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- Show various metrics, for each backend. Note that the values are not reset
|
||||||
|
-- when a backend exits. When a new backend starts with the backend ID, it will
|
||||||
|
-- continue accumulating the values from where the old backend left. If you are
|
||||||
|
-- only interested in the changes from your own session, store the values at the
|
||||||
|
-- beginning of the session somewhere, and subtract them on subsequent calls.
|
||||||
|
--
|
||||||
|
-- For histograms, 'bucket_le' is the upper bound of the histogram bucket.
|
||||||
|
CREATE VIEW neon_backend_perf_counters AS
|
||||||
|
SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value
|
||||||
|
FROM get_backend_perf_counters() AS P (
|
||||||
|
procno integer,
|
||||||
|
pid integer,
|
||||||
|
metric text,
|
||||||
|
bucket_le float8,
|
||||||
|
value float8
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Summary across all backends. (This could also be implemented with
|
||||||
|
-- an aggregate query over neon_backend_perf_counters view.)
|
||||||
|
CREATE VIEW neon_perf_counters AS
|
||||||
|
SELECT P.metric, P.bucket_le, P.value
|
||||||
|
FROM get_perf_counters() AS P (
|
||||||
|
metric text,
|
||||||
|
bucket_le float8,
|
||||||
|
value float8
|
||||||
|
);
|
||||||
4
pgxn/neon/neon--1.5--1.4.sql
Normal file
4
pgxn/neon/neon--1.5--1.4.sql
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
DROP VIEW IF EXISTS neon_perf_counters;
|
||||||
|
DROP VIEW IF EXISTS neon_backend_perf_counters;
|
||||||
|
DROP FUNCTION IF EXISTS get_perf_counters();
|
||||||
|
DROP FUNCTION IF EXISTS get_backend_perf_counters();
|
||||||
@@ -1,5 +1,7 @@
|
|||||||
# neon extension
|
# neon extension
|
||||||
comment = 'cloud storage for PostgreSQL'
|
comment = 'cloud storage for PostgreSQL'
|
||||||
|
# TODO: bump default version to 1.5, after we are certain that we don't
|
||||||
|
# need to rollback the compute image
|
||||||
default_version = '1.4'
|
default_version = '1.4'
|
||||||
module_pathname = '$libdir/neon'
|
module_pathname = '$libdir/neon'
|
||||||
relocatable = true
|
relocatable = true
|
||||||
|
|||||||
261
pgxn/neon/neon_perf_counters.c
Normal file
261
pgxn/neon/neon_perf_counters.c
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* neon_perf_counters.c
|
||||||
|
* Collect statistics about Neon I/O
|
||||||
|
*
|
||||||
|
* Each backend has its own set of counters in shared memory.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "storage/proc.h"
|
||||||
|
#include "storage/shmem.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
|
||||||
|
#include "neon_perf_counters.h"
|
||||||
|
#include "neon_pgversioncompat.h"
|
||||||
|
|
||||||
|
neon_per_backend_counters *neon_per_backend_counters_shared;
|
||||||
|
|
||||||
|
Size
|
||||||
|
NeonPerfCountersShmemSize(void)
|
||||||
|
{
|
||||||
|
Size size = 0;
|
||||||
|
|
||||||
|
size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters)));
|
||||||
|
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
NeonPerfCountersShmemInit(void)
|
||||||
|
{
|
||||||
|
bool found;
|
||||||
|
|
||||||
|
neon_per_backend_counters_shared =
|
||||||
|
ShmemInitStruct("Neon perf counters",
|
||||||
|
mul_size(MaxBackends,
|
||||||
|
sizeof(neon_per_backend_counters)),
|
||||||
|
&found);
|
||||||
|
Assert(found == IsUnderPostmaster);
|
||||||
|
if (!found)
|
||||||
|
{
|
||||||
|
/* shared memory is initialized to zeros, so nothing to do here */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Count a GetPage wait operation.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
inc_getpage_wait(uint64 latency_us)
|
||||||
|
{
|
||||||
|
int lo = 0;
|
||||||
|
int hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
|
||||||
|
|
||||||
|
/* Find the right bucket with binary search */
|
||||||
|
while (lo < hi)
|
||||||
|
{
|
||||||
|
int mid = (lo + hi) / 2;
|
||||||
|
|
||||||
|
if (latency_us < getpage_wait_bucket_thresholds[mid])
|
||||||
|
hi = mid;
|
||||||
|
else
|
||||||
|
lo = mid + 1;
|
||||||
|
}
|
||||||
|
MyNeonCounters->getpage_wait_us_bucket[lo]++;
|
||||||
|
MyNeonCounters->getpage_wait_us_sum += latency_us;
|
||||||
|
MyNeonCounters->getpage_wait_us_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Support functions for the views, neon_backend_perf_counters and
|
||||||
|
* neon_perf_counters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
char *name;
|
||||||
|
bool is_bucket;
|
||||||
|
double bucket_le;
|
||||||
|
double value;
|
||||||
|
} metric_t;
|
||||||
|
|
||||||
|
static metric_t *
|
||||||
|
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||||
|
{
|
||||||
|
#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
|
||||||
|
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
|
||||||
|
uint64 bucket_accum;
|
||||||
|
int i = 0;
|
||||||
|
Datum getpage_wait_str;
|
||||||
|
|
||||||
|
metrics[i].name = "getpage_wait_seconds_count";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->getpage_wait_us_count;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "getpage_wait_seconds_sum";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
|
||||||
|
i++;
|
||||||
|
|
||||||
|
bucket_accum = 0;
|
||||||
|
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
|
||||||
|
{
|
||||||
|
uint64 threshold = getpage_wait_bucket_thresholds[bucketno];
|
||||||
|
|
||||||
|
bucket_accum += counters->getpage_wait_us_bucket[bucketno];
|
||||||
|
|
||||||
|
metrics[i].name = "getpage_wait_seconds_bucket";
|
||||||
|
metrics[i].is_bucket = true;
|
||||||
|
metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
|
||||||
|
metrics[i].value = (double) bucket_accum;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
metrics[i].name = "getpage_prefetch_requests_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->getpage_prefetch_requests_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "getpage_sync_requests_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->getpage_sync_requests_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "getpage_prefetch_misses_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->getpage_prefetch_misses_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "getpage_prefetch_discards_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->getpage_prefetch_discards_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "pageserver_requests_sent_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->pageserver_requests_sent_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "pageserver_requests_disconnects_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->pageserver_disconnects_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "pageserver_send_flushes_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->pageserver_send_flushes_total;
|
||||||
|
i++;
|
||||||
|
metrics[i].name = "file_cache_hits_total";
|
||||||
|
metrics[i].is_bucket = false;
|
||||||
|
metrics[i].value = (double) counters->file_cache_hits_total;
|
||||||
|
i++;
|
||||||
|
|
||||||
|
Assert(i == NUM_METRICS);
|
||||||
|
|
||||||
|
/* NULL entry marks end of array */
|
||||||
|
metrics[i].name = NULL;
|
||||||
|
metrics[i].value = 0;
|
||||||
|
|
||||||
|
return metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write metric to three output Datums
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
metric_to_datums(metric_t *m, Datum *values, bool *nulls)
|
||||||
|
{
|
||||||
|
values[0] = CStringGetTextDatum(m->name);
|
||||||
|
nulls[0] = false;
|
||||||
|
if (m->is_bucket)
|
||||||
|
{
|
||||||
|
values[1] = Float8GetDatum(m->bucket_le);
|
||||||
|
nulls[1] = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
values[1] = (Datum) 0;
|
||||||
|
nulls[1] = true;
|
||||||
|
}
|
||||||
|
values[2] = Float8GetDatum(m->value);
|
||||||
|
nulls[2] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters);
|
||||||
|
Datum
|
||||||
|
neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||||
|
Datum values[5];
|
||||||
|
bool nulls[5];
|
||||||
|
|
||||||
|
/* We put all the tuples into a tuplestore in one go. */
|
||||||
|
InitMaterializedSRF(fcinfo, 0);
|
||||||
|
|
||||||
|
for (int procno = 0; procno < MaxBackends; procno++)
|
||||||
|
{
|
||||||
|
PGPROC *proc = GetPGProcByNumber(procno);
|
||||||
|
int pid = proc->pid;
|
||||||
|
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
|
||||||
|
metric_t *metrics = neon_perf_counters_to_metrics(counters);
|
||||||
|
|
||||||
|
values[0] = Int32GetDatum(procno);
|
||||||
|
nulls[0] = false;
|
||||||
|
values[1] = Int32GetDatum(pid);
|
||||||
|
nulls[1] = false;
|
||||||
|
|
||||||
|
for (int i = 0; metrics[i].name != NULL; i++)
|
||||||
|
{
|
||||||
|
metric_to_datums(&metrics[i], &values[2], &nulls[2]);
|
||||||
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(metrics);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (Datum) 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(neon_get_perf_counters);
|
||||||
|
Datum
|
||||||
|
neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||||
|
Datum values[3];
|
||||||
|
bool nulls[3];
|
||||||
|
Datum getpage_wait_str;
|
||||||
|
neon_per_backend_counters totals = {0};
|
||||||
|
metric_t *metrics;
|
||||||
|
|
||||||
|
/* We put all the tuples into a tuplestore in one go. */
|
||||||
|
InitMaterializedSRF(fcinfo, 0);
|
||||||
|
|
||||||
|
/* Aggregate the counters across all backends */
|
||||||
|
for (int procno = 0; procno < MaxBackends; procno++)
|
||||||
|
{
|
||||||
|
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
|
||||||
|
|
||||||
|
totals.getpage_wait_us_count += counters->getpage_wait_us_count;
|
||||||
|
totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
|
||||||
|
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
|
||||||
|
totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
|
||||||
|
totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
|
||||||
|
totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
|
||||||
|
totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
|
||||||
|
totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
|
||||||
|
totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
|
||||||
|
totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
|
||||||
|
totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
|
||||||
|
totals.file_cache_hits_total += counters->file_cache_hits_total;
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics = neon_perf_counters_to_metrics(&totals);
|
||||||
|
for (int i = 0; metrics[i].name != NULL; i++)
|
||||||
|
{
|
||||||
|
metric_to_datums(&metrics[i], &values[0], &nulls[0]);
|
||||||
|
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
|
||||||
|
}
|
||||||
|
pfree(metrics);
|
||||||
|
|
||||||
|
return (Datum) 0;
|
||||||
|
}
|
||||||
111
pgxn/neon/neon_perf_counters.h
Normal file
111
pgxn/neon/neon_perf_counters.h
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* neon_perf_counters.h
|
||||||
|
* Performance counters for neon storage requests
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NEON_PERF_COUNTERS_H
|
||||||
|
#define NEON_PERF_COUNTERS_H
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= 170000
|
||||||
|
#include "storage/procnumber.h"
|
||||||
|
#else
|
||||||
|
#include "storage/backendid.h"
|
||||||
|
#include "storage/proc.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static const uint64 getpage_wait_bucket_thresholds[] = {
|
||||||
|
20, 30, 60, 100, /* 0 - 100 us */
|
||||||
|
200, 300, 600, 1000, /* 100 us - 1 ms */
|
||||||
|
2000, 3000, 6000, 10000, /* 1 ms - 10 ms */
|
||||||
|
20000, 30000, 60000, 100000, /* 10 ms - 100 ms */
|
||||||
|
200000, 300000, 600000, 1000000, /* 100 ms - 1 s */
|
||||||
|
2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */
|
||||||
|
20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */
|
||||||
|
UINT64_MAX,
|
||||||
|
};
|
||||||
|
#define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds))
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Histogram for how long an smgrread() request needs to wait for response
|
||||||
|
* from pageserver. When prefetching is effective, these wait times can be
|
||||||
|
* lower than the network latency to the pageserver, even zero, if the
|
||||||
|
* page is already readily prefetched whenever we need to read a page.
|
||||||
|
*
|
||||||
|
* Note: we accumulate these in microseconds, because that's convenient in
|
||||||
|
* the backend, but the 'neon_backend_perf_counters' view will convert
|
||||||
|
* them to seconds, to make them more idiomatic as prometheus metrics.
|
||||||
|
*/
|
||||||
|
uint64 getpage_wait_us_count;
|
||||||
|
uint64 getpage_wait_us_sum;
|
||||||
|
uint64 getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Total number of speculative prefetch Getpage requests and synchronous
|
||||||
|
* GetPage requests sent.
|
||||||
|
*/
|
||||||
|
uint64 getpage_prefetch_requests_total;
|
||||||
|
uint64 getpage_sync_requests_total;
|
||||||
|
|
||||||
|
/* XXX: It's not clear to me when these misses happen. */
|
||||||
|
uint64 getpage_prefetch_misses_total;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of prefetched responses that were discarded becuase the
|
||||||
|
* prefetched page was not needed or because it was concurrently fetched /
|
||||||
|
* modified by another backend.
|
||||||
|
*/
|
||||||
|
uint64 getpage_prefetch_discards_total;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Total number of requests send to pageserver. (prefetch_requests_total
|
||||||
|
* and sync_request_total count only GetPage requests, this counts all
|
||||||
|
* request types.)
|
||||||
|
*/
|
||||||
|
uint64 pageserver_requests_sent_total;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of times the connection to the pageserver was lost and the
|
||||||
|
* backend had to reconnect. Note that this doesn't count the first
|
||||||
|
* connection in each backend, only reconnects.
|
||||||
|
*/
|
||||||
|
uint64 pageserver_disconnects_total;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of network flushes to the pageserver. Synchronous requests are
|
||||||
|
* flushed immediately, but when prefetching requests are sent in batches,
|
||||||
|
* this can be smaller than pageserver_requests_sent_total.
|
||||||
|
*/
|
||||||
|
uint64 pageserver_send_flushes_total;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of requests satisfied from the LFC.
|
||||||
|
*
|
||||||
|
* This is redundant with the server-wide file_cache_hits, but this gives
|
||||||
|
* per-backend granularity, and it's handy to have this in the same place
|
||||||
|
* as counters for requests that went to the pageserver. Maybe move all
|
||||||
|
* the LFC stats to this struct in the future?
|
||||||
|
*/
|
||||||
|
uint64 file_cache_hits_total;
|
||||||
|
|
||||||
|
} neon_per_backend_counters;
|
||||||
|
|
||||||
|
/* Pointer to the shared memory array of neon_per_backend_counters structs */
|
||||||
|
extern neon_per_backend_counters *neon_per_backend_counters_shared;
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= 170000
|
||||||
|
#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])
|
||||||
|
#else
|
||||||
|
#define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno])
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern void inc_getpage_wait(uint64 latency);
|
||||||
|
|
||||||
|
extern Size NeonPerfCountersShmemSize(void);
|
||||||
|
extern void NeonPerfCountersShmemInit(void);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* NEON_PERF_COUNTERS_H */
|
||||||
44
pgxn/neon/neon_pgversioncompat.c
Normal file
44
pgxn/neon/neon_pgversioncompat.c
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Support functions for the compatibility macros in neon_pgversioncompat.h
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "funcapi.h"
|
||||||
|
#include "miscadmin.h"
|
||||||
|
#include "utils/tuplestore.h"
|
||||||
|
|
||||||
|
#include "neon_pgversioncompat.h"
|
||||||
|
|
||||||
|
#if PG_MAJORVERSION_NUM < 15
|
||||||
|
void
|
||||||
|
InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
|
||||||
|
{
|
||||||
|
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
|
||||||
|
Tuplestorestate *tupstore;
|
||||||
|
MemoryContext old_context,
|
||||||
|
per_query_ctx;
|
||||||
|
TupleDesc stored_tupdesc;
|
||||||
|
|
||||||
|
/* check to see if caller supports returning a tuplestore */
|
||||||
|
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("set-valued function called in context that cannot accept a set")));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Store the tuplestore and the tuple descriptor in ReturnSetInfo. This
|
||||||
|
* must be done in the per-query memory context.
|
||||||
|
*/
|
||||||
|
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
|
||||||
|
old_context = MemoryContextSwitchTo(per_query_ctx);
|
||||||
|
|
||||||
|
if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE)
|
||||||
|
elog(ERROR, "return type must be a row type");
|
||||||
|
|
||||||
|
tupstore = tuplestore_begin_heap(false, false, work_mem);
|
||||||
|
rsinfo->returnMode = SFRM_Materialize;
|
||||||
|
rsinfo->setResult = tupstore;
|
||||||
|
rsinfo->setDesc = stored_tupdesc;
|
||||||
|
MemoryContextSwitchTo(old_context);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
@@ -6,6 +6,8 @@
|
|||||||
#ifndef NEON_PGVERSIONCOMPAT_H
|
#ifndef NEON_PGVERSIONCOMPAT_H
|
||||||
#define NEON_PGVERSIONCOMPAT_H
|
#define NEON_PGVERSIONCOMPAT_H
|
||||||
|
|
||||||
|
#include "fmgr.h"
|
||||||
|
|
||||||
#if PG_MAJORVERSION_NUM < 17
|
#if PG_MAJORVERSION_NUM < 17
|
||||||
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
|
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
|
||||||
#else
|
#else
|
||||||
@@ -123,4 +125,8 @@
|
|||||||
#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
|
#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if PG_MAJORVERSION_NUM < 15
|
||||||
|
extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* NEON_PGVERSIONCOMPAT_H */
|
#endif /* NEON_PGVERSIONCOMPAT_H */
|
||||||
|
|||||||
@@ -66,6 +66,7 @@
|
|||||||
#include "storage/md.h"
|
#include "storage/md.h"
|
||||||
#include "storage/smgr.h"
|
#include "storage/smgr.h"
|
||||||
|
|
||||||
|
#include "neon_perf_counters.h"
|
||||||
#include "pagestore_client.h"
|
#include "pagestore_client.h"
|
||||||
#include "bitmap.h"
|
#include "bitmap.h"
|
||||||
|
|
||||||
@@ -289,7 +290,6 @@ static PrefetchState *MyPState;
|
|||||||
|
|
||||||
static bool compact_prefetch_buffers(void);
|
static bool compact_prefetch_buffers(void);
|
||||||
static void consume_prefetch_responses(void);
|
static void consume_prefetch_responses(void);
|
||||||
static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns);
|
|
||||||
static bool prefetch_read(PrefetchRequest *slot);
|
static bool prefetch_read(PrefetchRequest *slot);
|
||||||
static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
|
static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
|
||||||
static bool prefetch_wait_for(uint64 ring_index);
|
static bool prefetch_wait_for(uint64 ring_index);
|
||||||
@@ -780,21 +780,27 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* prefetch_register_buffer() - register and prefetch buffer
|
* prefetch_register_bufferv() - register and prefetch buffers
|
||||||
*
|
*
|
||||||
* Register that we may want the contents of BufferTag in the near future.
|
* Register that we may want the contents of BufferTag in the near future.
|
||||||
|
* This is used when issuing a speculative prefetch request, but also when
|
||||||
|
* performing a synchronous request and need the buffer right now.
|
||||||
*
|
*
|
||||||
* If force_request_lsns is not NULL, those values are sent to the
|
* If force_request_lsns is not NULL, those values are sent to the
|
||||||
* pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
|
* pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
|
||||||
* to calculate the LSNs to send.
|
* to calculate the LSNs to send.
|
||||||
*
|
*
|
||||||
|
* When performing a prefetch rather than a synchronous request,
|
||||||
|
* is_prefetch==true. Currently, it only affects how the request is accounted
|
||||||
|
* in the perf counters.
|
||||||
|
*
|
||||||
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
* NOTE: this function may indirectly update MyPState->pfs_hash; which
|
||||||
* invalidates any active pointers into the hash table.
|
* invalidates any active pointers into the hash table.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static uint64
|
static uint64
|
||||||
prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
||||||
BlockNumber nblocks, const bits8 *mask)
|
BlockNumber nblocks, const bits8 *mask,
|
||||||
|
bool is_prefetch)
|
||||||
{
|
{
|
||||||
uint64 min_ring_index;
|
uint64 min_ring_index;
|
||||||
PrefetchRequest req;
|
PrefetchRequest req;
|
||||||
@@ -815,6 +821,7 @@ Retry:
|
|||||||
PrfHashEntry *entry = NULL;
|
PrfHashEntry *entry = NULL;
|
||||||
uint64 ring_index;
|
uint64 ring_index;
|
||||||
neon_request_lsns *lsns;
|
neon_request_lsns *lsns;
|
||||||
|
|
||||||
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
|
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@@ -858,6 +865,7 @@ Retry:
|
|||||||
prefetch_set_unused(ring_index);
|
prefetch_set_unused(ring_index);
|
||||||
entry = NULL;
|
entry = NULL;
|
||||||
slot = NULL;
|
slot = NULL;
|
||||||
|
MyNeonCounters->getpage_prefetch_discards_total++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -972,6 +980,11 @@ Retry:
|
|||||||
|
|
||||||
min_ring_index = Min(min_ring_index, ring_index);
|
min_ring_index = Min(min_ring_index, ring_index);
|
||||||
|
|
||||||
|
if (is_prefetch)
|
||||||
|
MyNeonCounters->getpage_prefetch_requests_total++;
|
||||||
|
else
|
||||||
|
MyNeonCounters->getpage_sync_requests_total++;
|
||||||
|
|
||||||
prefetch_do_request(slot, lsns);
|
prefetch_do_request(slot, lsns);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1000,13 +1013,6 @@ Retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static uint64
|
|
||||||
prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns)
|
|
||||||
{
|
|
||||||
return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: this function can get canceled and use a long jump to the next catch
|
* Note: this function can get canceled and use a long jump to the next catch
|
||||||
* context. Take care.
|
* context. Take care.
|
||||||
@@ -2612,7 +2618,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||||||
lfc_present[i] = ~(lfc_present[i]);
|
lfc_present[i] = ~(lfc_present[i]);
|
||||||
|
|
||||||
ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
|
ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
|
||||||
lfc_present);
|
lfc_present, true);
|
||||||
nblocks -= iterblocks;
|
nblocks -= iterblocks;
|
||||||
blocknum += iterblocks;
|
blocknum += iterblocks;
|
||||||
|
|
||||||
@@ -2656,7 +2662,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
|
|||||||
|
|
||||||
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
|
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
|
||||||
|
|
||||||
ring_index = prefetch_register_buffer(tag, NULL);
|
ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true);
|
||||||
|
|
||||||
Assert(ring_index < MyPState->ring_unused &&
|
Assert(ring_index < MyPState->ring_unused &&
|
||||||
MyPState->ring_last <= ring_index);
|
MyPState->ring_last <= ring_index);
|
||||||
@@ -2747,17 +2753,20 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
|
|||||||
* weren't for the behaviour of the LwLsn cache that uses the highest
|
* weren't for the behaviour of the LwLsn cache that uses the highest
|
||||||
* value of the LwLsn cache when the entry is not found.
|
* value of the LwLsn cache when the entry is not found.
|
||||||
*/
|
*/
|
||||||
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask);
|
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
|
||||||
|
|
||||||
for (int i = 0; i < nblocks; i++)
|
for (int i = 0; i < nblocks; i++)
|
||||||
{
|
{
|
||||||
void *buffer = buffers[i];
|
void *buffer = buffers[i];
|
||||||
BlockNumber blockno = base_blockno + i;
|
BlockNumber blockno = base_blockno + i;
|
||||||
neon_request_lsns *reqlsns = &request_lsns[i];
|
neon_request_lsns *reqlsns = &request_lsns[i];
|
||||||
|
TimestampTz start_ts, end_ts;
|
||||||
|
|
||||||
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
|
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
start_ts = GetCurrentTimestamp();
|
||||||
|
|
||||||
if (RecoveryInProgress() && MyBackendType != B_STARTUP)
|
if (RecoveryInProgress() && MyBackendType != B_STARTUP)
|
||||||
XLogWaitForReplayOf(reqlsns[0].request_lsn);
|
XLogWaitForReplayOf(reqlsns[0].request_lsn);
|
||||||
|
|
||||||
@@ -2794,6 +2803,7 @@ Retry:
|
|||||||
/* drop caches */
|
/* drop caches */
|
||||||
prefetch_set_unused(slot->my_ring_index);
|
prefetch_set_unused(slot->my_ring_index);
|
||||||
pgBufferUsage.prefetch.expired += 1;
|
pgBufferUsage.prefetch.expired += 1;
|
||||||
|
MyNeonCounters->getpage_prefetch_discards_total++;
|
||||||
/* make it look like a prefetch cache miss */
|
/* make it look like a prefetch cache miss */
|
||||||
entry = NULL;
|
entry = NULL;
|
||||||
}
|
}
|
||||||
@@ -2804,8 +2814,9 @@ Retry:
|
|||||||
if (entry == NULL)
|
if (entry == NULL)
|
||||||
{
|
{
|
||||||
pgBufferUsage.prefetch.misses += 1;
|
pgBufferUsage.prefetch.misses += 1;
|
||||||
|
MyNeonCounters->getpage_prefetch_misses_total++;
|
||||||
|
|
||||||
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL);
|
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
|
||||||
Assert(ring_index != UINT64_MAX);
|
Assert(ring_index != UINT64_MAX);
|
||||||
slot = GetPrfSlot(ring_index);
|
slot = GetPrfSlot(ring_index);
|
||||||
}
|
}
|
||||||
@@ -2860,6 +2871,9 @@ Retry:
|
|||||||
/* buffer was used, clean up for later reuse */
|
/* buffer was used, clean up for later reuse */
|
||||||
prefetch_set_unused(ring_index);
|
prefetch_set_unused(ring_index);
|
||||||
prefetch_cleanup_trailing_unused();
|
prefetch_cleanup_trailing_unused();
|
||||||
|
|
||||||
|
end_ts = GetCurrentTimestamp();
|
||||||
|
inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2913,6 +2927,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
|
|||||||
/* Try to read from local file cache */
|
/* Try to read from local file cache */
|
||||||
if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
|
if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
|
||||||
{
|
{
|
||||||
|
MyNeonCounters->file_cache_hits_total++;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3097,7 +3112,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
|||||||
/* assume heap */
|
/* assume heap */
|
||||||
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
|
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
|
||||||
RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
|
RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
|
||||||
|
|
||||||
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
|
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
|
||||||
{
|
{
|
||||||
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
|
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
* PushPage ('P'): Copy a page image (in the payload) to buffer cache
|
* PushPage ('P'): Copy a page image (in the payload) to buffer cache
|
||||||
* ApplyRecord ('A'): Apply a WAL record (in the payload)
|
* ApplyRecord ('A'): Apply a WAL record (in the payload)
|
||||||
* GetPage ('G'): Return a page image from buffer cache.
|
* GetPage ('G'): Return a page image from buffer cache.
|
||||||
|
* Ping ('H'): Return the input message.
|
||||||
*
|
*
|
||||||
* Currently, you only get a response to GetPage requests; the response is
|
* Currently, you only get a response to GetPage requests; the response is
|
||||||
* simply a 8k page, without any headers. Errors are logged to stderr.
|
* simply a 8k page, without any headers. Errors are logged to stderr.
|
||||||
@@ -133,6 +134,7 @@ static void ApplyRecord(StringInfo input_message);
|
|||||||
static void apply_error_callback(void *arg);
|
static void apply_error_callback(void *arg);
|
||||||
static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
|
static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
|
||||||
static void GetPage(StringInfo input_message);
|
static void GetPage(StringInfo input_message);
|
||||||
|
static void Ping(StringInfo input_message);
|
||||||
static ssize_t buffered_read(void *buf, size_t count);
|
static ssize_t buffered_read(void *buf, size_t count);
|
||||||
static void CreateFakeSharedMemoryAndSemaphores();
|
static void CreateFakeSharedMemoryAndSemaphores();
|
||||||
|
|
||||||
@@ -394,6 +396,10 @@ WalRedoMain(int argc, char *argv[])
|
|||||||
GetPage(&input_message);
|
GetPage(&input_message);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 'H': /* Ping */
|
||||||
|
Ping(&input_message);
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* EOF means we're done. Perform normal shutdown.
|
* EOF means we're done. Perform normal shutdown.
|
||||||
*/
|
*/
|
||||||
@@ -1057,6 +1063,36 @@ GetPage(StringInfo input_message)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
Ping(StringInfo input_message)
|
||||||
|
{
|
||||||
|
int tot_written;
|
||||||
|
/* Response: the input message */
|
||||||
|
tot_written = 0;
|
||||||
|
do {
|
||||||
|
ssize_t rc;
|
||||||
|
/* We don't need alignment, but it's bad practice to use char[BLCKSZ] */
|
||||||
|
#if PG_VERSION_NUM >= 160000
|
||||||
|
static const PGIOAlignedBlock response;
|
||||||
|
#else
|
||||||
|
static const PGAlignedBlock response;
|
||||||
|
#endif
|
||||||
|
rc = write(STDOUT_FILENO, &response.data[tot_written], BLCKSZ - tot_written);
|
||||||
|
if (rc < 0) {
|
||||||
|
/* If interrupted by signal, just retry */
|
||||||
|
if (errno == EINTR)
|
||||||
|
continue;
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not write to stdout: %m")));
|
||||||
|
}
|
||||||
|
tot_written += rc;
|
||||||
|
} while (tot_written < BLCKSZ);
|
||||||
|
|
||||||
|
elog(TRACE, "Page sent back for ping");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Buffer used by buffered_read() */
|
/* Buffer used by buffered_read() */
|
||||||
static char stdin_buf[16 * 1024];
|
static char stdin_buf[16 * 1024];
|
||||||
static size_t stdin_len = 0; /* # of bytes in buffer */
|
static size_t stdin_len = 0; /* # of bytes in buffer */
|
||||||
|
|||||||
@@ -24,12 +24,12 @@ bytes = { workspace = true, features = ["serde"] }
|
|||||||
camino.workspace = true
|
camino.workspace = true
|
||||||
chrono.workspace = true
|
chrono.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
|
compute_api.workspace = true
|
||||||
consumption_metrics.workspace = true
|
consumption_metrics.workspace = true
|
||||||
dashmap.workspace = true
|
dashmap.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
framed-websockets.workspace = true
|
framed-websockets.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
git-version.workspace = true
|
|
||||||
hashbrown.workspace = true
|
hashbrown.workspace = true
|
||||||
hashlink.workspace = true
|
hashlink.workspace = true
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
|
|||||||
@@ -80,6 +80,14 @@ pub(crate) trait TestBackend: Send + Sync + 'static {
|
|||||||
fn get_allowed_ips_and_secret(
|
fn get_allowed_ips_and_secret(
|
||||||
&self,
|
&self,
|
||||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
|
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
|
||||||
|
fn dyn_clone(&self) -> Box<dyn TestBackend>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
impl Clone for Box<dyn TestBackend> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
TestBackend::dyn_clone(&**self)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Backend<'_, (), ()> {
|
impl std::fmt::Display for Backend<'_, (), ()> {
|
||||||
@@ -444,7 +452,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
|
|||||||
Self::Web(url, ()) => {
|
Self::Web(url, ()) => {
|
||||||
info!("performing web authentication");
|
info!("performing web authentication");
|
||||||
|
|
||||||
let info = web::authenticate(ctx, &url, client).await?;
|
let info = web::authenticate(ctx, config, &url, client).await?;
|
||||||
|
|
||||||
Backend::Web(url, info)
|
Backend::Web(url, info)
|
||||||
}
|
}
|
||||||
@@ -557,7 +565,7 @@ mod tests {
|
|||||||
stream::{PqStream, Stream},
|
stream::{PqStream, Stream},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{auth_quirks, AuthRateLimiter};
|
use super::{auth_quirks, jwt::JwkCache, AuthRateLimiter};
|
||||||
|
|
||||||
struct Auth {
|
struct Auth {
|
||||||
ips: Vec<IpPattern>,
|
ips: Vec<IpPattern>,
|
||||||
@@ -585,6 +593,14 @@ mod tests {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
_ctx: &RequestMonitoring,
|
||||||
|
_endpoint: crate::EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<super::jwt::AuthRule>> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
async fn wake_compute(
|
async fn wake_compute(
|
||||||
&self,
|
&self,
|
||||||
_ctx: &RequestMonitoring,
|
_ctx: &RequestMonitoring,
|
||||||
@@ -595,12 +611,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static CONFIG: Lazy<AuthenticationConfig> = Lazy::new(|| AuthenticationConfig {
|
static CONFIG: Lazy<AuthenticationConfig> = Lazy::new(|| AuthenticationConfig {
|
||||||
|
jwks_cache: JwkCache::default(),
|
||||||
thread_pool: ThreadPool::new(1),
|
thread_pool: ThreadPool::new(1),
|
||||||
scram_protocol_timeout: std::time::Duration::from_secs(5),
|
scram_protocol_timeout: std::time::Duration::from_secs(5),
|
||||||
rate_limiter_enabled: true,
|
rate_limiter_enabled: true,
|
||||||
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
|
rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
|
||||||
rate_limit_ip_subnet: 64,
|
rate_limit_ip_subnet: 64,
|
||||||
ip_allowlist_check_enabled: true,
|
ip_allowlist_check_enabled: true,
|
||||||
|
is_auth_broker: false,
|
||||||
|
accept_jwts: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
|
async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
use std::{
|
use std::{
|
||||||
|
borrow::Cow,
|
||||||
future::Future,
|
future::Future,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
time::{Duration, SystemTime},
|
time::{Duration, SystemTime},
|
||||||
@@ -8,11 +9,17 @@ use anyhow::{bail, ensure, Context};
|
|||||||
use arc_swap::ArcSwapOption;
|
use arc_swap::ArcSwapOption;
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use jose_jwk::crypto::KeyInfo;
|
use jose_jwk::crypto::KeyInfo;
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{
|
||||||
|
de::{DeserializeSeed, IgnoredAny, Visitor},
|
||||||
|
Deserializer,
|
||||||
|
};
|
||||||
use signature::Verifier;
|
use signature::Verifier;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
|
||||||
use crate::{context::RequestMonitoring, http::parse_json_body_with_limit, EndpointId, RoleName};
|
use crate::{
|
||||||
|
context::RequestMonitoring, http::parse_json_body_with_limit, intern::RoleNameInt, EndpointId,
|
||||||
|
RoleName,
|
||||||
|
};
|
||||||
|
|
||||||
// TODO(conrad): make these configurable.
|
// TODO(conrad): make these configurable.
|
||||||
const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
|
const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
|
||||||
@@ -27,18 +34,19 @@ pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {
|
|||||||
&self,
|
&self,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
endpoint: EndpointId,
|
endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
|
||||||
) -> impl Future<Output = anyhow::Result<Vec<AuthRule>>> + Send;
|
) -> impl Future<Output = anyhow::Result<Vec<AuthRule>>> + Send;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub(crate) struct AuthRule {
|
pub(crate) struct AuthRule {
|
||||||
pub(crate) id: String,
|
pub(crate) id: String,
|
||||||
pub(crate) jwks_url: url::Url,
|
pub(crate) jwks_url: url::Url,
|
||||||
pub(crate) audience: Option<String>,
|
pub(crate) audience: Option<String>,
|
||||||
|
pub(crate) role_names: Vec<RoleNameInt>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub(crate) struct JwkCache {
|
pub struct JwkCache {
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
|
|
||||||
map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
|
map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
|
||||||
@@ -54,18 +62,28 @@ pub(crate) struct JwkCacheEntry {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl JwkCacheEntry {
|
impl JwkCacheEntry {
|
||||||
fn find_jwk_and_audience(&self, key_id: &str) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
|
fn find_jwk_and_audience(
|
||||||
self.key_sets.values().find_map(|key_set| {
|
&self,
|
||||||
key_set
|
key_id: &str,
|
||||||
.find_key(key_id)
|
role_name: &RoleName,
|
||||||
.map(|jwk| (jwk, key_set.audience.as_deref()))
|
) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
|
||||||
})
|
self.key_sets
|
||||||
|
.values()
|
||||||
|
// make sure our requested role has access to the key set
|
||||||
|
.filter(|key_set| key_set.role_names.iter().any(|role| **role == **role_name))
|
||||||
|
// try and find the requested key-id in the key set
|
||||||
|
.find_map(|key_set| {
|
||||||
|
key_set
|
||||||
|
.find_key(key_id)
|
||||||
|
.map(|jwk| (jwk, key_set.audience.as_deref()))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct KeySet {
|
struct KeySet {
|
||||||
jwks: jose_jwk::JwkSet,
|
jwks: jose_jwk::JwkSet,
|
||||||
audience: Option<String>,
|
audience: Option<String>,
|
||||||
|
role_names: Vec<RoleNameInt>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl KeySet {
|
impl KeySet {
|
||||||
@@ -106,7 +124,6 @@ impl JwkCacheEntryLock {
|
|||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
client: &reqwest::Client,
|
client: &reqwest::Client,
|
||||||
endpoint: EndpointId,
|
endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
|
||||||
auth_rules: &F,
|
auth_rules: &F,
|
||||||
) -> anyhow::Result<Arc<JwkCacheEntry>> {
|
) -> anyhow::Result<Arc<JwkCacheEntry>> {
|
||||||
// double check that no one beat us to updating the cache.
|
// double check that no one beat us to updating the cache.
|
||||||
@@ -119,11 +136,10 @@ impl JwkCacheEntryLock {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let rules = auth_rules
|
let rules = auth_rules.fetch_auth_rules(ctx, endpoint).await?;
|
||||||
.fetch_auth_rules(ctx, endpoint, role_name)
|
|
||||||
.await?;
|
|
||||||
let mut key_sets =
|
let mut key_sets =
|
||||||
ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());
|
ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());
|
||||||
|
|
||||||
// TODO(conrad): run concurrently
|
// TODO(conrad): run concurrently
|
||||||
// TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
|
// TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
|
||||||
for rule in rules {
|
for rule in rules {
|
||||||
@@ -151,6 +167,7 @@ impl JwkCacheEntryLock {
|
|||||||
KeySet {
|
KeySet {
|
||||||
jwks,
|
jwks,
|
||||||
audience: rule.audience,
|
audience: rule.audience,
|
||||||
|
role_names: rule.role_names,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -173,7 +190,6 @@ impl JwkCacheEntryLock {
|
|||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
client: &reqwest::Client,
|
client: &reqwest::Client,
|
||||||
endpoint: EndpointId,
|
endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
|
||||||
fetch: &F,
|
fetch: &F,
|
||||||
) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
|
) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
@@ -183,9 +199,7 @@ impl JwkCacheEntryLock {
|
|||||||
let Some(cached) = guard else {
|
let Some(cached) = guard else {
|
||||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||||
let permit = self.acquire_permit().await;
|
let permit = self.acquire_permit().await;
|
||||||
return self
|
return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
|
||||||
.renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
|
|
||||||
.await;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let last_update = now.duration_since(cached.last_retrieved);
|
let last_update = now.duration_since(cached.last_retrieved);
|
||||||
@@ -196,9 +210,7 @@ impl JwkCacheEntryLock {
|
|||||||
let permit = self.acquire_permit().await;
|
let permit = self.acquire_permit().await;
|
||||||
|
|
||||||
// it's been too long since we checked the keys. wait for them to update.
|
// it's been too long since we checked the keys. wait for them to update.
|
||||||
return self
|
return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
|
||||||
.renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// every 5 minutes we should spawn a job to eagerly update the token.
|
// every 5 minutes we should spawn a job to eagerly update the token.
|
||||||
@@ -212,7 +224,7 @@ impl JwkCacheEntryLock {
|
|||||||
let ctx = ctx.clone();
|
let ctx = ctx.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
if let Err(e) = entry
|
if let Err(e) = entry
|
||||||
.renew_jwks(permit, &ctx, &client, endpoint, role_name, &fetch)
|
.renew_jwks(permit, &ctx, &client, endpoint, &fetch)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
tracing::warn!(error=?e, "could not fetch JWKs in background job");
|
tracing::warn!(error=?e, "could not fetch JWKs in background job");
|
||||||
@@ -232,7 +244,7 @@ impl JwkCacheEntryLock {
|
|||||||
jwt: &str,
|
jwt: &str,
|
||||||
client: &reqwest::Client,
|
client: &reqwest::Client,
|
||||||
endpoint: EndpointId,
|
endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
role_name: &RoleName,
|
||||||
fetch: &F,
|
fetch: &F,
|
||||||
) -> Result<(), anyhow::Error> {
|
) -> Result<(), anyhow::Error> {
|
||||||
// JWT compact form is defined to be
|
// JWT compact form is defined to be
|
||||||
@@ -254,30 +266,26 @@ impl JwkCacheEntryLock {
|
|||||||
let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
|
let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
|
||||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
.context("Provided authentication token is not a valid JWT encoding")?;
|
||||||
|
|
||||||
ensure!(header.typ == "JWT");
|
ensure!(
|
||||||
|
header.typ == "JWT",
|
||||||
|
"Provided authentication token is not a valid JWT encoding"
|
||||||
|
);
|
||||||
let kid = header.key_id.context("missing key id")?;
|
let kid = header.key_id.context("missing key id")?;
|
||||||
|
|
||||||
let mut guard = self
|
let mut guard = self
|
||||||
.get_or_update_jwk_cache(ctx, client, endpoint.clone(), role_name.clone(), fetch)
|
.get_or_update_jwk_cache(ctx, client, endpoint.clone(), fetch)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// get the key from the JWKs if possible. If not, wait for the keys to update.
|
// get the key from the JWKs if possible. If not, wait for the keys to update.
|
||||||
let (jwk, expected_audience) = loop {
|
let (jwk, expected_audience) = loop {
|
||||||
match guard.find_jwk_and_audience(kid) {
|
match guard.find_jwk_and_audience(kid, role_name) {
|
||||||
Some(jwk) => break jwk,
|
Some(jwk) => break jwk,
|
||||||
None if guard.last_retrieved.elapsed() > MIN_RENEW => {
|
None if guard.last_retrieved.elapsed() > MIN_RENEW => {
|
||||||
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||||
|
|
||||||
let permit = self.acquire_permit().await;
|
let permit = self.acquire_permit().await;
|
||||||
guard = self
|
guard = self
|
||||||
.renew_jwks(
|
.renew_jwks(permit, ctx, client, endpoint.clone(), fetch)
|
||||||
permit,
|
|
||||||
ctx,
|
|
||||||
client,
|
|
||||||
endpoint.clone(),
|
|
||||||
role_name.clone(),
|
|
||||||
fetch,
|
|
||||||
)
|
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
@@ -300,32 +308,21 @@ impl JwkCacheEntryLock {
|
|||||||
}
|
}
|
||||||
key => bail!("unsupported key type {key:?}"),
|
key => bail!("unsupported key type {key:?}"),
|
||||||
};
|
};
|
||||||
|
tracing::debug!("JWT signature valid");
|
||||||
|
|
||||||
let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
|
let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
|
||||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
.context("Provided authentication token is not a valid JWT encoding")?;
|
||||||
let payload = serde_json::from_slice::<JwtPayload<'_>>(&payload)
|
|
||||||
.context("Provided authentication token is not a valid JWT encoding")?;
|
|
||||||
|
|
||||||
tracing::debug!(?payload, "JWT signature valid with claims");
|
let validator = JwtValidator {
|
||||||
|
expected_audience,
|
||||||
|
current_time: SystemTime::now(),
|
||||||
|
clock_skew_leeway: CLOCK_SKEW_LEEWAY,
|
||||||
|
};
|
||||||
|
|
||||||
match (expected_audience, payload.audience) {
|
let payload = validator
|
||||||
// check the audience matches
|
.deserialize(&mut serde_json::Deserializer::from_slice(&payload))?;
|
||||||
(Some(aud1), Some(aud2)) => ensure!(aud1 == aud2, "invalid JWT token audience"),
|
|
||||||
// the audience is expected but is missing
|
|
||||||
(Some(_), None) => bail!("invalid JWT token audience"),
|
|
||||||
// we don't care for the audience field
|
|
||||||
(None, _) => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
let now = SystemTime::now();
|
tracing::debug!(?payload, "JWT claims valid");
|
||||||
|
|
||||||
if let Some(exp) = payload.expiration {
|
|
||||||
ensure!(now < exp + CLOCK_SKEW_LEEWAY);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(nbf) = payload.not_before {
|
|
||||||
ensure!(nbf < now + CLOCK_SKEW_LEEWAY);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -336,7 +333,7 @@ impl JwkCache {
|
|||||||
&self,
|
&self,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
endpoint: EndpointId,
|
endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
role_name: &RoleName,
|
||||||
fetch: &F,
|
fetch: &F,
|
||||||
jwt: &str,
|
jwt: &str,
|
||||||
) -> Result<(), anyhow::Error> {
|
) -> Result<(), anyhow::Error> {
|
||||||
@@ -413,37 +410,184 @@ struct JwtHeader<'a> {
|
|||||||
key_id: Option<&'a str>,
|
key_id: Option<&'a str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>
|
struct JwtValidator<'a> {
|
||||||
#[derive(serde::Deserialize, serde::Serialize, Debug)]
|
expected_audience: Option<&'a str>,
|
||||||
struct JwtPayload<'a> {
|
current_time: SystemTime,
|
||||||
/// Audience - Recipient for which the JWT is intended
|
clock_skew_leeway: Duration,
|
||||||
#[serde(rename = "aud")]
|
|
||||||
audience: Option<&'a str>,
|
|
||||||
/// Expiration - Time after which the JWT expires
|
|
||||||
#[serde(deserialize_with = "numeric_date_opt", rename = "exp", default)]
|
|
||||||
expiration: Option<SystemTime>,
|
|
||||||
/// Not before - Time after which the JWT expires
|
|
||||||
#[serde(deserialize_with = "numeric_date_opt", rename = "nbf", default)]
|
|
||||||
not_before: Option<SystemTime>,
|
|
||||||
|
|
||||||
// the following entries are only extracted for the sake of debug logging.
|
|
||||||
/// Issuer of the JWT
|
|
||||||
#[serde(rename = "iss")]
|
|
||||||
issuer: Option<&'a str>,
|
|
||||||
/// Subject of the JWT (the user)
|
|
||||||
#[serde(rename = "sub")]
|
|
||||||
subject: Option<&'a str>,
|
|
||||||
/// Unique token identifier
|
|
||||||
#[serde(rename = "jti")]
|
|
||||||
jwt_id: Option<&'a str>,
|
|
||||||
/// Unique session identifier
|
|
||||||
#[serde(rename = "sid")]
|
|
||||||
session_id: Option<&'a str>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn numeric_date_opt<'de, D: Deserializer<'de>>(d: D) -> Result<Option<SystemTime>, D::Error> {
|
impl<'de> DeserializeSeed<'de> for JwtValidator<'_> {
|
||||||
let d = <Option<u64>>::deserialize(d)?;
|
type Value = JwtPayload<'de>;
|
||||||
Ok(d.map(|n| SystemTime::UNIX_EPOCH + Duration::from_secs(n)))
|
|
||||||
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
impl<'de> Visitor<'de> for JwtValidator<'_> {
|
||||||
|
type Value = JwtPayload<'de>;
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
formatter.write_str("a JWT payload")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
|
||||||
|
where
|
||||||
|
A: serde::de::MapAccess<'de>,
|
||||||
|
{
|
||||||
|
let mut payload = JwtPayload {
|
||||||
|
issuer: None,
|
||||||
|
subject: None,
|
||||||
|
jwt_id: None,
|
||||||
|
session_id: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut aud = false;
|
||||||
|
|
||||||
|
while let Some(key) = map.next_key()? {
|
||||||
|
match key {
|
||||||
|
"iss" if payload.issuer.is_none() => {
|
||||||
|
payload.issuer = Some(map.next_value()?);
|
||||||
|
}
|
||||||
|
"sub" if payload.subject.is_none() => {
|
||||||
|
payload.subject = Some(map.next_value()?);
|
||||||
|
}
|
||||||
|
"jit" if payload.jwt_id.is_none() => {
|
||||||
|
payload.jwt_id = Some(map.next_value()?);
|
||||||
|
}
|
||||||
|
"sid" if payload.session_id.is_none() => {
|
||||||
|
payload.session_id = Some(map.next_value()?);
|
||||||
|
}
|
||||||
|
"exp" => {
|
||||||
|
let exp = map.next_value::<u64>()?;
|
||||||
|
let exp = SystemTime::UNIX_EPOCH + Duration::from_secs(exp);
|
||||||
|
|
||||||
|
if self.current_time > exp + self.clock_skew_leeway {
|
||||||
|
return Err(serde::de::Error::custom("JWT token has expired"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"nbf" => {
|
||||||
|
let nbf = map.next_value::<u64>()?;
|
||||||
|
let nbf = SystemTime::UNIX_EPOCH + Duration::from_secs(nbf);
|
||||||
|
|
||||||
|
if self.current_time + self.clock_skew_leeway < nbf {
|
||||||
|
return Err(serde::de::Error::custom(
|
||||||
|
"JWT token is not yet ready to use",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"aud" => {
|
||||||
|
if let Some(expected_audience) = self.expected_audience {
|
||||||
|
map.next_value_seed(AudienceValidator { expected_audience })?;
|
||||||
|
aud = true;
|
||||||
|
} else {
|
||||||
|
map.next_value::<IgnoredAny>()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => map.next_value::<IgnoredAny>().map(|IgnoredAny| ())?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.expected_audience.is_some() && !aud {
|
||||||
|
return Err(serde::de::Error::custom("invalid JWT token audience"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
deserializer.deserialize_map(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AudienceValidator<'a> {
|
||||||
|
expected_audience: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> DeserializeSeed<'de> for AudienceValidator<'_> {
|
||||||
|
type Value = ();
|
||||||
|
|
||||||
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
impl<'de> Visitor<'de> for AudienceValidator<'_> {
|
||||||
|
type Value = ();
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
formatter.write_str("a single string or an array of strings")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
|
if self.expected_audience == v {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(E::custom("invalid JWT token audience"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
|
||||||
|
where
|
||||||
|
A: serde::de::SeqAccess<'de>,
|
||||||
|
{
|
||||||
|
while let Some(v) = seq.next_element_seed(SingleAudienceValidator {
|
||||||
|
expected_audience: self.expected_audience,
|
||||||
|
})? {
|
||||||
|
if v {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(serde::de::Error::custom("invalid JWT token audience"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deserializer.deserialize_any(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SingleAudienceValidator<'a> {
|
||||||
|
expected_audience: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> DeserializeSeed<'de> for SingleAudienceValidator<'_> {
|
||||||
|
type Value = bool;
|
||||||
|
|
||||||
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
impl<'de> Visitor<'de> for SingleAudienceValidator<'_> {
|
||||||
|
type Value = bool;
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
formatter.write_str("a single audience string")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: serde::de::Error,
|
||||||
|
{
|
||||||
|
Ok(self.expected_audience == v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deserializer.deserialize_any(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>
|
||||||
|
// the following entries are only extracted for the sake of debug logging.
|
||||||
|
#[derive(Debug)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
struct JwtPayload<'a> {
|
||||||
|
/// Issuer of the JWT
|
||||||
|
issuer: Option<Cow<'a, str>>,
|
||||||
|
/// Subject of the JWT (the user)
|
||||||
|
subject: Option<Cow<'a, str>>,
|
||||||
|
/// Unique token identifier
|
||||||
|
jwt_id: Option<Cow<'a, str>>,
|
||||||
|
/// Unique session identifier
|
||||||
|
session_id: Option<Cow<'a, str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct JwkRenewalPermit<'a> {
|
struct JwkRenewalPermit<'a> {
|
||||||
@@ -524,6 +668,8 @@ mod tests {
|
|||||||
use hyper_util::rt::TokioIo;
|
use hyper_util::rt::TokioIo;
|
||||||
use rand::rngs::OsRng;
|
use rand::rngs::OsRng;
|
||||||
use rsa::pkcs8::DecodePrivateKey;
|
use rsa::pkcs8::DecodePrivateKey;
|
||||||
|
use serde::Serialize;
|
||||||
|
use serde_json::json;
|
||||||
use signature::Signer;
|
use signature::Signer;
|
||||||
use tokio::net::TcpListener;
|
use tokio::net::TcpListener;
|
||||||
|
|
||||||
@@ -556,23 +702,41 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {
|
fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {
|
||||||
|
let now = SystemTime::now()
|
||||||
|
.duration_since(SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
let body = typed_json::json! {{
|
||||||
|
"exp": now + 3600,
|
||||||
|
"nbf": now,
|
||||||
|
"aud": ["audience1", "neon", "audience2"],
|
||||||
|
"sub": "user1",
|
||||||
|
"sid": "session1",
|
||||||
|
"jti": "token1",
|
||||||
|
"iss": "neon-testing",
|
||||||
|
}};
|
||||||
|
build_custom_jwt_payload(kid, body, sig)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_custom_jwt_payload(
|
||||||
|
kid: String,
|
||||||
|
body: impl Serialize,
|
||||||
|
sig: jose_jwa::Signing,
|
||||||
|
) -> String {
|
||||||
let header = JwtHeader {
|
let header = JwtHeader {
|
||||||
typ: "JWT",
|
typ: "JWT",
|
||||||
algorithm: jose_jwa::Algorithm::Signing(sig),
|
algorithm: jose_jwa::Algorithm::Signing(sig),
|
||||||
key_id: Some(&kid),
|
key_id: Some(&kid),
|
||||||
};
|
};
|
||||||
let body = typed_json::json! {{
|
|
||||||
"exp": SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() + 3600,
|
|
||||||
}};
|
|
||||||
|
|
||||||
let header =
|
let header =
|
||||||
base64::encode_config(serde_json::to_string(&header).unwrap(), URL_SAFE_NO_PAD);
|
base64::encode_config(serde_json::to_string(&header).unwrap(), URL_SAFE_NO_PAD);
|
||||||
let body = base64::encode_config(body.to_string(), URL_SAFE_NO_PAD);
|
let body = base64::encode_config(serde_json::to_string(&body).unwrap(), URL_SAFE_NO_PAD);
|
||||||
|
|
||||||
format!("{header}.{body}")
|
format!("{header}.{body}")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_ec_jwt(kid: String, key: p256::SecretKey) -> String {
|
fn new_ec_jwt(kid: String, key: &p256::SecretKey) -> String {
|
||||||
use p256::ecdsa::{Signature, SigningKey};
|
use p256::ecdsa::{Signature, SigningKey};
|
||||||
|
|
||||||
let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
|
let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
|
||||||
@@ -582,6 +746,16 @@ mod tests {
|
|||||||
format!("{payload}.{sig}")
|
format!("{payload}.{sig}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn new_custom_ec_jwt(kid: String, key: &p256::SecretKey, body: impl Serialize) -> String {
|
||||||
|
use p256::ecdsa::{Signature, SigningKey};
|
||||||
|
|
||||||
|
let payload = build_custom_jwt_payload(kid, body, jose_jwa::Signing::Es256);
|
||||||
|
let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());
|
||||||
|
let sig = base64::encode_config(sig.to_bytes(), URL_SAFE_NO_PAD);
|
||||||
|
|
||||||
|
format!("{payload}.{sig}")
|
||||||
|
}
|
||||||
|
|
||||||
fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {
|
fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {
|
||||||
use rsa::pkcs1v15::SigningKey;
|
use rsa::pkcs1v15::SigningKey;
|
||||||
use rsa::signature::SignatureEncoding;
|
use rsa::signature::SignatureEncoding;
|
||||||
@@ -653,42 +827,34 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
|
|||||||
-----END PRIVATE KEY-----
|
-----END PRIVATE KEY-----
|
||||||
";
|
";
|
||||||
|
|
||||||
#[tokio::test]
|
#[derive(Clone)]
|
||||||
async fn renew() {
|
struct Fetch(Vec<AuthRule>);
|
||||||
let (rs1, jwk1) = new_rsa_jwk(RS1, "1".into());
|
|
||||||
let (rs2, jwk2) = new_rsa_jwk(RS2, "2".into());
|
|
||||||
let (ec1, jwk3) = new_ec_jwk("3".into());
|
|
||||||
let (ec2, jwk4) = new_ec_jwk("4".into());
|
|
||||||
|
|
||||||
let jwt1 = new_rsa_jwt("1".into(), rs1);
|
impl FetchAuthRules for Fetch {
|
||||||
let jwt2 = new_rsa_jwt("2".into(), rs2);
|
async fn fetch_auth_rules(
|
||||||
let jwt3 = new_ec_jwt("3".into(), ec1);
|
&self,
|
||||||
let jwt4 = new_ec_jwt("4".into(), ec2);
|
_ctx: &RequestMonitoring,
|
||||||
|
_endpoint: EndpointId,
|
||||||
let foo_jwks = jose_jwk::JwkSet {
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
keys: vec![jwk1, jwk3],
|
Ok(self.0.clone())
|
||||||
};
|
}
|
||||||
let bar_jwks = jose_jwk::JwkSet {
|
}
|
||||||
keys: vec![jwk2, jwk4],
|
|
||||||
};
|
|
||||||
|
|
||||||
|
async fn jwks_server(
|
||||||
|
router: impl for<'a> Fn(&'a str) -> Option<Vec<u8>> + Send + Sync + 'static,
|
||||||
|
) -> SocketAddr {
|
||||||
|
let router = Arc::new(router);
|
||||||
let service = service_fn(move |req| {
|
let service = service_fn(move |req| {
|
||||||
let foo_jwks = foo_jwks.clone();
|
let router = Arc::clone(&router);
|
||||||
let bar_jwks = bar_jwks.clone();
|
|
||||||
async move {
|
async move {
|
||||||
let jwks = match req.uri().path() {
|
match router(req.uri().path()) {
|
||||||
"/foo" => &foo_jwks,
|
Some(body) => Response::builder()
|
||||||
"/bar" => &bar_jwks,
|
.status(200)
|
||||||
_ => {
|
.body(Full::new(Bytes::from(body))),
|
||||||
return Response::builder()
|
None => Response::builder()
|
||||||
.status(404)
|
.status(404)
|
||||||
.body(Full::new(Bytes::new()));
|
.body(Full::new(Bytes::new())),
|
||||||
}
|
}
|
||||||
};
|
|
||||||
let body = serde_json::to_vec(jwks).unwrap();
|
|
||||||
Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(Full::new(Bytes::from(body)))
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -703,50 +869,257 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let client = reqwest::Client::new();
|
addr
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[tokio::test]
|
||||||
struct Fetch(SocketAddr);
|
async fn check_jwt_happy_path() {
|
||||||
|
let (rs1, jwk1) = new_rsa_jwk(RS1, "rs1".into());
|
||||||
|
let (rs2, jwk2) = new_rsa_jwk(RS2, "rs2".into());
|
||||||
|
let (ec1, jwk3) = new_ec_jwk("ec1".into());
|
||||||
|
let (ec2, jwk4) = new_ec_jwk("ec2".into());
|
||||||
|
|
||||||
impl FetchAuthRules for Fetch {
|
let foo_jwks = jose_jwk::JwkSet {
|
||||||
async fn fetch_auth_rules(
|
keys: vec![jwk1, jwk3],
|
||||||
&self,
|
};
|
||||||
_ctx: &RequestMonitoring,
|
let bar_jwks = jose_jwk::JwkSet {
|
||||||
_endpoint: EndpointId,
|
keys: vec![jwk2, jwk4],
|
||||||
_role_name: RoleName,
|
};
|
||||||
) -> anyhow::Result<Vec<AuthRule>> {
|
|
||||||
Ok(vec![
|
let jwks_addr = jwks_server(move |path| match path {
|
||||||
AuthRule {
|
"/foo" => Some(serde_json::to_vec(&foo_jwks).unwrap()),
|
||||||
id: "foo".to_owned(),
|
"/bar" => Some(serde_json::to_vec(&bar_jwks).unwrap()),
|
||||||
jwks_url: format!("http://{}/foo", self.0).parse().unwrap(),
|
_ => None,
|
||||||
audience: None,
|
})
|
||||||
},
|
.await;
|
||||||
AuthRule {
|
|
||||||
id: "bar".to_owned(),
|
let role_name1 = RoleName::from("anonymous");
|
||||||
jwks_url: format!("http://{}/bar", self.0).parse().unwrap(),
|
let role_name2 = RoleName::from("authenticated");
|
||||||
audience: None,
|
|
||||||
},
|
let roles = vec![
|
||||||
])
|
RoleNameInt::from(&role_name1),
|
||||||
}
|
RoleNameInt::from(&role_name2),
|
||||||
}
|
];
|
||||||
|
let rules = vec![
|
||||||
|
AuthRule {
|
||||||
|
id: "foo".to_owned(),
|
||||||
|
jwks_url: format!("http://{jwks_addr}/foo").parse().unwrap(),
|
||||||
|
audience: None,
|
||||||
|
role_names: roles.clone(),
|
||||||
|
},
|
||||||
|
AuthRule {
|
||||||
|
id: "bar".to_owned(),
|
||||||
|
jwks_url: format!("http://{jwks_addr}/bar").parse().unwrap(),
|
||||||
|
audience: None,
|
||||||
|
role_names: roles.clone(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let fetch = Fetch(rules);
|
||||||
|
let jwk_cache = JwkCache::default();
|
||||||
|
|
||||||
let role_name = RoleName::from("user");
|
|
||||||
let endpoint = EndpointId::from("ep");
|
let endpoint = EndpointId::from("ep");
|
||||||
|
|
||||||
let jwk_cache = Arc::new(JwkCacheEntryLock::default());
|
let jwt1 = new_rsa_jwt("rs1".into(), rs1);
|
||||||
|
let jwt2 = new_rsa_jwt("rs2".into(), rs2);
|
||||||
|
let jwt3 = new_ec_jwt("ec1".into(), &ec1);
|
||||||
|
let jwt4 = new_ec_jwt("ec2".into(), &ec2);
|
||||||
|
|
||||||
for token in [jwt1, jwt2, jwt3, jwt4] {
|
let tokens = [jwt1, jwt2, jwt3, jwt4];
|
||||||
jwk_cache
|
let role_names = [role_name1, role_name2];
|
||||||
.check_jwt(
|
for role in &role_names {
|
||||||
&RequestMonitoring::test(),
|
for token in &tokens {
|
||||||
&token,
|
jwk_cache
|
||||||
&client,
|
.check_jwt(
|
||||||
endpoint.clone(),
|
&RequestMonitoring::test(),
|
||||||
role_name.clone(),
|
endpoint.clone(),
|
||||||
&Fetch(addr),
|
role,
|
||||||
)
|
&fetch,
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn check_jwt_invalid_signature() {
|
||||||
|
let (_, jwk) = new_ec_jwk("1".into());
|
||||||
|
let (key, _) = new_ec_jwk("1".into());
|
||||||
|
|
||||||
|
// has a matching kid, but signed by the wrong key
|
||||||
|
let bad_jwt = new_ec_jwt("1".into(), &key);
|
||||||
|
|
||||||
|
let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
|
||||||
|
let jwks_addr = jwks_server(move |path| match path {
|
||||||
|
"/" => Some(serde_json::to_vec(&jwks).unwrap()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let role = RoleName::from("authenticated");
|
||||||
|
|
||||||
|
let rules = vec![AuthRule {
|
||||||
|
id: String::new(),
|
||||||
|
jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
|
||||||
|
audience: None,
|
||||||
|
role_names: vec![RoleNameInt::from(&role)],
|
||||||
|
}];
|
||||||
|
|
||||||
|
let fetch = Fetch(rules);
|
||||||
|
let jwk_cache = JwkCache::default();
|
||||||
|
|
||||||
|
let ep = EndpointId::from("ep");
|
||||||
|
|
||||||
|
let ctx = RequestMonitoring::test();
|
||||||
|
let err = jwk_cache
|
||||||
|
.check_jwt(&ctx, ep, &role, &fetch, &bad_jwt)
|
||||||
|
.await
|
||||||
|
.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.to_string().contains("signature error"),
|
||||||
|
"expected \"signature error\", got {err:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn check_jwt_unknown_role() {
|
||||||
|
let (key, jwk) = new_rsa_jwk(RS1, "1".into());
|
||||||
|
let jwt = new_rsa_jwt("1".into(), key);
|
||||||
|
|
||||||
|
let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
|
||||||
|
let jwks_addr = jwks_server(move |path| match path {
|
||||||
|
"/" => Some(serde_json::to_vec(&jwks).unwrap()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let role = RoleName::from("authenticated");
|
||||||
|
let rules = vec![AuthRule {
|
||||||
|
id: String::new(),
|
||||||
|
jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
|
||||||
|
audience: None,
|
||||||
|
role_names: vec![RoleNameInt::from(&role)],
|
||||||
|
}];
|
||||||
|
|
||||||
|
let fetch = Fetch(rules);
|
||||||
|
let jwk_cache = JwkCache::default();
|
||||||
|
|
||||||
|
let ep = EndpointId::from("ep");
|
||||||
|
|
||||||
|
// this role_name is not accepted
|
||||||
|
let bad_role_name = RoleName::from("cloud_admin");
|
||||||
|
|
||||||
|
let ctx = RequestMonitoring::test();
|
||||||
|
let err = jwk_cache
|
||||||
|
.check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt)
|
||||||
|
.await
|
||||||
|
.unwrap_err();
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
err.to_string().contains("jwk not found"),
|
||||||
|
"expected \"jwk not found\", got {err:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn check_jwt_invalid_claims() {
|
||||||
|
let (key, jwk) = new_ec_jwk("1".into());
|
||||||
|
|
||||||
|
let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
|
||||||
|
let jwks_addr = jwks_server(move |path| match path {
|
||||||
|
"/" => Some(serde_json::to_vec(&jwks).unwrap()),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let now = SystemTime::now()
|
||||||
|
.duration_since(SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
struct Test {
|
||||||
|
body: serde_json::Value,
|
||||||
|
error: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
let table = vec![
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"nbf": now + 60,
|
||||||
|
"aud": "neon",
|
||||||
|
}},
|
||||||
|
error: "JWT token is not yet ready to use",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"exp": now - 60,
|
||||||
|
"aud": ["neon"],
|
||||||
|
}},
|
||||||
|
error: "JWT token has expired",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
}},
|
||||||
|
error: "invalid JWT token audience",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"aud": [],
|
||||||
|
}},
|
||||||
|
error: "invalid JWT token audience",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"aud": "foo",
|
||||||
|
}},
|
||||||
|
error: "invalid JWT token audience",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"aud": ["foo"],
|
||||||
|
}},
|
||||||
|
error: "invalid JWT token audience",
|
||||||
|
},
|
||||||
|
Test {
|
||||||
|
body: json! {{
|
||||||
|
"aud": ["foo", "bar"],
|
||||||
|
}},
|
||||||
|
error: "invalid JWT token audience",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let role = RoleName::from("authenticated");
|
||||||
|
|
||||||
|
let rules = vec![AuthRule {
|
||||||
|
id: String::new(),
|
||||||
|
jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
|
||||||
|
audience: Some("neon".to_string()),
|
||||||
|
role_names: vec![RoleNameInt::from(&role)],
|
||||||
|
}];
|
||||||
|
|
||||||
|
let fetch = Fetch(rules);
|
||||||
|
let jwk_cache = JwkCache::default();
|
||||||
|
|
||||||
|
let ep = EndpointId::from("ep");
|
||||||
|
|
||||||
|
let ctx = RequestMonitoring::test();
|
||||||
|
for test in table {
|
||||||
|
let jwt = new_custom_ec_jwt("1".into(), &key, test.body);
|
||||||
|
|
||||||
|
match jwk_cache
|
||||||
|
.check_jwt(&ctx, ep.clone(), &role, &fetch, &jwt)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
{
|
||||||
|
Err(err) if err.to_string().contains(test.error) => {}
|
||||||
|
Err(err) => {
|
||||||
|
panic!("expected {:?}, got {err:?}", test.error)
|
||||||
|
}
|
||||||
|
Ok(()) => {
|
||||||
|
panic!("expected {:?}, got ok", test.error)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use std::{collections::HashMap, net::SocketAddr};
|
use std::net::SocketAddr;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use arc_swap::ArcSwapOption;
|
use arc_swap::ArcSwapOption;
|
||||||
@@ -10,21 +10,19 @@ use crate::{
|
|||||||
NodeInfo,
|
NodeInfo,
|
||||||
},
|
},
|
||||||
context::RequestMonitoring,
|
context::RequestMonitoring,
|
||||||
intern::{BranchIdInt, BranchIdTag, EndpointIdTag, InternId, ProjectIdInt, ProjectIdTag},
|
intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
|
||||||
EndpointId, RoleName,
|
EndpointId,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::jwt::{AuthRule, FetchAuthRules, JwkCache};
|
use super::jwt::{AuthRule, FetchAuthRules};
|
||||||
|
|
||||||
pub struct LocalBackend {
|
pub struct LocalBackend {
|
||||||
pub(crate) jwks_cache: JwkCache,
|
|
||||||
pub(crate) node_info: NodeInfo,
|
pub(crate) node_info: NodeInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LocalBackend {
|
impl LocalBackend {
|
||||||
pub fn new(postgres_addr: SocketAddr) -> Self {
|
pub fn new(postgres_addr: SocketAddr) -> Self {
|
||||||
LocalBackend {
|
LocalBackend {
|
||||||
jwks_cache: JwkCache::default(),
|
|
||||||
node_info: NodeInfo {
|
node_info: NodeInfo {
|
||||||
config: {
|
config: {
|
||||||
let mut cfg = ConnCfg::new();
|
let mut cfg = ConnCfg::new();
|
||||||
@@ -48,26 +46,17 @@ impl LocalBackend {
|
|||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub(crate) struct StaticAuthRules;
|
pub(crate) struct StaticAuthRules;
|
||||||
|
|
||||||
pub static JWKS_ROLE_MAP: ArcSwapOption<JwksRoleSettings> = ArcSwapOption::const_empty();
|
pub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::const_empty();
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct JwksRoleSettings {
|
|
||||||
pub roles: HashMap<RoleName, EndpointJwksResponse>,
|
|
||||||
pub project_id: ProjectIdInt,
|
|
||||||
pub branch_id: BranchIdInt,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FetchAuthRules for StaticAuthRules {
|
impl FetchAuthRules for StaticAuthRules {
|
||||||
async fn fetch_auth_rules(
|
async fn fetch_auth_rules(
|
||||||
&self,
|
&self,
|
||||||
_ctx: &RequestMonitoring,
|
_ctx: &RequestMonitoring,
|
||||||
_endpoint: EndpointId,
|
_endpoint: EndpointId,
|
||||||
role_name: RoleName,
|
|
||||||
) -> anyhow::Result<Vec<AuthRule>> {
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
let mappings = JWKS_ROLE_MAP.load();
|
let mappings = JWKS_ROLE_MAP.load();
|
||||||
let role_mappings = mappings
|
let role_mappings = mappings
|
||||||
.as_deref()
|
.as_deref()
|
||||||
.and_then(|m| m.roles.get(&role_name))
|
|
||||||
.context("JWKs settings for this role were not configured")?;
|
.context("JWKs settings for this role were not configured")?;
|
||||||
let mut rules = vec![];
|
let mut rules = vec![];
|
||||||
for setting in &role_mappings.jwks {
|
for setting in &role_mappings.jwks {
|
||||||
@@ -75,6 +64,7 @@ impl FetchAuthRules for StaticAuthRules {
|
|||||||
id: setting.id.clone(),
|
id: setting.id.clone(),
|
||||||
jwks_url: setting.jwks_url.clone(),
|
jwks_url: setting.jwks_url.clone(),
|
||||||
audience: setting.jwt_audience.clone(),
|
audience: setting.jwt_audience.clone(),
|
||||||
|
role_names: setting.role_names.clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
auth, compute,
|
auth, compute,
|
||||||
|
config::AuthenticationConfig,
|
||||||
console::{self, provider::NodeInfo},
|
console::{self, provider::NodeInfo},
|
||||||
context::RequestMonitoring,
|
context::RequestMonitoring,
|
||||||
error::{ReportableError, UserFacingError},
|
error::{ReportableError, UserFacingError},
|
||||||
@@ -58,6 +59,7 @@ pub(crate) fn new_psql_session_id() -> String {
|
|||||||
|
|
||||||
pub(super) async fn authenticate(
|
pub(super) async fn authenticate(
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
|
auth_config: &'static AuthenticationConfig,
|
||||||
link_uri: &reqwest::Url,
|
link_uri: &reqwest::Url,
|
||||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
|
||||||
) -> auth::Result<NodeInfo> {
|
) -> auth::Result<NodeInfo> {
|
||||||
@@ -89,6 +91,14 @@ pub(super) async fn authenticate(
|
|||||||
info!(parent: &span, "waiting for console's reply...");
|
info!(parent: &span, "waiting for console's reply...");
|
||||||
let db_info = waiter.await.map_err(WebAuthError::from)?;
|
let db_info = waiter.await.map_err(WebAuthError::from)?;
|
||||||
|
|
||||||
|
if auth_config.ip_allowlist_check_enabled {
|
||||||
|
if let Some(allowed_ips) = &db_info.allowed_ips {
|
||||||
|
if !auth::check_peer_addr_is_in_list(&ctx.peer_addr(), allowed_ips) {
|
||||||
|
return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
|
||||||
|
|
||||||
// This config should be self-contained, because we won't
|
// This config should be self-contained, because we won't
|
||||||
|
|||||||
@@ -1,34 +1,38 @@
|
|||||||
use std::{
|
use std::{net::SocketAddr, pin::pin, str::FromStr, sync::Arc, time::Duration};
|
||||||
net::SocketAddr,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
pin::pin,
|
|
||||||
sync::Arc,
|
|
||||||
time::Duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::{bail, ensure};
|
use anyhow::{bail, ensure, Context};
|
||||||
|
use camino::{Utf8Path, Utf8PathBuf};
|
||||||
|
use compute_api::spec::LocalProxySpec;
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use futures::{future::Either, FutureExt};
|
use futures::future::Either;
|
||||||
use proxy::{
|
use proxy::{
|
||||||
auth::backend::local::{JwksRoleSettings, LocalBackend, JWKS_ROLE_MAP},
|
auth::backend::{
|
||||||
|
jwt::JwkCache,
|
||||||
|
local::{LocalBackend, JWKS_ROLE_MAP},
|
||||||
|
},
|
||||||
cancellation::CancellationHandlerMain,
|
cancellation::CancellationHandlerMain,
|
||||||
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
|
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
|
||||||
console::{locks::ApiLocks, messages::JwksRoleMapping},
|
console::{
|
||||||
|
locks::ApiLocks,
|
||||||
|
messages::{EndpointJwksResponse, JwksSettings},
|
||||||
|
},
|
||||||
http::health_server::AppMetrics,
|
http::health_server::AppMetrics,
|
||||||
|
intern::RoleNameInt,
|
||||||
metrics::{Metrics, ThreadPoolMetrics},
|
metrics::{Metrics, ThreadPoolMetrics},
|
||||||
rate_limiter::{BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo},
|
rate_limiter::{BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo},
|
||||||
scram::threadpool::ThreadPool,
|
scram::threadpool::ThreadPool,
|
||||||
serverless::{self, cancel_set::CancelSet, GlobalConnPoolOptions},
|
serverless::{self, cancel_set::CancelSet, GlobalConnPoolOptions},
|
||||||
|
RoleName,
|
||||||
};
|
};
|
||||||
|
|
||||||
project_git_version!(GIT_VERSION);
|
project_git_version!(GIT_VERSION);
|
||||||
project_build_tag!(BUILD_TAG);
|
project_build_tag!(BUILD_TAG);
|
||||||
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use tokio::{net::TcpListener, task::JoinSet};
|
use tokio::{net::TcpListener, sync::Notify, task::JoinSet};
|
||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info, warn};
|
use tracing::{error, info, warn};
|
||||||
use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
|
use utils::{pid_file, project_build_tag, project_git_version, sentry_init::init_sentry};
|
||||||
|
|
||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||||
@@ -72,9 +76,12 @@ struct LocalProxyCliArgs {
|
|||||||
/// Address of the postgres server
|
/// Address of the postgres server
|
||||||
#[clap(long, default_value = "127.0.0.1:5432")]
|
#[clap(long, default_value = "127.0.0.1:5432")]
|
||||||
compute: SocketAddr,
|
compute: SocketAddr,
|
||||||
/// File address of the local proxy config file
|
/// Path of the local proxy config file
|
||||||
#[clap(long, default_value = "./localproxy.json")]
|
#[clap(long, default_value = "./localproxy.json")]
|
||||||
config_path: PathBuf,
|
config_path: Utf8PathBuf,
|
||||||
|
/// Path of the local proxy PID file
|
||||||
|
#[clap(long, default_value = "./localproxy.pid")]
|
||||||
|
pid_path: Utf8PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(clap::Args, Clone, Copy, Debug)]
|
#[derive(clap::Args, Clone, Copy, Debug)]
|
||||||
@@ -126,6 +133,24 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let args = LocalProxyCliArgs::parse();
|
let args = LocalProxyCliArgs::parse();
|
||||||
let config = build_config(&args)?;
|
let config = build_config(&args)?;
|
||||||
|
|
||||||
|
// before we bind to any ports, write the process ID to a file
|
||||||
|
// so that compute-ctl can find our process later
|
||||||
|
// in order to trigger the appropriate SIGHUP on config change.
|
||||||
|
//
|
||||||
|
// This also claims a "lock" that makes sure only one instance
|
||||||
|
// of local-proxy runs at a time.
|
||||||
|
let _process_guard = loop {
|
||||||
|
match pid_file::claim_for_current_process(&args.pid_path) {
|
||||||
|
Ok(guard) => break guard,
|
||||||
|
Err(e) => {
|
||||||
|
// compute-ctl might have tried to read the pid-file to let us
|
||||||
|
// know about some config change. We should try again.
|
||||||
|
error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
|
||||||
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
|
let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
|
||||||
let http_listener = TcpListener::bind(args.http).await?;
|
let http_listener = TcpListener::bind(args.http).await?;
|
||||||
let shutdown = CancellationToken::new();
|
let shutdown = CancellationToken::new();
|
||||||
@@ -139,12 +164,30 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
16,
|
16,
|
||||||
));
|
));
|
||||||
|
|
||||||
refresh_config(args.config_path.clone()).await;
|
// write the process ID to a file so that compute-ctl can find our process later
|
||||||
|
// in order to trigger the appropriate SIGHUP on config change.
|
||||||
|
let pid = std::process::id();
|
||||||
|
info!("process running in PID {pid}");
|
||||||
|
std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
|
||||||
|
|
||||||
let mut maintenance_tasks = JoinSet::new();
|
let mut maintenance_tasks = JoinSet::new();
|
||||||
maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), move || {
|
|
||||||
refresh_config(args.config_path.clone()).map(Ok)
|
let refresh_config_notify = Arc::new(Notify::new());
|
||||||
|
maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), {
|
||||||
|
let refresh_config_notify = Arc::clone(&refresh_config_notify);
|
||||||
|
move || {
|
||||||
|
refresh_config_notify.notify_one();
|
||||||
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// trigger the first config load **after** setting up the signal hook
|
||||||
|
// to avoid the race condition where:
|
||||||
|
// 1. No config file registered when local-proxy starts up
|
||||||
|
// 2. The config file is written but the signal hook is not yet received
|
||||||
|
// 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
|
||||||
|
refresh_config_notify.notify_one();
|
||||||
|
tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
|
||||||
|
|
||||||
maintenance_tasks.spawn(proxy::http::health_server::task_main(
|
maintenance_tasks.spawn(proxy::http::health_server::task_main(
|
||||||
metrics_listener,
|
metrics_listener,
|
||||||
AppMetrics {
|
AppMetrics {
|
||||||
@@ -227,12 +270,15 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
|||||||
allow_self_signed_compute: false,
|
allow_self_signed_compute: false,
|
||||||
http_config,
|
http_config,
|
||||||
authentication_config: AuthenticationConfig {
|
authentication_config: AuthenticationConfig {
|
||||||
|
jwks_cache: JwkCache::default(),
|
||||||
thread_pool: ThreadPool::new(0),
|
thread_pool: ThreadPool::new(0),
|
||||||
scram_protocol_timeout: Duration::from_secs(10),
|
scram_protocol_timeout: Duration::from_secs(10),
|
||||||
rate_limiter_enabled: false,
|
rate_limiter_enabled: false,
|
||||||
rate_limiter: BucketRateLimiter::new(vec![]),
|
rate_limiter: BucketRateLimiter::new(vec![]),
|
||||||
rate_limit_ip_subnet: 64,
|
rate_limit_ip_subnet: 64,
|
||||||
ip_allowlist_check_enabled: true,
|
ip_allowlist_check_enabled: true,
|
||||||
|
is_auth_broker: false,
|
||||||
|
accept_jwts: true,
|
||||||
},
|
},
|
||||||
require_client_ip: false,
|
require_client_ip: false,
|
||||||
handshake_timeout: Duration::from_secs(10),
|
handshake_timeout: Duration::from_secs(10),
|
||||||
@@ -245,81 +291,84 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
|
|||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn refresh_config(path: PathBuf) {
|
async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
|
||||||
match refresh_config_inner(&path).await {
|
loop {
|
||||||
Ok(()) => {}
|
rx.notified().await;
|
||||||
Err(e) => {
|
|
||||||
error!(error=?e, ?path, "could not read config file");
|
match refresh_config_inner(&path).await {
|
||||||
|
Ok(()) => {}
|
||||||
|
Err(e) => {
|
||||||
|
error!(error=?e, ?path, "could not read config file");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn refresh_config_inner(path: &Path) -> anyhow::Result<()> {
|
async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
|
||||||
let bytes = tokio::fs::read(&path).await?;
|
let bytes = tokio::fs::read(&path).await?;
|
||||||
let mut data: JwksRoleMapping = serde_json::from_slice(&bytes)?;
|
let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
|
||||||
|
|
||||||
let mut settings = None;
|
let mut jwks_set = vec![];
|
||||||
|
|
||||||
for mapping in data.roles.values_mut() {
|
for jwks in data.jwks {
|
||||||
for jwks in &mut mapping.jwks {
|
let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
|
||||||
ensure!(
|
|
||||||
jwks.jwks_url.has_authority()
|
|
||||||
&& (jwks.jwks_url.scheme() == "http" || jwks.jwks_url.scheme() == "https"),
|
|
||||||
"Invalid JWKS url. Must be HTTP",
|
|
||||||
);
|
|
||||||
|
|
||||||
ensure!(
|
ensure!(
|
||||||
jwks.jwks_url
|
jwks_url.has_authority()
|
||||||
.host()
|
&& (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
|
||||||
.is_some_and(|h| h != url::Host::Domain("")),
|
"Invalid JWKS url. Must be HTTP",
|
||||||
"Invalid JWKS url. No domain listed",
|
);
|
||||||
);
|
|
||||||
|
|
||||||
// clear username, password and ports
|
ensure!(
|
||||||
jwks.jwks_url.set_username("").expect(
|
jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
|
||||||
|
"Invalid JWKS url. No domain listed",
|
||||||
|
);
|
||||||
|
|
||||||
|
// clear username, password and ports
|
||||||
|
jwks_url
|
||||||
|
.set_username("")
|
||||||
|
.expect("url can be a base and has a valid host and is not a file. should not error");
|
||||||
|
jwks_url
|
||||||
|
.set_password(None)
|
||||||
|
.expect("url can be a base and has a valid host and is not a file. should not error");
|
||||||
|
// local testing is hard if we need to have a specific restricted port
|
||||||
|
if cfg!(not(feature = "testing")) {
|
||||||
|
jwks_url.set_port(None).expect(
|
||||||
"url can be a base and has a valid host and is not a file. should not error",
|
"url can be a base and has a valid host and is not a file. should not error",
|
||||||
);
|
);
|
||||||
jwks.jwks_url.set_password(None).expect(
|
|
||||||
"url can be a base and has a valid host and is not a file. should not error",
|
|
||||||
);
|
|
||||||
// local testing is hard if we need to have a specific restricted port
|
|
||||||
if cfg!(not(feature = "testing")) {
|
|
||||||
jwks.jwks_url.set_port(None).expect(
|
|
||||||
"url can be a base and has a valid host and is not a file. should not error",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// clear query params
|
|
||||||
jwks.jwks_url.set_fragment(None);
|
|
||||||
jwks.jwks_url.query_pairs_mut().clear().finish();
|
|
||||||
|
|
||||||
if jwks.jwks_url.scheme() != "https" {
|
|
||||||
// local testing is hard if we need to set up https support.
|
|
||||||
if cfg!(not(feature = "testing")) {
|
|
||||||
jwks.jwks_url
|
|
||||||
.set_scheme("https")
|
|
||||||
.expect("should not error to set the scheme to https if it was http");
|
|
||||||
} else {
|
|
||||||
warn!(scheme = jwks.jwks_url.scheme(), "JWKS url is not HTTPS");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let (pr, br) = settings.get_or_insert((jwks.project_id, jwks.branch_id));
|
|
||||||
ensure!(
|
|
||||||
*pr == jwks.project_id,
|
|
||||||
"inconsistent project IDs configured"
|
|
||||||
);
|
|
||||||
ensure!(*br == jwks.branch_id, "inconsistent branch IDs configured");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clear query params
|
||||||
|
jwks_url.set_fragment(None);
|
||||||
|
jwks_url.query_pairs_mut().clear().finish();
|
||||||
|
|
||||||
|
if jwks_url.scheme() != "https" {
|
||||||
|
// local testing is hard if we need to set up https support.
|
||||||
|
if cfg!(not(feature = "testing")) {
|
||||||
|
jwks_url
|
||||||
|
.set_scheme("https")
|
||||||
|
.expect("should not error to set the scheme to https if it was http");
|
||||||
|
} else {
|
||||||
|
warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jwks_set.push(JwksSettings {
|
||||||
|
id: jwks.id,
|
||||||
|
jwks_url,
|
||||||
|
provider_name: jwks.provider_name,
|
||||||
|
jwt_audience: jwks.jwt_audience,
|
||||||
|
role_names: jwks
|
||||||
|
.role_names
|
||||||
|
.into_iter()
|
||||||
|
.map(RoleName::from)
|
||||||
|
.map(|s| RoleNameInt::from(&s))
|
||||||
|
.collect(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some((project_id, branch_id)) = settings {
|
info!("successfully loaded new config");
|
||||||
JWKS_ROLE_MAP.store(Some(Arc::new(JwksRoleSettings {
|
JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
|
||||||
roles: data.roles,
|
|
||||||
project_id,
|
|
||||||
branch_id,
|
|
||||||
})));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -133,9 +133,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
proxy_listener,
|
proxy_listener,
|
||||||
cancellation_token.clone(),
|
cancellation_token.clone(),
|
||||||
));
|
));
|
||||||
let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || async {
|
let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || {}));
|
||||||
Ok(())
|
|
||||||
}));
|
|
||||||
|
|
||||||
// the signal task cant ever succeed.
|
// the signal task cant ever succeed.
|
||||||
// the main task can error, or can succeed on cancellation.
|
// the main task can error, or can succeed on cancellation.
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
|
|||||||
use aws_config::Region;
|
use aws_config::Region;
|
||||||
use futures::future::Either;
|
use futures::future::Either;
|
||||||
use proxy::auth;
|
use proxy::auth;
|
||||||
|
use proxy::auth::backend::jwt::JwkCache;
|
||||||
use proxy::auth::backend::AuthRateLimiter;
|
use proxy::auth::backend::AuthRateLimiter;
|
||||||
use proxy::auth::backend::MaybeOwned;
|
use proxy::auth::backend::MaybeOwned;
|
||||||
use proxy::cancellation::CancelMap;
|
use proxy::cancellation::CancelMap;
|
||||||
@@ -102,6 +103,9 @@ struct ProxyCliArgs {
|
|||||||
default_value = "http://localhost:3000/authenticate_proxy_request/"
|
default_value = "http://localhost:3000/authenticate_proxy_request/"
|
||||||
)]
|
)]
|
||||||
auth_endpoint: String,
|
auth_endpoint: String,
|
||||||
|
/// if this is not local proxy, this toggles whether we accept jwt or passwords for http
|
||||||
|
#[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
|
||||||
|
is_auth_broker: bool,
|
||||||
/// path to TLS key for client postgres connections
|
/// path to TLS key for client postgres connections
|
||||||
///
|
///
|
||||||
/// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
|
/// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
|
||||||
@@ -382,9 +386,27 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
info!("Starting mgmt on {mgmt_address}");
|
info!("Starting mgmt on {mgmt_address}");
|
||||||
let mgmt_listener = TcpListener::bind(mgmt_address).await?;
|
let mgmt_listener = TcpListener::bind(mgmt_address).await?;
|
||||||
|
|
||||||
let proxy_address: SocketAddr = args.proxy.parse()?;
|
let proxy_listener = if !args.is_auth_broker {
|
||||||
info!("Starting proxy on {proxy_address}");
|
let proxy_address: SocketAddr = args.proxy.parse()?;
|
||||||
let proxy_listener = TcpListener::bind(proxy_address).await?;
|
info!("Starting proxy on {proxy_address}");
|
||||||
|
|
||||||
|
Some(TcpListener::bind(proxy_address).await?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: rename the argument to something like serverless.
|
||||||
|
// It now covers more than just websockets, it also covers SQL over HTTP.
|
||||||
|
let serverless_listener = if let Some(serverless_address) = args.wss {
|
||||||
|
let serverless_address: SocketAddr = serverless_address.parse()?;
|
||||||
|
info!("Starting wss on {serverless_address}");
|
||||||
|
Some(TcpListener::bind(serverless_address).await?)
|
||||||
|
} else if args.is_auth_broker {
|
||||||
|
bail!("wss arg must be present for auth-broker")
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let cancellation_token = CancellationToken::new();
|
let cancellation_token = CancellationToken::new();
|
||||||
|
|
||||||
let cancel_map = CancelMap::default();
|
let cancel_map = CancelMap::default();
|
||||||
@@ -430,21 +452,17 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// client facing tasks. these will exit on error or on cancellation
|
// client facing tasks. these will exit on error or on cancellation
|
||||||
// cancellation returns Ok(())
|
// cancellation returns Ok(())
|
||||||
let mut client_tasks = JoinSet::new();
|
let mut client_tasks = JoinSet::new();
|
||||||
client_tasks.spawn(proxy::proxy::task_main(
|
if let Some(proxy_listener) = proxy_listener {
|
||||||
config,
|
client_tasks.spawn(proxy::proxy::task_main(
|
||||||
proxy_listener,
|
config,
|
||||||
cancellation_token.clone(),
|
proxy_listener,
|
||||||
cancellation_handler.clone(),
|
cancellation_token.clone(),
|
||||||
endpoint_rate_limiter.clone(),
|
cancellation_handler.clone(),
|
||||||
));
|
endpoint_rate_limiter.clone(),
|
||||||
|
));
|
||||||
// TODO: rename the argument to something like serverless.
|
}
|
||||||
// It now covers more than just websockets, it also covers SQL over HTTP.
|
|
||||||
if let Some(serverless_address) = args.wss {
|
|
||||||
let serverless_address: SocketAddr = serverless_address.parse()?;
|
|
||||||
info!("Starting wss on {serverless_address}");
|
|
||||||
let serverless_listener = TcpListener::bind(serverless_address).await?;
|
|
||||||
|
|
||||||
|
if let Some(serverless_listener) = serverless_listener {
|
||||||
client_tasks.spawn(serverless::task_main(
|
client_tasks.spawn(serverless::task_main(
|
||||||
config,
|
config,
|
||||||
serverless_listener,
|
serverless_listener,
|
||||||
@@ -461,10 +479,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
// maintenance tasks. these never return unless there's an error
|
// maintenance tasks. these never return unless there's an error
|
||||||
let mut maintenance_tasks = JoinSet::new();
|
let mut maintenance_tasks = JoinSet::new();
|
||||||
maintenance_tasks.spawn(proxy::handle_signals(
|
maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone(), || {}));
|
||||||
cancellation_token.clone(),
|
|
||||||
|| async { Ok(()) },
|
|
||||||
));
|
|
||||||
maintenance_tasks.spawn(http::health_server::task_main(
|
maintenance_tasks.spawn(http::health_server::task_main(
|
||||||
http_listener,
|
http_listener,
|
||||||
AppMetrics {
|
AppMetrics {
|
||||||
@@ -677,7 +692,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
let http_config = HttpConfig {
|
let http_config = HttpConfig {
|
||||||
accept_websockets: true,
|
accept_websockets: !args.is_auth_broker,
|
||||||
pool_options: GlobalConnPoolOptions {
|
pool_options: GlobalConnPoolOptions {
|
||||||
max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
|
max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
|
||||||
gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
|
gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
|
||||||
@@ -692,12 +707,15 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
|
|||||||
max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
|
max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
|
||||||
};
|
};
|
||||||
let authentication_config = AuthenticationConfig {
|
let authentication_config = AuthenticationConfig {
|
||||||
|
jwks_cache: JwkCache::default(),
|
||||||
thread_pool,
|
thread_pool,
|
||||||
scram_protocol_timeout: args.scram_protocol_timeout,
|
scram_protocol_timeout: args.scram_protocol_timeout,
|
||||||
rate_limiter_enabled: args.auth_rate_limit_enabled,
|
rate_limiter_enabled: args.auth_rate_limit_enabled,
|
||||||
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
|
rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
|
||||||
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
|
rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
|
||||||
ip_allowlist_check_enabled: !args.is_private_access_proxy,
|
ip_allowlist_check_enabled: !args.is_private_access_proxy,
|
||||||
|
is_auth_broker: args.is_auth_broker,
|
||||||
|
accept_jwts: args.is_auth_broker,
|
||||||
};
|
};
|
||||||
|
|
||||||
let config = Box::leak(Box::new(ProxyConfig {
|
let config = Box::leak(Box::new(ProxyConfig {
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
auth::{self, backend::AuthRateLimiter},
|
auth::{
|
||||||
|
self,
|
||||||
|
backend::{jwt::JwkCache, AuthRateLimiter},
|
||||||
|
},
|
||||||
console::locks::ApiLocks,
|
console::locks::ApiLocks,
|
||||||
rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
|
rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
|
||||||
scram::threadpool::ThreadPool,
|
scram::threadpool::ThreadPool,
|
||||||
@@ -67,6 +70,9 @@ pub struct AuthenticationConfig {
|
|||||||
pub rate_limiter: AuthRateLimiter,
|
pub rate_limiter: AuthRateLimiter,
|
||||||
pub rate_limit_ip_subnet: u8,
|
pub rate_limit_ip_subnet: u8,
|
||||||
pub ip_allowlist_check_enabled: bool,
|
pub ip_allowlist_check_enabled: bool,
|
||||||
|
pub jwks_cache: JwkCache,
|
||||||
|
pub is_auth_broker: bool,
|
||||||
|
pub accept_jwts: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TlsConfig {
|
impl TlsConfig {
|
||||||
@@ -250,18 +256,26 @@ impl CertResolver {
|
|||||||
|
|
||||||
let common_name = pem.subject().to_string();
|
let common_name = pem.subject().to_string();
|
||||||
|
|
||||||
// We only use non-wildcard certificates in web auth proxy so it seems okay to treat them the same as
|
// We need to get the canonical name for this certificate so we can match them against any domain names
|
||||||
// wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
|
// seen within the proxy codebase.
|
||||||
// verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
|
//
|
||||||
// and passed None instead, which blows up number of cases downstream code should handle. Proper coding
|
// In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI.
|
||||||
// here should better avoid Option for common_names, and do wildcard-based certificate selection instead
|
// We need to remove the wildcard prefix for the purposes of certificate selection.
|
||||||
// of cutting off '*.' parts.
|
//
|
||||||
let common_name = if common_name.starts_with("CN=*.") {
|
// auth-broker does not use SNI and instead uses the Neon-Connection-String header.
|
||||||
common_name.strip_prefix("CN=*.").map(|s| s.to_string())
|
// Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String.
|
||||||
|
//
|
||||||
|
// Console Web proxy does not use any wildcard domains and does not need any certificate selection or conn string
|
||||||
|
// validation, so let's we can continue with any common-name
|
||||||
|
let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(s) = common_name.strip_prefix("CN=apiauth.") {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(s) = common_name.strip_prefix("CN=") {
|
||||||
|
s.to_string()
|
||||||
} else {
|
} else {
|
||||||
common_name.strip_prefix("CN=").map(|s| s.to_string())
|
bail!("Failed to parse common name from certificate")
|
||||||
}
|
};
|
||||||
.context("Failed to parse common name from certificate")?;
|
|
||||||
|
|
||||||
let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
|
let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,11 @@
|
|||||||
use measured::FixedCardinalityLabel;
|
use measured::FixedCardinalityLabel;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::fmt::{self, Display};
|
use std::fmt::{self, Display};
|
||||||
|
|
||||||
use crate::auth::IpPattern;
|
use crate::auth::IpPattern;
|
||||||
|
|
||||||
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
|
use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
|
||||||
use crate::proxy::retry::CouldRetry;
|
use crate::proxy::retry::CouldRetry;
|
||||||
use crate::RoleName;
|
|
||||||
|
|
||||||
/// Generic error response with human-readable description.
|
/// Generic error response with human-readable description.
|
||||||
/// Note that we can't always present it to user as is.
|
/// Note that we can't always present it to user as is.
|
||||||
@@ -284,6 +282,8 @@ pub(crate) struct DatabaseInfo {
|
|||||||
/// be inconvenient for debug with local PG instance.
|
/// be inconvenient for debug with local PG instance.
|
||||||
pub(crate) password: Option<Box<str>>,
|
pub(crate) password: Option<Box<str>>,
|
||||||
pub(crate) aux: MetricsAuxInfo,
|
pub(crate) aux: MetricsAuxInfo,
|
||||||
|
#[serde(default)]
|
||||||
|
pub(crate) allowed_ips: Option<Vec<IpPattern>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Manually implement debug to omit sensitive info.
|
// Manually implement debug to omit sensitive info.
|
||||||
@@ -294,6 +294,7 @@ impl fmt::Debug for DatabaseInfo {
|
|||||||
.field("port", &self.port)
|
.field("port", &self.port)
|
||||||
.field("dbname", &self.dbname)
|
.field("dbname", &self.dbname)
|
||||||
.field("user", &self.user)
|
.field("user", &self.user)
|
||||||
|
.field("allowed_ips", &self.allowed_ips)
|
||||||
.finish_non_exhaustive()
|
.finish_non_exhaustive()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -345,11 +346,6 @@ impl ColdStartInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
|
||||||
pub struct JwksRoleMapping {
|
|
||||||
pub roles: HashMap<RoleName, EndpointJwksResponse>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
pub struct EndpointJwksResponse {
|
pub struct EndpointJwksResponse {
|
||||||
pub jwks: Vec<JwksSettings>,
|
pub jwks: Vec<JwksSettings>,
|
||||||
@@ -358,11 +354,10 @@ pub struct EndpointJwksResponse {
|
|||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
pub struct JwksSettings {
|
pub struct JwksSettings {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
pub project_id: ProjectIdInt,
|
|
||||||
pub branch_id: BranchIdInt,
|
|
||||||
pub jwks_url: url::Url,
|
pub jwks_url: url::Url,
|
||||||
pub provider_name: String,
|
pub provider_name: String,
|
||||||
pub jwt_audience: Option<String>,
|
pub jwt_audience: Option<String>,
|
||||||
|
pub role_names: Vec<RoleNameInt>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -432,6 +427,22 @@ mod tests {
|
|||||||
"aux": dummy_aux(),
|
"aux": dummy_aux(),
|
||||||
}))?;
|
}))?;
|
||||||
|
|
||||||
|
// with allowed_ips
|
||||||
|
let dbinfo = serde_json::from_value::<DatabaseInfo>(json!({
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5432,
|
||||||
|
"dbname": "postgres",
|
||||||
|
"user": "john_doe",
|
||||||
|
"password": "password",
|
||||||
|
"aux": dummy_aux(),
|
||||||
|
"allowed_ips": ["127.0.0.1"],
|
||||||
|
}))?;
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
dbinfo.allowed_ips,
|
||||||
|
Some(vec![IpPattern::Single("127.0.0.1".parse()?)])
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,10 @@ pub mod neon;
|
|||||||
use super::messages::{ConsoleError, MetricsAuxInfo};
|
use super::messages::{ConsoleError, MetricsAuxInfo};
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::{
|
auth::{
|
||||||
backend::{ComputeCredentialKeys, ComputeUserInfo},
|
backend::{
|
||||||
|
jwt::{AuthRule, FetchAuthRules},
|
||||||
|
ComputeCredentialKeys, ComputeUserInfo,
|
||||||
|
},
|
||||||
IpPattern,
|
IpPattern,
|
||||||
},
|
},
|
||||||
cache::{endpoints::EndpointsCache, project_info::ProjectInfoCacheImpl, Cached, TimedLru},
|
cache::{endpoints::EndpointsCache, project_info::ProjectInfoCacheImpl, Cached, TimedLru},
|
||||||
@@ -16,7 +19,7 @@ use crate::{
|
|||||||
intern::ProjectIdInt,
|
intern::ProjectIdInt,
|
||||||
metrics::ApiLockMetrics,
|
metrics::ApiLockMetrics,
|
||||||
rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
|
rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
|
||||||
scram, EndpointCacheKey,
|
scram, EndpointCacheKey, EndpointId,
|
||||||
};
|
};
|
||||||
use dashmap::DashMap;
|
use dashmap::DashMap;
|
||||||
use std::{hash::Hash, sync::Arc, time::Duration};
|
use std::{hash::Hash, sync::Arc, time::Duration};
|
||||||
@@ -334,6 +337,12 @@ pub(crate) trait Api {
|
|||||||
user_info: &ComputeUserInfo,
|
user_info: &ComputeUserInfo,
|
||||||
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
|
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
|
||||||
|
|
||||||
|
async fn get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>>;
|
||||||
|
|
||||||
/// Wake up the compute node and return the corresponding connection info.
|
/// Wake up the compute node and return the corresponding connection info.
|
||||||
async fn wake_compute(
|
async fn wake_compute(
|
||||||
&self,
|
&self,
|
||||||
@@ -343,6 +352,7 @@ pub(crate) trait Api {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
|
#[derive(Clone)]
|
||||||
pub enum ConsoleBackend {
|
pub enum ConsoleBackend {
|
||||||
/// Current Cloud API (V2).
|
/// Current Cloud API (V2).
|
||||||
Console(neon::Api),
|
Console(neon::Api),
|
||||||
@@ -386,6 +396,20 @@ impl Api for ConsoleBackend {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
match self {
|
||||||
|
Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await,
|
||||||
|
#[cfg(any(test, feature = "testing"))]
|
||||||
|
Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await,
|
||||||
|
#[cfg(test)]
|
||||||
|
Self::Test(_api) => Ok(vec![]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn wake_compute(
|
async fn wake_compute(
|
||||||
&self,
|
&self,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
@@ -552,3 +576,13 @@ impl WakeComputePermit {
|
|||||||
res
|
res
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FetchAuthRules for ConsoleBackend {
|
||||||
|
async fn fetch_auth_rules(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
self.get_endpoint_jwks(ctx, endpoint).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ use super::{
|
|||||||
errors::{ApiError, GetAuthInfoError, WakeComputeError},
|
errors::{ApiError, GetAuthInfoError, WakeComputeError},
|
||||||
AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
|
AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
|
||||||
};
|
};
|
||||||
use crate::context::RequestMonitoring;
|
use crate::{
|
||||||
|
auth::backend::jwt::AuthRule, context::RequestMonitoring, intern::RoleNameInt, RoleName,
|
||||||
|
};
|
||||||
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
|
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
|
||||||
use crate::{auth::IpPattern, cache::Cached};
|
use crate::{auth::IpPattern, cache::Cached};
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -118,6 +120,39 @@ impl Api {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn do_get_endpoint_jwks(&self, endpoint: EndpointId) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
let (client, connection) =
|
||||||
|
tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
|
||||||
|
|
||||||
|
let connection = tokio::spawn(connection);
|
||||||
|
|
||||||
|
let res = client.query(
|
||||||
|
"select id, jwks_url, audience, role_names from neon_control_plane.endpoint_jwks where endpoint_id = $1",
|
||||||
|
&[&endpoint.as_str()],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let mut rows = vec![];
|
||||||
|
for row in res {
|
||||||
|
rows.push(AuthRule {
|
||||||
|
id: row.get("id"),
|
||||||
|
jwks_url: url::Url::parse(row.get("jwks_url"))?,
|
||||||
|
audience: row.get("audience"),
|
||||||
|
role_names: row
|
||||||
|
.get::<_, Vec<String>>("role_names")
|
||||||
|
.into_iter()
|
||||||
|
.map(RoleName::from)
|
||||||
|
.map(|s| RoleNameInt::from(&s))
|
||||||
|
.collect(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
drop(client);
|
||||||
|
connection.await??;
|
||||||
|
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
|
||||||
async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
|
async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
|
||||||
let mut config = compute::ConnCfg::new();
|
let mut config = compute::ConnCfg::new();
|
||||||
config
|
config
|
||||||
@@ -185,6 +220,14 @@ impl super::Api for Api {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
_ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
self.do_get_endpoint_jwks(endpoint).await
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
async fn wake_compute(
|
async fn wake_compute(
|
||||||
&self,
|
&self,
|
||||||
|
|||||||
@@ -7,27 +7,33 @@ use super::{
|
|||||||
NodeInfo,
|
NodeInfo,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
auth::backend::ComputeUserInfo,
|
auth::backend::{jwt::AuthRule, ComputeUserInfo},
|
||||||
compute,
|
compute,
|
||||||
console::messages::{ColdStartInfo, Reason},
|
console::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
|
||||||
http,
|
http,
|
||||||
metrics::{CacheOutcome, Metrics},
|
metrics::{CacheOutcome, Metrics},
|
||||||
rate_limiter::WakeComputeRateLimiter,
|
rate_limiter::WakeComputeRateLimiter,
|
||||||
scram, EndpointCacheKey,
|
scram, EndpointCacheKey, EndpointId,
|
||||||
};
|
};
|
||||||
use crate::{cache::Cached, context::RequestMonitoring};
|
use crate::{cache::Cached, context::RequestMonitoring};
|
||||||
|
use ::http::{header::AUTHORIZATION, HeaderName};
|
||||||
|
use anyhow::bail;
|
||||||
use futures::TryFutureExt;
|
use futures::TryFutureExt;
|
||||||
use std::{sync::Arc, time::Duration};
|
use std::{sync::Arc, time::Duration};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_postgres::config::SslMode;
|
use tokio_postgres::config::SslMode;
|
||||||
use tracing::{debug, error, info, info_span, warn, Instrument};
|
use tracing::{debug, error, info, info_span, warn, Instrument};
|
||||||
|
|
||||||
|
const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct Api {
|
pub struct Api {
|
||||||
endpoint: http::Endpoint,
|
endpoint: http::Endpoint,
|
||||||
pub caches: &'static ApiCaches,
|
pub caches: &'static ApiCaches,
|
||||||
pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,
|
pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,
|
||||||
pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
||||||
jwt: String,
|
// put in a shared ref so we don't copy secrets all over in memory
|
||||||
|
jwt: Arc<str>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Api {
|
impl Api {
|
||||||
@@ -38,7 +44,9 @@ impl Api {
|
|||||||
locks: &'static ApiLocks<EndpointCacheKey>,
|
locks: &'static ApiLocks<EndpointCacheKey>,
|
||||||
wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN").unwrap_or_default();
|
let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN")
|
||||||
|
.unwrap_or_default()
|
||||||
|
.into();
|
||||||
Self {
|
Self {
|
||||||
endpoint,
|
endpoint,
|
||||||
caches,
|
caches,
|
||||||
@@ -71,9 +79,9 @@ impl Api {
|
|||||||
async {
|
async {
|
||||||
let request = self
|
let request = self
|
||||||
.endpoint
|
.endpoint
|
||||||
.get("proxy_get_role_secret")
|
.get_path("proxy_get_role_secret")
|
||||||
.header("X-Request-ID", &request_id)
|
.header(X_REQUEST_ID, &request_id)
|
||||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||||
.query(&[("session_id", ctx.session_id())])
|
.query(&[("session_id", ctx.session_id())])
|
||||||
.query(&[
|
.query(&[
|
||||||
("application_name", application_name.as_str()),
|
("application_name", application_name.as_str()),
|
||||||
@@ -125,6 +133,61 @@ impl Api {
|
|||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn do_get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
if !self
|
||||||
|
.caches
|
||||||
|
.endpoints_cache
|
||||||
|
.is_valid(ctx, &endpoint.normalize())
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
bail!("endpoint not found");
|
||||||
|
}
|
||||||
|
let request_id = ctx.session_id().to_string();
|
||||||
|
async {
|
||||||
|
let request = self
|
||||||
|
.endpoint
|
||||||
|
.get_with_url(|url| {
|
||||||
|
url.path_segments_mut()
|
||||||
|
.push("endpoints")
|
||||||
|
.push(endpoint.as_str())
|
||||||
|
.push("jwks");
|
||||||
|
})
|
||||||
|
.header(X_REQUEST_ID, &request_id)
|
||||||
|
.header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
|
||||||
|
.query(&[("session_id", ctx.session_id())])
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
info!(url = request.url().as_str(), "sending http request");
|
||||||
|
let start = Instant::now();
|
||||||
|
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
|
||||||
|
let response = self.endpoint.execute(request).await?;
|
||||||
|
drop(pause);
|
||||||
|
info!(duration = ?start.elapsed(), "received http response");
|
||||||
|
|
||||||
|
let body = parse_body::<EndpointJwksResponse>(response).await?;
|
||||||
|
|
||||||
|
let rules = body
|
||||||
|
.jwks
|
||||||
|
.into_iter()
|
||||||
|
.map(|jwks| AuthRule {
|
||||||
|
id: jwks.id,
|
||||||
|
jwks_url: jwks.jwks_url,
|
||||||
|
audience: jwks.jwt_audience,
|
||||||
|
role_names: jwks.role_names,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(rules)
|
||||||
|
}
|
||||||
|
.map_err(crate::error::log_error)
|
||||||
|
.instrument(info_span!("http", id = request_id))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
async fn do_wake_compute(
|
async fn do_wake_compute(
|
||||||
&self,
|
&self,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
@@ -135,7 +198,7 @@ impl Api {
|
|||||||
async {
|
async {
|
||||||
let mut request_builder = self
|
let mut request_builder = self
|
||||||
.endpoint
|
.endpoint
|
||||||
.get("proxy_wake_compute")
|
.get_path("proxy_wake_compute")
|
||||||
.header("X-Request-ID", &request_id)
|
.header("X-Request-ID", &request_id)
|
||||||
.header("Authorization", format!("Bearer {}", &self.jwt))
|
.header("Authorization", format!("Bearer {}", &self.jwt))
|
||||||
.query(&[("session_id", ctx.session_id())])
|
.query(&[("session_id", ctx.session_id())])
|
||||||
@@ -262,6 +325,15 @@ impl super::Api for Api {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(skip_all)]
|
||||||
|
async fn get_endpoint_jwks(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
endpoint: EndpointId,
|
||||||
|
) -> anyhow::Result<Vec<AuthRule>> {
|
||||||
|
self.do_get_endpoint_jwks(ctx, endpoint).await
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
async fn wake_compute(
|
async fn wake_compute(
|
||||||
&self,
|
&self,
|
||||||
|
|||||||
@@ -86,9 +86,17 @@ impl Endpoint {
|
|||||||
|
|
||||||
/// Return a [builder](RequestBuilder) for a `GET` request,
|
/// Return a [builder](RequestBuilder) for a `GET` request,
|
||||||
/// appending a single `path` segment to the base endpoint URL.
|
/// appending a single `path` segment to the base endpoint URL.
|
||||||
pub(crate) fn get(&self, path: &str) -> RequestBuilder {
|
pub(crate) fn get_path(&self, path: &str) -> RequestBuilder {
|
||||||
|
self.get_with_url(|u| {
|
||||||
|
u.path_segments_mut().push(path);
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a [builder](RequestBuilder) for a `GET` request,
|
||||||
|
/// accepting a closure to modify the url path segments for more complex paths queries.
|
||||||
|
pub(crate) fn get_with_url(&self, f: impl for<'a> FnOnce(&'a mut ApiUrl)) -> RequestBuilder {
|
||||||
let mut url = self.endpoint.clone();
|
let mut url = self.endpoint.clone();
|
||||||
url.path_segments_mut().push(path);
|
f(&mut url);
|
||||||
self.client.get(url.into_inner())
|
self.client.get(url.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,7 +152,7 @@ mod tests {
|
|||||||
|
|
||||||
// Validate that this pattern makes sense.
|
// Validate that this pattern makes sense.
|
||||||
let req = endpoint
|
let req = endpoint
|
||||||
.get("frobnicate")
|
.get_path("frobnicate")
|
||||||
.query(&[
|
.query(&[
|
||||||
("foo", Some("10")), // should be just `foo=10`
|
("foo", Some("10")), // should be just `foo=10`
|
||||||
("bar", None), // shouldn't be passed at all
|
("bar", None), // shouldn't be passed at all
|
||||||
@@ -162,7 +170,7 @@ mod tests {
|
|||||||
let endpoint = Endpoint::new(url, Client::new());
|
let endpoint = Endpoint::new(url, Client::new());
|
||||||
|
|
||||||
let req = endpoint
|
let req = endpoint
|
||||||
.get("frobnicate")
|
.get_path("frobnicate")
|
||||||
.query(&[("session_id", uuid::Uuid::nil())])
|
.query(&[("session_id", uuid::Uuid::nil())])
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::{
|
use std::{
|
||||||
hash::BuildHasherDefault, marker::PhantomData, num::NonZeroUsize, ops::Index, sync::OnceLock,
|
any::type_name, hash::BuildHasherDefault, marker::PhantomData, num::NonZeroUsize, ops::Index,
|
||||||
|
sync::OnceLock,
|
||||||
};
|
};
|
||||||
|
|
||||||
use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
|
use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
|
||||||
@@ -16,12 +17,21 @@ pub struct StringInterner<Id> {
|
|||||||
_id: PhantomData<Id>,
|
_id: PhantomData<Id>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Debug, Clone, Copy, Eq, Hash)]
|
#[derive(PartialEq, Clone, Copy, Eq, Hash)]
|
||||||
pub struct InternedString<Id> {
|
pub struct InternedString<Id> {
|
||||||
inner: Spur,
|
inner: Spur,
|
||||||
_id: PhantomData<Id>,
|
_id: PhantomData<Id>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<Id: InternId> std::fmt::Debug for InternedString<Id> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_tuple("InternedString")
|
||||||
|
.field(&type_name::<Id>())
|
||||||
|
.field(&self.as_str())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<Id: InternId> std::fmt::Display for InternedString<Id> {
|
impl<Id: InternId> std::fmt::Display for InternedString<Id> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
self.as_str().fmt(f)
|
self.as_str().fmt(f)
|
||||||
@@ -130,14 +140,14 @@ impl<Id: InternId> Default for StringInterner<Id> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
pub(crate) struct RoleNameTag;
|
pub struct RoleNameTag;
|
||||||
impl InternId for RoleNameTag {
|
impl InternId for RoleNameTag {
|
||||||
fn get_interner() -> &'static StringInterner<Self> {
|
fn get_interner() -> &'static StringInterner<Self> {
|
||||||
static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();
|
static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();
|
||||||
ROLE_NAMES.get_or_init(Default::default)
|
ROLE_NAMES.get_or_init(Default::default)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) type RoleNameInt = InternedString<RoleNameTag>;
|
pub type RoleNameInt = InternedString<RoleNameTag>;
|
||||||
impl From<&RoleName> for RoleNameInt {
|
impl From<&RoleName> for RoleNameInt {
|
||||||
fn from(value: &RoleName) -> Self {
|
fn from(value: &RoleName) -> Self {
|
||||||
RoleNameTag::get_interner().get_or_intern(value)
|
RoleNameTag::get_interner().get_or_intern(value)
|
||||||
|
|||||||
@@ -82,7 +82,7 @@
|
|||||||
impl_trait_overcaptures,
|
impl_trait_overcaptures,
|
||||||
)]
|
)]
|
||||||
|
|
||||||
use std::{convert::Infallible, future::Future};
|
use std::convert::Infallible;
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use intern::{EndpointIdInt, EndpointIdTag, InternId};
|
use intern::{EndpointIdInt, EndpointIdTag, InternId};
|
||||||
@@ -117,13 +117,12 @@ pub mod usage_metrics;
|
|||||||
pub mod waiters;
|
pub mod waiters;
|
||||||
|
|
||||||
/// Handle unix signals appropriately.
|
/// Handle unix signals appropriately.
|
||||||
pub async fn handle_signals<F, Fut>(
|
pub async fn handle_signals<F>(
|
||||||
token: CancellationToken,
|
token: CancellationToken,
|
||||||
mut refresh_config: F,
|
mut refresh_config: F,
|
||||||
) -> anyhow::Result<Infallible>
|
) -> anyhow::Result<Infallible>
|
||||||
where
|
where
|
||||||
F: FnMut() -> Fut,
|
F: FnMut(),
|
||||||
Fut: Future<Output = anyhow::Result<()>>,
|
|
||||||
{
|
{
|
||||||
use tokio::signal::unix::{signal, SignalKind};
|
use tokio::signal::unix::{signal, SignalKind};
|
||||||
|
|
||||||
@@ -136,7 +135,7 @@ where
|
|||||||
// Hangup is commonly used for config reload.
|
// Hangup is commonly used for config reload.
|
||||||
_ = hangup.recv() => {
|
_ = hangup.recv() => {
|
||||||
warn!("received SIGHUP");
|
warn!("received SIGHUP");
|
||||||
refresh_config().await?;
|
refresh_config();
|
||||||
}
|
}
|
||||||
// Shut down the whole application.
|
// Shut down the whole application.
|
||||||
_ = interrupt.recv() => {
|
_ = interrupt.recv() => {
|
||||||
|
|||||||
@@ -525,6 +525,10 @@ impl TestBackend for TestConnectMechanism {
|
|||||||
{
|
{
|
||||||
unimplemented!("not used in tests")
|
unimplemented!("not used in tests")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dyn_clone(&self) -> Box<dyn TestBackend> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
|
fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
|
||||||
|
|||||||
@@ -43,6 +43,13 @@ impl ThreadPool {
|
|||||||
pub fn new(n_workers: u8) -> Arc<Self> {
|
pub fn new(n_workers: u8) -> Arc<Self> {
|
||||||
// rayon would be nice here, but yielding in rayon does not work well afaict.
|
// rayon would be nice here, but yielding in rayon does not work well afaict.
|
||||||
|
|
||||||
|
if n_workers == 0 {
|
||||||
|
return Arc::new(Self {
|
||||||
|
runtime: None,
|
||||||
|
metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Arc::new_cyclic(|pool| {
|
Arc::new_cyclic(|pool| {
|
||||||
let pool = pool.clone();
|
let pool = pool.clone();
|
||||||
let worker_id = AtomicUsize::new(0);
|
let worker_id = AtomicUsize::new(0);
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
mod backend;
|
mod backend;
|
||||||
pub mod cancel_set;
|
pub mod cancel_set;
|
||||||
mod conn_pool;
|
mod conn_pool;
|
||||||
|
mod http_conn_pool;
|
||||||
mod http_util;
|
mod http_util;
|
||||||
mod json;
|
mod json;
|
||||||
mod sql_over_http;
|
mod sql_over_http;
|
||||||
@@ -19,7 +20,8 @@ use anyhow::Context;
|
|||||||
use futures::future::{select, Either};
|
use futures::future::{select, Either};
|
||||||
use futures::TryFutureExt;
|
use futures::TryFutureExt;
|
||||||
use http::{Method, Response, StatusCode};
|
use http::{Method, Response, StatusCode};
|
||||||
use http_body_util::Full;
|
use http_body_util::combinators::BoxBody;
|
||||||
|
use http_body_util::{BodyExt, Empty};
|
||||||
use hyper1::body::Incoming;
|
use hyper1::body::Incoming;
|
||||||
use hyper_util::rt::TokioExecutor;
|
use hyper_util::rt::TokioExecutor;
|
||||||
use hyper_util::server::conn::auto::Builder;
|
use hyper_util::server::conn::auto::Builder;
|
||||||
@@ -81,7 +83,28 @@ pub async fn task_main(
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let http_conn_pool = http_conn_pool::GlobalConnPool::new(&config.http_config);
|
||||||
|
{
|
||||||
|
let http_conn_pool = Arc::clone(&http_conn_pool);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
http_conn_pool.gc_worker(StdRng::from_entropy()).await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// shutdown the connection pool
|
||||||
|
tokio::spawn({
|
||||||
|
let cancellation_token = cancellation_token.clone();
|
||||||
|
let http_conn_pool = http_conn_pool.clone();
|
||||||
|
async move {
|
||||||
|
cancellation_token.cancelled().await;
|
||||||
|
tokio::task::spawn_blocking(move || http_conn_pool.shutdown())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
let backend = Arc::new(PoolingBackend {
|
let backend = Arc::new(PoolingBackend {
|
||||||
|
http_conn_pool: Arc::clone(&http_conn_pool),
|
||||||
pool: Arc::clone(&conn_pool),
|
pool: Arc::clone(&conn_pool),
|
||||||
config,
|
config,
|
||||||
endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
|
endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
|
||||||
@@ -342,7 +365,7 @@ async fn request_handler(
|
|||||||
// used to cancel in-flight HTTP requests. not used to cancel websockets
|
// used to cancel in-flight HTTP requests. not used to cancel websockets
|
||||||
http_cancellation_token: CancellationToken,
|
http_cancellation_token: CancellationToken,
|
||||||
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||||
) -> Result<Response<Full<Bytes>>, ApiError> {
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
|
||||||
let host = request
|
let host = request
|
||||||
.headers()
|
.headers()
|
||||||
.get("host")
|
.get("host")
|
||||||
@@ -386,7 +409,7 @@ async fn request_handler(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Return the response so the spawned future can continue.
|
// Return the response so the spawned future can continue.
|
||||||
Ok(response.map(|_: http_body_util::Empty<Bytes>| Full::new(Bytes::new())))
|
Ok(response.map(|b| b.map_err(|x| match x {}).boxed()))
|
||||||
} else if request.uri().path() == "/sql" && *request.method() == Method::POST {
|
} else if request.uri().path() == "/sql" && *request.method() == Method::POST {
|
||||||
let ctx = RequestMonitoring::new(
|
let ctx = RequestMonitoring::new(
|
||||||
session_id,
|
session_id,
|
||||||
@@ -409,7 +432,7 @@ async fn request_handler(
|
|||||||
)
|
)
|
||||||
.header("Access-Control-Max-Age", "86400" /* 24 hours */)
|
.header("Access-Control-Max-Age", "86400" /* 24 hours */)
|
||||||
.status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
|
.status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
|
||||||
.body(Full::new(Bytes::new()))
|
.body(Empty::new().map_err(|x| match x {}).boxed())
|
||||||
.map_err(|e| ApiError::InternalServerError(e.into()))
|
.map_err(|e| ApiError::InternalServerError(e.into()))
|
||||||
} else {
|
} else {
|
||||||
json_response(StatusCode::BAD_REQUEST, "query is not supported")
|
json_response(StatusCode::BAD_REQUEST, "query is not supported")
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
use std::{sync::Arc, time::Duration};
|
use std::{io, sync::Arc, time::Duration};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
|
||||||
|
use tokio::net::{lookup_host, TcpStream};
|
||||||
use tracing::{field::display, info};
|
use tracing::{field::display, info};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -27,9 +29,13 @@ use crate::{
|
|||||||
Host,
|
Host,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool};
|
use super::{
|
||||||
|
conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool},
|
||||||
|
http_conn_pool::{self, poll_http2_client},
|
||||||
|
};
|
||||||
|
|
||||||
pub(crate) struct PoolingBackend {
|
pub(crate) struct PoolingBackend {
|
||||||
|
pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool>,
|
||||||
pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
||||||
pub(crate) config: &'static ProxyConfig,
|
pub(crate) config: &'static ProxyConfig,
|
||||||
pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,
|
||||||
@@ -103,32 +109,44 @@ impl PoolingBackend {
|
|||||||
pub(crate) async fn authenticate_with_jwt(
|
pub(crate) async fn authenticate_with_jwt(
|
||||||
&self,
|
&self,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
|
config: &AuthenticationConfig,
|
||||||
user_info: &ComputeUserInfo,
|
user_info: &ComputeUserInfo,
|
||||||
jwt: &str,
|
jwt: String,
|
||||||
) -> Result<ComputeCredentials, AuthError> {
|
) -> Result<(), AuthError> {
|
||||||
match &self.config.auth_backend {
|
match &self.config.auth_backend {
|
||||||
crate::auth::Backend::Console(_, ()) => {
|
crate::auth::Backend::Console(console, ()) => {
|
||||||
Err(AuthError::auth_failed("JWT login is not yet supported"))
|
config
|
||||||
}
|
|
||||||
crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed(
|
|
||||||
"JWT login over web auth proxy is not supported",
|
|
||||||
)),
|
|
||||||
crate::auth::Backend::Local(cache) => {
|
|
||||||
cache
|
|
||||||
.jwks_cache
|
.jwks_cache
|
||||||
.check_jwt(
|
.check_jwt(
|
||||||
ctx,
|
ctx,
|
||||||
user_info.endpoint.clone(),
|
user_info.endpoint.clone(),
|
||||||
user_info.user.clone(),
|
&user_info.user,
|
||||||
&StaticAuthRules,
|
&**console,
|
||||||
jwt,
|
&jwt,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| AuthError::auth_failed(e.to_string()))?;
|
.map_err(|e| AuthError::auth_failed(e.to_string()))?;
|
||||||
Ok(ComputeCredentials {
|
|
||||||
info: user_info.clone(),
|
Ok(())
|
||||||
keys: crate::auth::backend::ComputeCredentialKeys::None,
|
}
|
||||||
})
|
crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed(
|
||||||
|
"JWT login over web auth proxy is not supported",
|
||||||
|
)),
|
||||||
|
crate::auth::Backend::Local(_) => {
|
||||||
|
config
|
||||||
|
.jwks_cache
|
||||||
|
.check_jwt(
|
||||||
|
ctx,
|
||||||
|
user_info.endpoint.clone(),
|
||||||
|
&user_info.user,
|
||||||
|
&StaticAuthRules,
|
||||||
|
&jwt,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|e| AuthError::auth_failed(e.to_string()))?;
|
||||||
|
|
||||||
|
// todo: rewrite JWT signature with key shared somehow between local proxy and postgres
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -174,14 +192,55 @@ impl PoolingBackend {
|
|||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wake up the destination if needed
|
||||||
|
#[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
|
||||||
|
pub(crate) async fn connect_to_local_proxy(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
conn_info: ConnInfo,
|
||||||
|
) -> Result<http_conn_pool::Client, HttpConnError> {
|
||||||
|
info!("pool: looking for an existing connection");
|
||||||
|
if let Some(client) = self.http_conn_pool.get(ctx, &conn_info) {
|
||||||
|
return Ok(client);
|
||||||
|
}
|
||||||
|
|
||||||
|
let conn_id = uuid::Uuid::new_v4();
|
||||||
|
tracing::Span::current().record("conn_id", display(conn_id));
|
||||||
|
info!(%conn_id, "pool: opening a new connection '{conn_info}'");
|
||||||
|
let backend = self
|
||||||
|
.config
|
||||||
|
.auth_backend
|
||||||
|
.as_ref()
|
||||||
|
.map(|()| ComputeCredentials {
|
||||||
|
info: conn_info.user_info.clone(),
|
||||||
|
keys: crate::auth::backend::ComputeCredentialKeys::None,
|
||||||
|
});
|
||||||
|
crate::proxy::connect_compute::connect_to_compute(
|
||||||
|
ctx,
|
||||||
|
&HyperMechanism {
|
||||||
|
conn_id,
|
||||||
|
conn_info,
|
||||||
|
pool: self.http_conn_pool.clone(),
|
||||||
|
locks: &self.config.connect_compute_locks,
|
||||||
|
},
|
||||||
|
&backend,
|
||||||
|
false, // do not allow self signed compute for http flow
|
||||||
|
self.config.wake_compute_retry_config,
|
||||||
|
self.config.connect_to_compute_retry_config,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub(crate) enum HttpConnError {
|
pub(crate) enum HttpConnError {
|
||||||
#[error("pooled connection closed at inconsistent state")]
|
#[error("pooled connection closed at inconsistent state")]
|
||||||
ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),
|
ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),
|
||||||
#[error("could not connection to compute")]
|
#[error("could not connection to postgres in compute")]
|
||||||
ConnectionError(#[from] tokio_postgres::Error),
|
PostgresConnectionError(#[from] tokio_postgres::Error),
|
||||||
|
#[error("could not connection to local-proxy in compute")]
|
||||||
|
LocalProxyConnectionError(#[from] LocalProxyConnError),
|
||||||
|
|
||||||
#[error("could not get auth info")]
|
#[error("could not get auth info")]
|
||||||
GetAuthInfo(#[from] GetAuthInfoError),
|
GetAuthInfo(#[from] GetAuthInfoError),
|
||||||
@@ -193,11 +252,20 @@ pub(crate) enum HttpConnError {
|
|||||||
TooManyConnectionAttempts(#[from] ApiLockError),
|
TooManyConnectionAttempts(#[from] ApiLockError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub(crate) enum LocalProxyConnError {
|
||||||
|
#[error("error with connection to local-proxy")]
|
||||||
|
Io(#[source] std::io::Error),
|
||||||
|
#[error("could not establish h2 connection")]
|
||||||
|
H2(#[from] hyper1::Error),
|
||||||
|
}
|
||||||
|
|
||||||
impl ReportableError for HttpConnError {
|
impl ReportableError for HttpConnError {
|
||||||
fn get_error_kind(&self) -> ErrorKind {
|
fn get_error_kind(&self) -> ErrorKind {
|
||||||
match self {
|
match self {
|
||||||
HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
|
HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
|
||||||
HttpConnError::ConnectionError(p) => p.get_error_kind(),
|
HttpConnError::PostgresConnectionError(p) => p.get_error_kind(),
|
||||||
|
HttpConnError::LocalProxyConnectionError(_) => ErrorKind::Compute,
|
||||||
HttpConnError::GetAuthInfo(a) => a.get_error_kind(),
|
HttpConnError::GetAuthInfo(a) => a.get_error_kind(),
|
||||||
HttpConnError::AuthError(a) => a.get_error_kind(),
|
HttpConnError::AuthError(a) => a.get_error_kind(),
|
||||||
HttpConnError::WakeCompute(w) => w.get_error_kind(),
|
HttpConnError::WakeCompute(w) => w.get_error_kind(),
|
||||||
@@ -210,7 +278,8 @@ impl UserFacingError for HttpConnError {
|
|||||||
fn to_string_client(&self) -> String {
|
fn to_string_client(&self) -> String {
|
||||||
match self {
|
match self {
|
||||||
HttpConnError::ConnectionClosedAbruptly(_) => self.to_string(),
|
HttpConnError::ConnectionClosedAbruptly(_) => self.to_string(),
|
||||||
HttpConnError::ConnectionError(p) => p.to_string(),
|
HttpConnError::PostgresConnectionError(p) => p.to_string(),
|
||||||
|
HttpConnError::LocalProxyConnectionError(p) => p.to_string(),
|
||||||
HttpConnError::GetAuthInfo(c) => c.to_string_client(),
|
HttpConnError::GetAuthInfo(c) => c.to_string_client(),
|
||||||
HttpConnError::AuthError(c) => c.to_string_client(),
|
HttpConnError::AuthError(c) => c.to_string_client(),
|
||||||
HttpConnError::WakeCompute(c) => c.to_string_client(),
|
HttpConnError::WakeCompute(c) => c.to_string_client(),
|
||||||
@@ -224,7 +293,8 @@ impl UserFacingError for HttpConnError {
|
|||||||
impl CouldRetry for HttpConnError {
|
impl CouldRetry for HttpConnError {
|
||||||
fn could_retry(&self) -> bool {
|
fn could_retry(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
HttpConnError::ConnectionError(e) => e.could_retry(),
|
HttpConnError::PostgresConnectionError(e) => e.could_retry(),
|
||||||
|
HttpConnError::LocalProxyConnectionError(e) => e.could_retry(),
|
||||||
HttpConnError::ConnectionClosedAbruptly(_) => false,
|
HttpConnError::ConnectionClosedAbruptly(_) => false,
|
||||||
HttpConnError::GetAuthInfo(_) => false,
|
HttpConnError::GetAuthInfo(_) => false,
|
||||||
HttpConnError::AuthError(_) => false,
|
HttpConnError::AuthError(_) => false,
|
||||||
@@ -236,7 +306,7 @@ impl CouldRetry for HttpConnError {
|
|||||||
impl ShouldRetryWakeCompute for HttpConnError {
|
impl ShouldRetryWakeCompute for HttpConnError {
|
||||||
fn should_retry_wake_compute(&self) -> bool {
|
fn should_retry_wake_compute(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
HttpConnError::ConnectionError(e) => e.should_retry_wake_compute(),
|
HttpConnError::PostgresConnectionError(e) => e.should_retry_wake_compute(),
|
||||||
// we never checked cache validity
|
// we never checked cache validity
|
||||||
HttpConnError::TooManyConnectionAttempts(_) => false,
|
HttpConnError::TooManyConnectionAttempts(_) => false,
|
||||||
_ => true,
|
_ => true,
|
||||||
@@ -244,6 +314,38 @@ impl ShouldRetryWakeCompute for HttpConnError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ReportableError for LocalProxyConnError {
|
||||||
|
fn get_error_kind(&self) -> ErrorKind {
|
||||||
|
match self {
|
||||||
|
LocalProxyConnError::Io(_) => ErrorKind::Compute,
|
||||||
|
LocalProxyConnError::H2(_) => ErrorKind::Compute,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserFacingError for LocalProxyConnError {
|
||||||
|
fn to_string_client(&self) -> String {
|
||||||
|
"Could not establish HTTP connection to the database".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CouldRetry for LocalProxyConnError {
|
||||||
|
fn could_retry(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
LocalProxyConnError::Io(_) => false,
|
||||||
|
LocalProxyConnError::H2(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl ShouldRetryWakeCompute for LocalProxyConnError {
|
||||||
|
fn should_retry_wake_compute(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
LocalProxyConnError::Io(_) => false,
|
||||||
|
LocalProxyConnError::H2(_) => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct TokioMechanism {
|
struct TokioMechanism {
|
||||||
pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
|
||||||
conn_info: ConnInfo,
|
conn_info: ConnInfo,
|
||||||
@@ -293,3 +395,99 @@ impl ConnectMechanism for TokioMechanism {
|
|||||||
|
|
||||||
fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
|
fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct HyperMechanism {
|
||||||
|
pool: Arc<http_conn_pool::GlobalConnPool>,
|
||||||
|
conn_info: ConnInfo,
|
||||||
|
conn_id: uuid::Uuid,
|
||||||
|
|
||||||
|
/// connect_to_compute concurrency lock
|
||||||
|
locks: &'static ApiLocks<Host>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ConnectMechanism for HyperMechanism {
|
||||||
|
type Connection = http_conn_pool::Client;
|
||||||
|
type ConnectError = HttpConnError;
|
||||||
|
type Error = HttpConnError;
|
||||||
|
|
||||||
|
async fn connect_once(
|
||||||
|
&self,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
node_info: &CachedNodeInfo,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<Self::Connection, Self::ConnectError> {
|
||||||
|
let host = node_info.config.get_host()?;
|
||||||
|
let permit = self.locks.get_permit(&host).await?;
|
||||||
|
|
||||||
|
let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
|
||||||
|
|
||||||
|
// let port = node_info.config.get_ports().first().unwrap_or_else(10432);
|
||||||
|
let res = connect_http2(&host, 10432, timeout).await;
|
||||||
|
drop(pause);
|
||||||
|
let (client, connection) = permit.release_result(res)?;
|
||||||
|
|
||||||
|
Ok(poll_http2_client(
|
||||||
|
self.pool.clone(),
|
||||||
|
ctx,
|
||||||
|
&self.conn_info,
|
||||||
|
client,
|
||||||
|
connection,
|
||||||
|
self.conn_id,
|
||||||
|
node_info.aux.clone(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn connect_http2(
|
||||||
|
host: &str,
|
||||||
|
port: u16,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(http_conn_pool::Send, http_conn_pool::Connect), LocalProxyConnError> {
|
||||||
|
// assumption: host is an ip address so this should not actually perform any requests.
|
||||||
|
// todo: add that assumption as a guarantee in the control-plane API.
|
||||||
|
let mut addrs = lookup_host((host, port))
|
||||||
|
.await
|
||||||
|
.map_err(LocalProxyConnError::Io)?;
|
||||||
|
|
||||||
|
let mut last_err = None;
|
||||||
|
|
||||||
|
let stream = loop {
|
||||||
|
let Some(addr) = addrs.next() else {
|
||||||
|
return Err(last_err.unwrap_or_else(|| {
|
||||||
|
LocalProxyConnError::Io(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"could not resolve any addresses",
|
||||||
|
))
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
|
||||||
|
match tokio::time::timeout(timeout, TcpStream::connect(addr)).await {
|
||||||
|
Ok(Ok(stream)) => {
|
||||||
|
stream.set_nodelay(true).map_err(LocalProxyConnError::Io)?;
|
||||||
|
break stream;
|
||||||
|
}
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
last_err = Some(LocalProxyConnError::Io(e));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
last_err = Some(LocalProxyConnError::Io(io::Error::new(
|
||||||
|
io::ErrorKind::TimedOut,
|
||||||
|
e,
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
let (client, connection) = hyper1::client::conn::http2::Builder::new(TokioExecutor::new())
|
||||||
|
.timer(TokioTimer::new())
|
||||||
|
.keep_alive_interval(Duration::from_secs(20))
|
||||||
|
.keep_alive_while_idle(true)
|
||||||
|
.keep_alive_timeout(Duration::from_secs(5))
|
||||||
|
.handshake(TokioIo::new(stream))
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok((client, connection))
|
||||||
|
}
|
||||||
|
|||||||
335
proxy/src/serverless/http_conn_pool.rs
Normal file
335
proxy/src/serverless/http_conn_pool.rs
Normal file
@@ -0,0 +1,335 @@
|
|||||||
|
use dashmap::DashMap;
|
||||||
|
use hyper1::client::conn::http2;
|
||||||
|
use hyper_util::rt::{TokioExecutor, TokioIo};
|
||||||
|
use parking_lot::RwLock;
|
||||||
|
use rand::Rng;
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
use std::sync::atomic::{self, AtomicUsize};
|
||||||
|
use std::{sync::Arc, sync::Weak};
|
||||||
|
use tokio::net::TcpStream;
|
||||||
|
|
||||||
|
use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
|
||||||
|
use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
|
||||||
|
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
|
||||||
|
use crate::{context::RequestMonitoring, EndpointCacheKey};
|
||||||
|
|
||||||
|
use tracing::{debug, error};
|
||||||
|
use tracing::{info, info_span, Instrument};
|
||||||
|
|
||||||
|
use super::conn_pool::ConnInfo;
|
||||||
|
|
||||||
|
pub(crate) type Send = http2::SendRequest<hyper1::body::Incoming>;
|
||||||
|
pub(crate) type Connect =
|
||||||
|
http2::Connection<TokioIo<TcpStream>, hyper1::body::Incoming, TokioExecutor>;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct ConnPoolEntry {
|
||||||
|
conn: Send,
|
||||||
|
conn_id: uuid::Uuid,
|
||||||
|
aux: MetricsAuxInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-endpoint connection pool
|
||||||
|
// Number of open connections is limited by the `max_conns_per_endpoint`.
|
||||||
|
pub(crate) struct EndpointConnPool {
|
||||||
|
conns: VecDeque<ConnPoolEntry>,
|
||||||
|
_guard: HttpEndpointPoolsGuard<'static>,
|
||||||
|
global_connections_count: Arc<AtomicUsize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EndpointConnPool {
|
||||||
|
fn get_conn_entry(&mut self) -> Option<ConnPoolEntry> {
|
||||||
|
let Self { conns, .. } = self;
|
||||||
|
|
||||||
|
let conn = conns.pop_front()?;
|
||||||
|
conns.push_back(conn.clone());
|
||||||
|
Some(conn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remove_conn(&mut self, conn_id: uuid::Uuid) -> bool {
|
||||||
|
let Self {
|
||||||
|
conns,
|
||||||
|
global_connections_count,
|
||||||
|
..
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
let old_len = conns.len();
|
||||||
|
conns.retain(|conn| conn.conn_id != conn_id);
|
||||||
|
let new_len = conns.len();
|
||||||
|
let removed = old_len - new_len;
|
||||||
|
if removed > 0 {
|
||||||
|
global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);
|
||||||
|
Metrics::get()
|
||||||
|
.proxy
|
||||||
|
.http_pool_opened_connections
|
||||||
|
.get_metric()
|
||||||
|
.dec_by(removed as i64);
|
||||||
|
}
|
||||||
|
removed > 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for EndpointConnPool {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if !self.conns.is_empty() {
|
||||||
|
self.global_connections_count
|
||||||
|
.fetch_sub(self.conns.len(), atomic::Ordering::Relaxed);
|
||||||
|
Metrics::get()
|
||||||
|
.proxy
|
||||||
|
.http_pool_opened_connections
|
||||||
|
.get_metric()
|
||||||
|
.dec_by(self.conns.len() as i64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct GlobalConnPool {
|
||||||
|
// endpoint -> per-endpoint connection pool
|
||||||
|
//
|
||||||
|
// That should be a fairly conteded map, so return reference to the per-endpoint
|
||||||
|
// pool as early as possible and release the lock.
|
||||||
|
global_pool: DashMap<EndpointCacheKey, Arc<RwLock<EndpointConnPool>>>,
|
||||||
|
|
||||||
|
/// Number of endpoint-connection pools
|
||||||
|
///
|
||||||
|
/// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
|
||||||
|
/// That seems like far too much effort, so we're using a relaxed increment counter instead.
|
||||||
|
/// It's only used for diagnostics.
|
||||||
|
global_pool_size: AtomicUsize,
|
||||||
|
|
||||||
|
/// Total number of connections in the pool
|
||||||
|
global_connections_count: Arc<AtomicUsize>,
|
||||||
|
|
||||||
|
config: &'static crate::config::HttpConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GlobalConnPool {
|
||||||
|
pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
|
||||||
|
let shards = config.pool_options.pool_shards;
|
||||||
|
Arc::new(Self {
|
||||||
|
global_pool: DashMap::with_shard_amount(shards),
|
||||||
|
global_pool_size: AtomicUsize::new(0),
|
||||||
|
config,
|
||||||
|
global_connections_count: Arc::new(AtomicUsize::new(0)),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn shutdown(&self) {
|
||||||
|
// drops all strong references to endpoint-pools
|
||||||
|
self.global_pool.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn gc_worker(&self, mut rng: impl Rng) {
|
||||||
|
let epoch = self.config.pool_options.gc_epoch;
|
||||||
|
let mut interval = tokio::time::interval(epoch / (self.global_pool.shards().len()) as u32);
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
|
let shard = rng.gen_range(0..self.global_pool.shards().len());
|
||||||
|
self.gc(shard);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gc(&self, shard: usize) {
|
||||||
|
debug!(shard, "pool: performing epoch reclamation");
|
||||||
|
|
||||||
|
// acquire a random shard lock
|
||||||
|
let mut shard = self.global_pool.shards()[shard].write();
|
||||||
|
|
||||||
|
let timer = Metrics::get()
|
||||||
|
.proxy
|
||||||
|
.http_pool_reclaimation_lag_seconds
|
||||||
|
.start_timer();
|
||||||
|
let current_len = shard.len();
|
||||||
|
let mut clients_removed = 0;
|
||||||
|
shard.retain(|endpoint, x| {
|
||||||
|
// if the current endpoint pool is unique (no other strong or weak references)
|
||||||
|
// then it is currently not in use by any connections.
|
||||||
|
if let Some(pool) = Arc::get_mut(x.get_mut()) {
|
||||||
|
let EndpointConnPool { conns, .. } = pool.get_mut();
|
||||||
|
|
||||||
|
let old_len = conns.len();
|
||||||
|
|
||||||
|
conns.retain(|conn| !conn.conn.is_closed());
|
||||||
|
|
||||||
|
let new_len = conns.len();
|
||||||
|
let removed = old_len - new_len;
|
||||||
|
clients_removed += removed;
|
||||||
|
|
||||||
|
// we only remove this pool if it has no active connections
|
||||||
|
if conns.is_empty() {
|
||||||
|
info!("pool: discarding pool for endpoint {endpoint}");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
});
|
||||||
|
|
||||||
|
let new_len = shard.len();
|
||||||
|
drop(shard);
|
||||||
|
timer.observe();
|
||||||
|
|
||||||
|
// Do logging outside of the lock.
|
||||||
|
if clients_removed > 0 {
|
||||||
|
let size = self
|
||||||
|
.global_connections_count
|
||||||
|
.fetch_sub(clients_removed, atomic::Ordering::Relaxed)
|
||||||
|
- clients_removed;
|
||||||
|
Metrics::get()
|
||||||
|
.proxy
|
||||||
|
.http_pool_opened_connections
|
||||||
|
.get_metric()
|
||||||
|
.dec_by(clients_removed as i64);
|
||||||
|
info!("pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}");
|
||||||
|
}
|
||||||
|
let removed = current_len - new_len;
|
||||||
|
|
||||||
|
if removed > 0 {
|
||||||
|
let global_pool_size = self
|
||||||
|
.global_pool_size
|
||||||
|
.fetch_sub(removed, atomic::Ordering::Relaxed)
|
||||||
|
- removed;
|
||||||
|
info!("pool: performed global pool gc. size now {global_pool_size}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn get(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
) -> Option<Client> {
|
||||||
|
let endpoint = conn_info.endpoint_cache_key()?;
|
||||||
|
let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);
|
||||||
|
let client = endpoint_pool.write().get_conn_entry()?;
|
||||||
|
|
||||||
|
if client.conn.is_closed() {
|
||||||
|
info!("pool: cached connection '{conn_info}' is closed, opening a new one");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
tracing::Span::current().record("conn_id", tracing::field::display(client.conn_id));
|
||||||
|
info!(
|
||||||
|
cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
|
||||||
|
"pool: reusing connection '{conn_info}'"
|
||||||
|
);
|
||||||
|
ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
|
||||||
|
ctx.success();
|
||||||
|
Some(Client::new(client.conn, client.aux))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_or_create_endpoint_pool(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
endpoint: &EndpointCacheKey,
|
||||||
|
) -> Arc<RwLock<EndpointConnPool>> {
|
||||||
|
// fast path
|
||||||
|
if let Some(pool) = self.global_pool.get(endpoint) {
|
||||||
|
return pool.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
// slow path
|
||||||
|
let new_pool = Arc::new(RwLock::new(EndpointConnPool {
|
||||||
|
conns: VecDeque::new(),
|
||||||
|
_guard: Metrics::get().proxy.http_endpoint_pools.guard(),
|
||||||
|
global_connections_count: self.global_connections_count.clone(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
// find or create a pool for this endpoint
|
||||||
|
let mut created = false;
|
||||||
|
let pool = self
|
||||||
|
.global_pool
|
||||||
|
.entry(endpoint.clone())
|
||||||
|
.or_insert_with(|| {
|
||||||
|
created = true;
|
||||||
|
new_pool
|
||||||
|
})
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// log new global pool size
|
||||||
|
if created {
|
||||||
|
let global_pool_size = self
|
||||||
|
.global_pool_size
|
||||||
|
.fetch_add(1, atomic::Ordering::Relaxed)
|
||||||
|
+ 1;
|
||||||
|
info!(
|
||||||
|
"pool: created new pool for '{endpoint}', global pool size now {global_pool_size}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pool
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn poll_http2_client(
|
||||||
|
global_pool: Arc<GlobalConnPool>,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
conn_info: &ConnInfo,
|
||||||
|
client: Send,
|
||||||
|
connection: Connect,
|
||||||
|
conn_id: uuid::Uuid,
|
||||||
|
aux: MetricsAuxInfo,
|
||||||
|
) -> Client {
|
||||||
|
let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());
|
||||||
|
let session_id = ctx.session_id();
|
||||||
|
|
||||||
|
let span = info_span!(parent: None, "connection", %conn_id);
|
||||||
|
let cold_start_info = ctx.cold_start_info();
|
||||||
|
span.in_scope(|| {
|
||||||
|
info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection");
|
||||||
|
});
|
||||||
|
|
||||||
|
let pool = match conn_info.endpoint_cache_key() {
|
||||||
|
Some(endpoint) => {
|
||||||
|
let pool = global_pool.get_or_create_endpoint_pool(&endpoint);
|
||||||
|
|
||||||
|
pool.write().conns.push_back(ConnPoolEntry {
|
||||||
|
conn: client.clone(),
|
||||||
|
conn_id,
|
||||||
|
aux: aux.clone(),
|
||||||
|
});
|
||||||
|
|
||||||
|
Arc::downgrade(&pool)
|
||||||
|
}
|
||||||
|
None => Weak::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// let idle = global_pool.get_idle_timeout();
|
||||||
|
|
||||||
|
tokio::spawn(
|
||||||
|
async move {
|
||||||
|
let _conn_gauge = conn_gauge;
|
||||||
|
let res = connection.await;
|
||||||
|
match res {
|
||||||
|
Ok(()) => info!("connection closed"),
|
||||||
|
Err(e) => error!(%session_id, "connection error: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove from connection pool
|
||||||
|
if let Some(pool) = pool.clone().upgrade() {
|
||||||
|
if pool.write().remove_conn(conn_id) {
|
||||||
|
info!("closed connection removed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.instrument(span),
|
||||||
|
);
|
||||||
|
|
||||||
|
Client::new(client, aux)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct Client {
|
||||||
|
pub(crate) inner: Send,
|
||||||
|
aux: MetricsAuxInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Client {
|
||||||
|
pub(self) fn new(inner: Send, aux: MetricsAuxInfo) -> Self {
|
||||||
|
Self { inner, aux }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
|
||||||
|
USAGE_METRICS.register(Ids {
|
||||||
|
endpoint_id: self.aux.endpoint_id,
|
||||||
|
branch_id: self.aux.branch_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,13 +5,13 @@ use bytes::Bytes;
|
|||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use http::{Response, StatusCode};
|
use http::{Response, StatusCode};
|
||||||
use http_body_util::Full;
|
use http_body_util::{combinators::BoxBody, BodyExt, Full};
|
||||||
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use utils::http::error::ApiError;
|
use utils::http::error::ApiError;
|
||||||
|
|
||||||
/// Like [`ApiError::into_response`]
|
/// Like [`ApiError::into_response`]
|
||||||
pub(crate) fn api_error_into_response(this: ApiError) -> Response<Full<Bytes>> {
|
pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper1::Error>> {
|
||||||
match this {
|
match this {
|
||||||
ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
|
ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
|
||||||
format!("{err:#?}"), // use debug printing so that we give the cause
|
format!("{err:#?}"), // use debug printing so that we give the cause
|
||||||
@@ -64,17 +64,24 @@ struct HttpErrorBody {
|
|||||||
|
|
||||||
impl HttpErrorBody {
|
impl HttpErrorBody {
|
||||||
/// Same as [`utils::http::error::HttpErrorBody::response_from_msg_and_status`]
|
/// Same as [`utils::http::error::HttpErrorBody::response_from_msg_and_status`]
|
||||||
fn response_from_msg_and_status(msg: String, status: StatusCode) -> Response<Full<Bytes>> {
|
fn response_from_msg_and_status(
|
||||||
|
msg: String,
|
||||||
|
status: StatusCode,
|
||||||
|
) -> Response<BoxBody<Bytes, hyper1::Error>> {
|
||||||
HttpErrorBody { msg }.to_response(status)
|
HttpErrorBody { msg }.to_response(status)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as [`utils::http::error::HttpErrorBody::to_response`]
|
/// Same as [`utils::http::error::HttpErrorBody::to_response`]
|
||||||
fn to_response(&self, status: StatusCode) -> Response<Full<Bytes>> {
|
fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper1::Error>> {
|
||||||
Response::builder()
|
Response::builder()
|
||||||
.status(status)
|
.status(status)
|
||||||
.header(http::header::CONTENT_TYPE, "application/json")
|
.header(http::header::CONTENT_TYPE, "application/json")
|
||||||
// we do not have nested maps with non string keys so serialization shouldn't fail
|
// we do not have nested maps with non string keys so serialization shouldn't fail
|
||||||
.body(Full::new(Bytes::from(serde_json::to_string(self).unwrap())))
|
.body(
|
||||||
|
Full::new(Bytes::from(serde_json::to_string(self).unwrap()))
|
||||||
|
.map_err(|x| match x {})
|
||||||
|
.boxed(),
|
||||||
|
)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -83,14 +90,14 @@ impl HttpErrorBody {
|
|||||||
pub(crate) fn json_response<T: Serialize>(
|
pub(crate) fn json_response<T: Serialize>(
|
||||||
status: StatusCode,
|
status: StatusCode,
|
||||||
data: T,
|
data: T,
|
||||||
) -> Result<Response<Full<Bytes>>, ApiError> {
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
|
||||||
let json = serde_json::to_string(&data)
|
let json = serde_json::to_string(&data)
|
||||||
.context("Failed to serialize JSON response")
|
.context("Failed to serialize JSON response")
|
||||||
.map_err(ApiError::InternalServerError)?;
|
.map_err(ApiError::InternalServerError)?;
|
||||||
let response = Response::builder()
|
let response = Response::builder()
|
||||||
.status(status)
|
.status(status)
|
||||||
.header(http::header::CONTENT_TYPE, "application/json")
|
.header(http::header::CONTENT_TYPE, "application/json")
|
||||||
.body(Full::new(Bytes::from(json)))
|
.body(Full::new(Bytes::from(json)).map_err(|x| match x {}).boxed())
|
||||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ use futures::future::Either;
|
|||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use futures::TryFutureExt;
|
use futures::TryFutureExt;
|
||||||
use http::header::AUTHORIZATION;
|
use http::header::AUTHORIZATION;
|
||||||
|
use http::Method;
|
||||||
|
use http_body_util::combinators::BoxBody;
|
||||||
use http_body_util::BodyExt;
|
use http_body_util::BodyExt;
|
||||||
use http_body_util::Full;
|
use http_body_util::Full;
|
||||||
use hyper1::body::Body;
|
use hyper1::body::Body;
|
||||||
@@ -38,9 +40,11 @@ use url::Url;
|
|||||||
use urlencoding;
|
use urlencoding;
|
||||||
use utils::http::error::ApiError;
|
use utils::http::error::ApiError;
|
||||||
|
|
||||||
|
use crate::auth::backend::ComputeCredentials;
|
||||||
use crate::auth::backend::ComputeUserInfo;
|
use crate::auth::backend::ComputeUserInfo;
|
||||||
use crate::auth::endpoint_sni;
|
use crate::auth::endpoint_sni;
|
||||||
use crate::auth::ComputeUserInfoParseError;
|
use crate::auth::ComputeUserInfoParseError;
|
||||||
|
use crate::config::AuthenticationConfig;
|
||||||
use crate::config::ProxyConfig;
|
use crate::config::ProxyConfig;
|
||||||
use crate::config::TlsConfig;
|
use crate::config::TlsConfig;
|
||||||
use crate::context::RequestMonitoring;
|
use crate::context::RequestMonitoring;
|
||||||
@@ -56,6 +60,7 @@ use crate::usage_metrics::MetricCounterRecorder;
|
|||||||
use crate::DbName;
|
use crate::DbName;
|
||||||
use crate::RoleName;
|
use crate::RoleName;
|
||||||
|
|
||||||
|
use super::backend::LocalProxyConnError;
|
||||||
use super::backend::PoolingBackend;
|
use super::backend::PoolingBackend;
|
||||||
use super::conn_pool::AuthData;
|
use super::conn_pool::AuthData;
|
||||||
use super::conn_pool::Client;
|
use super::conn_pool::Client;
|
||||||
@@ -123,8 +128,8 @@ pub(crate) enum ConnInfoError {
|
|||||||
MissingUsername,
|
MissingUsername,
|
||||||
#[error("invalid username: {0}")]
|
#[error("invalid username: {0}")]
|
||||||
InvalidUsername(#[from] std::string::FromUtf8Error),
|
InvalidUsername(#[from] std::string::FromUtf8Error),
|
||||||
#[error("missing password")]
|
#[error("missing authentication credentials: {0}")]
|
||||||
MissingPassword,
|
MissingCredentials(Credentials),
|
||||||
#[error("missing hostname")]
|
#[error("missing hostname")]
|
||||||
MissingHostname,
|
MissingHostname,
|
||||||
#[error("invalid hostname: {0}")]
|
#[error("invalid hostname: {0}")]
|
||||||
@@ -133,6 +138,14 @@ pub(crate) enum ConnInfoError {
|
|||||||
MalformedEndpoint,
|
MalformedEndpoint,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub(crate) enum Credentials {
|
||||||
|
#[error("required password")]
|
||||||
|
Password,
|
||||||
|
#[error("required authorization bearer token in JWT format")]
|
||||||
|
BearerJwt,
|
||||||
|
}
|
||||||
|
|
||||||
impl ReportableError for ConnInfoError {
|
impl ReportableError for ConnInfoError {
|
||||||
fn get_error_kind(&self) -> ErrorKind {
|
fn get_error_kind(&self) -> ErrorKind {
|
||||||
ErrorKind::User
|
ErrorKind::User
|
||||||
@@ -146,6 +159,7 @@ impl UserFacingError for ConnInfoError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn get_conn_info(
|
fn get_conn_info(
|
||||||
|
config: &'static AuthenticationConfig,
|
||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
headers: &HeaderMap,
|
headers: &HeaderMap,
|
||||||
tls: Option<&TlsConfig>,
|
tls: Option<&TlsConfig>,
|
||||||
@@ -181,21 +195,32 @@ fn get_conn_info(
|
|||||||
ctx.set_user(username.clone());
|
ctx.set_user(username.clone());
|
||||||
|
|
||||||
let auth = if let Some(auth) = headers.get(&AUTHORIZATION) {
|
let auth = if let Some(auth) = headers.get(&AUTHORIZATION) {
|
||||||
|
if !config.accept_jwts {
|
||||||
|
return Err(ConnInfoError::MissingCredentials(Credentials::Password));
|
||||||
|
}
|
||||||
|
|
||||||
let auth = auth
|
let auth = auth
|
||||||
.to_str()
|
.to_str()
|
||||||
.map_err(|_| ConnInfoError::InvalidHeader(&AUTHORIZATION))?;
|
.map_err(|_| ConnInfoError::InvalidHeader(&AUTHORIZATION))?;
|
||||||
AuthData::Jwt(
|
AuthData::Jwt(
|
||||||
auth.strip_prefix("Bearer ")
|
auth.strip_prefix("Bearer ")
|
||||||
.ok_or(ConnInfoError::MissingPassword)?
|
.ok_or(ConnInfoError::MissingCredentials(Credentials::BearerJwt))?
|
||||||
.into(),
|
.into(),
|
||||||
)
|
)
|
||||||
} else if let Some(pass) = connection_url.password() {
|
} else if let Some(pass) = connection_url.password() {
|
||||||
|
// wrong credentials provided
|
||||||
|
if config.accept_jwts {
|
||||||
|
return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));
|
||||||
|
}
|
||||||
|
|
||||||
AuthData::Password(match urlencoding::decode_binary(pass.as_bytes()) {
|
AuthData::Password(match urlencoding::decode_binary(pass.as_bytes()) {
|
||||||
std::borrow::Cow::Borrowed(b) => b.into(),
|
std::borrow::Cow::Borrowed(b) => b.into(),
|
||||||
std::borrow::Cow::Owned(b) => b.into(),
|
std::borrow::Cow::Owned(b) => b.into(),
|
||||||
})
|
})
|
||||||
|
} else if config.accept_jwts {
|
||||||
|
return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));
|
||||||
} else {
|
} else {
|
||||||
return Err(ConnInfoError::MissingPassword);
|
return Err(ConnInfoError::MissingCredentials(Credentials::Password));
|
||||||
};
|
};
|
||||||
|
|
||||||
let endpoint = match connection_url.host() {
|
let endpoint = match connection_url.host() {
|
||||||
@@ -247,7 +272,7 @@ pub(crate) async fn handle(
|
|||||||
request: Request<Incoming>,
|
request: Request<Incoming>,
|
||||||
backend: Arc<PoolingBackend>,
|
backend: Arc<PoolingBackend>,
|
||||||
cancel: CancellationToken,
|
cancel: CancellationToken,
|
||||||
) -> Result<Response<Full<Bytes>>, ApiError> {
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
|
||||||
let result = handle_inner(cancel, config, &ctx, request, backend).await;
|
let result = handle_inner(cancel, config, &ctx, request, backend).await;
|
||||||
|
|
||||||
let mut response = match result {
|
let mut response = match result {
|
||||||
@@ -279,7 +304,7 @@ pub(crate) async fn handle(
|
|||||||
|
|
||||||
let mut message = e.to_string_client();
|
let mut message = e.to_string_client();
|
||||||
let db_error = match &e {
|
let db_error = match &e {
|
||||||
SqlOverHttpError::ConnectCompute(HttpConnError::ConnectionError(e))
|
SqlOverHttpError::ConnectCompute(HttpConnError::PostgresConnectionError(e))
|
||||||
| SqlOverHttpError::Postgres(e) => e.as_db_error(),
|
| SqlOverHttpError::Postgres(e) => e.as_db_error(),
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
@@ -504,7 +529,7 @@ async fn handle_inner(
|
|||||||
ctx: &RequestMonitoring,
|
ctx: &RequestMonitoring,
|
||||||
request: Request<Incoming>,
|
request: Request<Incoming>,
|
||||||
backend: Arc<PoolingBackend>,
|
backend: Arc<PoolingBackend>,
|
||||||
) -> Result<Response<Full<Bytes>>, SqlOverHttpError> {
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
|
||||||
let _requeset_gauge = Metrics::get()
|
let _requeset_gauge = Metrics::get()
|
||||||
.proxy
|
.proxy
|
||||||
.connection_requests
|
.connection_requests
|
||||||
@@ -514,18 +539,50 @@ async fn handle_inner(
|
|||||||
"handling interactive connection from client"
|
"handling interactive connection from client"
|
||||||
);
|
);
|
||||||
|
|
||||||
//
|
let conn_info = get_conn_info(
|
||||||
// Determine the destination and connection params
|
&config.authentication_config,
|
||||||
//
|
ctx,
|
||||||
let headers = request.headers();
|
request.headers(),
|
||||||
|
config.tls_config.as_ref(),
|
||||||
// TLS config should be there.
|
)?;
|
||||||
let conn_info = get_conn_info(ctx, headers, config.tls_config.as_ref())?;
|
|
||||||
info!(
|
info!(
|
||||||
user = conn_info.conn_info.user_info.user.as_str(),
|
user = conn_info.conn_info.user_info.user.as_str(),
|
||||||
"credentials"
|
"credentials"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
match conn_info.auth {
|
||||||
|
AuthData::Jwt(jwt) if config.authentication_config.is_auth_broker => {
|
||||||
|
handle_auth_broker_inner(config, ctx, request, conn_info.conn_info, jwt, backend).await
|
||||||
|
}
|
||||||
|
auth => {
|
||||||
|
handle_db_inner(
|
||||||
|
cancel,
|
||||||
|
config,
|
||||||
|
ctx,
|
||||||
|
request,
|
||||||
|
conn_info.conn_info,
|
||||||
|
auth,
|
||||||
|
backend,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_db_inner(
|
||||||
|
cancel: CancellationToken,
|
||||||
|
config: &'static ProxyConfig,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
request: Request<Incoming>,
|
||||||
|
conn_info: ConnInfo,
|
||||||
|
auth: AuthData,
|
||||||
|
backend: Arc<PoolingBackend>,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
|
||||||
|
//
|
||||||
|
// Determine the destination and connection params
|
||||||
|
//
|
||||||
|
let headers = request.headers();
|
||||||
|
|
||||||
// Allow connection pooling only if explicitly requested
|
// Allow connection pooling only if explicitly requested
|
||||||
// or if we have decided that http pool is no longer opt-in
|
// or if we have decided that http pool is no longer opt-in
|
||||||
let allow_pool = !config.http_config.pool_options.opt_in
|
let allow_pool = !config.http_config.pool_options.opt_in
|
||||||
@@ -563,26 +620,36 @@ async fn handle_inner(
|
|||||||
|
|
||||||
let authenticate_and_connect = Box::pin(
|
let authenticate_and_connect = Box::pin(
|
||||||
async {
|
async {
|
||||||
let keys = match &conn_info.auth {
|
let keys = match auth {
|
||||||
AuthData::Password(pw) => {
|
AuthData::Password(pw) => {
|
||||||
backend
|
backend
|
||||||
.authenticate_with_password(
|
.authenticate_with_password(
|
||||||
ctx,
|
ctx,
|
||||||
&config.authentication_config,
|
&config.authentication_config,
|
||||||
&conn_info.conn_info.user_info,
|
&conn_info.user_info,
|
||||||
pw,
|
&pw,
|
||||||
)
|
)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
AuthData::Jwt(jwt) => {
|
AuthData::Jwt(jwt) => {
|
||||||
backend
|
backend
|
||||||
.authenticate_with_jwt(ctx, &conn_info.conn_info.user_info, jwt)
|
.authenticate_with_jwt(
|
||||||
.await?
|
ctx,
|
||||||
|
&config.authentication_config,
|
||||||
|
&conn_info.user_info,
|
||||||
|
jwt,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
ComputeCredentials {
|
||||||
|
info: conn_info.user_info.clone(),
|
||||||
|
keys: crate::auth::backend::ComputeCredentialKeys::None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let client = backend
|
let client = backend
|
||||||
.connect_to_compute(ctx, conn_info.conn_info, keys, !allow_pool)
|
.connect_to_compute(ctx, conn_info, keys, !allow_pool)
|
||||||
.await?;
|
.await?;
|
||||||
// not strictly necessary to mark success here,
|
// not strictly necessary to mark success here,
|
||||||
// but it's just insurance for if we forget it somewhere else
|
// but it's just insurance for if we forget it somewhere else
|
||||||
@@ -640,7 +707,11 @@ async fn handle_inner(
|
|||||||
|
|
||||||
let len = json_output.len();
|
let len = json_output.len();
|
||||||
let response = response
|
let response = response
|
||||||
.body(Full::new(Bytes::from(json_output)))
|
.body(
|
||||||
|
Full::new(Bytes::from(json_output))
|
||||||
|
.map_err(|x| match x {})
|
||||||
|
.boxed(),
|
||||||
|
)
|
||||||
// only fails if invalid status code or invalid header/values are given.
|
// only fails if invalid status code or invalid header/values are given.
|
||||||
// these are not user configurable so it cannot fail dynamically
|
// these are not user configurable so it cannot fail dynamically
|
||||||
.expect("building response payload should not fail");
|
.expect("building response payload should not fail");
|
||||||
@@ -656,6 +727,65 @@ async fn handle_inner(
|
|||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static HEADERS_TO_FORWARD: &[&HeaderName] = &[
|
||||||
|
&AUTHORIZATION,
|
||||||
|
&CONN_STRING,
|
||||||
|
&RAW_TEXT_OUTPUT,
|
||||||
|
&ARRAY_MODE,
|
||||||
|
&TXN_ISOLATION_LEVEL,
|
||||||
|
&TXN_READ_ONLY,
|
||||||
|
&TXN_DEFERRABLE,
|
||||||
|
];
|
||||||
|
|
||||||
|
async fn handle_auth_broker_inner(
|
||||||
|
config: &'static ProxyConfig,
|
||||||
|
ctx: &RequestMonitoring,
|
||||||
|
request: Request<Incoming>,
|
||||||
|
conn_info: ConnInfo,
|
||||||
|
jwt: String,
|
||||||
|
backend: Arc<PoolingBackend>,
|
||||||
|
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
|
||||||
|
backend
|
||||||
|
.authenticate_with_jwt(
|
||||||
|
ctx,
|
||||||
|
&config.authentication_config,
|
||||||
|
&conn_info.user_info,
|
||||||
|
jwt,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(HttpConnError::from)?;
|
||||||
|
|
||||||
|
let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;
|
||||||
|
|
||||||
|
let local_proxy_uri = ::http::Uri::from_static("http://proxy.local/sql");
|
||||||
|
|
||||||
|
let (mut parts, body) = request.into_parts();
|
||||||
|
let mut req = Request::builder().method(Method::POST).uri(local_proxy_uri);
|
||||||
|
|
||||||
|
// todo(conradludgate): maybe auth-broker should parse these and re-serialize
|
||||||
|
// these instead just to ensure they remain normalised.
|
||||||
|
for &h in HEADERS_TO_FORWARD {
|
||||||
|
if let Some(hv) = parts.headers.remove(h) {
|
||||||
|
req = req.header(h, hv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let req = req
|
||||||
|
.body(body)
|
||||||
|
.expect("all headers and params received via hyper should be valid for request");
|
||||||
|
|
||||||
|
// todo: map body to count egress
|
||||||
|
let _metrics = client.metrics();
|
||||||
|
|
||||||
|
Ok(client
|
||||||
|
.inner
|
||||||
|
.send_request(req)
|
||||||
|
.await
|
||||||
|
.map_err(LocalProxyConnError::from)
|
||||||
|
.map_err(HttpConnError::from)?
|
||||||
|
.map(|b| b.boxed()))
|
||||||
|
}
|
||||||
|
|
||||||
impl QueryData {
|
impl QueryData {
|
||||||
async fn process(
|
async fn process(
|
||||||
self,
|
self,
|
||||||
@@ -705,7 +835,9 @@ impl QueryData {
|
|||||||
// query failed or was cancelled.
|
// query failed or was cancelled.
|
||||||
Ok(Err(error)) => {
|
Ok(Err(error)) => {
|
||||||
let db_error = match &error {
|
let db_error = match &error {
|
||||||
SqlOverHttpError::ConnectCompute(HttpConnError::ConnectionError(e))
|
SqlOverHttpError::ConnectCompute(
|
||||||
|
HttpConnError::PostgresConnectionError(e),
|
||||||
|
)
|
||||||
| SqlOverHttpError::Postgres(e) => e.as_db_error(),
|
| SqlOverHttpError::Postgres(e) => e.as_db_error(),
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ chrono.workspace = true
|
|||||||
clap = { workspace = true, features = ["derive"] }
|
clap = { workspace = true, features = ["derive"] }
|
||||||
crc32c.workspace = true
|
crc32c.workspace = true
|
||||||
fail.workspace = true
|
fail.workspace = true
|
||||||
git-version.workspace = true
|
|
||||||
hex.workspace = true
|
hex.workspace = true
|
||||||
humantime.workspace = true
|
humantime.workspace = true
|
||||||
hyper.workspace = true
|
hyper.workspace = true
|
||||||
|
|||||||
@@ -374,14 +374,16 @@ type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
|
|||||||
|
|
||||||
async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
|
||||||
// fsync the datadir to make sure we have a consistent state on disk.
|
// fsync the datadir to make sure we have a consistent state on disk.
|
||||||
let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
|
if !conf.no_sync {
|
||||||
let started = Instant::now();
|
let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
|
||||||
utils::crashsafe::syncfs(dfd)?;
|
let started = Instant::now();
|
||||||
let elapsed = started.elapsed();
|
utils::crashsafe::syncfs(dfd)?;
|
||||||
info!(
|
let elapsed = started.elapsed();
|
||||||
elapsed_ms = elapsed.as_millis(),
|
info!(
|
||||||
"syncfs data directory done"
|
elapsed_ms = elapsed.as_millis(),
|
||||||
);
|
"syncfs data directory done"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
|
info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
|
||||||
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user