zero-copy jwt claim validation

stash
split up jwt tests
2026-03-14 22:00:38 +00:00 · 2024-09-30 12:47:07 +01:00 · 2024-09-29 20:29:26 +01:00 · 2024-09-27 16:31:49 +01:00 · 2024-09-27 11:43:34 +01:00 · 2024-09-27 11:43:34 +01:00
138 changed files with 9769 additions and 1814 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -13,6 +13,7 @@
 # Directories
 !.cargo/
 !.config/
 !compute/
 !compute_tools/
 !control_plane/
 !libs/
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -257,7 +257,15 @@ jobs:
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
      - name: Install postgres binaries
-        run: cp -a pg_install /tmp/neon/pg_install
+        run: |
          # Use tar to copy files matching the pattern, preserving the paths in the destionation
          tar c \
            pg_install/v* \
            pg_install/build/*/src/test/regress/*.so \
            pg_install/build/*/src/test/regress/pg_regress \
            pg_install/build/*/src/test/isolation/isolationtester \
            pg_install/build/*/src/test/isolation/pg_isolation_regress \
            | tar  x -C /tmp/neon
      - name: Upload Neon artifact
        uses: ./.github/actions/upload
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -120,6 +120,59 @@ jobs:
      - name: Run mypy to check types
        run: poetry run mypy .
  # Check that the vendor/postgres-* submodules point to the
  # corresponding REL_*_STABLE_neon branches.
  check-submodules:
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: true
      - uses: dorny/paths-filter@v3
        id: check-if-submodules-changed
        with:
          filters: |
            vendor:
              - 'vendor/**'
      - name: Check vendor/postgres-v14 submodule reference
        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
        uses: jtmullen/submodule-branch-check-action@v1
        with:
          path: "vendor/postgres-v14"
          fetch_depth: "50"
          sub_fetch_depth: "50"
          pass_if_unchanged: true
      - name: Check vendor/postgres-v15 submodule reference
        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
        uses: jtmullen/submodule-branch-check-action@v1
        with:
          path: "vendor/postgres-v15"
          fetch_depth: "50"
          sub_fetch_depth: "50"
          pass_if_unchanged: true
      - name: Check vendor/postgres-v16 submodule reference
        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
        uses: jtmullen/submodule-branch-check-action@v1
        with:
          path: "vendor/postgres-v16"
          fetch_depth: "50"
          sub_fetch_depth: "50"
          pass_if_unchanged: true
      - name: Check vendor/postgres-v17 submodule reference
        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
        uses: jtmullen/submodule-branch-check-action@v1
        with:
          path: "vendor/postgres-v17"
          fetch_depth: "50"
          sub_fetch_depth: "50"
          pass_if_unchanged: true
  check-codestyle-rust:
    needs: [ check-permissions, build-build-tools-image ]
    strategy:
@@ -549,7 +602,20 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: [ v14, v15, v16, v17 ]
+        version:
          # Much data was already generated on old PG versions with bullseye's
          # libraries, the locales of which can cause data incompatibilities.
          # However, new PG versions should check if they can be built on newer
          # images, as that reduces the support burden of old and ancient
          # distros.
          - pg: v14
            debian: bullseye-slim
          - pg: v15
            debian: bullseye-slim
          - pg: v16
            debian: bullseye-slim
          - pg: v17
            debian: bookworm-slim
        arch: [ x64, arm64 ]
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
@@ -592,41 +658,46 @@ jobs:
          context: .
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            PG_VERSION=${{ matrix.version }}
+            PG_VERSION=${{ matrix.version.pg }}
            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
            DEBIAN_FLAVOR=${{ matrix.version.debian }}
          provenance: false
          push: true
          pull: true
-          file: Dockerfile.compute-node
+          file: compute/Dockerfile.compute-node
-          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
+          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
-          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
+          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
          tags: |
-            neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
+            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
      - name: Build neon extensions test image
-        if: matrix.version == 'v16'
+        if: matrix.version.pg == 'v16'
        uses: docker/build-push-action@v6
        with:
          context: .
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            PG_VERSION=${{ matrix.version }}
+            PG_VERSION=${{ matrix.version.pg }}
            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
            DEBIAN_FLAVOR=${{ matrix.version.debian }}
          provenance: false
          push: true
          pull: true
-          file: Dockerfile.compute-node
+          file: compute/Dockerfile.compute-node
          target: neon-pg-ext-test
-          cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
+          cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.arch }}
-          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
+          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }}
          tags: |
-            neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
+            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
      - name: Build compute-tools image
        # compute-tools are Postgres independent, so build it only once
-        if: matrix.version == 'v17'
+        # We pick 16, because that builds on debian 11 with older glibc (and is
        # thus compatible with newer glibc), rather than 17 on Debian 12, as
        # that isn't guaranteed to be compatible with Debian 11
        if: matrix.version.pg == 'v16'
        uses: docker/build-push-action@v6
        with:
          target: compute-tools-image
@@ -635,10 +706,11 @@ jobs:
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
            DEBIAN_FLAVOR=${{ matrix.version.debian }}
          provenance: false
          push: true
          pull: true
-          file: Dockerfile.compute-node
+          file: compute/Dockerfile.compute-node
          tags: |
            neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
@@ -726,7 +798,7 @@ jobs:
      - name: Build vm image
        run: |
          ./vm-builder \
-            -spec=vm-image-spec.yaml \
+            -spec=compute/vm-image-spec.yaml \
            -src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
            -dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
@@ -790,6 +862,9 @@ jobs:
    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # for `aws-actions/configure-aws-credentials`
    env:
      VERSIONS: v14 v15 v16 v17
@@ -834,13 +909,19 @@ jobs:
          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
      - name: Configure AWS-prod credentials
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-region: eu-central-1
          mask-aws-account-id: true
          role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
      - name: Login to prod ECR
        uses: docker/login-action@v3
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
        with:
          registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
          username: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_ACCESS_KEY_ID }}
          password: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_SECRET_ACCESS_KEY }}
      - name: Copy all images to prod ECR
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
@@ -1109,10 +1190,9 @@ jobs:
              files_to_promote+=("s3://${BUCKET}/${s3_key}")
-              # TODO Add v17
+              for pg_version in v14 v15 v16 v17; do
              for pg_version in v14 v15 v16; do
                # We run less tests for debug builds, so we don't need to promote them
-                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
+                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v17" ] ; }; then
                  continue
                fi
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -0,0 +1,102 @@
 name: Cloud Regression Test
 on:
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
    #          │ ┌───────────── hour (0 - 23)
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:  '45 1 * * *' # run once a day, timezone is utc
  workflow_dispatch: # adds ability to run this manually
 defaults:
  run:
    shell: bash -euxo pipefail {0}
 concurrency:
  # Allow only one workflow
  group: ${{ github.workflow }}
  cancel-in-progress: true
 jobs:
  regress:
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      DEFAULT_PG_VERSION: 16
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
    runs-on: us-east-2
    container:
      image: neondatabase/build-tools:pinned
      options: --init
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Patch the test
        run: |
          cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
          patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
      - name: Generate a random password
        id: pwgen
        run: |
          set +x
          DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
          echo "::add-mask::${DBPASS//\//}"
          echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
      - name: Change tests according to the generated password
        env:
          DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
        run: |
          cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
          for fname in sql/*.sql expected/*.out; do
            sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
          done
          for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
            USER=$(echo "${ph}" | cut -c 22-)
            MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
            sed -i.bak "s/${ph}/${MD5}/" expected/password.out
          done
      - name: Download Neon artifact
        uses: ./.github/actions/download
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
      - name: Run the regression tests
        uses: ./.github/actions/run-python-test-set
        with:
          build_type: ${{ env.BUILD_TYPE }}
          test_selection: cloud_regress
          pg_version: ${{ env.DEFAULT_PG_VERSION }}
          extra_params: -m remote_cluster
        env:
          BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
      - name: Create Allure report
        id: create-allure-report
        if: ${{ !cancelled() }}
        uses: ./.github/actions/allure-report-generate
      - name: Post to a Slack channel
        if: ${{ github.event.schedule && failure() }}
        uses: slackapi/slack-github-action@v1
        with:
          channel-id: "C033QLM5P7D" # on-call-staging-stream
          slack-message: |
            Periodic pg_regress on staging: ${{ job.status }}
            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
            <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
        env:
          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -102,12 +102,12 @@ jobs:
          # Default set of platforms to run e2e tests on
          platforms='["docker", "k8s"]'
-          # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
+          # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or compute/Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
          # If the workflow run is not a pull request, add k8s-neonvm to the list.
          if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
            for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
              case "$f" in
-                vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
+                vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
                  ;;
                *)
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -255,12 +255,6 @@ dependencies = [
 "syn 2.0.52",
 ]
 [[package]]
 name = "atomic"
 version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba"
 [[package]]
 name = "atomic-take"
 version = "1.1.0"
@@ -295,8 +289,8 @@ dependencies = [
 "fastrand 2.0.0",
 "hex",
 "http 0.2.9",
- "hyper 0.14.26",
+ "hyper 0.14.30",
- "ring 0.17.6",
+ "ring",
 "time",
 "tokio",
 "tracing",
@@ -486,7 +480,7 @@ dependencies = [
 "once_cell",
 "p256 0.11.1",
 "percent-encoding",
- "ring 0.17.6",
+ "ring",
 "sha2",
 "subtle",
 "time",
@@ -593,7 +587,7 @@ dependencies = [
 "http 0.2.9",
 "http-body 0.4.5",
 "http-body 1.0.0",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "hyper-rustls 0.24.0",
 "once_cell",
 "pin-project-lite",
@@ -684,7 +678,7 @@ dependencies = [
 "futures-util",
 "http 0.2.9",
 "http-body 0.4.5",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "itoa",
 "matchit 0.7.0",
 "memchr",
@@ -1089,9 +1083,9 @@ dependencies = [
 [[package]]
 name = "ciborium"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
 dependencies = [
 "ciborium-io",
 "ciborium-ll",
@@ -1100,18 +1094,18 @@ dependencies = [
 [[package]]
 name = "ciborium-io"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
 [[package]]
 name = "ciborium-ll"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
 dependencies = [
 "ciborium-io",
- "half 1.8.2",
+ "half",
 ]
 [[package]]
@@ -1224,7 +1218,7 @@ dependencies = [
 "compute_api",
 "flate2",
 "futures",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "nix 0.27.1",
 "notify",
 "num_cpus",
@@ -1327,10 +1321,9 @@ dependencies = [
 "clap",
 "comfy-table",
 "compute_api",
 "git-version",
 "humantime",
 "humantime-serde",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "nix 0.27.1",
 "once_cell",
 "pageserver_api",
@@ -2304,12 +2297,6 @@ dependencies = [
 "tracing",
 ]
 [[package]]
 name = "half"
 version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
 [[package]]
 name = "half"
 version = "2.4.1"
@@ -2411,17 +2398,6 @@ dependencies = [
 "digest",
 ]
 [[package]]
 name = "hostname"
 version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
 dependencies = [
 "libc",
 "match_cfg",
 "winapi",
 ]
 [[package]]
 name = "hostname"
 version = "0.4.0"
@@ -2430,7 +2406,7 @@ checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
 dependencies = [
 "cfg-if",
 "libc",
- "windows 0.52.0",
+ "windows",
 ]
 [[package]]
@@ -2539,9 +2515,9 @@ dependencies = [
 [[package]]
 name = "hyper"
-version = "0.14.26"
+version = "0.14.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4"
+checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -2554,7 +2530,7 @@ dependencies = [
 "httpdate",
 "itoa",
 "pin-project-lite",
- "socket2 0.4.9",
+ "socket2",
 "tokio",
 "tower-service",
 "tracing",
@@ -2589,7 +2565,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
 dependencies = [
 "http 0.2.9",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "log",
 "rustls 0.21.11",
 "rustls-native-certs 0.6.2",
@@ -2620,7 +2596,7 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
 dependencies = [
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "pin-project-lite",
 "tokio",
 "tokio-io-timeout",
@@ -2639,7 +2615,7 @@ dependencies = [
 "http-body 1.0.0",
 "hyper 1.2.0",
 "pin-project-lite",
- "socket2 0.5.5",
+ "socket2",
 "tokio",
 "tower",
 "tower-service",
@@ -2648,16 +2624,16 @@ dependencies = [
 [[package]]
 name = "iana-time-zone"
-version = "0.1.56"
+version = "0.1.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c"
+checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
 dependencies = [
 "android_system_properties",
 "core-foundation-sys",
 "iana-time-zone-haiku",
 "js-sys",
 "wasm-bindgen",
- "windows 0.48.0",
+ "windows-core",
 ]
 [[package]]
@@ -2870,7 +2846,7 @@ dependencies = [
 "base64 0.21.1",
 "js-sys",
 "pem",
- "ring 0.17.6",
+ "ring",
 "serde",
 "serde_json",
 "simple_asn1",
@@ -2908,11 +2884,11 @@ dependencies = [
 [[package]]
 name = "lazy_static"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 dependencies = [
- "spin 0.5.2",
+ "spin",
 ]
 [[package]]
@@ -2974,12 +2950,6 @@ dependencies = [
 "hashbrown 0.14.5",
 ]
 [[package]]
 name = "match_cfg"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
 [[package]]
 name = "matchers"
 version = "0.1.0"
@@ -3072,15 +3042,6 @@ dependencies = [
 "autocfg",
 ]
 [[package]]
 name = "memoffset"
 version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
 dependencies = [
 "autocfg",
 ]
 [[package]]
 name = "memoffset"
 version = "0.9.0"
@@ -3616,7 +3577,6 @@ dependencies = [
 "anyhow",
 "camino",
 "clap",
 "git-version",
 "humantime",
 "pageserver",
 "pageserver_api",
@@ -3655,12 +3615,11 @@ dependencies = [
 "enumset",
 "fail",
 "futures",
 "git-version",
 "hex",
 "hex-literal",
 "humantime",
 "humantime-serde",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "indoc",
 "itertools 0.10.5",
 "md5",
@@ -3775,7 +3734,6 @@ dependencies = [
 "clap",
 "criterion",
 "futures",
 "git-version",
 "hex-literal",
 "itertools 0.10.5",
 "once_cell",
@@ -3853,7 +3811,7 @@ dependencies = [
 "ahash",
 "bytes",
 "chrono",
- "half 2.4.1",
+ "half",
 "hashbrown 0.14.5",
 "num",
 "num-bigint",
@@ -4140,7 +4098,7 @@ dependencies = [
 "crc32c",
 "env_logger",
 "log",
- "memoffset 0.8.0",
+ "memoffset 0.9.0",
 "once_cell",
 "postgres",
 "regex",
@@ -4338,6 +4296,7 @@ dependencies = [
 "camino-tempfile",
 "chrono",
 "clap",
 "compute_api",
 "consumption_metrics",
 "dashmap",
 "ecdsa 0.16.9",
@@ -4345,17 +4304,16 @@ dependencies = [
 "fallible-iterator",
 "framed-websockets",
 "futures",
 "git-version",
 "hashbrown 0.14.5",
 "hashlink",
 "hex",
 "hmac",
- "hostname 0.3.1",
+ "hostname",
 "http 1.1.0",
 "http-body-util",
 "humantime",
 "humantime-serde",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "hyper 1.2.0",
 "hyper-util",
 "indexmap 2.0.1",
@@ -4400,7 +4358,7 @@ dependencies = [
 "signature 2.2.0",
 "smallvec",
 "smol_str",
- "socket2 0.5.5",
+ "socket2",
 "subtle",
 "thiserror",
 "tikv-jemalloc-ctl",
@@ -4578,7 +4536,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1"
 dependencies = [
 "pem",
- "ring 0.17.6",
+ "ring",
 "time",
 "yasna",
 ]
@@ -4602,7 +4560,7 @@ dependencies = [
 "rustls-pki-types",
 "ryu",
 "sha1_smol",
- "socket2 0.5.5",
+ "socket2",
 "tokio",
 "tokio-rustls 0.25.0",
 "tokio-util",
@@ -4714,7 +4672,7 @@ dependencies = [
 "futures-util",
 "http-types",
 "humantime-serde",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
@@ -4747,7 +4705,7 @@ dependencies = [
 "h2 0.3.26",
 "http 0.2.9",
 "http-body 0.4.5",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "hyper-rustls 0.24.0",
 "ipnet",
 "js-sys",
@@ -4905,21 +4863,6 @@ dependencies = [
 "subtle",
 ]
 [[package]]
 name = "ring"
 version = "0.16.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
 dependencies = [
 "cc",
 "libc",
 "once_cell",
 "spin 0.5.2",
 "untrusted 0.7.1",
 "web-sys",
 "winapi",
 ]
 [[package]]
 name = "ring"
 version = "0.17.6"
@@ -4929,8 +4872,8 @@ dependencies = [
 "cc",
 "getrandom 0.2.11",
 "libc",
- "spin 0.9.8",
+ "spin",
- "untrusted 0.9.0",
+ "untrusted",
 "windows-sys 0.48.0",
 ]
@@ -4950,7 +4893,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "496c1d3718081c45ba9c31fbfc07417900aa96f4070ff90dc29961836b7a9945"
 dependencies = [
 "http 0.2.9",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "lazy_static",
 "percent-encoding",
 "regex",
@@ -5074,7 +5017,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4"
 dependencies = [
 "log",
- "ring 0.17.6",
+ "ring",
 "rustls-webpki 0.101.7",
 "sct",
 ]
@@ -5086,7 +5029,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
 dependencies = [
 "log",
- "ring 0.17.6",
+ "ring",
 "rustls-pki-types",
 "rustls-webpki 0.102.2",
 "subtle",
@@ -5143,24 +5086,14 @@ version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
 [[package]]
 name = "rustls-webpki"
 version = "0.100.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e98ff011474fa39949b7e5c0428f9b4937eda7da7848bbb947786b7be0b27dab"
 dependencies = [
 "ring 0.16.20",
 "untrusted 0.7.1",
 ]
 [[package]]
 name = "rustls-webpki"
 version = "0.101.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
 dependencies = [
- "ring 0.17.6",
+ "ring",
- "untrusted 0.9.0",
+ "untrusted",
 ]
 [[package]]
@@ -5169,9 +5102,9 @@ version = "0.102.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
 dependencies = [
- "ring 0.17.6",
+ "ring",
 "rustls-pki-types",
- "untrusted 0.9.0",
+ "untrusted",
 ]
 [[package]]
@@ -5202,10 +5135,9 @@ dependencies = [
 "desim",
 "fail",
 "futures",
 "git-version",
 "hex",
 "humantime",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "metrics",
 "once_cell",
 "parking_lot 0.12.1",
@@ -5262,11 +5194,11 @@ dependencies = [
 [[package]]
 name = "schannel"
-version = "0.1.21"
+version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
+checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534"
 dependencies = [
- "windows-sys 0.42.0",
+ "windows-sys 0.52.0",
 ]
 [[package]]
@@ -5290,8 +5222,8 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
 dependencies = [
- "ring 0.17.6",
+ "ring",
- "untrusted 0.9.0",
+ "untrusted",
 ]
 [[package]]
@@ -5400,7 +5332,7 @@ version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
 dependencies = [
- "hostname 0.4.0",
+ "hostname",
 "libc",
 "os_info",
 "rustc_version",
@@ -5712,16 +5644,6 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "socket2"
 version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662"
 dependencies = [
 "libc",
 "winapi",
 ]
 [[package]]
 name = "socket2"
 version = "0.5.5"
@@ -5732,12 +5654,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]
 [[package]]
 name = "spin"
 version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
 [[package]]
 name = "spin"
 version = "0.9.8"
@@ -5781,9 +5697,8 @@ dependencies = [
 "futures",
 "futures-core",
 "futures-util",
 "git-version",
 "humantime",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "metrics",
 "once_cell",
 "parking_lot 0.12.1",
@@ -5809,10 +5724,9 @@ dependencies = [
 "diesel_migrations",
 "fail",
 "futures",
 "git-version",
 "hex",
 "humantime",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "itertools 0.10.5",
 "lasso",
 "measured",
@@ -5862,7 +5776,6 @@ dependencies = [
 "either",
 "futures",
 "futures-util",
 "git-version",
 "hex",
 "humantime",
 "itertools 0.10.5",
@@ -6228,7 +6141,7 @@ dependencies = [
 "num_cpus",
 "pin-project-lite",
 "signal-hook-registry",
- "socket2 0.5.5",
+ "socket2",
 "tokio-macros",
 "windows-sys 0.48.0",
 ]
@@ -6288,7 +6201,7 @@ dependencies = [
 "pin-project-lite",
 "postgres-protocol",
 "postgres-types",
- "socket2 0.5.5",
+ "socket2",
 "tokio",
 "tokio-util",
 ]
@@ -6300,7 +6213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ea13f22eda7127c827983bdaf0d7fff9df21c8817bab02815ac277a21143677"
 dependencies = [
 "futures",
- "ring 0.17.6",
+ "ring",
 "rustls 0.22.4",
 "tokio",
 "tokio-postgres",
@@ -6434,7 +6347,7 @@ dependencies = [
 "h2 0.3.26",
 "http 0.2.9",
 "http-body 0.4.5",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "hyper-timeout",
 "percent-encoding",
 "pin-project",
@@ -6611,7 +6524,7 @@ dependencies = [
 name = "tracing-utils"
 version = "0.1.0"
 dependencies = [
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "opentelemetry",
 "opentelemetry-otlp",
 "opentelemetry-semantic-conventions",
@@ -6714,12 +6627,6 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
 [[package]]
 name = "untrusted"
 version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
 [[package]]
 name = "untrusted"
 version = "0.9.0"
@@ -6728,17 +6635,18 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 [[package]]
 name = "ureq"
-version = "2.7.1"
+version = "2.9.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
+checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
 dependencies = [
- "base64 0.21.1",
+ "base64 0.22.1",
 "log",
 "once_cell",
- "rustls 0.21.11",
+ "rustls 0.22.4",
- "rustls-webpki 0.100.2",
+ "rustls-pki-types",
 "rustls-webpki 0.102.2",
 "url",
- "webpki-roots 0.23.1",
+ "webpki-roots 0.26.1",
 ]
 [[package]]
@@ -6799,10 +6707,11 @@ dependencies = [
 "criterion",
 "fail",
 "futures",
 "git-version",
 "hex",
 "hex-literal",
 "humantime",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
@@ -6837,11 +6746,10 @@ dependencies = [
 [[package]]
 name = "uuid"
-version = "1.6.1"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
+checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
 "atomic",
 "getrandom 0.2.11",
 "serde",
 ]
@@ -7075,15 +6983,6 @@ dependencies = [
 "wasm-bindgen",
 ]
 [[package]]
 name = "webpki-roots"
 version = "0.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
 dependencies = [
 "rustls-webpki 0.100.2",
 ]
 [[package]]
 name = "webpki-roots"
 version = "0.25.2"
@@ -7152,15 +7051,6 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 [[package]]
 name = "windows"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
 dependencies = [
 "windows-targets 0.48.0",
 ]
 [[package]]
 name = "windows"
 version = "0.52.0"
@@ -7180,21 +7070,6 @@ dependencies = [
 "windows-targets 0.52.4",
 ]
 [[package]]
 name = "windows-sys"
 version = "0.42.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
 dependencies = [
 "windows_aarch64_gnullvm 0.42.2",
 "windows_aarch64_msvc 0.42.2",
 "windows_i686_gnu 0.42.2",
 "windows_i686_msvc 0.42.2",
 "windows_x86_64_gnu 0.42.2",
 "windows_x86_64_gnullvm 0.42.2",
 "windows_x86_64_msvc 0.42.2",
 ]
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
@@ -7243,12 +7118,6 @@ dependencies = [
 "windows_x86_64_msvc 0.52.4",
 ]
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.48.0"
@@ -7261,12 +7130,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.0"
@@ -7279,12 +7142,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
 [[package]]
 name = "windows_i686_gnu"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.0"
@@ -7297,12 +7154,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
 [[package]]
 name = "windows_i686_msvc"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.0"
@@ -7315,12 +7166,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.0"
@@ -7333,12 +7178,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.0"
@@ -7351,12 +7190,6 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.42.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.0"
@@ -7433,10 +7266,11 @@ dependencies = [
 "futures-util",
 "generic-array",
 "getrandom 0.2.11",
 "half",
 "hashbrown 0.14.5",
 "hex",
 "hmac",
- "hyper 0.14.26",
+ "hyper 0.14.30",
 "indexmap 1.9.3",
 "itertools 0.10.5",
 "itertools 0.12.1",
@@ -7504,7 +7338,7 @@ dependencies = [
 "der 0.7.8",
 "hex",
 "pem",
- "ring 0.17.6",
+ "ring",
 "signature 2.2.0",
 "spki 0.7.3",
 "thiserror",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -76,8 +76,6 @@ clap = { version = "4.0", features = ["derive"] }
 comfy-table = "7.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-deque = "0.8.5"
 crossbeam-utils = "0.8.5"
 dashmap = { version = "5.5.0", features = ["raw-api"] }
 either = "1.8"
 enum-map = "2.4.2"
@@ -95,7 +93,7 @@ hdrhistogram = "7.5.2"
 hex = "0.4"
 hex-literal = "0.4"
 hmac = "0.12.1"
-hostname = "0.3.1"
+hostname = "0.4"
 http = {version = "1.1.0", features = ["std"]}
 http-types = { version = "2", default-features = false }
 humantime = "2.1"
@@ -104,7 +102,6 @@ hyper = "0.14"
 tokio-tungstenite = "0.20.0"
 indexmap = "2"
 indoc = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "9"
@@ -113,7 +110,7 @@ libc = "0.2"
 md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
-memoffset = "0.8"
+memoffset = "0.9"
 nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
 num_cpus = "1.15"
@@ -142,7 +139,6 @@ rpds = "0.13"
 rustc-hash = "1.1.0"
 rustls = "0.22"
 rustls-pemfile = "2"
 rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
@@ -164,7 +160,6 @@ strum_macros = "0.26"
 svg_fmt = "0.4.3"
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
 test-context = "0.3"
 thiserror = "1.0"
 tikv-jemallocator = "0.5"
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -3,13 +3,15 @@ ARG REPOSITORY=neondatabase
 ARG IMAGE=build-tools
 ARG TAG=pinned
 ARG BUILD_TAG
 ARG DEBIAN_FLAVOR=bullseye-slim
 #########################################################################################
 #
 # Layer "build-deps"
 #
 #########################################################################################
-FROM debian:bullseye-slim AS build-deps
+FROM debian:$DEBIAN_FLAVOR AS build-deps
 ARG DEBIAN_FLAVOR
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
@@ -280,7 +282,7 @@ FROM build-deps AS vector-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY patches/pgvector.patch /pgvector.patch
+COPY compute/patches/pgvector.patch /pgvector.patch
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
@@ -366,7 +368,7 @@ FROM build-deps AS rum-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY patches/rum.patch /rum.patch
+COPY compute/patches/rum.patch /rum.patch
 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
@@ -1027,10 +1029,47 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
 #
 #########################################################################################
-FROM debian:bullseye-slim AS compute-tools-image
+FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
 ARG DEBIAN_FLAVOR
 COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
 #########################################################################################
 #
 # Layer "pgbouncer"
 #
 #########################################################################################
 FROM debian:$DEBIAN_FLAVOR AS pgbouncer
 ARG DEBIAN_FLAVOR
 RUN set -e \
    && apt-get update \
    && apt-get install -y \
        build-essential \
        git \
        libevent-dev \
        libtool \
        pkg-config
 # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
 ENV PGBOUNCER_TAG=pgbouncer_1_22_1
 RUN set -e \
    && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
    && cd pgbouncer \
    && ./autogen.sh \
    && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
    && make -j $(nproc) dist_man_MANS= \
    && make install dist_man_MANS=
 #########################################################################################
 #
 # Layers "postgres-exporter" and "sql-exporter"
 #
 #########################################################################################
 FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
 FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
 #########################################################################################
 #
 # Clean up postgres folder before inclusion
@@ -1078,7 +1117,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
 COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
 COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
 COPY --from=rum-pg-build /rum.tar.gz /ext-src
-COPY patches/rum.patch /ext-src
+COPY compute/patches/rum.patch /ext-src
 #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
 COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
 COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
@@ -1086,9 +1125,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
 COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
 #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
 COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
-COPY patches/pg_hint_plan.patch /ext-src
+COPY compute/patches/pg_hint_plan.patch /ext-src
 COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
-COPY patches/pg_cron.patch /ext-src
+COPY compute/patches/pg_cron.patch /ext-src
 #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
 #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
 COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
@@ -1097,7 +1136,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
 #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
 #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
 COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
-COPY patches/pg_anon.patch /ext-src
+COPY compute/patches/pg_anon.patch /ext-src
 COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
 COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
 RUN case "${PG_VERSION}" in "v17") \
@@ -1144,7 +1183,9 @@ ENV PGDATABASE=postgres
 # Put it all together into the final image
 #
 #########################################################################################
-FROM debian:bullseye-slim
+FROM debian:$DEBIAN_FLAVOR
 ARG DEBIAN_FLAVOR
 ENV DEBIAN_FLAVOR=$DEBIAN_FLAVOR
 # Add user postgres
 RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    echo "postgres:test_console_pass" | chpasswd && \
@@ -1160,23 +1201,50 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
 # pgbouncer and its config
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
 # Metrics exporter binaries and  configuration files
 COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
 COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
 COPY --chmod=0644 compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
 COPY --chmod=0644 compute/etc/neon_collector.yml             /etc/neon_collector.yml
 COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
 COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
 # Create remote extension download directory
 RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
 # Install:
 # libreadline8 for psql
 # libicu67, locales for collations (including ICU and plpgsql_check)
 # liblz4-1 for lz4
 # libossp-uuid16 for extension ossp-uuid
-# libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
+# libgeos, libsfcgal1, and libprotobuf-c1 for PostGIS
 # libxml2, libxslt1.1 for xml2
 # libzstd1 for zstd
 # libboost* for rdkit
 # ca-certificates for communicating with s3 by compute_ctl
-RUN apt update &&  \
+
 RUN apt update && \
    case $DEBIAN_FLAVOR in \
      # Version-specific installs for Bullseye (PG14-PG16):
      # libicu67, locales for collations (including ICU and plpgsql_check)
      # libgdal28, libproj19 for PostGIS
      bullseye*) \
        VERSION_INSTALLS="libicu67 libgdal28 libproj19"; \
      ;; \
      # Version-specific installs for Bookworm (PG17):
      # libicu72, locales for collations (including ICU and plpgsql_check)
      # libgdal32, libproj25 for PostGIS
      bookworm*) \
        VERSION_INSTALLS="libicu72 libgdal32 libproj25"; \
      ;; \
    esac && \
    apt install --no-install-recommends -y \
        gdb \
        libicu67 \
        liblz4-1 \
        libreadline8 \
        libboost-iostreams1.74.0 \
@@ -1185,8 +1253,6 @@ RUN apt update &&  \
        libboost-system1.74.0 \
        libossp-uuid16 \
        libgeos-c1v5 \
        libgdal28 \
        libproj19 \
        libprotobuf-c1 \
        libsfcgal1 \
        libxml2 \
@@ -1195,7 +1261,8 @@ RUN apt update &&  \
        libcurl4-openssl-dev \
        locales \
        procps \
-        ca-certificates && \
+        ca-certificates \
        $VERSION_INSTALLS && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
--- a/compute/README.md
+++ b/compute/README.md
@@ -0,0 +1,21 @@
 This directory contains files that are needed to build the compute
 images, or included in the compute images.
 Dockerfile.compute-node
 	To build the compute image
 vm-image-spec.yaml
 	Instructions for vm-builder, to turn the compute-node image into
 	corresponding vm-compute-node image.
 etc/
 	Configuration files included in /etc in the compute image
 patches/
 	Some extensions need to be patched to work with Neon. This
 	directory contains such patches. They are applied to the extension
 	sources in Dockerfile.compute-node
 In addition to these, postgres itself, the neon postgres extension,
 and compute_ctl are built and copied into the compute image by
 Dockerfile.compute-node.
--- a/compute/etc/neon_collector.yml
+++ b/compute/etc/neon_collector.yml
@@ -0,0 +1,246 @@
 collector_name: neon_collector
 metrics:
 - metric_name: lfc_misses
  type: gauge
  help: 'lfc_misses'
  key_labels:
  values: [lfc_misses]
  query: |
    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
 - metric_name: lfc_used
  type: gauge
  help: 'LFC chunks used (chunk = 1MB)'
  key_labels:
  values: [lfc_used]
  query: |
    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
 - metric_name: lfc_hits
  type: gauge
  help: 'lfc_hits'
  key_labels:
  values: [lfc_hits]
  query: |
    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
 - metric_name: lfc_writes
  type: gauge
  help: 'lfc_writes'
  key_labels:
  values: [lfc_writes]
  query: |
    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
 - metric_name: lfc_cache_size_limit
  type: gauge
  help: 'LFC cache size limit in bytes'
  key_labels:
  values: [lfc_cache_size_limit]
  query: |
    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
 - metric_name: connection_counts
  type: gauge
  help: 'Connection counts'
  key_labels:
    - datname
    - state
  values: [count]
  query: |
    select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
 - metric_name: pg_stats_userdb
  type: gauge
  help: 'Stats for several oldest non-system dbs'
  key_labels:
    - datname
  value_label: kind
  values:
    - db_size
    - deadlocks
    # Rows
    - inserted
    - updated
    - deleted
  # We export stats for 10 non-system database. Without this limit
  # it is too easy to abuse the system by creating lots of databases.
  query: |
    select pg_database_size(datname) as db_size, deadlocks,
       tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
       datname
     from pg_stat_database
     where datname IN (
       select datname
       from pg_database
       where datname <> 'postgres' and not datistemplate
       order by oid
       limit 10
     );
 - metric_name: max_cluster_size
  type: gauge
  help: 'neon.max_cluster_size setting'
  key_labels:
  values: [max_cluster_size]
  query: |
    select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
 - metric_name: db_total_size
  type: gauge
  help: 'Size of all databases'
  key_labels:
  values: [total]
  query: |
    select sum(pg_database_size(datname)) as total from pg_database;
 # DEPRECATED
 - metric_name: lfc_approximate_working_set_size
  type: gauge
  help: 'Approximate working set size in pages of 8192 bytes'
  key_labels:
  values: [approximate_working_set_size]
  query: |
    select neon.approximate_working_set_size(false) as approximate_working_set_size;
 - metric_name: lfc_approximate_working_set_size_windows
  type: gauge
  help: 'Approximate working set size in pages of 8192 bytes'
  key_labels: [duration]
  values: [size]
  # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
  # of durations in a pretty-printed form.
  query: |
    select
      x as duration,
      neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
    from
      (values ('5m'),('15m'),('1h')) as t (x);
 - metric_name: compute_current_lsn
  type: gauge
  help: 'Current LSN of the database'
  key_labels:
  values: [lsn]
  query: |
    select
      case
        when pg_catalog.pg_is_in_recovery()
        then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
        else (pg_current_wal_lsn() - '0/0')::FLOAT8
      end as lsn;
 - metric_name: compute_receive_lsn
  type: gauge
  help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
  key_labels:
  values: [lsn]
  query: |
    SELECT
      CASE
        WHEN pg_catalog.pg_is_in_recovery()
        THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
        ELSE 0
      END AS lsn;
 - metric_name: replication_delay_bytes
  type: gauge
  help: 'Bytes between received and replayed LSN'
  key_labels:
  values: [replication_delay_bytes]
  # We use a GREATEST call here because this calculation can be negative.
  # The calculation is not atomic, meaning after we've gotten the receive
  # LSN, the replay LSN may have advanced past the receive LSN we
  # are using for the calculation.
  query: |
    SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
 - metric_name: replication_delay_seconds
  type: gauge
  help: 'Time since last LSN was replayed'
  key_labels:
  values: [replication_delay_seconds]
  query: |
    SELECT
      CASE
        WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
        ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
     END AS replication_delay_seconds;
 - metric_name: checkpoints_req
  type: gauge
  help: 'Number of requested checkpoints'
  key_labels:
  values: [checkpoints_req]
  query: |
    SELECT checkpoints_req FROM pg_stat_bgwriter;
 - metric_name: checkpoints_timed
  type: gauge
  help: 'Number of scheduled checkpoints'
  key_labels:
  values: [checkpoints_timed]
  query: |
    SELECT checkpoints_timed FROM pg_stat_bgwriter;
 - metric_name: compute_logical_snapshot_files
  type: gauge
  help: 'Number of snapshot files in pg_logical/snapshot'
  key_labels:
    - timeline_id
  values: [num_logical_snapshot_files]
  query: |
    SELECT
      (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
      -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
      -- temporary snapshot files are renamed to the actual snapshot files after they are
      -- completely built. We only WAL-log the completely built snapshot files.
      (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
 # In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
 # It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
 # Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
 - metric_name: logical_slot_restart_lsn
  type: gauge
  help: 'restart_lsn of logical slots'
  key_labels:
    - slot_name
  values: [restart_lsn]
  query: |
    select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
    from pg_replication_slots
    where slot_type = 'logical';
 - metric_name: compute_subscriptions_count
  type: gauge
  help: 'Number of logical replication subscriptions grouped by enabled/disabled'
  key_labels:
    - enabled
  values: [subscriptions_count]
  query: |
    select subenabled::text as enabled, count(*) as subscriptions_count
    from pg_subscription
    group by subenabled;
 - metric_name: retained_wal
  type: gauge
  help: 'Retained WAL in inactive replication slots'
  key_labels:
    - slot_name
  values: [retained_wal]
  query: |
    SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
    FROM pg_replication_slots
    WHERE active = false;
 - metric_name: wal_is_lost
  type: gauge
  help: 'Whether or not the replication slot wal_status is lost'
  key_labels:
    - slot_name
  values: [wal_is_lost]
  query: |
    SELECT slot_name,
           CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
    FROM pg_replication_slots;
--- a/compute/etc/neon_collector_autoscaling.yml
+++ b/compute/etc/neon_collector_autoscaling.yml
@@ -0,0 +1,55 @@
 collector_name: neon_collector_autoscaling
 metrics:
 - metric_name: lfc_misses
  type: gauge
  help: 'lfc_misses'
  key_labels:
  values: [lfc_misses]
  query: |
    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
 - metric_name: lfc_used
  type: gauge
  help: 'LFC chunks used (chunk = 1MB)'
  key_labels:
  values: [lfc_used]
  query: |
    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
 - metric_name: lfc_hits
  type: gauge
  help: 'lfc_hits'
  key_labels:
  values: [lfc_hits]
  query: |
    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
 - metric_name: lfc_writes
  type: gauge
  help: 'lfc_writes'
  key_labels:
  values: [lfc_writes]
  query: |
    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
 - metric_name: lfc_cache_size_limit
  type: gauge
  help: 'LFC cache size limit in bytes'
  key_labels:
  values: [lfc_cache_size_limit]
  query: |
    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
 - metric_name: lfc_approximate_working_set_size_windows
  type: gauge
  help: 'Approximate working set size in pages of 8192 bytes'
  key_labels: [duration_seconds]
  values: [size]
  # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
  # size looking back 1..60 minutes, labeled with the number of minutes.
  query: |
    select
      x::text as duration_seconds,
      neon.approximate_working_set_size_seconds(x) as size
    from
      (select generate_series * 60 as x from generate_series(1, 60)) as t (x);
--- a/compute/etc/pgbouncer.ini
+++ b/compute/etc/pgbouncer.ini
@@ -0,0 +1,17 @@
 [databases]
 *=host=localhost port=5432 auth_user=cloud_admin
 [pgbouncer]
 listen_port=6432
 listen_addr=0.0.0.0
 auth_type=scram-sha-256
 auth_user=cloud_admin
 auth_dbname=postgres
 client_tls_sslmode=disable
 server_tls_sslmode=disable
 pool_mode=transaction
 max_client_conn=10000
 default_pool_size=64
 max_prepared_statements=0
 admin_users=postgres
 unix_socket_dir=/tmp/
 unix_socket_mode=0777
--- a/compute/etc/sql_exporter.yml
+++ b/compute/etc/sql_exporter.yml
@@ -0,0 +1,33 @@
 # Configuration for sql_exporter
 # Global defaults.
 global:
  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
  scrape_timeout: 10s
  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
  scrape_timeout_offset: 500ms
  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
  min_interval: 0s
  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
  # as will concurrent scrapes.
  max_connections: 1
  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
  # always be the same as max_connections.
  max_idle_connections: 1
  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
  # If 0, connections are not closed due to a connection's age.
  max_connection_lifetime: 5m
 # The target to monitor and the collectors to execute on it.
 target:
  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
  # the schema gets dropped or replaced to match the driver expected DSN format.
  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
  # Collectors (referenced by name) to execute on the target.
  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
  collectors: [neon_collector]
 # Collector files specifies a list of globs. One collector definition is read from each matching file.
 # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
 collector_files:
  - "neon_collector.yml"
--- a/compute/etc/sql_exporter_autoscaling.yml
+++ b/compute/etc/sql_exporter_autoscaling.yml
@@ -0,0 +1,33 @@
 # Configuration for sql_exporter for autoscaling-agent
 # Global defaults.
 global:
  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
  scrape_timeout: 10s
  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
  scrape_timeout_offset: 500ms
  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
  min_interval: 0s
  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
  # as will concurrent scrapes.
  max_connections: 1
  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
  # always be the same as max_connections.
  max_idle_connections: 1
  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
  # If 0, connections are not closed due to a connection's age.
  max_connection_lifetime: 5m
 # The target to monitor and the collectors to execute on it.
 target:
  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
  # the schema gets dropped or replaced to match the driver expected DSN format.
  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
  # Collectors (referenced by name) to execute on the target.
  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
  collectors: [neon_collector_autoscaling]
 # Collector files specifies a list of globs. One collector definition is read from each matching file.
 # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
 collector_files:
  - "neon_collector_autoscaling.yml"
--- a/compute/patches/cloud_regress_pg16.patch
+++ b/compute/patches/cloud_regress_pg16.patch
--- a/compute/patches/pg_anon.patch
+++ b/compute/patches/pg_anon.patch
--- a/compute/patches/pg_cron.patch
+++ b/compute/patches/pg_cron.patch
--- a/compute/patches/pg_hint_plan.patch
+++ b/compute/patches/pg_hint_plan.patch
--- a/compute/patches/pgvector.patch
+++ b/compute/patches/pgvector.patch
--- a/compute/patches/rum.patch
+++ b/compute/patches/rum.patch
--- a/compute/vm-image-spec.yaml
+++ b/compute/vm-image-spec.yaml
@@ -0,0 +1,112 @@
 # Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
 ---
 commands:
  - name: cgconfigparser
    user: root
    sysvInitAction: sysinit
    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
  # running it as root.
  - name: chmod-resize-swap
    user: root
    sysvInitAction: sysinit
    shell: 'chmod 711 /neonvm/bin/resize-swap'
  - name: pgbouncer
    user: postgres
    sysvInitAction: respawn
    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
    shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
  - name: sql-exporter
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
  - name: sql-exporter-autoscaling
    user: nobody
    sysvInitAction: respawn
    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
 shutdownHook: |
  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
  - filename: compute_ctl-resize-swap
    content: |
      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
      # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
  - filename: cgconfig.conf
    content: |
      # Configuration for cgroups in VM compute nodes
      group neon-postgres {
          perm {
              admin {
                  uid = postgres;
              }
              task {
                  gid = users;
              }
          }
          memory {}
      }
 build: |
  # Build cgroup-tools
  #
  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
  # requires cgroup v2, so we'll build cgroup-tools ourselves.
  FROM debian:bullseye-slim as libcgroup-builder
  ENV LIBCGROUP_VERSION=v2.0.3
  RUN set -exu \
      && apt update \
      && apt install --no-install-recommends -y \
          git \
          ca-certificates \
          automake \
          cmake \
          make \
          gcc \
          byacc \
          flex \
          libtool \
          libpam0g-dev \
      && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
      && INSTALL_DIR="/libcgroup-install" \
      && mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
      && cd libcgroup \
      # extracted from bootstrap.sh, with modified flags:
      && (test -d m4 || mkdir m4) \
      && autoreconf -fi \
      && rm -rf autom4te.cache \
      && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
      # actually build the thing...
      && make install
 merge: |
  # tweak nofile limits
  RUN set -e \
      && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
      && test ! -e /etc/security || ( \
         echo '*    - nofile 1048576' >>/etc/security/limits.conf \
      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \
         )
  # Allow postgres user (compute_ctl) to run swap resizer.
  # Need to install sudo in order to allow this.
  #
  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
  RUN set -e \
      && apt update \
      && apt install --no-install-recommends -y \
             sudo \
      && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
  COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
  COPY cgconfig.conf /etc/cgconfig.conf
  RUN set -e \
      && chmod 0644 /etc/cgconfig.conf
  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -11,9 +11,17 @@ use crate::compute::ComputeNode;
 fn configurator_main_loop(compute: &Arc<ComputeNode>) {
    info!("waiting for reconfiguration requests");
    loop {
-        let state = compute.state.lock().unwrap();
+        let mut state = compute.state.lock().unwrap();
        let mut state = compute.state_changed.wait(state).unwrap();
        // We have to re-check the status after re-acquiring the lock because it could be that
        // the status has changed while we were waiting for the lock, and we might not need to
        // wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
        // we are waiting for a condition variable that will never be signaled.
        if state.status != ComputeStatus::ConfigurationPending {
            state = compute.state_changed.wait(state).unwrap();
        }
        // Re-check the status after waking up
        if state.status == ComputeStatus::ConfigurationPending {
            info!("got configuration request");
            state.status = ComputeStatus::Configuration;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,7 +9,6 @@ anyhow.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
 git-version.workspace = true
 humantime.workspace = true
 nix.workspace = true
 once_cell.workspace = true
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -346,7 +346,14 @@ impl StorageController {
            let pg_log_path = pg_data_path.join("postgres.log");
            if !tokio::fs::try_exists(&pg_data_path).await? {
-                let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
+                let initdb_args = [
                    "-D",
                    pg_data_path.as_ref(),
                    "--username",
                    &username(),
                    "--no-sync",
                    "--no-instructions",
                ];
                tracing::info!(
                    "Initializing storage controller database with args: {:?}",
                    initdb_args
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
 use clap::{Parser, Subcommand};
 use pageserver_api::{
    controller_api::{
-        NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
+        AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
+        ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
    },
    models::{
        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -339,7 +339,7 @@ async fn main() -> anyhow::Result<()> {
                        listen_pg_port,
                        listen_http_addr,
                        listen_http_port,
-                        availability_zone_id,
+                        availability_zone_id: AvailabilityZone(availability_zone_id),
                    }),
                )
                .await?;
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -2,8 +2,8 @@
 # Example docker compose configuration
 The configuration in this directory is used for testing Neon docker images: it is
-not intended for deploying a usable system.  To run a development environment where
+not intended for deploying a usable system. To run a development environment where
-you can experiment with a minature Neon system, use `cargo neon` rather than container images.
+you can experiment with a miniature Neon system, use `cargo neon` rather than container images.
 This configuration does not start the storage controller, because the controller
 needs a way to reconfigure running computes, and no such thing exists in this setup.
--- a/docs/rfcs/038-independent-compute-release.md
+++ b/docs/rfcs/038-independent-compute-release.md
@@ -0,0 +1,343 @@
 # Independent compute release
 Created at: 2024-08-30. Author: Alexey Kondratov (@ololobus)
 ## Summary
 This document proposes an approach to fully independent compute release flow. It attempts to
 cover the following features:
 - Process is automated as much as possible to minimize human errors.
 - Compute<->storage protocol compatibility is ensured.
 - A transparent release history is available with an easy rollback strategy.
 - Although not in the scope of this document, there is a viable way to extend the proposed release
  flow to achieve the canary and/or blue-green deployment strategies.
 ## Motivation
 Previously, the compute release was tightly coupled to the storage release. This meant that once
 some storage nodes got restarted with a newer version, all new compute starts using these nodes
 automatically got a new version. Thus, two releases happen in parallel, which increases the blast
 radius and makes ownership fuzzy.
 Now, we practice a manual v0 independent compute release flow -- after getting a new compute release
 image and tag, we pin it region by region using Admin UI. It's better, but it still has its own flaws:
 1. It's a simple but fairly manual process, as you need to click through a few pages.
 2. It's prone to human errors, e.g., you could mistype or copy the wrong compute tag.
 3. We now require an additional approval in the Admin UI, which partially solves the 2.,
   but also makes the whole process pretty annoying, as you constantly need to go back
   and forth between two people.
 ## Non-goals
 It's not the goal of this document to propose a design for some general-purpose release tool like Helm.
 The document considers how the current compute fleet is orchestrated at Neon. Even if we later
 decide to split the control plane further (e.g., introduce a separate compute controller), the proposed
 release process shouldn't change much, i.e., the releases table and API will reside in
 one of the parts.
 Achieving the canary and/or blue-green deploy strategies is out of the scope of this document. They
 were kept in mind, though, so it's expected that the proposed approach will lay down the foundation
 for implementing them in future iterations.
 ## Impacted components
 Compute, control plane, CI, observability (some Grafana dashboards may require changes).
 ## Prior art
 One of the very close examples is how Helm tracks [releases history](https://helm.sh/docs/helm/helm_history/).
 In the code:
 - [Release](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/release.go#L20-L43)
 - [Release info](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/info.go#L24-L40)
 - [Release status](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/status.go#L18-L42)
 TL;DR it has several important attributes:
 - Revision -- unique release ID/primary key. It is not the same as the application version,
  because the same version can be deployed several times, e.g., after a newer version rollback.
 - App version -- version of the application chart/code.
 - Config -- set of overrides to the default config of the application.
 - Status -- current status of the release in the history.
 - Timestamps -- tracks when a release was created and deployed.
 ## Proposed implementation
 ### Separate release branch
 We will use a separate release branch, `release-compute`, to have a clean history for releases and commits.
 In order to avoid confusion with storage releases, we will use a different prefix for compute [git release
 tags](https://github.com/neondatabase/neon/releases) -- `release-compute-XXXX`. We will use the same tag for
 Docker images as well. The `neondatabase/compute-node-v16:release-compute-XXXX` looks longer and a bit redundant,
 but it's better to have image and git tags in sync.
 Currently, control plane relies on the numeric compute and storage release versions to decide on compute->storage
 compatibility. Once we implement this proposal, we should drop this code as release numbers will be completely
 independent. The only constraint we want is that it must monotonically increase within the same release branch.
 ### Compute config/settings manifest
 We will create a new sub-directory `compute` and file `compute/manifest.yaml` with a structure:
 ```yaml
 pg_settings:
  # Common settings for primaries and secondaries of all versions.
  common:
    wal_log_hints: "off"
    max_wal_size: "1024"
  per_version:
    14:
      # Common settings for both replica and primary of version PG 14
      common:
        shared_preload_libraries: "neon,pg_stat_statements,extension_x"
    15:
      common:
        shared_preload_libraries: "neon,pg_stat_statements,extension_x"
      # Settings that should be applied only to
      replica:
        # Available only starting Postgres 15th
        recovery_prefetch: "off"
    # ...
    17:
      common:
        # For example, if third-party `extension_x` is not yet available for PG 17
        shared_preload_libraries: "neon,pg_stat_statements"
      replica:
        recovery_prefetch: "off"
 ```
 **N.B.** Setting value should be a string with `on|off` for booleans and a number (as a string)
 without units for all numeric settings. That's how the control plane currently operates.
 The priority of settings will be (a higher number is a higher priority):
 1. Any static and hard-coded settings in the control plane
 2. `pg_settings->common`
 3. Per-version `common`
 4. Per-version `replica`
 5. Any per-user/project/endpoint overrides in the control plane
 6. Any dynamic setting calculated based on the compute size
 **N.B.** For simplicity, we do not do any custom logic for `shared_preload_libraries`, so it's completely
 overridden if specified on some level. Make sure that you include all necessary extensions in it when you
 do any overrides.
 **N.B.** There is a tricky question about what to do with custom compute image pinning we sometimes
 do for particular projects and customers. That's usually some ad-hoc work and images are based on
 the latest compute image, so it's relatively safe to assume that we could use settings from the latest compute
 release. If for some reason that's not true, and further overrides are needed, it's also possible to do
 on the project level together with pinning the image, so it's on-call/engineer/support responsibility to
 ensure that compute starts with the specified custom image. The only real risk is that compute image will get
 stale and settings from new releases will drift away, so eventually it will get something incompatible,
 but i) this is some operational issue, as we do not want stale images anyway, and ii) base settings
 receive something really new so rarely that the chance of this happening is very low. If we want to solve it completely,
 then together with pinning the image we could also pin the matching release revision in the control plane.
 The compute team will own the content of `compute/manifest.yaml`.
 ### Control plane: releases table
 In order to store information about releases, the control plane will use a table `compute_releases` with the following
 schema:
 ```sql
 CREATE TABLE compute_releases (
  -- Unique release ID
  -- N.B. Revision won't by synchronized across all regions, because all control planes are technically independent
  -- services. We have the same situation with Helm releases as well because they could be deployed and rolled back
  -- independently in different clusters.
  revision BIGSERIAL PRIMARY KEY,
  -- Numeric version of the compute image, e.g. 9057
  version BIGINT NOT NULL,
  -- Compute image tag, e.g. `release-9057`
  tag TEXT NOT NULL,
  -- Current release status. Currently, it will be a simple enum
  -- * `deployed` -- release is deployed and used for new compute starts.
  --                 Exactly one release can have this status at a time.
  -- * `superseded` -- release has been replaced by a newer one.
  -- But we can always extend it in the future when we need more statuses
  -- for more complex deployment strategies.
  status TEXT NOT NULL,
  -- Any additional metadata for compute in the corresponding release
  manifest JSONB NOT NULL,
  -- Timestamp when release record was created in the control plane database
  created_at TIMESTAMP NOT NULL DEFAULT now(),
  -- Timestamp when release deployment was finished
  deployed_at TIMESTAMP
 );
 ```
 We keep track of the old releases not only for the sake of audit, but also because we usually have ~30% of
 old computes started using the image from one of the previous releases. Yet, when users want to reconfigure
 them without restarting, the control plane needs to know what settings are applicable to them, so we also need
 information about the previous releases that are readily available. There could be some other auxiliary info
 needed as well: supported extensions, compute flags, etc.
 **N.B.** Here, we can end up in an ambiguous situation when the same compute image is deployed twice, e.g.,
 it was deployed once, then rolled back, and then deployed again, potentially with a different manifest. Yet,
 we could've started some computes with the first deployment and some with the second. Thus, when we need to
 look up the manifest for the compute by its image tag, we will see two records in the table with the same tag,
 but different revision numbers. We can assume that this could happen only in case of rollbacks, so we
 can just take the latest revision for the given tag.
 ### Control plane: management API
 The control plane will implement new API methods to manage releases:
 1. `POST /management/api/v2/compute_releases` to create a new release. With payload
   ```json
    {
      "version": 9057,
      "tag": "release-9057",
      "manifest": {}
    }
   ```
   and response
   ```json
    {
      "revision": 53,
      "version": 9057,
      "tag": "release-9057",
      "status": "deployed",
      "manifest": {},
      "created_at": "2024-08-15T15:52:01.0000Z",
      "deployed_at": "2024-08-15T15:52:01.0000Z",
    }
   ```
   Here, we can actually mix-in custom (remote) extensions metadata into the `manifest`, so that the control plane
   will get information about all available extensions not bundled into compute image. The corresponding
   workflow in `neondatabase/build-custom-extensions` should produce it as an artifact and make
   it accessible to the workflow in the `neondatabase/infra`. See the complete release flow below. Doing that,
   we put a constraint that new custom extension requires new compute release, which is good for the safety,
   but is not exactly what we want operational-wise (we want to be able to deploy new extensions without new
   images). Yet, it can be solved incrementally: v0 -- do not do anything with extensions at all;
   v1 -- put them into the same manifest; v2 -- make them separate entities with their own lifecycle.
   **N.B.** This method is intended to be used in CI workflows, and CI/network can be flaky. It's reasonable
   to assume that we could retry the request several times, even though it's already succeeded. Although it's
   not a big deal to create several identical releases one-by-one, it's better to avoid it, so the control plane
   should check if the latest release is identical and just return `304 Not Modified` in this case.
 2. `POST /management/api/v2/compute_releases/rollback` to rollback to any previously deployed release. With payload
   including the revision of the release to rollback to:
   ```json
   {
      "revision": 52
   }
   ```
   Rollback marks the current release as `superseded` and creates a new release with all the same data as the
   requested revision, but with a new revision number.
   This rollback API is not strictly needed, as we can just use `infra` repo workflow to deploy any
   available tag. It's still nice to have for on-call and any urgent matters, for example, if we need
   to rollback and GitHub is down. It's much easier to specify only the revision number vs. crafting
   all the necessary data for the new release payload.
 ### Compute->storage compatibility tests
 In order to safely release new compute versions independently from storage, we need to ensure that the currently
 deployed storage is compatible with the new compute version. Currently, we maintain backward compatibility
 in storage, but newer computes may require a newer storage version.
 Remote end-to-end (e2e) tests [already accept](https://github.com/neondatabase/cloud/blob/e3468d433e0d73d02b7d7e738d027f509b522408/.github/workflows/testing.yml#L43-L48)
 `storage_image_tag` and `compute_image_tag` as separate inputs. That means that we could reuse e2e tests to ensure
 compatibility between storage and compute:
 1. Pick the latest storage release tag and use it as `storage_image_tag`.
 2. Pick a new compute tag built in the current compute release PR and use it as `compute_image_tag`.
   Here, we should use a temporary ECR image tag, because the final tag will be known only after the release PR is merged.
 3. Trigger e2e tests as usual.
 ### Release flow
 ```mermaid
  sequenceDiagram
  actor oncall as Compute on-call person
  participant neon as neondatabase/neon
  box private
    participant cloud as neondatabase/cloud
    participant exts as neondatabase/build-custom-extensions
    participant infra as neondatabase/infra
  end
  box cloud
    participant preprod as Pre-prod control plane
    participant prod as Production control plane
    participant k8s as Compute k8s
  end
  oncall ->> neon: Open release PR into release-compute
  activate neon
  neon ->> cloud: CI: trigger e2e compatibility tests
  activate cloud
  cloud -->> neon: CI: e2e tests pass
  deactivate cloud
  neon ->> neon: CI: pass PR checks, get approvals
  deactivate neon
  oncall ->> neon: Merge release PR into release-compute
  activate neon
  neon ->> neon: CI: pass checks, build and push images
  neon ->> exts: CI: trigger extensions build
  activate exts
  exts -->> neon: CI: extensions are ready
  deactivate exts
  neon ->> neon: CI: create release tag
  neon ->> infra: Trigger release workflow using the produced tag
  deactivate neon
  activate infra
  infra ->> infra: CI: pass checks
  infra ->> preprod: Release new compute image to pre-prod automatically <br/> POST /management/api/v2/compute_releases
  activate preprod
  preprod -->> infra: 200 OK
  deactivate preprod
  infra ->> infra: CI: wait for per-region production deploy approvals
  oncall ->> infra: CI: approve deploys region by region
  infra ->> k8s: Prewarm new compute image
  infra ->> prod: POST /management/api/v2/compute_releases
  activate prod
  prod -->> infra: 200 OK
  deactivate prod
  deactivate infra
 ```
 ## Further work
 As briefly mentioned in other sections, eventually, we would like to use more complex deployment strategies.
 For example, we can pass a fraction of the total compute starts that should use the new release. Then we can
 mark the release as `partial` or `canary` and monitor its performance. If everything is fine, we can promote it
 to `deployed` status. If not, we can roll back to the previous one.
 ## Alternatives
 In theory, we can try using Helm as-is:
 1. Write a compute Helm chart. That will actually have only some config map, which the control plane can access and read.
   N.B. We could reuse the control plane chart as well, but then it's not a fully independent release again and even more fuzzy.
 2. The control plane will read it and start using the new compute version for new starts.
 Drawbacks:
 1. Helm releases work best if the workload is controlled by the Helm chart itself. Then you can have different
   deployment strategies like rolling update or canary or blue/green deployments. At Neon, the compute starts are controlled
   by control plane, so it makes it much more tricky.
 2. Releases visibility will suffer, i.e. instead of a nice table in the control plane and Admin UI, we would need to use
   `helm` cli and/or K8s UIs like K8sLens.
 3. We do not restart all computes shortly after the new version release. This means that for some features and compatibility
   purpose (see above) control plane may need some auxiliary info from the previous releases.
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -268,6 +268,22 @@ pub struct GenericOption {
 /// declare a `trait` on it.
 pub type GenericOptions = Option<Vec<GenericOption>>;
 /// Configured the local-proxy application with the relevant JWKS and roles it should
 /// use for authorizing connect requests using JWT.
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct LocalProxySpec {
    pub jwks: Vec<JwksSettings>,
 }
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct JwksSettings {
    pub id: String,
    pub role_names: Vec<String>,
    pub jwks_url: String,
    pub provider_name: String,
    pub jwt_audience: Option<String>,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -104,9 +104,6 @@ pub struct ConfigToml {
    pub image_compression: ImageCompressionAlgorithm,
    pub ephemeral_bytes_per_memory_kb: usize,
    pub l0_flush: Option<crate::models::L0FlushConfig>,
    #[serde(skip_serializing)]
    // TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
    pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
    pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
    pub io_buffer_alignment: usize,
 }
@@ -384,7 +381,6 @@ impl Default for ConfigToml {
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
            l0_flush: None,
            compact_level0_phase1_value_access: Default::default(),
            virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
            io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -1,4 +1,5 @@
 use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
 use std::str::FromStr;
 use std::time::{Duration, Instant};
@@ -57,7 +58,7 @@ pub struct NodeRegisterRequest {
    pub listen_http_addr: String,
    pub listen_http_port: u16,
-    pub availability_zone_id: String,
+    pub availability_zone_id: AvailabilityZone,
 }
 #[derive(Serialize, Deserialize)]
@@ -74,10 +75,19 @@ pub struct TenantPolicyRequest {
    pub scheduling: Option<ShardSchedulingPolicy>,
 }
 #[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
 pub struct AvailabilityZone(pub String);
 impl Display for AvailabilityZone {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.0)
    }
 }
 #[derive(Serialize, Deserialize)]
 pub struct ShardsPreferredAzsRequest {
    #[serde(flatten)]
-    pub preferred_az_ids: HashMap<TenantShardId, String>,
+    pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>,
 }
 #[derive(Serialize, Deserialize)]
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -37,14 +37,11 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
 /// ```mermaid
 /// stateDiagram-v2
 ///
 ///     [*] --> Loading: spawn_load()
 ///     [*] --> Attaching: spawn_attach()
 ///
 ///     Loading --> Activating: activate()
 ///     Attaching --> Activating: activate()
 ///     Activating --> Active: infallible
 ///
 ///     Loading --> Broken: load() failure
 ///     Attaching --> Broken: attach() failure
 ///
 ///     Active --> Stopping: set_stopping(), part of shutdown & detach
@@ -68,10 +65,6 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
 )]
 #[serde(tag = "slug", content = "data")]
 pub enum TenantState {
    /// This tenant is being loaded from local disk.
    ///
    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
    Loading,
    /// This tenant is being attached to the pageserver.
    ///
    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
@@ -121,8 +114,6 @@ impl TenantState {
            // But, our attach task might still be fetching the remote timelines, etc.
            // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
            Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
            // tenant mgr startup distinguishes attaching from loading via marker file.
            Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
            // We only reach Active after successful load / attach.
            // So, call atttachment status Attached.
            Self::Active => Attached,
@@ -191,10 +182,11 @@ impl LsnLease {
 }
 /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
 ///
 /// XXX: We used to have more variants here, but now it's just one, which makes this rather
 /// useless. Remove, once we've checked that there's no client code left that looks at this.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub enum ActivatingFrom {
    /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
    Loading,
    /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
    Attaching,
 }
@@ -1562,11 +1554,8 @@ mod tests {
    #[test]
    fn tenantstatus_activating_serde() {
-        let states = [
+        let states = [TenantState::Activating(ActivatingFrom::Attaching)];
-            TenantState::Activating(ActivatingFrom::Loading),
+        let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
            TenantState::Activating(ActivatingFrom::Attaching),
        ];
        let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
        let actual = serde_json::to_string(&states).unwrap();
@@ -1581,13 +1570,7 @@ mod tests {
    fn tenantstatus_activating_strum() {
        // tests added, because we use these for metrics
        let examples = [
            (line!(), TenantState::Loading, "Loading"),
            (line!(), TenantState::Attaching, "Attaching"),
            (
                line!(),
                TenantState::Activating(ActivatingFrom::Loading),
                "Activating",
            ),
            (
                line!(),
                TenantState::Activating(ActivatingFrom::Attaching),
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -984,6 +984,7 @@ pub fn short_error(e: &QueryError) -> String {
 }
 fn log_query_error(query: &str, e: &QueryError) {
    // If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`.
    match e {
        QueryError::Disconnected(ConnectionError::Io(io_error)) => {
            if is_expected_io_error(io_error) {
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -19,6 +19,7 @@ bincode.workspace = true
 bytes.workspace = true
 camino.workspace = true
 chrono.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
 hyper = { workspace = true, features = ["full"] }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -92,6 +92,10 @@ pub mod toml_edit_ext;
 pub mod circuit_breaker;
 // Re-export used in macro. Avoids adding git-version as dep in target crates.
 #[doc(hidden)]
 pub use git_version;
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
 /// we have several cases:
@@ -131,7 +135,7 @@ macro_rules! project_git_version {
    ($const_identifier:ident) => {
        // this should try GIT_VERSION first only then git_version::git_version!
        const $const_identifier: &::core::primitive::str = {
-            const __COMMIT_FROM_GIT: &::core::primitive::str = git_version::git_version! {
+            const __COMMIT_FROM_GIT: &::core::primitive::str = $crate::git_version::git_version! {
                prefix = "",
                fallback = "unknown",
                args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -27,7 +27,6 @@ crc32c.workspace = true
 either.workspace = true
 fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
 hex.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -1,7 +1,7 @@
 //! Quantify a single walredo manager's throughput under N concurrent callers.
 //!
 //! The benchmark implementation ([`bench_impl`]) is parametrized by
-//! - `redo_work` => [`Request::short_request`] or [`Request::medium_request`]
+//! - `redo_work` => an async closure that takes a `PostgresRedoManager` and performs one redo
 //! - `n_redos` => number of times the benchmark shell execute the `redo_work`
 //! - `nclients` => number of clients (more on this shortly).
 //!
@@ -10,7 +10,7 @@
 //! Each task executes the `redo_work` `n_redos/nclients` times.
 //!
 //! We exercise the following combinations:
-//! - `redo_work = short / medium``
+//! - `redo_work = ping / short / medium``
 //! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]`
 //!
 //! We let `criterion` determine the `n_redos` using `iter_custom`.
@@ -27,33 +27,43 @@
 //!
 //! # Reference Numbers
 //!
-//! 2024-04-15 on i3en.3xlarge
+//! 2024-09-18 on im4gn.2xlarge
 //!
 //! ```text
-//! short/1           time:   [24.584 µs 24.737 µs 24.922 µs]
+//! ping/1                  time:   [21.789 µs 21.918 µs 22.078 µs]
-//! short/2           time:   [33.479 µs 33.660 µs 33.888 µs]
+//! ping/2                  time:   [27.686 µs 27.812 µs 27.970 µs]
-//! short/4           time:   [42.713 µs 43.046 µs 43.440 µs]
+//! ping/4                  time:   [35.468 µs 35.671 µs 35.926 µs]
-//! short/8           time:   [71.814 µs 72.478 µs 73.240 µs]
+//! ping/8                  time:   [59.682 µs 59.987 µs 60.363 µs]
-//! short/16          time:   [132.73 µs 134.45 µs 136.22 µs]
+//! ping/16                 time:   [101.79 µs 102.37 µs 103.08 µs]
-//! short/32          time:   [258.31 µs 260.73 µs 263.27 µs]
+//! ping/32                 time:   [184.18 µs 185.15 µs 186.36 µs]
-//! short/64          time:   [511.61 µs 514.44 µs 517.51 µs]
+//! ping/64                 time:   [349.86 µs 351.45 µs 353.47 µs]
-//! short/128         time:   [992.64 µs 998.23 µs 1.0042 ms]
+//! ping/128                time:   [684.53 µs 687.98 µs 692.17 µs]
-//! medium/1          time:   [110.11 µs 110.50 µs 110.96 µs]
+//! short/1                 time:   [31.833 µs 32.126 µs 32.428 µs]
-//! medium/2          time:   [153.06 µs 153.85 µs 154.99 µs]
+//! short/2                 time:   [35.558 µs 35.756 µs 35.992 µs]
-//! medium/4          time:   [317.51 µs 319.92 µs 322.85 µs]
+//! short/4                 time:   [44.850 µs 45.138 µs 45.484 µs]
-//! medium/8          time:   [638.30 µs 644.68 µs 652.12 µs]
+//! short/8                 time:   [65.985 µs 66.379 µs 66.853 µs]
-//! medium/16         time:   [1.2651 ms 1.2773 ms 1.2914 ms]
+//! short/16                time:   [127.06 µs 127.90 µs 128.87 µs]
-//! medium/32         time:   [2.5117 ms 2.5410 ms 2.5720 ms]
+//! short/32                time:   [252.98 µs 254.70 µs 256.73 µs]
-//! medium/64         time:   [4.8088 ms 4.8555 ms 4.9047 ms]
+//! short/64                time:   [497.13 µs 499.86 µs 503.26 µs]
-//! medium/128        time:   [8.8311 ms 8.9849 ms 9.1263 ms]
+//! short/128               time:   [987.46 µs 993.45 µs 1.0004 ms]
 //! medium/1                time:   [137.91 µs 138.55 µs 139.35 µs]
 //! medium/2                time:   [192.00 µs 192.91 µs 194.07 µs]
 //! medium/4                time:   [389.62 µs 391.55 µs 394.01 µs]
 //! medium/8                time:   [776.80 µs 780.33 µs 784.77 µs]
 //! medium/16               time:   [1.5323 ms 1.5383 ms 1.5459 ms]
 //! medium/32               time:   [3.0120 ms 3.0226 ms 3.0350 ms]
 //! medium/64               time:   [5.7405 ms 5.7787 ms 5.8166 ms]
 //! medium/128              time:   [10.412 ms 10.574 ms 10.718 ms]
 //! ```
 use anyhow::Context;
 use bytes::{Buf, Bytes};
 use criterion::{BenchmarkId, Criterion};
 use once_cell::sync::Lazy;
 use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
 use pageserver_api::{key::Key, shard::TenantShardId};
 use std::{
    future::Future,
    sync::Arc,
    time::{Duration, Instant},
 };
@@ -61,40 +71,59 @@ use tokio::{sync::Barrier, task::JoinSet};
 use utils::{id::TenantId, lsn::Lsn};
 fn bench(c: &mut Criterion) {
-    {
+    macro_rules! bench_group {
-        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
+        ($name:expr, $redo_work:expr) => {{
-        for nclients in nclients {
+            let name: &str = $name;
-            let mut group = c.benchmark_group("short");
+            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
-            group.bench_with_input(
+            for nclients in nclients {
-                BenchmarkId::from_parameter(nclients),
+                let mut group = c.benchmark_group(name);
-                &nclients,
+                group.bench_with_input(
-                |b, nclients| {
+                    BenchmarkId::from_parameter(nclients),
-                    let redo_work = Arc::new(Request::short_input());
+                    &nclients,
-                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
+                    |b, nclients| {
-                },
+                        b.iter_custom(|iters| bench_impl($redo_work, iters, *nclients));
-            );
+                    },
-        }
+                );
-    }
+            }
-    {
+        }};
        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
        for nclients in nclients {
            let mut group = c.benchmark_group("medium");
            group.bench_with_input(
                BenchmarkId::from_parameter(nclients),
                &nclients,
                |b, nclients| {
                    let redo_work = Arc::new(Request::medium_input());
                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
                },
            );
        }
    }
    //
    // benchmark the protocol implementation
    //
    let pg_version = 14;
    bench_group!(
        "ping",
        Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
            let _: () = mgr.ping(pg_version).await.unwrap();
        })
    );
    //
    // benchmarks with actual record redo
    //
    let make_redo_work = |req: &'static Request| {
        Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
            let page = req.execute(&mgr).await.unwrap();
            assert_eq!(page.remaining(), 8192);
        })
    };
    bench_group!("short", {
        static REQUEST: Lazy<Request> = Lazy::new(Request::short_input);
        make_redo_work(&REQUEST)
    });
    bench_group!("medium", {
        static REQUEST: Lazy<Request> = Lazy::new(Request::medium_input);
        make_redo_work(&REQUEST)
    });
 }
 criterion::criterion_group!(benches, bench);
 criterion::criterion_main!(benches);
 // Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
-fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
+fn bench_impl<F, Fut>(redo_work: Arc<F>, n_redos: u64, nclients: u64) -> Duration
 where
    F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
    Fut: Future<Output = ()> + Send + 'static,
 {
    let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();
    let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
@@ -135,17 +164,20 @@ fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration
    })
 }
-async fn client(
+async fn client<F, Fut>(
    mgr: Arc<PostgresRedoManager>,
    start: Arc<Barrier>,
-    redo_work: Arc<Request>,
+    redo_work: Arc<F>,
    n_redos: u64,
-) -> Duration {
+) -> Duration
 where
    F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
    Fut: Future<Output = ()> + Send + 'static,
 {
    start.wait().await;
    let start = Instant::now();
    for _ in 0..n_redos {
-        let page = redo_work.execute(&mgr).await.unwrap();
+        redo_work(Arc::clone(&mgr)).await;
        assert_eq!(page.remaining(), 8192);
        // The real pageserver will rarely if ever do 2 walredos in a row without
        // yielding to the executor.
        tokio::task::yield_now().await;
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -432,7 +432,7 @@ impl Client {
            self.mgmt_api_endpoint
        );
-        self.request(Method::POST, &uri, req)
+        self.request(Method::PUT, &uri, req)
            .await?
            .json()
            .await
--- a/pageserver/compaction/Cargo.toml
+++ b/pageserver/compaction/Cargo.toml
@@ -12,7 +12,6 @@ anyhow.workspace = true
 async-stream.workspace = true
 clap = { workspace = true, features = ["string"] }
 futures.workspace = true
 git-version.workspace = true
 itertools.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -10,7 +10,6 @@ license.workspace = true
 anyhow.workspace = true
 camino.workspace = true
 clap = { workspace = true, features = ["string"] }
 git-version.workspace = true
 humantime.workspace = true
 pageserver = { path = ".." }
 pageserver_api.workspace = true
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -15,7 +15,7 @@ use clap::{Arg, ArgAction, Command};
 use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
 use pageserver::config::PageserverIdentity;
-use pageserver::control_plane_client::ControlPlaneClient;
+use pageserver::controller_upcall_client::ControllerUpcallClient;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
@@ -396,7 +396,7 @@ fn start_pageserver(
    // Set up deletion queue
    let (deletion_queue, deletion_workers) = DeletionQueue::new(
        remote_storage.clone(),
-        ControlPlaneClient::new(conf, &shutdown_pageserver),
+        ControllerUpcallClient::new(conf, &shutdown_pageserver),
        conf,
    );
    if let Some(deletion_workers) = deletion_workers {
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -324,7 +324,6 @@ impl PageServerConf {
            max_vectored_read_bytes,
            image_compression,
            ephemeral_bytes_per_memory_kb,
            compact_level0_phase1_value_access: _,
            l0_flush,
            virtual_file_direct_io,
            concurrent_tenant_warmup,
@@ -535,16 +534,6 @@ mod tests {
            .expect("parse_and_validate");
    }
    #[test]
    fn test_compactl0_phase1_access_mode_is_ignored_silently() {
        let input = indoc::indoc! {r#"
            [compact_level0_phase1_value_access]
            mode = "streaming-kmerge"
            validate = "key-lsn-value"
        "#};
        toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input).unwrap();
    }
    /// If there's a typo in the pageserver config, we'd rather catch that typo
    /// and fail pageserver startup than silently ignoring the typo, leaving whoever
    /// made it in the believe that their config change is effective.
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 use futures::Future;
 use pageserver_api::{
-    controller_api::NodeRegisterRequest,
+    controller_api::{AvailabilityZone, NodeRegisterRequest},
    shard::TenantShardId,
    upcall_api::{
        ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
@@ -17,9 +17,12 @@ use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
 use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
 use pageserver_api::config::NodeMetadata;
-/// The Pageserver's client for using the control plane API: this is a small subset
+/// The Pageserver's client for using the storage controller upcall API: this is a small API
-/// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
+/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
-pub struct ControlPlaneClient {
+///
 /// The server presenting this API may either be the storage controller or some other
 /// service (such as the Neon control plane) providing a store of generation numbers.
 pub struct ControllerUpcallClient {
    http_client: reqwest::Client,
    base_url: Url,
    node_id: NodeId,
@@ -45,7 +48,7 @@ pub trait ControlPlaneGenerationsApi {
    ) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
 }
-impl ControlPlaneClient {
+impl ControllerUpcallClient {
    /// A None return value indicates that the input `conf` object does not have control
    /// plane API enabled.
    pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
@@ -114,7 +117,7 @@ impl ControlPlaneClient {
    }
 }
-impl ControlPlaneGenerationsApi for ControlPlaneClient {
+impl ControlPlaneGenerationsApi for ControllerUpcallClient {
    /// Block until we get a successful response, or error out if we are shut down
    async fn re_attach(
        &self,
@@ -148,10 +151,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
                            .and_then(|jv| jv.as_str().map(|str| str.to_owned()));
                        match az_id_from_metadata {
-                            Some(az_id) => Some(az_id),
+                            Some(az_id) => Some(AvailabilityZone(az_id)),
                            None => {
                                tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
-                                conf.availability_zone.clone()
+                                conf.availability_zone.clone().map(AvailabilityZone)
                            }
                        }
                    };
@@ -216,29 +219,38 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
            .join("validate")
            .expect("Failed to build validate path");
-        let request = ValidateRequest {
+        // When sending validate requests, break them up into chunks so that we
-            tenants: tenants
+        // avoid possible edge cases of generating any HTTP requests that
-                .into_iter()
+        // require database I/O across many thousands of tenants.
-                .map(|(id, gen)| ValidateRequestTenant {
+        let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
-                    id,
+        for tenant_chunk in (tenants).chunks(128) {
-                    gen: gen
+            let request = ValidateRequest {
-                        .into()
+                tenants: tenant_chunk
-                        .expect("Generation should always be valid for a Tenant doing deletions"),
+                    .iter()
-                })
+                    .map(|(id, generation)| ValidateRequestTenant {
-                .collect(),
+                        id: *id,
-        };
+                        gen: (*generation).into().expect(
                            "Generation should always be valid for a Tenant doing deletions",
                        ),
                    })
                    .collect(),
            };
-        failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
+            failpoint_support::sleep_millis_async!(
-        if self.cancel.is_cancelled() {
+                "control-plane-client-validate-sleep",
-            return Err(RetryForeverError::ShuttingDown);
+                &self.cancel
            );
            if self.cancel.is_cancelled() {
                return Err(RetryForeverError::ShuttingDown);
            }
            let response: ValidateResponse =
                self.retry_http_forever(&re_attach_path, request).await?;
            for rt in response.tenants {
                result.insert(rt.id, rt.valid);
            }
        }
-        let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
+        Ok(result.into_iter().collect())
        Ok(response
            .tenants
            .into_iter()
            .map(|rt| (rt.id, rt.valid))
            .collect())
    }
 }
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -6,7 +6,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
-use crate::control_plane_client::ControlPlaneGenerationsApi;
+use crate::controller_upcall_client::ControlPlaneGenerationsApi;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
@@ -622,7 +622,7 @@ impl DeletionQueue {
    /// If remote_storage is None, then the returned workers will also be None.
    pub fn new<C>(
        remote_storage: GenericRemoteStorage,
-        control_plane_client: Option<C>,
+        controller_upcall_client: Option<C>,
        conf: &'static PageServerConf,
    ) -> (Self, Option<DeletionQueueWorkers<C>>)
    where
@@ -662,7 +662,7 @@ impl DeletionQueue {
                    conf,
                    backend_rx,
                    executor_tx,
-                    control_plane_client,
+                    controller_upcall_client,
                    lsn_table.clone(),
                    cancel.clone(),
                ),
@@ -704,7 +704,7 @@ mod test {
    use tokio::task::JoinHandle;
    use crate::{
-        control_plane_client::RetryForeverError,
+        controller_upcall_client::RetryForeverError,
        repository::Key,
        tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
    };
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -25,8 +25,8 @@ use tracing::info;
 use tracing::warn;
 use crate::config::PageServerConf;
-use crate::control_plane_client::ControlPlaneGenerationsApi;
+use crate::controller_upcall_client::ControlPlaneGenerationsApi;
-use crate::control_plane_client::RetryForeverError;
+use crate::controller_upcall_client::RetryForeverError;
 use crate::metrics;
 use crate::virtual_file::MaybeFatalIo;
@@ -61,7 +61,7 @@ where
    tx: tokio::sync::mpsc::Sender<DeleterMessage>,
    // Client for calling into control plane API for validation of deletes
-    control_plane_client: Option<C>,
+    controller_upcall_client: Option<C>,
    // DeletionLists which are waiting generation validation.  Not safe to
    // execute until [`validate`] has processed them.
@@ -94,7 +94,7 @@ where
        conf: &'static PageServerConf,
        rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
        tx: tokio::sync::mpsc::Sender<DeleterMessage>,
-        control_plane_client: Option<C>,
+        controller_upcall_client: Option<C>,
        lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
        cancel: CancellationToken,
    ) -> Self {
@@ -102,7 +102,7 @@ where
            conf,
            rx,
            tx,
-            control_plane_client,
+            controller_upcall_client,
            lsn_table,
            pending_lists: Vec::new(),
            validated_lists: Vec::new(),
@@ -145,8 +145,8 @@ where
            return Ok(());
        }
-        let tenants_valid = if let Some(control_plane_client) = &self.control_plane_client {
+        let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
-            match control_plane_client
+            match controller_upcall_client
                .validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
                .await
            {
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -589,6 +589,10 @@ async fn timeline_create_handler(
                StatusCode::SERVICE_UNAVAILABLE,
                HttpErrorBody::from_msg(e.to_string()),
            ),
            Err(e @ tenant::CreateTimelineError::AncestorArchived) => json_response(
                StatusCode::NOT_ACCEPTABLE,
                HttpErrorBody::from_msg(e.to_string()),
            ),
            Err(tenant::CreateTimelineError::ShuttingDown) => json_response(
                StatusCode::SERVICE_UNAVAILABLE,
                HttpErrorBody::from_msg("tenant shutting down".to_string()),
@@ -2955,7 +2959,7 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
            |r| api_handler(r, timeline_preserve_initdb_handler),
        )
-        .post(
+        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config",
            |r| api_handler(r, timeline_archival_config_handler),
        )
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -6,7 +6,7 @@ pub mod basebackup;
 pub mod config;
 pub mod consumption_metrics;
 pub mod context;
-pub mod control_plane_client;
+pub mod controller_upcall_client;
 pub mod deletion_queue;
 pub mod disk_usage_eviction_task;
 pub mod http;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -8,6 +8,8 @@ use metrics::{
 };
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
 use postgres_backend::{is_expected_io_error, QueryError};
 use pq_proto::framed::ConnectionError;
 use strum::{EnumCount, VariantNames};
 use strum_macros::{IntoStaticStr, VariantNames};
 use tracing::warn;
@@ -1383,7 +1385,7 @@ impl SmgrQueryTimePerTimeline {
        &'a self,
        op: SmgrQueryType,
        ctx: &'c RequestContext,
-    ) -> Option<impl Drop + '_> {
+    ) -> Option<impl Drop + 'a> {
        let start = Instant::now();
        self.global_started[op as usize].inc();
@@ -1508,6 +1510,7 @@ static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
 pub(crate) struct BasebackupQueryTime {
    ok: Histogram,
    error: Histogram,
    client_error: Histogram,
 }
 pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
@@ -1521,6 +1524,7 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|
    BasebackupQueryTime {
        ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
        error: vec.get_metric_with_label_values(&["error"]).unwrap(),
        client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
    }
 });
@@ -1534,7 +1538,7 @@ impl BasebackupQueryTime {
    pub(crate) fn start_recording<'c: 'a, 'a>(
        &'a self,
        ctx: &'c RequestContext,
-    ) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
+    ) -> BasebackupQueryTimeOngoingRecording<'a, 'a> {
        let start = Instant::now();
        match ctx.micros_spent_throttled.open() {
            Ok(()) => (),
@@ -1557,7 +1561,7 @@ impl BasebackupQueryTime {
 }
 impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
-    pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
+    pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
        let elapsed = self.start.elapsed();
        let ex_throttled = self
            .ctx
@@ -1576,10 +1580,15 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
                elapsed
            }
        };
-        let metric = if res.is_ok() {
+        // If you want to change categorize of a specific error, also change it in `log_query_error`.
-            &self.parent.ok
+        let metric = match res {
-        } else {
+            Ok(_) => &self.parent.ok,
-            &self.parent.error
+            Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
                if is_expected_io_error(io_error) =>
            {
                &self.parent.client_error
            }
            Err(_) => &self.parent.error,
        };
        metric.observe(ex_throttled.as_secs_f64());
    }
@@ -3208,45 +3217,38 @@ pub(crate) mod tenant_throttling {
    impl TimelineGet {
        pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
            let per_tenant_label_values = &[
                KIND,
                &tenant_shard_id.tenant_id.to_string(),
                &tenant_shard_id.shard_slug().to_string(),
            ];
            TimelineGet {
                count_accounted_start: {
                    GlobalAndPerTenantIntCounter {
                        global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
-                        per_tenant: COUNT_ACCOUNTED_START_PER_TENANT.with_label_values(&[
+                        per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
-                            KIND,
+                            .with_label_values(per_tenant_label_values),
                            &tenant_shard_id.tenant_id.to_string(),
                            &tenant_shard_id.shard_slug().to_string(),
                        ]),
                    }
                },
                count_accounted_finish: {
                    GlobalAndPerTenantIntCounter {
                        global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
-                        per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT.with_label_values(&[
+                        per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
-                            KIND,
+                            .with_label_values(per_tenant_label_values),
                            &tenant_shard_id.tenant_id.to_string(),
                            &tenant_shard_id.shard_slug().to_string(),
                        ]),
                    }
                },
                wait_time: {
                    GlobalAndPerTenantIntCounter {
                        global: WAIT_USECS.with_label_values(&[KIND]),
-                        per_tenant: WAIT_USECS_PER_TENANT.with_label_values(&[
+                        per_tenant: WAIT_USECS_PER_TENANT
-                            KIND,
+                            .with_label_values(per_tenant_label_values),
                            &tenant_shard_id.tenant_id.to_string(),
                            &tenant_shard_id.shard_slug().to_string(),
                        ]),
                    }
                },
                count_throttled: {
                    GlobalAndPerTenantIntCounter {
                        global: WAIT_COUNT.with_label_values(&[KIND]),
-                        per_tenant: WAIT_COUNT_PER_TENANT.with_label_values(&[
+                        per_tenant: WAIT_COUNT_PER_TENANT
-                            KIND,
+                            .with_label_values(per_tenant_label_values),
                            &tenant_shard_id.tenant_id.to_string(),
                            &tenant_shard_id.shard_slug().to_string(),
                        ]),
                    }
                },
            }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -840,6 +840,36 @@ impl Timeline {
        Ok(total_size * BLCKSZ as u64)
    }
    /// Get a KeySpace that covers all the Keys that are in use at AND below the given LSN. This is only used
    /// for gc-compaction.
    ///
    /// gc-compaction cannot use the same `collect_keyspace` function as the legacy compaction because it
    /// processes data at multiple LSNs and needs to be aware of the fact that some key ranges might need to
    /// be kept only for a specific range of LSN.
    ///
    /// Consider the case that the user created branches at LSN 10 and 20, where the user created a table A at
    /// LSN 10 and dropped that table at LSN 20. `collect_keyspace` at LSN 10 will return the key range
    /// corresponding to that table, while LSN 20 won't. The keyspace info at a single LSN is not enough to
    /// determine which keys to retain/drop for gc-compaction.
    ///
    /// For now, it only drops AUX-v1 keys. But in the future, the function will be extended to return the keyspace
    /// to be retained for each of the branch LSN.
    ///
    /// The return value is (dense keyspace, sparse keyspace).
    pub(crate) async fn collect_gc_compaction_keyspace(
        &self,
    ) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {
        let metadata_key_begin = Key::metadata_key_range().start;
        let aux_v1_key = AUX_FILES_KEY;
        let dense_keyspace = KeySpace {
            ranges: vec![Key::MIN..aux_v1_key, aux_v1_key.next()..metadata_key_begin],
        };
        Ok((
            dense_keyspace,
            SparseKeySpace(KeySpace::single(Key::metadata_key_range())),
        ))
    }
    ///
    /// Get a KeySpace that covers all the Keys that are in use at the given LSN.
    /// Anything that's not listed maybe removed from the underlying storage (from
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -18,7 +18,6 @@ use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use enumset::EnumSet;
 use futures::stream::FuturesUnordered;
 use futures::FutureExt;
 use futures::StreamExt;
 use pageserver_api::models;
 use pageserver_api::models::AuxFilePolicy;
@@ -34,6 +33,7 @@ use remote_storage::GenericRemoteStorage;
 use remote_storage::TimeoutOrCancel;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::future::Future;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
@@ -563,6 +563,8 @@ pub enum CreateTimelineError {
    AncestorLsn(anyhow::Error),
    #[error("ancestor timeline is not active")]
    AncestorNotActive,
    #[error("ancestor timeline is archived")]
    AncestorArchived,
    #[error("tenant shutting down")]
    ShuttingDown,
    #[error(transparent)]
@@ -1031,13 +1033,9 @@ impl Tenant {
        }
        Ok(TenantPreload {
-            timelines: Self::load_timeline_metadata(
+            timelines: self
-                self,
+                .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
-                remote_timeline_ids,
+                .await?,
                remote_storage,
                cancel,
            )
            .await?,
        })
    }
@@ -1303,7 +1301,7 @@ impl Tenant {
        .await
    }
-    async fn load_timeline_metadata(
+    async fn load_timelines_metadata(
        self: &Arc<Tenant>,
        timeline_ids: HashSet<TimelineId>,
        remote_storage: &GenericRemoteStorage,
@@ -1311,33 +1309,10 @@ impl Tenant {
    ) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
        let mut part_downloads = JoinSet::new();
        for timeline_id in timeline_ids {
            let client = RemoteTimelineClient::new(
                remote_storage.clone(),
                self.deletion_queue_client.clone(),
                self.conf,
                self.tenant_shard_id,
                timeline_id,
                self.generation,
            );
            let cancel_clone = cancel.clone();
            part_downloads.spawn(
-                async move {
+                self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone)
-                    debug!("starting index part download");
+                    .instrument(info_span!("download_index_part", %timeline_id)),
                    let index_part = client.download_index_file(&cancel_clone).await;
                    debug!("finished index part download");
                    Result::<_, anyhow::Error>::Ok(TimelinePreload {
                        client,
                        timeline_id,
                        index_part,
                    })
                }
                .map(move |res| {
                    res.with_context(|| format!("download index part for timeline {timeline_id}"))
                })
                .instrument(info_span!("download_index_part", %timeline_id)),
            );
        }
@@ -1348,8 +1323,7 @@ impl Tenant {
                next = part_downloads.join_next() => {
                    match next {
                        Some(result) => {
-                            let preload_result = result.context("join preload task")?;
+                            let preload = result.context("join preload task")?;
                            let preload = preload_result?;
                            timeline_preloads.insert(preload.timeline_id, preload);
                        },
                        None => {
@@ -1366,6 +1340,36 @@ impl Tenant {
        Ok(timeline_preloads)
    }
    fn load_timeline_metadata(
        self: &Arc<Tenant>,
        timeline_id: TimelineId,
        remote_storage: GenericRemoteStorage,
        cancel: CancellationToken,
    ) -> impl Future<Output = TimelinePreload> {
        let client = RemoteTimelineClient::new(
            remote_storage.clone(),
            self.deletion_queue_client.clone(),
            self.conf,
            self.tenant_shard_id,
            timeline_id,
            self.generation,
        );
        async move {
            debug_assert_current_span_has_tenant_and_timeline_id();
            debug!("starting index part download");
            let index_part = client.download_index_file(&cancel).await;
            debug!("finished index part download");
            TimelinePreload {
                client,
                timeline_id,
                index_part,
            }
        }
    }
    pub(crate) async fn apply_timeline_archival_config(
        &self,
        timeline_id: TimelineId,
@@ -1696,6 +1700,11 @@ impl Tenant {
                    return Err(CreateTimelineError::AncestorNotActive);
                }
                if ancestor_timeline.is_archived() == Some(true) {
                    info!("tried to branch archived timeline");
                    return Err(CreateTimelineError::AncestorArchived);
                }
                if let Some(lsn) = ancestor_start_lsn.as_mut() {
                    *lsn = lsn.align();
@@ -1966,9 +1975,6 @@ impl Tenant {
                TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => {
                    panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state);
                }
                TenantState::Loading => {
                    *current_state = TenantState::Activating(ActivatingFrom::Loading);
                }
                TenantState::Attaching => {
                    *current_state = TenantState::Activating(ActivatingFrom::Attaching);
                }
@@ -2149,7 +2155,7 @@ impl Tenant {
    async fn set_stopping(
        &self,
        progress: completion::Barrier,
-        allow_transition_from_loading: bool,
+        _allow_transition_from_loading: bool,
        allow_transition_from_attaching: bool,
    ) -> Result<(), SetStoppingError> {
        let mut rx = self.state.subscribe();
@@ -2164,7 +2170,6 @@ impl Tenant {
                );
                false
            }
            TenantState::Loading => allow_transition_from_loading,
            TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,
        })
        .await
@@ -2183,13 +2188,6 @@ impl Tenant {
                *current_state = TenantState::Stopping { progress };
                true
            }
            TenantState::Loading => {
                if !allow_transition_from_loading {
                    unreachable!("3we ensured above that we're done with activation, and, there is no re-activation")
                };
                *current_state = TenantState::Stopping { progress };
                true
            }
            TenantState::Active => {
                // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
                // are created after the transition to Stopping. That's harmless, as the Timelines
@@ -2245,7 +2243,7 @@ impl Tenant {
        // The load & attach routines own the tenant state until it has reached `Active`.
        // So, wait until it's done.
        rx.wait_for(|state| match state {
-            TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+            TenantState::Activating(_) | TenantState::Attaching => {
                info!(
                    "waiting for {} to turn Active|Broken|Stopping",
                    <&'static str>::from(state)
@@ -2265,7 +2263,7 @@ impl Tenant {
        let reason = reason.to_string();
        self.state.send_modify(|current_state| {
            match *current_state {
-                TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
+                TenantState::Activating(_) | TenantState::Attaching => {
                    unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
                }
                TenantState::Active => {
@@ -2309,7 +2307,7 @@ impl Tenant {
        loop {
            let current_state = receiver.borrow_and_update().clone();
            match current_state {
-                TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
+                TenantState::Attaching | TenantState::Activating(_) => {
                    // in these states, there's a chance that we can reach ::Active
                    self.activate_now();
                    match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {
@@ -3625,7 +3623,7 @@ impl Tenant {
        start_lsn: Lsn,
        ancestor: Option<Arc<Timeline>>,
        last_aux_file_policy: Option<AuxFilePolicy>,
-    ) -> anyhow::Result<UninitializedTimeline> {
+    ) -> anyhow::Result<UninitializedTimeline<'a>> {
        let tenant_shard_id = self.tenant_shard_id;
        let resources = self.build_timeline_resources(new_timeline_id);
@@ -4142,7 +4140,7 @@ pub(crate) mod harness {
            let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
            let tenant = Arc::new(Tenant::new(
-                TenantState::Loading,
+                TenantState::Attaching,
                self.conf,
                AttachedTenantConf::try_from(LocationConf::attached_single(
                    TenantConfOpt::from(self.tenant_conf.clone()),
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -5,6 +5,7 @@ use itertools::Itertools;
 use super::storage_layer::LayerName;
 /// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
 ///
 /// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
 ///
 /// ```plain
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -30,8 +30,8 @@ use utils::{backoff, completion, crashsafe};
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::control_plane_client::{
+use crate::controller_upcall_client::{
-    ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
+    ControlPlaneGenerationsApi, ControllerUpcallClient, RetryForeverError,
 };
 use crate::deletion_queue::DeletionQueueClient;
 use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
@@ -122,7 +122,7 @@ pub(crate) enum ShardSelector {
    Known(ShardIndex),
 }
-/// A convenience for use with the re_attach ControlPlaneClient function: rather
+/// A convenience for use with the re_attach ControllerUpcallClient function: rather
 /// than the serializable struct, we build this enum that encapsulates
 /// the invariant that attached tenants always have generations.
 ///
@@ -341,7 +341,7 @@ async fn init_load_generations(
            "Emergency mode!  Tenants will be attached unsafely using their last known generation"
        );
        emergency_generations(tenant_confs)
-    } else if let Some(client) = ControlPlaneClient::new(conf, cancel) {
+    } else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
        info!("Calling control plane API to re-attach tenants");
        // If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
        match client.re_attach(conf).await {
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -1,13 +1,13 @@
 //! Common traits and structs for layers
 pub mod delta_layer;
 pub mod filter_iterator;
 pub mod image_layer;
 pub mod inmemory_layer;
 pub(crate) mod layer;
 mod layer_desc;
 mod layer_name;
 pub mod merge_iterator;
 pub mod split_writer;
 use crate::context::{AccessStatsBehavior, RequestContext};
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -39,7 +39,7 @@ use crate::tenant::disk_btree::{
 use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
-    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
+    BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
    VectoredReadCoalesceMode, VectoredReadPlanner,
 };
 use crate::tenant::PageReconstructError;
@@ -1021,13 +1021,30 @@ impl DeltaLayerInner {
                    continue;
                }
            };
-
+            let view = BufView::new_slice(&blobs_buf.buf);
            for meta in blobs_buf.blobs.iter().rev() {
                if Some(meta.meta.key) == ignore_key_with_err {
                    continue;
                }
                let blob_read = meta.read(&view).await;
                let blob_read = match blob_read {
                    Ok(buf) => buf,
                    Err(e) => {
                        reconstruct_state.on_key_error(
                            meta.meta.key,
                            PageReconstructError::Other(anyhow!(e).context(format!(
                                "Failed to decompress blob from virtual file {}",
                                self.file.path,
                            ))),
                        );
                        ignore_key_with_err = Some(meta.meta.key);
                        continue;
                    }
                };
                let value = Value::des(&blob_read);
                let value = Value::des(&blobs_buf.buf[meta.start..meta.end]);
                let value = match value {
                    Ok(v) => v,
                    Err(e) => {
@@ -1243,21 +1260,21 @@ impl DeltaLayerInner {
                buf.reserve(read.size());
                let res = reader.read_blobs(&read, buf, ctx).await?;
                let view = BufView::new_slice(&res.buf);
                for blob in res.blobs {
                    let key = blob.meta.key;
                    let lsn = blob.meta.lsn;
-                    let data = &res.buf[blob.start..blob.end];
+
                    let data = blob.read(&view).await?;
                    #[cfg(debug_assertions)]
-                    Value::des(data)
+                    Value::des(&data)
                        .with_context(|| {
                            format!(
-                                "blob failed to deserialize for {}@{}, {}..{}: {:?}",
+                                "blob failed to deserialize for {}: {:?}",
-                                blob.meta.key,
+                                blob,
-                                blob.meta.lsn,
+                                utils::Hex(&data)
                                blob.start,
                                blob.end,
                                utils::Hex(data)
                            )
                        })
                        .unwrap();
@@ -1265,15 +1282,15 @@ impl DeltaLayerInner {
                    // is it an image or will_init walrecord?
                    // FIXME: this could be handled by threading the BlobRef to the
                    // VectoredReadBuilder
-                    let will_init = crate::repository::ValueBytes::will_init(data)
+                    let will_init = crate::repository::ValueBytes::will_init(&data)
                        .inspect_err(|_e| {
                            #[cfg(feature = "testing")]
-                            tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
+                            tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
                        })
                        .unwrap_or(false);
                    per_blob_copy.clear();
-                    per_blob_copy.extend_from_slice(data);
+                    per_blob_copy.extend_from_slice(&data);
                    let (tmp, res) = writer
                        .put_value_bytes(
@@ -1538,8 +1555,11 @@ impl<'a> DeltaLayerIterator<'a> {
            .read_blobs(&plan, buf, self.ctx)
            .await?;
        let frozen_buf = blobs_buf.buf.freeze();
        let view = BufView::new_bytes(frozen_buf);
        for meta in blobs_buf.blobs.iter() {
-            let value = Value::des(&frozen_buf[meta.start..meta.end])?;
+            let blob_read = meta.read(&view).await?;
            let value = Value::des(&blob_read)?;
            next_batch.push_back((meta.meta.key, meta.meta.lsn, value));
        }
        self.key_values_batch = next_batch;
@@ -1916,9 +1936,13 @@ pub(crate) mod test {
                let blobs_buf = vectored_blob_reader
                    .read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
                    .await?;
                let view = BufView::new_slice(&blobs_buf.buf);
                for meta in blobs_buf.blobs.iter() {
-                    let value = &blobs_buf.buf[meta.start..meta.end];
+                    let value = meta.read(&view).await?;
-                    assert_eq!(value, entries_meta.index[&(meta.meta.key, meta.meta.lsn)]);
+                    assert_eq!(
                        &value[..],
                        &entries_meta.index[&(meta.meta.key, meta.meta.lsn)]
                    );
                }
                buf = Some(blobs_buf.buf);
--- a/pageserver/src/tenant/storage_layer/filter_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/filter_iterator.rs
@@ -0,0 +1,205 @@
 use std::ops::Range;
 use anyhow::bail;
 use pageserver_api::{
    key::Key,
    keyspace::{KeySpace, SparseKeySpace},
 };
 use utils::lsn::Lsn;
 use crate::repository::Value;
 use super::merge_iterator::MergeIterator;
 /// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
 ///
 /// The iterator will skip any keys not included in the keyspace filter. In other words, the keyspace filter contains the keys
 /// to be retained.
 pub struct FilterIterator<'a> {
    inner: MergeIterator<'a>,
    retain_key_filters: Vec<Range<Key>>,
    current_filter_idx: usize,
 }
 impl<'a> FilterIterator<'a> {
    pub fn create(
        inner: MergeIterator<'a>,
        dense_keyspace: KeySpace,
        sparse_keyspace: SparseKeySpace,
    ) -> anyhow::Result<Self> {
        let mut retain_key_filters = Vec::new();
        retain_key_filters.extend(dense_keyspace.ranges);
        retain_key_filters.extend(sparse_keyspace.0.ranges);
        retain_key_filters.sort_by(|a, b| a.start.cmp(&b.start));
        // Verify key filters are non-overlapping and sorted
        for window in retain_key_filters.windows(2) {
            if window[0].end > window[1].start {
                bail!(
                    "Key filters are overlapping: {:?} and {:?}",
                    window[0],
                    window[1]
                );
            }
        }
        Ok(Self {
            inner,
            retain_key_filters,
            current_filter_idx: 0,
        })
    }
    pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
        while let Some(item) = self.inner.next().await? {
            while self.current_filter_idx < self.retain_key_filters.len()
                && item.0 >= self.retain_key_filters[self.current_filter_idx].end
            {
                // [filter region]    [filter region]     [filter region]
                //                                     ^ item
                //                    ^ current filter
                self.current_filter_idx += 1;
                // [filter region]    [filter region]     [filter region]
                //                                     ^ item
                //                                        ^ current filter
            }
            if self.current_filter_idx >= self.retain_key_filters.len() {
                // We already exhausted all filters, so we should return now
                // [filter region] [filter region] [filter region]
                //                                                    ^ item
                //                                                 ^ current filter (nothing)
                return Ok(None);
            }
            if self.retain_key_filters[self.current_filter_idx].contains(&item.0) {
                // [filter region]    [filter region]     [filter region]
                //                                              ^ item
                //                                        ^ current filter
                return Ok(Some(item));
            }
            // If the key is not contained in the key retaining filters, continue to the next item.
            // [filter region]    [filter region]     [filter region]
            //                                     ^ item
            //                                        ^ current filter
        }
        Ok(None)
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use itertools::Itertools;
    use pageserver_api::key::Key;
    use utils::lsn::Lsn;
    use crate::{
        tenant::{
            harness::{TenantHarness, TIMELINE_ID},
            storage_layer::delta_layer::test::produce_delta_layer,
        },
        DEFAULT_PG_VERSION,
    };
    async fn assert_filter_iter_equal(
        filter_iter: &mut FilterIterator<'_>,
        expect: &[(Key, Lsn, Value)],
    ) {
        let mut expect_iter = expect.iter();
        loop {
            let o1 = filter_iter.next().await.unwrap();
            let o2 = expect_iter.next();
            assert_eq!(o1.is_some(), o2.is_some());
            if o1.is_none() && o2.is_none() {
                break;
            }
            let (k1, l1, v1) = o1.unwrap();
            let (k2, l2, v2) = o2.unwrap();
            assert_eq!(&k1, k2);
            assert_eq!(l1, *l2);
            assert_eq!(&v1, v2);
        }
    }
    #[tokio::test]
    async fn filter_keyspace_iterator() {
        use crate::repository::Value;
        use bytes::Bytes;
        let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator")
            .await
            .unwrap();
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await
            .unwrap();
        fn get_key(id: u32) -> Key {
            let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
            key.field6 = id;
            key
        }
        const N: usize = 100;
        let test_deltas1 = (0..N)
            .map(|idx| {
                (
                    get_key(idx as u32),
                    Lsn(0x20 * ((idx as u64) % 10 + 1)),
                    Value::Image(Bytes::from(format!("img{idx:05}"))),
                )
            })
            .collect_vec();
        let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)
            .await
            .unwrap();
        let merge_iter = MergeIterator::create(
            &[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
            &[],
            &ctx,
        );
        let mut filter_iter = FilterIterator::create(
            merge_iter,
            KeySpace {
                ranges: vec![
                    get_key(5)..get_key(10),
                    get_key(20)..get_key(30),
                    get_key(90)..get_key(110),
                    get_key(1000)..get_key(2000),
                ],
            },
            SparseKeySpace(KeySpace::default()),
        )
        .unwrap();
        let mut result = Vec::new();
        result.extend(test_deltas1[5..10].iter().cloned());
        result.extend(test_deltas1[20..30].iter().cloned());
        result.extend(test_deltas1[90..100].iter().cloned());
        assert_filter_iter_equal(&mut filter_iter, &result).await;
        let merge_iter = MergeIterator::create(
            &[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
            &[],
            &ctx,
        );
        let mut filter_iter = FilterIterator::create(
            merge_iter,
            KeySpace {
                ranges: vec![
                    get_key(0)..get_key(10),
                    get_key(20)..get_key(30),
                    get_key(90)..get_key(95),
                ],
            },
            SparseKeySpace(KeySpace::default()),
        )
        .unwrap();
        let mut result = Vec::new();
        result.extend(test_deltas1[0..10].iter().cloned());
        result.extend(test_deltas1[20..30].iter().cloned());
        result.extend(test_deltas1[90..95].iter().cloned());
        assert_filter_iter_equal(&mut filter_iter, &result).await;
    }
 }
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -36,7 +36,8 @@ use crate::tenant::disk_btree::{
 };
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
-    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
+    BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
    VectoredReadPlanner,
 };
 use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
@@ -547,15 +548,15 @@ impl ImageLayerInner {
            let buf = BytesMut::with_capacity(buf_size);
            let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?;
            let frozen_buf = blobs_buf.buf.freeze();
            let view = BufView::new_bytes(frozen_buf);
            for meta in blobs_buf.blobs.iter() {
-                let img_buf = frozen_buf.slice(meta.start..meta.end);
+                let img_buf = meta.read(&view).await?;
                key_count += 1;
                writer
-                    .put_image(meta.meta.key, img_buf, ctx)
+                    .put_image(meta.meta.key, img_buf.into_bytes(), ctx)
                    .await
                    .context(format!("Storing key {}", meta.meta.key))?;
            }
@@ -602,13 +603,28 @@ impl ImageLayerInner {
            match res {
                Ok(blobs_buf) => {
                    let frozen_buf = blobs_buf.buf.freeze();
-
+                    let view = BufView::new_bytes(frozen_buf);
                    for meta in blobs_buf.blobs.iter() {
-                        let img_buf = frozen_buf.slice(meta.start..meta.end);
+                        let img_buf = meta.read(&view).await;
                        let img_buf = match img_buf {
                            Ok(img_buf) => img_buf,
                            Err(e) => {
                                reconstruct_state.on_key_error(
                                    meta.meta.key,
                                    PageReconstructError::Other(anyhow!(e).context(format!(
                                        "Failed to decompress blob from virtual file {}",
                                        self.file.path,
                                    ))),
                                );
                                continue;
                            }
                        };
                        reconstruct_state.update_key(
                            &meta.meta.key,
                            self.lsn,
-                            Value::Image(img_buf),
+                            Value::Image(img_buf.into_bytes()),
                        );
                    }
                }
@@ -1025,10 +1041,15 @@ impl<'a> ImageLayerIterator<'a> {
        let blobs_buf = vectored_blob_reader
            .read_blobs(&plan, buf, self.ctx)
            .await?;
-        let frozen_buf: Bytes = blobs_buf.buf.freeze();
+        let frozen_buf = blobs_buf.buf.freeze();
        let view = BufView::new_bytes(frozen_buf);
        for meta in blobs_buf.blobs.iter() {
-            let img_buf = frozen_buf.slice(meta.start..meta.end);
+            let img_buf = meta.read(&view).await?;
-            next_batch.push_back((meta.meta.key, self.image_layer.lsn, Value::Image(img_buf)));
+            next_batch.push_back((
                meta.meta.key,
                self.image_layer.lsn,
                Value::Image(img_buf.into_bytes()),
            ));
        }
        self.key_values_batch = next_batch;
        Ok(())
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -481,8 +481,7 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
                let allowed_rps = tenant.timeline_get_throttle.steady_rps();
                let delta = now - prev;
                info!(
-                    n_seconds=%format_args!("{:.3}",
+                    n_seconds=%format_args!("{:.3}", delta.as_secs_f64()),
                    delta.as_secs_f64()),
                    count_accounted = count_accounted_finish,  // don't break existing log scraping
                    count_throttled,
                    sum_throttled_usecs,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -112,7 +112,7 @@ use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::ShardIndex;
 use postgres_connection::PgConnectionConfig;
-use postgres_ffi::to_pg_timestamp;
+use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE};
 use utils::{
    completion,
    generation::Generation,
@@ -1337,6 +1337,10 @@ impl Timeline {
        _ctx: &RequestContext,
    ) -> anyhow::Result<LsnLease> {
        let lease = {
            // Normalize the requested LSN to be aligned, and move to the first record
            // if it points to the beginning of the page (header).
            let lsn = xlog_utils::normalize_lsn(lsn, WAL_SEGMENT_SIZE);
            let mut gc_info = self.gc_info.write().unwrap();
            let valid_until = SystemTime::now() + length;
@@ -3597,7 +3601,7 @@ impl Timeline {
                    ctx,
                )
                .await
-                .map_err(|e| FlushLayerError::from_anyhow(self, e))?;
+                .map_err(|e| FlushLayerError::from_anyhow(self, e.into()))?;
            if self.cancel.is_cancelled() {
                return Err(FlushLayerError::Cancelled);
@@ -3836,16 +3840,20 @@ impl Timeline {
        partition_size: u64,
        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<((KeyPartitioning, SparseKeyPartitioning), Lsn)> {
+    ) -> Result<((KeyPartitioning, SparseKeyPartitioning), Lsn), CompactionError> {
        let Ok(mut partitioning_guard) = self.partitioning.try_lock() else {
            // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.
            // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`
            // and hence before the compaction task starts.
-            anyhow::bail!("repartition() called concurrently, this should not happen");
+            return Err(CompactionError::Other(anyhow!(
                "repartition() called concurrently, this should not happen"
            )));
        };
        let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard;
        if lsn < *partition_lsn {
-            anyhow::bail!("repartition() called with LSN going backwards, this should not happen");
+            return Err(CompactionError::Other(anyhow!(
                "repartition() called with LSN going backwards, this should not happen"
            )));
        }
        let distance = lsn.0 - partition_lsn.0;
@@ -4447,6 +4455,12 @@ pub(crate) enum CompactionError {
    Other(anyhow::Error),
 }
 impl CompactionError {
    pub fn is_cancelled(&self) -> bool {
        matches!(self, CompactionError::ShuttingDown)
    }
 }
 impl From<CollectKeySpaceError> for CompactionError {
    fn from(err: CollectKeySpaceError) -> Self {
        match err {
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -31,6 +31,7 @@ use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}
 use crate::page_cache;
 use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::filter_iterator::FilterIterator;
 use crate::tenant::storage_layer::merge_iterator::MergeIterator;
 use crate::tenant::storage_layer::split_writer::{
    SplitDeltaLayerWriter, SplitImageLayerWriter, SplitWriterResult,
@@ -389,7 +390,7 @@ impl Timeline {
                // error but continue.
                //
                // Suppress error when it's due to cancellation
-                if !self.cancel.is_cancelled() {
+                if !self.cancel.is_cancelled() && !err.is_cancelled() {
                    tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
                }
                (1, false)
@@ -1772,6 +1773,7 @@ impl Timeline {
            gc_cutoff,
            lowest_retain_lsn
        );
        // Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
        // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
        let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
@@ -1820,7 +1822,12 @@ impl Timeline {
                image_layers.push(layer);
            }
        }
-        let mut merge_iter = MergeIterator::create(&delta_layers, &image_layers, ctx);
+        let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?;
        let mut merge_iter = FilterIterator::create(
            MergeIterator::create(&delta_layers, &image_layers, ctx),
            dense_ks,
            sparse_ks,
        )?;
        // Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
        // Data of the same key.
        let mut accumulated_values = Vec::new();
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -30,8 +30,8 @@ use crate::{
    pgdatadir_mapping::CollectKeySpaceError,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    tenant::{
-        storage_layer::LayerVisibilityHint, tasks::BackgroundLoopKind, timeline::EvictionError,
+        size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint,
-        LogicalSizeCalculationCause, Tenant,
+        tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
    },
 };
@@ -557,6 +557,8 @@ impl Timeline {
            gather_result = gather => {
                match gather_result {
                    Ok(_) => {},
                    // It can happen sometimes that we hit this instead of the cancellation token firing above
                    Err(CalculateSyntheticSizeError::Cancelled) => {}
                    Err(e) => {
                        // We don't care about the result, but, if it failed, we should log it,
                        // since consumption metric might be hitting the cached value and
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -16,8 +16,9 @@
 //! Note that the vectored blob api does *not* go through the page cache.
 use std::collections::BTreeMap;
 use std::ops::Deref;
-use bytes::BytesMut;
+use bytes::{Bytes, BytesMut};
 use pageserver_api::key::Key;
 use tokio::io::AsyncWriteExt;
 use tokio_epoll_uring::BoundedBuf;
@@ -35,11 +36,123 @@ pub struct BlobMeta {
    pub lsn: Lsn,
 }
-/// Blob offsets into [`VectoredBlobsBuf::buf`]
+/// A view into the vectored blobs read buffer.
 #[derive(Clone, Debug)]
 pub(crate) enum BufView<'a> {
    Slice(&'a [u8]),
    Bytes(bytes::Bytes),
 }
 impl<'a> BufView<'a> {
    /// Creates a new slice-based view on the blob.
    pub fn new_slice(slice: &'a [u8]) -> Self {
        Self::Slice(slice)
    }
    /// Creates a new [`bytes::Bytes`]-based view on the blob.
    pub fn new_bytes(bytes: bytes::Bytes) -> Self {
        Self::Bytes(bytes)
    }
    /// Convert the view into `Bytes`.
    ///
    /// If using slice as the underlying storage, the copy will be an O(n) operation.
    pub fn into_bytes(self) -> Bytes {
        match self {
            BufView::Slice(slice) => Bytes::copy_from_slice(slice),
            BufView::Bytes(bytes) => bytes,
        }
    }
    /// Creates a sub-view of the blob based on the range.
    fn view(&self, range: std::ops::Range<usize>) -> Self {
        match self {
            BufView::Slice(slice) => BufView::Slice(&slice[range]),
            BufView::Bytes(bytes) => BufView::Bytes(bytes.slice(range)),
        }
    }
 }
 impl<'a> Deref for BufView<'a> {
    type Target = [u8];
    fn deref(&self) -> &Self::Target {
        match self {
            BufView::Slice(slice) => slice,
            BufView::Bytes(bytes) => bytes,
        }
    }
 }
 impl<'a> AsRef<[u8]> for BufView<'a> {
    fn as_ref(&self) -> &[u8] {
        match self {
            BufView::Slice(slice) => slice,
            BufView::Bytes(bytes) => bytes.as_ref(),
        }
    }
 }
 impl<'a> From<&'a [u8]> for BufView<'a> {
    fn from(value: &'a [u8]) -> Self {
        Self::new_slice(value)
    }
 }
 impl From<Bytes> for BufView<'_> {
    fn from(value: Bytes) -> Self {
        Self::new_bytes(value)
    }
 }
 /// Blob offsets into [`VectoredBlobsBuf::buf`]. The byte ranges is potentially compressed,
 /// subject to [`VectoredBlob::compression_bits`].
 pub struct VectoredBlob {
-    pub start: usize,
+    /// Blob metadata.
    pub end: usize,
    pub meta: BlobMeta,
    /// Start offset.
    start: usize,
    /// End offset.
    end: usize,
    /// Compression used on the the blob.
    compression_bits: u8,
 }
 impl VectoredBlob {
    /// Reads a decompressed view of the blob.
    pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {
        let view = buf.view(self.start..self.end);
        match self.compression_bits {
            BYTE_UNCOMPRESSED => Ok(view),
            BYTE_ZSTD => {
                let mut decompressed_vec = Vec::new();
                let mut decoder =
                    async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec);
                decoder.write_all(&view).await?;
                decoder.flush().await?;
                // Zero-copy conversion from `Vec` to `Bytes`
                Ok(BufView::new_bytes(Bytes::from(decompressed_vec)))
            }
            bits => {
                let error = std::io::Error::new(
                    std::io::ErrorKind::InvalidData,
                    format!("Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", self.meta.key, self.meta.lsn, self.start, self.end),
                );
                Err(error)
            }
        }
    }
 }
 impl std::fmt::Display for VectoredBlob {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}@{}, {}..{}",
            self.meta.key, self.meta.lsn, self.start, self.end
        )
    }
 }
 /// Return type of [`VectoredBlobReader::read_blobs`]
@@ -514,7 +627,7 @@ impl<'a> VectoredBlobReader<'a> {
            );
        }
-        let mut buf = self
+        let buf = self
            .file
            .read_exact_at(buf.slice(0..read.size()), read.start, ctx)
            .await?
@@ -529,9 +642,6 @@ impl<'a> VectoredBlobReader<'a> {
        // of a blob is implicit: the start of the next blob if one exists
        // or the end of the read.
        // Some scratch space, put here for reusing the allocation
        let mut decompressed_vec = Vec::new();
        for (blob_start, meta) in blobs_at {
            let blob_start_in_buf = blob_start - start_offset;
            let first_len_byte = buf[blob_start_in_buf as usize];
@@ -557,35 +667,14 @@ impl<'a> VectoredBlobReader<'a> {
                )
            };
-            let start_raw = blob_start_in_buf + size_length;
+            let start = (blob_start_in_buf + size_length) as usize;
-            let end_raw = start_raw + blob_size;
+            let end = start + blob_size as usize;
            let (start, end);
            if compression_bits == BYTE_UNCOMPRESSED {
                start = start_raw as usize;
                end = end_raw as usize;
            } else if compression_bits == BYTE_ZSTD {
                let mut decoder =
                    async_compression::tokio::write::ZstdDecoder::new(&mut decompressed_vec);
                decoder
                    .write_all(&buf[start_raw as usize..end_raw as usize])
                    .await?;
                decoder.flush().await?;
                start = buf.len();
                buf.extend_from_slice(&decompressed_vec);
                end = buf.len();
                decompressed_vec.clear();
            } else {
                let error = std::io::Error::new(
                    std::io::ErrorKind::InvalidData,
                    format!("invalid compression byte {compression_bits:x}"),
                );
                return Err(error);
            }
            metas.push(VectoredBlob {
                start,
                end,
                meta: *meta,
                compression_bits,
            });
        }
@@ -1020,8 +1109,13 @@ mod tests {
            let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
            assert_eq!(result.blobs.len(), 1);
            let read_blob = &result.blobs[0];
-            let read_buf = &result.buf[read_blob.start..read_blob.end];
+            let view = BufView::new_slice(&result.buf);
-            assert_eq!(blob, read_buf, "mismatch for idx={idx} at offset={offset}");
+            let read_buf = read_blob.read(&view).await?;
            assert_eq!(
                &blob[..],
                &read_buf[..],
                "mismatch for idx={idx} at offset={offset}"
            );
            buf = result.buf;
        }
        Ok(())
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -205,6 +205,22 @@ impl PostgresRedoManager {
        }
    }
    /// Do a ping request-response roundtrip.
    ///
    /// Not used in production, but by Rust benchmarks.
    ///
    /// # Cancel-Safety
    ///
    /// This method is cancellation-safe.
    pub async fn ping(&self, pg_version: u32) -> Result<(), Error> {
        self.do_with_walredo_process(pg_version, |proc| async move {
            proc.ping(Duration::from_secs(1))
                .await
                .map_err(Error::Other)
        })
        .await
    }
    pub fn status(&self) -> WalRedoManagerStatus {
        WalRedoManagerStatus {
            last_redo_at: {
@@ -297,6 +313,9 @@ impl PostgresRedoManager {
        }
    }
    /// # Cancel-Safety
    ///
    /// This method is cancel-safe iff `closure` is cancel-safe.
    async fn do_with_walredo_process<
        F: FnOnce(Arc<Process>) -> Fut,
        Fut: Future<Output = Result<O, Error>>,
@@ -537,6 +556,17 @@ mod tests {
    use tracing::Instrument;
    use utils::{id::TenantId, lsn::Lsn};
    #[tokio::test]
    async fn test_ping() {
        let h = RedoHarness::new().unwrap();
        h.manager
            .ping(14)
            .instrument(h.span())
            .await
            .expect("ping should work");
    }
    #[tokio::test]
    async fn short_v14_redo() {
        let expected = std::fs::read("test_data/short_v14_redo.page").unwrap();
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -6,6 +6,7 @@ use self::no_leak_child::NoLeakChild;
 use crate::{
    config::PageServerConf,
    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
    page_cache::PAGE_SZ,
    span::debug_assert_current_span_has_tenant_id,
    walrecord::NeonWalRecord,
 };
@@ -237,6 +238,26 @@ impl WalRedoProcess {
        res
    }
    /// Do a ping request-response roundtrip.
    ///
    /// Not used in production, but by Rust benchmarks.
    pub(crate) async fn ping(&self, timeout: Duration) -> anyhow::Result<()> {
        let mut writebuf: Vec<u8> = Vec::with_capacity(4);
        protocol::build_ping_msg(&mut writebuf);
        let Ok(res) = tokio::time::timeout(timeout, self.apply_wal_records0(&writebuf)).await
        else {
            anyhow::bail!("WAL redo ping timed out");
        };
        let response = res?;
        if response.len() != PAGE_SZ {
            anyhow::bail!(
                "WAL redo ping response should respond with page-sized response: {}",
                response.len()
            );
        }
        Ok(())
    }
    /// # Cancel-Safety
    ///
    /// When not polled to completion (e.g. because in `tokio::select!` another
--- a/pageserver/src/walredo/process/protocol.rs
+++ b/pageserver/src/walredo/process/protocol.rs
@@ -55,3 +55,8 @@ pub(crate) fn build_get_page_msg(tag: BufferTag, buf: &mut Vec<u8>) {
    tag.ser_into(buf)
        .expect("serialize BufferTag should always succeed");
 }
 pub(crate) fn build_ping_msg(buf: &mut Vec<u8>) {
    buf.put_u8(b'H');
    buf.put_u32(4);
 }
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -9,6 +9,8 @@ OBJS = \
 	hll.o \
 	libpagestore.o \
 	neon.o \
 	neon_pgversioncompat.o \
 	neon_perf_counters.o \
 	neon_utils.o \
 	neon_walreader.o \
 	pagestore_smgr.o \
@@ -23,7 +25,18 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl
 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql
+DATA = \
 	neon--1.0.sql \
 	neon--1.0--1.1.sql \
 	neon--1.1--1.2.sql \
 	neon--1.2--1.3.sql \
 	neon--1.3--1.4.sql \
 	neon--1.4--1.5.sql \
 	neon--1.5--1.4.sql \
 	neon--1.4--1.3.sql \
 	neon--1.3--1.2.sql \
 	neon--1.2--1.1.sql \
 	neon--1.1--1.0.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"
 EXTRA_CLEAN = \
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -109,6 +109,7 @@ typedef struct FileCacheControl
 								 * reenabling */
 	uint32		size;			/* size of cache file in chunks */
 	uint32		used;			/* number of used chunks */
 	uint32		used_pages;		/* number of used pages */
 	uint32		limit;			/* shared copy of lfc_size_limit */
 	uint64		hits;
 	uint64		misses;
@@ -905,6 +906,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				/* Cache overflow: evict least recently used chunk */
 				FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
 				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 				{
 					lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
 				}
 				CriticalAssert(victim->access_count == 0);
 				entry->offset = victim->offset; /* grab victim's chunk */
 				hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
@@ -959,6 +964,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				for (int i = 0; i < blocks_in_chunk; i++)
 				{
 					lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
 					entry->bitmap[(chunk_offs + i) >> 5] |=
 						(1 << ((chunk_offs + i) & 31));
 				}
@@ -1051,6 +1057,11 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 			if (lfc_ctl)
 				value = lfc_ctl->size;
 			break;
 		case 5:
 			key = "file_cache_used_pages";
 			if (lfc_ctl)
 				value = lfc_ctl->used_pages;
 			break;
 		default:
 			SRF_RETURN_DONE(funcctx);
 	}
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -30,6 +30,7 @@
 #include "utils/guc.h"
 #include "neon.h"
 #include "neon_perf_counters.h"
 #include "neon_utils.h"
 #include "pagestore_client.h"
 #include "walproposer.h"
@@ -331,6 +332,7 @@ CLEANUP_AND_DISCONNECT(PageServer *shard)
 	}
 	if (shard->conn)
 	{
 		MyNeonCounters->pageserver_disconnects_total++;
 		PQfinish(shard->conn);
 		shard->conn = NULL;
 	}
@@ -737,6 +739,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	PageServer *shard = &page_servers[shard_no];
 	PGconn	   *pageserver_conn;
 	MyNeonCounters->pageserver_requests_sent_total++;
 	/* If the connection was lost for some reason, reconnect */
 	if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
 	{
@@ -889,6 +893,7 @@ pageserver_flush(shardno_t shard_no)
 	}
 	else
 	{
 		MyNeonCounters->pageserver_send_flushes_total++;
 		if (PQflush(pageserver_conn))
 		{
 			char	   *msg = pchomp(PQerrorMessage(pageserver_conn));
@@ -922,7 +927,7 @@ check_neon_id(char **newval, void **extra, GucSource source)
 static Size
 PagestoreShmemSize(void)
 {
-	return sizeof(PagestoreShmemState);
+	return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize());
 }
 static bool
@@ -941,6 +946,9 @@ PagestoreShmemInit(void)
 		memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
 		AssignPageserverConnstring(page_server_connstring, NULL);
 	}
 	NeonPerfCountersShmemInit();
 	LWLockRelease(AddinShmemInitLock);
 	return found;
 }
--- a/pgxn/neon/neon--1.4--1.5.sql
+++ b/pgxn/neon/neon--1.4--1.5.sql
@@ -0,0 +1,39 @@
 \echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
 CREATE FUNCTION get_backend_perf_counters()
 RETURNS SETOF RECORD
 AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters'
 LANGUAGE C PARALLEL SAFE;
 CREATE FUNCTION get_perf_counters()
 RETURNS SETOF RECORD
 AS 'MODULE_PATHNAME', 'neon_get_perf_counters'
 LANGUAGE C PARALLEL SAFE;
 -- Show various metrics, for each backend. Note that the values are not reset
 -- when a backend exits. When a new backend starts with the backend ID, it will
 -- continue accumulating the values from where the old backend left. If you are
 -- only interested in the changes from your own session, store the values at the
 -- beginning of the session somewhere, and subtract them on subsequent calls.
 --
 -- For histograms, 'bucket_le' is the upper bound of the histogram bucket.
 CREATE VIEW neon_backend_perf_counters AS
  SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value
  FROM get_backend_perf_counters() AS P (
    procno integer,
    pid integer,
    metric text,
    bucket_le float8,
    value float8
  );
 -- Summary across all backends. (This could also be implemented with
 -- an aggregate query over neon_backend_perf_counters view.)
 CREATE VIEW neon_perf_counters AS
  SELECT P.metric, P.bucket_le, P.value
  FROM get_perf_counters() AS P (
    metric text,
    bucket_le float8,
    value float8
  );
--- a/pgxn/neon/neon--1.5--1.4.sql
+++ b/pgxn/neon/neon--1.5--1.4.sql
@@ -0,0 +1,4 @@
 DROP VIEW IF EXISTS neon_perf_counters;
 DROP VIEW IF EXISTS neon_backend_perf_counters;
 DROP FUNCTION IF EXISTS get_perf_counters();
 DROP FUNCTION IF EXISTS get_backend_perf_counters();
--- a/pgxn/neon/neon.control
+++ b/pgxn/neon/neon.control
@@ -1,5 +1,7 @@
 # neon extension
 comment = 'cloud storage for PostgreSQL'
 # TODO: bump default version to 1.5, after we are certain that we don't
 # need to rollback the compute image
 default_version = '1.4'
 module_pathname = '$libdir/neon'
 relocatable = true
--- a/pgxn/neon/neon_perf_counters.c
+++ b/pgxn/neon/neon_perf_counters.c
@@ -0,0 +1,261 @@
 /*-------------------------------------------------------------------------
 *
 * neon_perf_counters.c
 *	  Collect statistics about Neon I/O
 *
 * Each backend has its own set of counters in shared memory.
 *
 *-------------------------------------------------------------------------
 */
 #include "postgres.h"
 #include <math.h>
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/proc.h"
 #include "storage/shmem.h"
 #include "utils/builtins.h"
 #include "neon_perf_counters.h"
 #include "neon_pgversioncompat.h"
 neon_per_backend_counters *neon_per_backend_counters_shared;
 Size
 NeonPerfCountersShmemSize(void)
 {
 	Size		size = 0;
 	size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters)));
 	return size;
 }
 void
 NeonPerfCountersShmemInit(void)
 {
 	bool		found;
 	neon_per_backend_counters_shared =
 		ShmemInitStruct("Neon perf counters",
 						mul_size(MaxBackends,
 								 sizeof(neon_per_backend_counters)),
 						&found);
 	Assert(found == IsUnderPostmaster);
 	if (!found)
 	{
 		/* shared memory is initialized to zeros, so nothing to do here */
 	}
 }
 /*
 * Count a GetPage wait operation.
 */
 void
 inc_getpage_wait(uint64 latency_us)
 {
 	int			lo = 0;
 	int			hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
 	/* Find the right bucket with binary search */
 	while (lo < hi)
 	{
 		int			mid = (lo + hi) / 2;
 		if (latency_us < getpage_wait_bucket_thresholds[mid])
 			hi = mid;
 		else
 			lo = mid + 1;
 	}
 	MyNeonCounters->getpage_wait_us_bucket[lo]++;
 	MyNeonCounters->getpage_wait_us_sum += latency_us;
 	MyNeonCounters->getpage_wait_us_count++;
 }
 /*
 * Support functions for the views, neon_backend_perf_counters and
 * neon_perf_counters.
 */
 typedef struct
 {
 	char	   *name;
 	bool		is_bucket;
 	double		bucket_le;
 	double		value;
 } metric_t;
 static metric_t *
 neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
 {
 #define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
 	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
 	uint64		bucket_accum;
 	int			i = 0;
 	Datum		getpage_wait_str;
 	metrics[i].name = "getpage_wait_seconds_count";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->getpage_wait_us_count;
 	i++;
 	metrics[i].name = "getpage_wait_seconds_sum";
 	metrics[i].is_bucket = false;
 	metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
 	i++;
 	bucket_accum = 0;
 	for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
 	{
 		uint64		threshold = getpage_wait_bucket_thresholds[bucketno];
 		bucket_accum += counters->getpage_wait_us_bucket[bucketno];
 		metrics[i].name = "getpage_wait_seconds_bucket";
 		metrics[i].is_bucket = true;
 		metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
 		metrics[i].value = (double) bucket_accum;
 		i++;
 	}
 	metrics[i].name = "getpage_prefetch_requests_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->getpage_prefetch_requests_total;
 	i++;
 	metrics[i].name = "getpage_sync_requests_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->getpage_sync_requests_total;
 	i++;
 	metrics[i].name = "getpage_prefetch_misses_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->getpage_prefetch_misses_total;
 	i++;
 	metrics[i].name = "getpage_prefetch_discards_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->getpage_prefetch_discards_total;
 	i++;
 	metrics[i].name = "pageserver_requests_sent_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->pageserver_requests_sent_total;
 	i++;
 	metrics[i].name = "pageserver_requests_disconnects_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->pageserver_disconnects_total;
 	i++;
 	metrics[i].name = "pageserver_send_flushes_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->pageserver_send_flushes_total;
 	i++;
 	metrics[i].name = "file_cache_hits_total";
 	metrics[i].is_bucket = false;
 	metrics[i].value = (double) counters->file_cache_hits_total;
 	i++;
 	Assert(i == NUM_METRICS);
 	/* NULL entry marks end of array */
 	metrics[i].name = NULL;
 	metrics[i].value = 0;
 	return metrics;
 }
 /*
 * Write metric to three output Datums
 */
 static void
 metric_to_datums(metric_t *m, Datum *values, bool *nulls)
 {
 	values[0] = CStringGetTextDatum(m->name);
 	nulls[0] = false;
 	if (m->is_bucket)
 	{
 		values[1] = Float8GetDatum(m->bucket_le);
 		nulls[1] = false;
 	}
 	else
 	{
 		values[1] = (Datum) 0;
 		nulls[1] = true;
 	}
 	values[2] = Float8GetDatum(m->value);
 	nulls[2] = false;
 }
 PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters);
 Datum
 neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
 {
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Datum		values[5];
 	bool		nulls[5];
 	/* We put all the tuples into a tuplestore in one go. */
 	InitMaterializedSRF(fcinfo, 0);
 	for (int procno = 0; procno < MaxBackends; procno++)
 	{
 		PGPROC	   *proc = GetPGProcByNumber(procno);
 		int			pid = proc->pid;
 		neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
 		metric_t   *metrics = neon_perf_counters_to_metrics(counters);
 		values[0] = Int32GetDatum(procno);
 		nulls[0] = false;
 		values[1] = Int32GetDatum(pid);
 		nulls[1] = false;
 		for (int i = 0; metrics[i].name != NULL; i++)
 		{
 			metric_to_datums(&metrics[i], &values[2], &nulls[2]);
 			tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
 		}
 		pfree(metrics);
 	}
 	return (Datum) 0;
 }
 PG_FUNCTION_INFO_V1(neon_get_perf_counters);
 Datum
 neon_get_perf_counters(PG_FUNCTION_ARGS)
 {
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Datum		values[3];
 	bool		nulls[3];
 	Datum		getpage_wait_str;
 	neon_per_backend_counters totals = {0};
 	metric_t   *metrics;
 	/* We put all the tuples into a tuplestore in one go. */
 	InitMaterializedSRF(fcinfo, 0);
 	/* Aggregate the counters across all backends */
 	for (int procno = 0; procno < MaxBackends; procno++)
 	{
 		neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
 		totals.getpage_wait_us_count += counters->getpage_wait_us_count;
 		totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
 		for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
 			totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
 		totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
 		totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
 		totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
 		totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
 		totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
 		totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
 		totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
 		totals.file_cache_hits_total += counters->file_cache_hits_total;
 	}
 	metrics = neon_perf_counters_to_metrics(&totals);
 	for (int i = 0; metrics[i].name != NULL; i++)
 	{
 		metric_to_datums(&metrics[i], &values[0], &nulls[0]);
 		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
 	}
 	pfree(metrics);
 	return (Datum) 0;
 }
--- a/pgxn/neon/neon_perf_counters.h
+++ b/pgxn/neon/neon_perf_counters.h
@@ -0,0 +1,111 @@
 /*-------------------------------------------------------------------------
 *
 * neon_perf_counters.h
 *	  Performance counters for neon storage requests
 *-------------------------------------------------------------------------
 */
 #ifndef NEON_PERF_COUNTERS_H
 #define NEON_PERF_COUNTERS_H
 #if PG_VERSION_NUM >= 170000
 #include "storage/procnumber.h"
 #else
 #include "storage/backendid.h"
 #include "storage/proc.h"
 #endif
 static const uint64 getpage_wait_bucket_thresholds[] = {
 	      20,       30,       60,       100,  /* 0      -  100 us */
 	     200,      300,      600,	   1000,  /* 100 us - 1 ms */
 	    2000,     3000,     6000,     10000,  /* 1 ms   - 10 ms */
 	   20000,    30000,    60000,    100000,  /* 10 ms  - 100 ms */
 	  200000,   300000,   600000,   1000000,  /* 100 ms - 1 s */
 	 2000000,  3000000,  6000000,  10000000,  /* 1 s - 10 s */
    20000000, 30000000, 60000000, 100000000,  /* 10 s - 100 s */
 	UINT64_MAX,
 };
 #define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds))
 typedef struct
 {
 	/*
 	 * Histogram for how long an smgrread() request needs to wait for response
 	 * from pageserver. When prefetching is effective, these wait times can be
 	 * lower than the network latency to the pageserver, even zero, if the
 	 * page is already readily prefetched whenever we need to read a page.
 	 *
 	 * Note: we accumulate these in microseconds, because that's convenient in
 	 * the backend, but the 'neon_backend_perf_counters' view will convert
 	 * them to seconds, to make them more idiomatic as prometheus metrics.
 	 */
 	uint64		getpage_wait_us_count;
 	uint64		getpage_wait_us_sum;
 	uint64		getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS];
 	/*
 	 * Total number of speculative prefetch Getpage requests and synchronous
 	 * GetPage requests sent.
 	 */
 	uint64		getpage_prefetch_requests_total;
 	uint64		getpage_sync_requests_total;
 	/* XXX: It's not clear to me when these misses happen. */
 	uint64		getpage_prefetch_misses_total;
 	/*
 	 * Number of prefetched responses that were discarded becuase the
 	 * prefetched page was not needed or because it was concurrently fetched /
 	 * modified by another backend.
 	 */
 	uint64		getpage_prefetch_discards_total;
 	/*
 	 * Total number of requests send to pageserver. (prefetch_requests_total
 	 * and sync_request_total count only GetPage requests, this counts all
 	 * request types.)
 	 */
 	uint64		pageserver_requests_sent_total;
 	/*
 	 * Number of times the connection to the pageserver was lost and the
 	 * backend had to reconnect. Note that this doesn't count the first
 	 * connection in each backend, only reconnects.
 	 */
 	uint64		pageserver_disconnects_total;
 	/*
 	 * Number of network flushes to the pageserver. Synchronous requests are
 	 * flushed immediately, but when prefetching requests are sent in batches,
 	 * this can be smaller than pageserver_requests_sent_total.
 	 */
 	uint64		pageserver_send_flushes_total;
 	/*
 	 * Number of requests satisfied from the LFC.
 	 *
 	 * This is redundant with the server-wide file_cache_hits, but this gives
 	 * per-backend granularity, and it's handy to have this in the same place
 	 * as counters for requests that went to the pageserver. Maybe move all
 	 * the LFC stats to this struct in the future?
 	 */
 	uint64		file_cache_hits_total;
 } neon_per_backend_counters;
 /* Pointer to the shared memory array of neon_per_backend_counters structs */
 extern neon_per_backend_counters *neon_per_backend_counters_shared;
 #if PG_VERSION_NUM >= 170000
 #define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])
 #else
 #define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno])
 #endif
 extern void inc_getpage_wait(uint64 latency);
 extern Size NeonPerfCountersShmemSize(void);
 extern void NeonPerfCountersShmemInit(void);
 #endif							/* NEON_PERF_COUNTERS_H */
--- a/pgxn/neon/neon_pgversioncompat.c
+++ b/pgxn/neon/neon_pgversioncompat.c
@@ -0,0 +1,44 @@
 /*
 * Support functions for the compatibility macros in neon_pgversioncompat.h
 */
 #include "postgres.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "utils/tuplestore.h"
 #include "neon_pgversioncompat.h"
 #if PG_MAJORVERSION_NUM < 15
 void
 InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
 {
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Tuplestorestate *tupstore;
 	MemoryContext old_context,
 				per_query_ctx;
 	TupleDesc	stored_tupdesc;
 	/* check to see if caller supports returning a tuplestore */
 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("set-valued function called in context that cannot accept a set")));
 	/*
 	 * Store the tuplestore and the tuple descriptor in ReturnSetInfo.  This
 	 * must be done in the per-query memory context.
 	 */
 	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
 	old_context = MemoryContextSwitchTo(per_query_ctx);
 	if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
 	tupstore = tuplestore_begin_heap(false, false, work_mem);
 	rsinfo->returnMode = SFRM_Materialize;
 	rsinfo->setResult = tupstore;
 	rsinfo->setDesc = stored_tupdesc;
 	MemoryContextSwitchTo(old_context);
 }
 #endif
--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -6,6 +6,8 @@
 #ifndef NEON_PGVERSIONCOMPAT_H
 #define NEON_PGVERSIONCOMPAT_H
 #include "fmgr.h"
 #if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
 #else
@@ -123,4 +125,8 @@
 #define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
 #endif
 #if PG_MAJORVERSION_NUM < 15
 extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
 #endif
 #endif							/* NEON_PGVERSIONCOMPAT_H */
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -66,6 +66,7 @@
 #include "storage/md.h"
 #include "storage/smgr.h"
 #include "neon_perf_counters.h"
 #include "pagestore_client.h"
 #include "bitmap.h"
@@ -289,7 +290,6 @@ static PrefetchState *MyPState;
 static bool compact_prefetch_buffers(void);
 static void consume_prefetch_responses(void);
 static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns);
 static bool prefetch_read(PrefetchRequest *slot);
 static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
 static bool prefetch_wait_for(uint64 ring_index);
@@ -780,21 +780,27 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 }
 /*
- * prefetch_register_buffer() - register and prefetch buffer
+ * prefetch_register_bufferv() - register and prefetch buffers
 *
 * Register that we may want the contents of BufferTag in the near future.
 * This is used when issuing a speculative prefetch request, but also when
 * performing a synchronous request and need the buffer right now.
 *
 * If force_request_lsns is not NULL, those values are sent to the
 * pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
 * to calculate the LSNs to send.
 *
 * When performing a prefetch rather than a synchronous request,
 * is_prefetch==true. Currently, it only affects how the request is accounted
 * in the perf counters.
 *
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
 static uint64
 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
-						  BlockNumber nblocks, const bits8 *mask)
+						  BlockNumber nblocks, const bits8 *mask,
 						  bool is_prefetch)
 {
 	uint64		min_ring_index;
 	PrefetchRequest req;
@@ -815,6 +821,7 @@ Retry:
 		PrfHashEntry *entry = NULL;
 		uint64		ring_index;
 		neon_request_lsns *lsns;
 		if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
 			continue;
@@ -858,6 +865,7 @@ Retry:
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 					slot = NULL;
 					MyNeonCounters->getpage_prefetch_discards_total++;
 				}
 			}
@@ -972,6 +980,11 @@ Retry:
 		min_ring_index = Min(min_ring_index, ring_index);
 		if (is_prefetch)
 			MyNeonCounters->getpage_prefetch_requests_total++;
 		else
 			MyNeonCounters->getpage_sync_requests_total++;
 		prefetch_do_request(slot, lsns);
 	}
@@ -1000,13 +1013,6 @@ Retry:
 }
 static uint64
 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns)
 {
 	return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL);
 }
 /*
 * Note: this function can get canceled and use a long jump to the next catch
 * context. Take care.
@@ -2612,7 +2618,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			lfc_present[i] = ~(lfc_present[i]);
 		ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
-											   lfc_present);
+											   lfc_present, true);
 		nblocks -= iterblocks;
 		blocknum += iterblocks;
@@ -2656,7 +2662,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 	CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
-	ring_index = prefetch_register_buffer(tag, NULL);
+	ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true);
 	Assert(ring_index < MyPState->ring_unused &&
 		   MyPState->ring_last <= ring_index);
@@ -2747,17 +2753,20 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 	 * weren't for the behaviour of the LwLsn cache that uses the highest
 	 * value of the LwLsn cache when the entry is not found.
 	 */
-	prefetch_register_bufferv(buftag, request_lsns, nblocks, mask);
+	prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
 	for (int i = 0; i < nblocks; i++)
 	{
 		void	   *buffer = buffers[i];
 		BlockNumber blockno = base_blockno + i;
 		neon_request_lsns *reqlsns = &request_lsns[i];
 		TimestampTz		start_ts, end_ts;
 		if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
 			continue;
 		start_ts = GetCurrentTimestamp();
 		if (RecoveryInProgress() && MyBackendType != B_STARTUP)
 			XLogWaitForReplayOf(reqlsns[0].request_lsn);
@@ -2794,6 +2803,7 @@ Retry:
 				/* drop caches */
 				prefetch_set_unused(slot->my_ring_index);
 				pgBufferUsage.prefetch.expired += 1;
 				MyNeonCounters->getpage_prefetch_discards_total++;
 				/* make it look like a prefetch cache miss */
 				entry = NULL;
 			}
@@ -2804,8 +2814,9 @@ Retry:
 			if (entry == NULL)
 			{
 				pgBufferUsage.prefetch.misses += 1;
 				MyNeonCounters->getpage_prefetch_misses_total++;
-				ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL);
+				ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
 				Assert(ring_index != UINT64_MAX);
 				slot = GetPrfSlot(ring_index);
 			}
@@ -2860,6 +2871,9 @@ Retry:
 		/* buffer was used, clean up for later reuse */
 		prefetch_set_unused(ring_index);
 		prefetch_cleanup_trailing_unused();
 		end_ts = GetCurrentTimestamp();
 		inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0);
 	}
 }
@@ -2913,6 +2927,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 	/* Try to read from local file cache */
 	if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
 	{
 		MyNeonCounters->file_cache_hits_total++;
 		return;
 	}
@@ -3097,7 +3112,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				/* assume heap */
 				RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
 				RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
-	
+
 				if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
 				{
 					neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -24,6 +24,7 @@
 * PushPage ('P'): Copy a page image (in the payload) to buffer cache
 * ApplyRecord ('A'): Apply a WAL record (in the payload)
 * GetPage ('G'): Return a page image from buffer cache.
 * Ping ('H'): Return the input message.
 *
 * Currently, you only get a response to GetPage requests; the response is
 * simply a 8k page, without any headers. Errors are logged to stderr.
@@ -133,6 +134,7 @@ static void ApplyRecord(StringInfo input_message);
 static void apply_error_callback(void *arg);
 static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
 static void GetPage(StringInfo input_message);
 static void Ping(StringInfo input_message);
 static ssize_t buffered_read(void *buf, size_t count);
 static void CreateFakeSharedMemoryAndSemaphores();
@@ -394,6 +396,10 @@ WalRedoMain(int argc, char *argv[])
 				GetPage(&input_message);
 				break;
 			case 'H': 			/* Ping */
 				Ping(&input_message);
 				break;
 				/*
 				 * EOF means we're done. Perform normal shutdown.
 				 */
@@ -1057,6 +1063,36 @@ GetPage(StringInfo input_message)
 }
 static void
 Ping(StringInfo input_message)
 {
 	int			tot_written;
 	/* Response: the input message */
 	tot_written = 0;
 	do {
 		ssize_t		rc;
 		/* We don't need alignment, but it's bad practice to use char[BLCKSZ] */
 #if PG_VERSION_NUM >= 160000
 		static const PGIOAlignedBlock response;
 #else
 		static const PGAlignedBlock response;
 #endif
 		rc = write(STDOUT_FILENO, &response.data[tot_written], BLCKSZ - tot_written);
 		if (rc < 0) {
 			/* If interrupted by signal, just retry */
 			if (errno == EINTR)
 				continue;
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not write to stdout: %m")));
 		}
 		tot_written += rc;
 	} while (tot_written < BLCKSZ);
 	elog(TRACE, "Page sent back for ping");
 }
 /* Buffer used by buffered_read() */
 static char stdin_buf[16 * 1024];
 static size_t stdin_len = 0;	/* # of bytes in buffer */
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -24,12 +24,12 @@ bytes = { workspace = true, features = ["serde"] }
 camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 compute_api.workspace = true
 consumption_metrics.workspace = true
 dashmap.workspace = true
 env_logger.workspace = true
 framed-websockets.workspace = true
 futures.workspace = true
 git-version.workspace = true
 hashbrown.workspace = true
 hashlink.workspace = true
 hex.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -80,6 +80,14 @@ pub(crate) trait TestBackend: Send + Sync + 'static {
    fn get_allowed_ips_and_secret(
        &self,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
    fn dyn_clone(&self) -> Box<dyn TestBackend>;
 }
 #[cfg(test)]
 impl Clone for Box<dyn TestBackend> {
    fn clone(&self) -> Self {
        TestBackend::dyn_clone(&**self)
    }
 }
 impl std::fmt::Display for Backend<'_, (), ()> {
@@ -444,7 +452,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
            Self::Web(url, ()) => {
                info!("performing web authentication");
-                let info = web::authenticate(ctx, &url, client).await?;
+                let info = web::authenticate(ctx, config, &url, client).await?;
                Backend::Web(url, info)
            }
@@ -557,7 +565,7 @@ mod tests {
        stream::{PqStream, Stream},
    };
-    use super::{auth_quirks, AuthRateLimiter};
+    use super::{auth_quirks, jwt::JwkCache, AuthRateLimiter};
    struct Auth {
        ips: Vec<IpPattern>,
@@ -585,6 +593,14 @@ mod tests {
            ))
        }
        async fn get_endpoint_jwks(
            &self,
            _ctx: &RequestMonitoring,
            _endpoint: crate::EndpointId,
        ) -> anyhow::Result<Vec<super::jwt::AuthRule>> {
            unimplemented!()
        }
        async fn wake_compute(
            &self,
            _ctx: &RequestMonitoring,
@@ -595,12 +611,15 @@ mod tests {
    }
    static CONFIG: Lazy<AuthenticationConfig> = Lazy::new(|| AuthenticationConfig {
        jwks_cache: JwkCache::default(),
        thread_pool: ThreadPool::new(1),
        scram_protocol_timeout: std::time::Duration::from_secs(5),
        rate_limiter_enabled: true,
        rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
        rate_limit_ip_subnet: 64,
        ip_allowlist_check_enabled: true,
        is_auth_broker: false,
        accept_jwts: false,
    });
    async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -1,4 +1,5 @@
 use std::{
    borrow::Cow,
    future::Future,
    sync::Arc,
    time::{Duration, SystemTime},
@@ -8,11 +9,17 @@ use anyhow::{bail, ensure, Context};
 use arc_swap::ArcSwapOption;
 use dashmap::DashMap;
 use jose_jwk::crypto::KeyInfo;
-use serde::{Deserialize, Deserializer};
+use serde::{
    de::{DeserializeSeed, IgnoredAny, Visitor},
    Deserializer,
 };
 use signature::Verifier;
 use tokio::time::Instant;
-use crate::{context::RequestMonitoring, http::parse_json_body_with_limit, EndpointId, RoleName};
+use crate::{
    context::RequestMonitoring, http::parse_json_body_with_limit, intern::RoleNameInt, EndpointId,
    RoleName,
 };
 // TODO(conrad): make these configurable.
 const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
@@ -27,18 +34,19 @@ pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
        role_name: RoleName,
    ) -> impl Future<Output = anyhow::Result<Vec<AuthRule>>> + Send;
 }
 #[derive(Debug, Clone)]
 pub(crate) struct AuthRule {
    pub(crate) id: String,
    pub(crate) jwks_url: url::Url,
    pub(crate) audience: Option<String>,
    pub(crate) role_names: Vec<RoleNameInt>,
 }
 #[derive(Default)]
-pub(crate) struct JwkCache {
+pub struct JwkCache {
    client: reqwest::Client,
    map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
@@ -54,18 +62,28 @@ pub(crate) struct JwkCacheEntry {
 }
 impl JwkCacheEntry {
-    fn find_jwk_and_audience(&self, key_id: &str) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
+    fn find_jwk_and_audience(
-        self.key_sets.values().find_map(|key_set| {
+        &self,
-            key_set
+        key_id: &str,
-                .find_key(key_id)
+        role_name: &RoleName,
-                .map(|jwk| (jwk, key_set.audience.as_deref()))
+    ) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
-        })
+        self.key_sets
            .values()
            // make sure our requested role has access to the key set
            .filter(|key_set| key_set.role_names.iter().any(|role| **role == **role_name))
            // try and find the requested key-id in the key set
            .find_map(|key_set| {
                key_set
                    .find_key(key_id)
                    .map(|jwk| (jwk, key_set.audience.as_deref()))
            })
    }
 }
 struct KeySet {
    jwks: jose_jwk::JwkSet,
    audience: Option<String>,
    role_names: Vec<RoleNameInt>,
 }
 impl KeySet {
@@ -106,7 +124,6 @@ impl JwkCacheEntryLock {
        ctx: &RequestMonitoring,
        client: &reqwest::Client,
        endpoint: EndpointId,
        role_name: RoleName,
        auth_rules: &F,
    ) -> anyhow::Result<Arc<JwkCacheEntry>> {
        // double check that no one beat us to updating the cache.
@@ -119,11 +136,10 @@ impl JwkCacheEntryLock {
            }
        }
-        let rules = auth_rules
+        let rules = auth_rules.fetch_auth_rules(ctx, endpoint).await?;
            .fetch_auth_rules(ctx, endpoint, role_name)
            .await?;
        let mut key_sets =
            ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());
        // TODO(conrad): run concurrently
        // TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
        for rule in rules {
@@ -151,6 +167,7 @@ impl JwkCacheEntryLock {
                                KeySet {
                                    jwks,
                                    audience: rule.audience,
                                    role_names: rule.role_names,
                                },
                            );
                        }
@@ -173,7 +190,6 @@ impl JwkCacheEntryLock {
        ctx: &RequestMonitoring,
        client: &reqwest::Client,
        endpoint: EndpointId,
        role_name: RoleName,
        fetch: &F,
    ) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
        let now = Instant::now();
@@ -183,9 +199,7 @@ impl JwkCacheEntryLock {
        let Some(cached) = guard else {
            let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
            let permit = self.acquire_permit().await;
-            return self
+            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
                .renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
                .await;
        };
        let last_update = now.duration_since(cached.last_retrieved);
@@ -196,9 +210,7 @@ impl JwkCacheEntryLock {
            let permit = self.acquire_permit().await;
            // it's been too long since we checked the keys. wait for them to update.
-            return self
+            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
                .renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
                .await;
        }
        // every 5 minutes we should spawn a job to eagerly update the token.
@@ -212,7 +224,7 @@ impl JwkCacheEntryLock {
                let ctx = ctx.clone();
                tokio::spawn(async move {
                    if let Err(e) = entry
-                        .renew_jwks(permit, &ctx, &client, endpoint, role_name, &fetch)
+                        .renew_jwks(permit, &ctx, &client, endpoint, &fetch)
                        .await
                    {
                        tracing::warn!(error=?e, "could not fetch JWKs in background job");
@@ -232,7 +244,7 @@ impl JwkCacheEntryLock {
        jwt: &str,
        client: &reqwest::Client,
        endpoint: EndpointId,
-        role_name: RoleName,
+        role_name: &RoleName,
        fetch: &F,
    ) -> Result<(), anyhow::Error> {
        // JWT compact form is defined to be
@@ -254,30 +266,26 @@ impl JwkCacheEntryLock {
        let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
            .context("Provided authentication token is not a valid JWT encoding")?;
-        ensure!(header.typ == "JWT");
+        ensure!(
            header.typ == "JWT",
            "Provided authentication token is not a valid JWT encoding"
        );
        let kid = header.key_id.context("missing key id")?;
        let mut guard = self
-            .get_or_update_jwk_cache(ctx, client, endpoint.clone(), role_name.clone(), fetch)
+            .get_or_update_jwk_cache(ctx, client, endpoint.clone(), fetch)
            .await?;
        // get the key from the JWKs if possible. If not, wait for the keys to update.
        let (jwk, expected_audience) = loop {
-            match guard.find_jwk_and_audience(kid) {
+            match guard.find_jwk_and_audience(kid, role_name) {
                Some(jwk) => break jwk,
                None if guard.last_retrieved.elapsed() > MIN_RENEW => {
                    let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
                    let permit = self.acquire_permit().await;
                    guard = self
-                        .renew_jwks(
+                        .renew_jwks(permit, ctx, client, endpoint.clone(), fetch)
                            permit,
                            ctx,
                            client,
                            endpoint.clone(),
                            role_name.clone(),
                            fetch,
                        )
                        .await?;
                }
                _ => {
@@ -300,32 +308,21 @@ impl JwkCacheEntryLock {
            }
            key => bail!("unsupported key type {key:?}"),
        };
        tracing::debug!("JWT signature valid");
        let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
            .context("Provided authentication token is not a valid JWT encoding")?;
        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payload)
            .context("Provided authentication token is not a valid JWT encoding")?;
-        tracing::debug!(?payload, "JWT signature valid with claims");
+        let validator = JwtValidator {
            expected_audience,
            current_time: SystemTime::now(),
            clock_skew_leeway: CLOCK_SKEW_LEEWAY,
        };
-        match (expected_audience, payload.audience) {
+        let payload = validator
-            // check the audience matches
+            .deserialize(&mut serde_json::Deserializer::from_slice(&payload))?;
            (Some(aud1), Some(aud2)) => ensure!(aud1 == aud2, "invalid JWT token audience"),
            // the audience is expected but is missing
            (Some(_), None) => bail!("invalid JWT token audience"),
            // we don't care for the audience field
            (None, _) => {}
        }
-        let now = SystemTime::now();
+        tracing::debug!(?payload, "JWT claims valid");
        if let Some(exp) = payload.expiration {
            ensure!(now < exp + CLOCK_SKEW_LEEWAY);
        }
        if let Some(nbf) = payload.not_before {
            ensure!(nbf < now + CLOCK_SKEW_LEEWAY);
        }
        Ok(())
    }
@@ -336,7 +333,7 @@ impl JwkCache {
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
-        role_name: RoleName,
+        role_name: &RoleName,
        fetch: &F,
        jwt: &str,
    ) -> Result<(), anyhow::Error> {
@@ -413,37 +410,184 @@ struct JwtHeader<'a> {
    key_id: Option<&'a str>,
 }
-/// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>
+struct JwtValidator<'a> {
-#[derive(serde::Deserialize, serde::Serialize, Debug)]
+    expected_audience: Option<&'a str>,
-struct JwtPayload<'a> {
+    current_time: SystemTime,
-    /// Audience - Recipient for which the JWT is intended
+    clock_skew_leeway: Duration,
    #[serde(rename = "aud")]
    audience: Option<&'a str>,
    /// Expiration - Time after which the JWT expires
    #[serde(deserialize_with = "numeric_date_opt", rename = "exp", default)]
    expiration: Option<SystemTime>,
    /// Not before - Time after which the JWT expires
    #[serde(deserialize_with = "numeric_date_opt", rename = "nbf", default)]
    not_before: Option<SystemTime>,
    // the following entries are only extracted for the sake of debug logging.
    /// Issuer of the JWT
    #[serde(rename = "iss")]
    issuer: Option<&'a str>,
    /// Subject of the JWT (the user)
    #[serde(rename = "sub")]
    subject: Option<&'a str>,
    /// Unique token identifier
    #[serde(rename = "jti")]
    jwt_id: Option<&'a str>,
    /// Unique session identifier
    #[serde(rename = "sid")]
    session_id: Option<&'a str>,
 }
-fn numeric_date_opt<'de, D: Deserializer<'de>>(d: D) -> Result<Option<SystemTime>, D::Error> {
+impl<'de> DeserializeSeed<'de> for JwtValidator<'_> {
-    let d = <Option<u64>>::deserialize(d)?;
+    type Value = JwtPayload<'de>;
-    Ok(d.map(|n| SystemTime::UNIX_EPOCH + Duration::from_secs(n)))
+
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        impl<'de> Visitor<'de> for JwtValidator<'_> {
            type Value = JwtPayload<'de>;
            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
                formatter.write_str("a JWT payload")
            }
            fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
            where
                A: serde::de::MapAccess<'de>,
            {
                let mut payload = JwtPayload {
                    issuer: None,
                    subject: None,
                    jwt_id: None,
                    session_id: None,
                };
                let mut aud = false;
                while let Some(key) = map.next_key()? {
                    match key {
                        "iss" if payload.issuer.is_none() => {
                            payload.issuer = Some(map.next_value()?);
                        }
                        "sub" if payload.subject.is_none() => {
                            payload.subject = Some(map.next_value()?);
                        }
                        "jit" if payload.jwt_id.is_none() => {
                            payload.jwt_id = Some(map.next_value()?);
                        }
                        "sid" if payload.session_id.is_none() => {
                            payload.session_id = Some(map.next_value()?);
                        }
                        "exp" => {
                            let exp = map.next_value::<u64>()?;
                            let exp = SystemTime::UNIX_EPOCH + Duration::from_secs(exp);
                            if self.current_time > exp + self.clock_skew_leeway {
                                return Err(serde::de::Error::custom("JWT token has expired"));
                            }
                        }
                        "nbf" => {
                            let nbf = map.next_value::<u64>()?;
                            let nbf = SystemTime::UNIX_EPOCH + Duration::from_secs(nbf);
                            if self.current_time + self.clock_skew_leeway < nbf {
                                return Err(serde::de::Error::custom(
                                    "JWT token is not yet ready to use",
                                ));
                            }
                        }
                        "aud" => {
                            if let Some(expected_audience) = self.expected_audience {
                                map.next_value_seed(AudienceValidator { expected_audience })?;
                                aud = true;
                            } else {
                                map.next_value::<IgnoredAny>()?;
                            }
                        }
                        _ => map.next_value::<IgnoredAny>().map(|IgnoredAny| ())?,
                    }
                }
                if self.expected_audience.is_some() && !aud {
                    return Err(serde::de::Error::custom("invalid JWT token audience"));
                }
                Ok(payload)
            }
        }
        deserializer.deserialize_map(self)
    }
 }
 struct AudienceValidator<'a> {
    expected_audience: &'a str,
 }
 impl<'de> DeserializeSeed<'de> for AudienceValidator<'_> {
    type Value = ();
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        impl<'de> Visitor<'de> for AudienceValidator<'_> {
            type Value = ();
            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
                formatter.write_str("a single string or an array of strings")
            }
            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
            where
                E: serde::de::Error,
            {
                if self.expected_audience == v {
                    Ok(())
                } else {
                    Err(E::custom("invalid JWT token audience"))
                }
            }
            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
            where
                A: serde::de::SeqAccess<'de>,
            {
                while let Some(v) = seq.next_element_seed(SingleAudienceValidator {
                    expected_audience: self.expected_audience,
                })? {
                    if v {
                        return Ok(());
                    }
                }
                Err(serde::de::Error::custom("invalid JWT token audience"))
            }
        }
        deserializer.deserialize_any(self)
    }
 }
 struct SingleAudienceValidator<'a> {
    expected_audience: &'a str,
 }
 impl<'de> DeserializeSeed<'de> for SingleAudienceValidator<'_> {
    type Value = bool;
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'de>,
    {
        impl<'de> Visitor<'de> for SingleAudienceValidator<'_> {
            type Value = bool;
            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
                formatter.write_str("a single audience string")
            }
            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
            where
                E: serde::de::Error,
            {
                Ok(self.expected_audience == v)
            }
        }
        deserializer.deserialize_any(self)
    }
 }
 /// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>
 // the following entries are only extracted for the sake of debug logging.
 #[derive(Debug)]
 #[allow(dead_code)]
 struct JwtPayload<'a> {
    /// Issuer of the JWT
    issuer: Option<Cow<'a, str>>,
    /// Subject of the JWT (the user)
    subject: Option<Cow<'a, str>>,
    /// Unique token identifier
    jwt_id: Option<Cow<'a, str>>,
    /// Unique session identifier
    session_id: Option<Cow<'a, str>>,
 }
 struct JwkRenewalPermit<'a> {
@@ -524,6 +668,8 @@ mod tests {
    use hyper_util::rt::TokioIo;
    use rand::rngs::OsRng;
    use rsa::pkcs8::DecodePrivateKey;
    use serde::Serialize;
    use serde_json::json;
    use signature::Signer;
    use tokio::net::TcpListener;
@@ -556,23 +702,41 @@ mod tests {
    }
    fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {
        let now = SystemTime::now()
            .duration_since(SystemTime::UNIX_EPOCH)
            .unwrap()
            .as_secs();
        let body = typed_json::json! {{
            "exp": now + 3600,
            "nbf": now,
            "aud": ["audience1", "neon", "audience2"],
            "sub": "user1",
            "sid": "session1",
            "jti": "token1",
            "iss": "neon-testing",
        }};
        build_custom_jwt_payload(kid, body, sig)
    }
    fn build_custom_jwt_payload(
        kid: String,
        body: impl Serialize,
        sig: jose_jwa::Signing,
    ) -> String {
        let header = JwtHeader {
            typ: "JWT",
            algorithm: jose_jwa::Algorithm::Signing(sig),
            key_id: Some(&kid),
        };
        let body = typed_json::json! {{
            "exp": SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() + 3600,
        }};
        let header =
            base64::encode_config(serde_json::to_string(&header).unwrap(), URL_SAFE_NO_PAD);
-        let body = base64::encode_config(body.to_string(), URL_SAFE_NO_PAD);
+        let body = base64::encode_config(serde_json::to_string(&body).unwrap(), URL_SAFE_NO_PAD);
        format!("{header}.{body}")
    }
-    fn new_ec_jwt(kid: String, key: p256::SecretKey) -> String {
+    fn new_ec_jwt(kid: String, key: &p256::SecretKey) -> String {
        use p256::ecdsa::{Signature, SigningKey};
        let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
@@ -582,6 +746,16 @@ mod tests {
        format!("{payload}.{sig}")
    }
    fn new_custom_ec_jwt(kid: String, key: &p256::SecretKey, body: impl Serialize) -> String {
        use p256::ecdsa::{Signature, SigningKey};
        let payload = build_custom_jwt_payload(kid, body, jose_jwa::Signing::Es256);
        let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());
        let sig = base64::encode_config(sig.to_bytes(), URL_SAFE_NO_PAD);
        format!("{payload}.{sig}")
    }
    fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {
        use rsa::pkcs1v15::SigningKey;
        use rsa::signature::SignatureEncoding;
@@ -653,42 +827,34 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 -----END PRIVATE KEY-----
 ";
-    #[tokio::test]
+    #[derive(Clone)]
-    async fn renew() {
+    struct Fetch(Vec<AuthRule>);
        let (rs1, jwk1) = new_rsa_jwk(RS1, "1".into());
        let (rs2, jwk2) = new_rsa_jwk(RS2, "2".into());
        let (ec1, jwk3) = new_ec_jwk("3".into());
        let (ec2, jwk4) = new_ec_jwk("4".into());
-        let jwt1 = new_rsa_jwt("1".into(), rs1);
+    impl FetchAuthRules for Fetch {
-        let jwt2 = new_rsa_jwt("2".into(), rs2);
+        async fn fetch_auth_rules(
-        let jwt3 = new_ec_jwt("3".into(), ec1);
+            &self,
-        let jwt4 = new_ec_jwt("4".into(), ec2);
+            _ctx: &RequestMonitoring,
-
+            _endpoint: EndpointId,
-        let foo_jwks = jose_jwk::JwkSet {
+        ) -> anyhow::Result<Vec<AuthRule>> {
-            keys: vec![jwk1, jwk3],
+            Ok(self.0.clone())
-        };
+        }
-        let bar_jwks = jose_jwk::JwkSet {
+    }
            keys: vec![jwk2, jwk4],
        };
    async fn jwks_server(
        router: impl for<'a> Fn(&'a str) -> Option<Vec<u8>> + Send + Sync + 'static,
    ) -> SocketAddr {
        let router = Arc::new(router);
        let service = service_fn(move |req| {
-            let foo_jwks = foo_jwks.clone();
+            let router = Arc::clone(&router);
            let bar_jwks = bar_jwks.clone();
            async move {
-                let jwks = match req.uri().path() {
+                match router(req.uri().path()) {
-                    "/foo" => &foo_jwks,
+                    Some(body) => Response::builder()
-                    "/bar" => &bar_jwks,
+                        .status(200)
-                    _ => {
+                        .body(Full::new(Bytes::from(body))),
-                        return Response::builder()
+                    None => Response::builder()
-                            .status(404)
+                        .status(404)
-                            .body(Full::new(Bytes::new()));
+                        .body(Full::new(Bytes::new())),
-                    }
+                }
                };
                let body = serde_json::to_vec(jwks).unwrap();
                Response::builder()
                    .status(200)
                    .body(Full::new(Bytes::from(body)))
            }
        });
@@ -703,50 +869,257 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
            }
        });
-        let client = reqwest::Client::new();
+        addr
    }
-        #[derive(Clone)]
+    #[tokio::test]
-        struct Fetch(SocketAddr);
+    async fn check_jwt_happy_path() {
        let (rs1, jwk1) = new_rsa_jwk(RS1, "rs1".into());
        let (rs2, jwk2) = new_rsa_jwk(RS2, "rs2".into());
        let (ec1, jwk3) = new_ec_jwk("ec1".into());
        let (ec2, jwk4) = new_ec_jwk("ec2".into());
-        impl FetchAuthRules for Fetch {
+        let foo_jwks = jose_jwk::JwkSet {
-            async fn fetch_auth_rules(
+            keys: vec![jwk1, jwk3],
-                &self,
+        };
-                _ctx: &RequestMonitoring,
+        let bar_jwks = jose_jwk::JwkSet {
-                _endpoint: EndpointId,
+            keys: vec![jwk2, jwk4],
-                _role_name: RoleName,
+        };
-            ) -> anyhow::Result<Vec<AuthRule>> {
+
-                Ok(vec![
+        let jwks_addr = jwks_server(move |path| match path {
-                    AuthRule {
+            "/foo" => Some(serde_json::to_vec(&foo_jwks).unwrap()),
-                        id: "foo".to_owned(),
+            "/bar" => Some(serde_json::to_vec(&bar_jwks).unwrap()),
-                        jwks_url: format!("http://{}/foo", self.0).parse().unwrap(),
+            _ => None,
-                        audience: None,
+        })
-                    },
+        .await;
-                    AuthRule {
+
-                        id: "bar".to_owned(),
+        let role_name1 = RoleName::from("anonymous");
-                        jwks_url: format!("http://{}/bar", self.0).parse().unwrap(),
+        let role_name2 = RoleName::from("authenticated");
-                        audience: None,
+
-                    },
+        let roles = vec![
-                ])
+            RoleNameInt::from(&role_name1),
-            }
+            RoleNameInt::from(&role_name2),
-        }
+        ];
        let rules = vec![
            AuthRule {
                id: "foo".to_owned(),
                jwks_url: format!("http://{jwks_addr}/foo").parse().unwrap(),
                audience: None,
                role_names: roles.clone(),
            },
            AuthRule {
                id: "bar".to_owned(),
                jwks_url: format!("http://{jwks_addr}/bar").parse().unwrap(),
                audience: None,
                role_names: roles.clone(),
            },
        ];
        let fetch = Fetch(rules);
        let jwk_cache = JwkCache::default();
        let role_name = RoleName::from("user");
        let endpoint = EndpointId::from("ep");
-        let jwk_cache = Arc::new(JwkCacheEntryLock::default());
+        let jwt1 = new_rsa_jwt("rs1".into(), rs1);
        let jwt2 = new_rsa_jwt("rs2".into(), rs2);
        let jwt3 = new_ec_jwt("ec1".into(), &ec1);
        let jwt4 = new_ec_jwt("ec2".into(), &ec2);
-        for token in [jwt1, jwt2, jwt3, jwt4] {
+        let tokens = [jwt1, jwt2, jwt3, jwt4];
-            jwk_cache
+        let role_names = [role_name1, role_name2];
-                .check_jwt(
+        for role in &role_names {
-                    &RequestMonitoring::test(),
+            for token in &tokens {
-                    &token,
+                jwk_cache
-                    &client,
+                    .check_jwt(
-                    endpoint.clone(),
+                        &RequestMonitoring::test(),
-                    role_name.clone(),
+                        endpoint.clone(),
-                    &Fetch(addr),
+                        role,
-                )
+                        &fetch,
                        token,
                    )
                    .await
                    .unwrap();
            }
        }
    }
    #[tokio::test]
    async fn check_jwt_invalid_signature() {
        let (_, jwk) = new_ec_jwk("1".into());
        let (key, _) = new_ec_jwk("1".into());
        // has a matching kid, but signed by the wrong key
        let bad_jwt = new_ec_jwt("1".into(), &key);
        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
        let jwks_addr = jwks_server(move |path| match path {
            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
            _ => None,
        })
        .await;
        let role = RoleName::from("authenticated");
        let rules = vec![AuthRule {
            id: String::new(),
            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
            audience: None,
            role_names: vec![RoleNameInt::from(&role)],
        }];
        let fetch = Fetch(rules);
        let jwk_cache = JwkCache::default();
        let ep = EndpointId::from("ep");
        let ctx = RequestMonitoring::test();
        let err = jwk_cache
            .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt)
            .await
            .unwrap_err();
        assert!(
            err.to_string().contains("signature error"),
            "expected \"signature error\", got {err:?}"
        );
    }
    #[tokio::test]
    async fn check_jwt_unknown_role() {
        let (key, jwk) = new_rsa_jwk(RS1, "1".into());
        let jwt = new_rsa_jwt("1".into(), key);
        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
        let jwks_addr = jwks_server(move |path| match path {
            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
            _ => None,
        })
        .await;
        let role = RoleName::from("authenticated");
        let rules = vec![AuthRule {
            id: String::new(),
            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
            audience: None,
            role_names: vec![RoleNameInt::from(&role)],
        }];
        let fetch = Fetch(rules);
        let jwk_cache = JwkCache::default();
        let ep = EndpointId::from("ep");
        // this role_name is not accepted
        let bad_role_name = RoleName::from("cloud_admin");
        let ctx = RequestMonitoring::test();
        let err = jwk_cache
            .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt)
            .await
            .unwrap_err();
        assert!(
            err.to_string().contains("jwk not found"),
            "expected \"jwk not found\", got {err:?}"
        );
    }
    #[tokio::test]
    async fn check_jwt_invalid_claims() {
        let (key, jwk) = new_ec_jwk("1".into());
        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
        let jwks_addr = jwks_server(move |path| match path {
            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
            _ => None,
        })
        .await;
        let now = SystemTime::now()
            .duration_since(SystemTime::UNIX_EPOCH)
            .unwrap()
            .as_secs();
        struct Test {
            body: serde_json::Value,
            error: &'static str,
        }
        let table = vec![
            Test {
                body: json! {{
                    "nbf": now + 60,
                    "aud": "neon",
                }},
                error: "JWT token is not yet ready to use",
            },
            Test {
                body: json! {{
                    "exp": now - 60,
                    "aud": ["neon"],
                }},
                error: "JWT token has expired",
            },
            Test {
                body: json! {{
                }},
                error: "invalid JWT token audience",
            },
            Test {
                body: json! {{
                    "aud": [],
                }},
                error: "invalid JWT token audience",
            },
            Test {
                body: json! {{
                    "aud": "foo",
                }},
                error: "invalid JWT token audience",
            },
            Test {
                body: json! {{
                    "aud": ["foo"],
                }},
                error: "invalid JWT token audience",
            },
            Test {
                body: json! {{
                    "aud": ["foo", "bar"],
                }},
                error: "invalid JWT token audience",
            },
        ];
        let role = RoleName::from("authenticated");
        let rules = vec![AuthRule {
            id: String::new(),
            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
            audience: Some("neon".to_string()),
            role_names: vec![RoleNameInt::from(&role)],
        }];
        let fetch = Fetch(rules);
        let jwk_cache = JwkCache::default();
        let ep = EndpointId::from("ep");
        let ctx = RequestMonitoring::test();
        for test in table {
            let jwt = new_custom_ec_jwt("1".into(), &key, test.body);
            match jwk_cache
                .check_jwt(&ctx, ep.clone(), &role, &fetch, &jwt)
                .await
-                .unwrap();
+            {
                Err(err) if err.to_string().contains(test.error) => {}
                Err(err) => {
                    panic!("expected {:?}, got {err:?}", test.error)
                }
                Ok(()) => {
                    panic!("expected {:?}, got ok", test.error)
                }
            }
        }
    }
 }
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -1,4 +1,4 @@
-use std::{collections::HashMap, net::SocketAddr};
+use std::net::SocketAddr;
 use anyhow::Context;
 use arc_swap::ArcSwapOption;
@@ -10,21 +10,19 @@ use crate::{
        NodeInfo,
    },
    context::RequestMonitoring,
-    intern::{BranchIdInt, BranchIdTag, EndpointIdTag, InternId, ProjectIdInt, ProjectIdTag},
+    intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
-    EndpointId, RoleName,
+    EndpointId,
 };
-use super::jwt::{AuthRule, FetchAuthRules, JwkCache};
+use super::jwt::{AuthRule, FetchAuthRules};
 pub struct LocalBackend {
    pub(crate) jwks_cache: JwkCache,
    pub(crate) node_info: NodeInfo,
 }
 impl LocalBackend {
    pub fn new(postgres_addr: SocketAddr) -> Self {
        LocalBackend {
            jwks_cache: JwkCache::default(),
            node_info: NodeInfo {
                config: {
                    let mut cfg = ConnCfg::new();
@@ -48,26 +46,17 @@ impl LocalBackend {
 #[derive(Clone, Copy)]
 pub(crate) struct StaticAuthRules;
-pub static JWKS_ROLE_MAP: ArcSwapOption<JwksRoleSettings> = ArcSwapOption::const_empty();
+pub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::const_empty();
 #[derive(Debug, Clone)]
 pub struct JwksRoleSettings {
    pub roles: HashMap<RoleName, EndpointJwksResponse>,
    pub project_id: ProjectIdInt,
    pub branch_id: BranchIdInt,
 }
 impl FetchAuthRules for StaticAuthRules {
    async fn fetch_auth_rules(
        &self,
        _ctx: &RequestMonitoring,
        _endpoint: EndpointId,
        role_name: RoleName,
    ) -> anyhow::Result<Vec<AuthRule>> {
        let mappings = JWKS_ROLE_MAP.load();
        let role_mappings = mappings
            .as_deref()
            .and_then(|m| m.roles.get(&role_name))
            .context("JWKs settings for this role were not configured")?;
        let mut rules = vec![];
        for setting in &role_mappings.jwks {
@@ -75,6 +64,7 @@ impl FetchAuthRules for StaticAuthRules {
                id: setting.id.clone(),
                jwks_url: setting.jwks_url.clone(),
                audience: setting.jwt_audience.clone(),
                role_names: setting.role_names.clone(),
            });
        }
--- a/proxy/src/auth/backend/web.rs
+++ b/proxy/src/auth/backend/web.rs
@@ -1,5 +1,6 @@
 use crate::{
    auth, compute,
    config::AuthenticationConfig,
    console::{self, provider::NodeInfo},
    context::RequestMonitoring,
    error::{ReportableError, UserFacingError},
@@ -58,6 +59,7 @@ pub(crate) fn new_psql_session_id() -> String {
 pub(super) async fn authenticate(
    ctx: &RequestMonitoring,
    auth_config: &'static AuthenticationConfig,
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
@@ -89,6 +91,14 @@ pub(super) async fn authenticate(
    info!(parent: &span, "waiting for console's reply...");
    let db_info = waiter.await.map_err(WebAuthError::from)?;
    if auth_config.ip_allowlist_check_enabled {
        if let Some(allowed_ips) = &db_info.allowed_ips {
            if !auth::check_peer_addr_is_in_list(&ctx.peer_addr(), allowed_ips) {
                return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
            }
        }
    }
    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;
    // This config should be self-contained, because we won't
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -1,34 +1,38 @@
-use std::{
+use std::{net::SocketAddr, pin::pin, str::FromStr, sync::Arc, time::Duration};
    net::SocketAddr,
    path::{Path, PathBuf},
    pin::pin,
    sync::Arc,
    time::Duration,
 };
-use anyhow::{bail, ensure};
+use anyhow::{bail, ensure, Context};
 use camino::{Utf8Path, Utf8PathBuf};
 use compute_api::spec::LocalProxySpec;
 use dashmap::DashMap;
-use futures::{future::Either, FutureExt};
+use futures::future::Either;
 use proxy::{
-    auth::backend::local::{JwksRoleSettings, LocalBackend, JWKS_ROLE_MAP},
+    auth::backend::{
        jwt::JwkCache,
        local::{LocalBackend, JWKS_ROLE_MAP},
    },
    cancellation::CancellationHandlerMain,
    config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
-    console::{locks::ApiLocks, messages::JwksRoleMapping},
+    console::{
        locks::ApiLocks,
        messages::{EndpointJwksResponse, JwksSettings},
    },
    http::health_server::AppMetrics,
    intern::RoleNameInt,
    metrics::{Metrics, ThreadPoolMetrics},
    rate_limiter::{BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo},
    scram::threadpool::ThreadPool,
    serverless::{self, cancel_set::CancelSet, GlobalConnPoolOptions},
    RoleName,
 };
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 use clap::Parser;
-use tokio::{net::TcpListener, task::JoinSet};
+use tokio::{net::TcpListener, sync::Notify, task::JoinSet};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, warn};
-use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
+use utils::{pid_file, project_build_tag, project_git_version, sentry_init::init_sentry};
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
@@ -72,9 +76,12 @@ struct LocalProxyCliArgs {
    /// Address of the postgres server
    #[clap(long, default_value = "127.0.0.1:5432")]
    compute: SocketAddr,
-    /// File address of the local proxy config file
+    /// Path of the local proxy config file
    #[clap(long, default_value = "./localproxy.json")]
-    config_path: PathBuf,
+    config_path: Utf8PathBuf,
    /// Path of the local proxy PID file
    #[clap(long, default_value = "./localproxy.pid")]
    pid_path: Utf8PathBuf,
 }
 #[derive(clap::Args, Clone, Copy, Debug)]
@@ -126,6 +133,24 @@ async fn main() -> anyhow::Result<()> {
    let args = LocalProxyCliArgs::parse();
    let config = build_config(&args)?;
    // before we bind to any ports, write the process ID to a file
    // so that compute-ctl can find our process later
    // in order to trigger the appropriate SIGHUP on config change.
    //
    // This also claims a "lock" that makes sure only one instance
    // of local-proxy runs at a time.
    let _process_guard = loop {
        match pid_file::claim_for_current_process(&args.pid_path) {
            Ok(guard) => break guard,
            Err(e) => {
                // compute-ctl might have tried to read the pid-file to let us
                // know about some config change. We should try again.
                error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
                tokio::time::sleep(Duration::from_secs(1)).await;
            }
        }
    };
    let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
    let http_listener = TcpListener::bind(args.http).await?;
    let shutdown = CancellationToken::new();
@@ -139,12 +164,30 @@ async fn main() -> anyhow::Result<()> {
        16,
    ));
-    refresh_config(args.config_path.clone()).await;
+    // write the process ID to a file so that compute-ctl can find our process later
    // in order to trigger the appropriate SIGHUP on config change.
    let pid = std::process::id();
    info!("process running in PID {pid}");
    std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), move || {
+
-        refresh_config(args.config_path.clone()).map(Ok)
+    let refresh_config_notify = Arc::new(Notify::new());
    maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), {
        let refresh_config_notify = Arc::clone(&refresh_config_notify);
        move || {
            refresh_config_notify.notify_one();
        }
    }));
    // trigger the first config load **after** setting up the signal hook
    // to avoid the race condition where:
    // 1. No config file registered when local-proxy starts up
    // 2. The config file is written but the signal hook is not yet received
    // 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
    refresh_config_notify.notify_one();
    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
    maintenance_tasks.spawn(proxy::http::health_server::task_main(
        metrics_listener,
        AppMetrics {
@@ -227,12 +270,15 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
        allow_self_signed_compute: false,
        http_config,
        authentication_config: AuthenticationConfig {
            jwks_cache: JwkCache::default(),
            thread_pool: ThreadPool::new(0),
            scram_protocol_timeout: Duration::from_secs(10),
            rate_limiter_enabled: false,
            rate_limiter: BucketRateLimiter::new(vec![]),
            rate_limit_ip_subnet: 64,
            ip_allowlist_check_enabled: true,
            is_auth_broker: false,
            accept_jwts: true,
        },
        require_client_ip: false,
        handshake_timeout: Duration::from_secs(10),
@@ -245,81 +291,84 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
    })))
 }
-async fn refresh_config(path: PathBuf) {
+async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
-    match refresh_config_inner(&path).await {
+    loop {
-        Ok(()) => {}
+        rx.notified().await;
-        Err(e) => {
+
-            error!(error=?e, ?path, "could not read config file");
+        match refresh_config_inner(&path).await {
            Ok(()) => {}
            Err(e) => {
                error!(error=?e, ?path, "could not read config file");
            }
        }
    }
 }
-async fn refresh_config_inner(path: &Path) -> anyhow::Result<()> {
+async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
    let bytes = tokio::fs::read(&path).await?;
-    let mut data: JwksRoleMapping = serde_json::from_slice(&bytes)?;
+    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
-    let mut settings = None;
+    let mut jwks_set = vec![];
-    for mapping in data.roles.values_mut() {
+    for jwks in data.jwks {
-        for jwks in &mut mapping.jwks {
+        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
            ensure!(
                jwks.jwks_url.has_authority()
                    && (jwks.jwks_url.scheme() == "http" || jwks.jwks_url.scheme() == "https"),
                "Invalid JWKS url. Must be HTTP",
            );
-            ensure!(
+        ensure!(
-                jwks.jwks_url
+            jwks_url.has_authority()
-                    .host()
+                && (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
-                    .is_some_and(|h| h != url::Host::Domain("")),
+            "Invalid JWKS url. Must be HTTP",
-                "Invalid JWKS url. No domain listed",
+        );
            );
-            // clear username, password and ports
+        ensure!(
-            jwks.jwks_url.set_username("").expect(
+            jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
            "Invalid JWKS url. No domain listed",
        );
        // clear username, password and ports
        jwks_url
            .set_username("")
            .expect("url can be a base and has a valid host and is not a file. should not error");
        jwks_url
            .set_password(None)
            .expect("url can be a base and has a valid host and is not a file. should not error");
        // local testing is hard if we need to have a specific restricted port
        if cfg!(not(feature = "testing")) {
            jwks_url.set_port(None).expect(
                "url can be a base and has a valid host and is not a file. should not error",
            );
            jwks.jwks_url.set_password(None).expect(
                "url can be a base and has a valid host and is not a file. should not error",
            );
            // local testing is hard if we need to have a specific restricted port
            if cfg!(not(feature = "testing")) {
                jwks.jwks_url.set_port(None).expect(
                    "url can be a base and has a valid host and is not a file. should not error",
                );
            }
            // clear query params
            jwks.jwks_url.set_fragment(None);
            jwks.jwks_url.query_pairs_mut().clear().finish();
            if jwks.jwks_url.scheme() != "https" {
                // local testing is hard if we need to set up https support.
                if cfg!(not(feature = "testing")) {
                    jwks.jwks_url
                        .set_scheme("https")
                        .expect("should not error to set the scheme to https if it was http");
                } else {
                    warn!(scheme = jwks.jwks_url.scheme(), "JWKS url is not HTTPS");
                }
            }
            let (pr, br) = settings.get_or_insert((jwks.project_id, jwks.branch_id));
            ensure!(
                *pr == jwks.project_id,
                "inconsistent project IDs configured"
            );
            ensure!(*br == jwks.branch_id, "inconsistent branch IDs configured");
        }
        // clear query params
        jwks_url.set_fragment(None);
        jwks_url.query_pairs_mut().clear().finish();
        if jwks_url.scheme() != "https" {
            // local testing is hard if we need to set up https support.
            if cfg!(not(feature = "testing")) {
                jwks_url
                    .set_scheme("https")
                    .expect("should not error to set the scheme to https if it was http");
            } else {
                warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
            }
        }
        jwks_set.push(JwksSettings {
            id: jwks.id,
            jwks_url,
            provider_name: jwks.provider_name,
            jwt_audience: jwks.jwt_audience,
            role_names: jwks
                .role_names
                .into_iter()
                .map(RoleName::from)
                .map(|s| RoleNameInt::from(&s))
                .collect(),
        })
    }
-    if let Some((project_id, branch_id)) = settings {
+    info!("successfully loaded new config");
-        JWKS_ROLE_MAP.store(Some(Arc::new(JwksRoleSettings {
+    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
            roles: data.roles,
            project_id,
            branch_id,
        })));
    }
    Ok(())
 }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -133,9 +133,7 @@ async fn main() -> anyhow::Result<()> {
        proxy_listener,
        cancellation_token.clone(),
    ));
-    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || async {
+    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || {}));
        Ok(())
    }));
    // the signal task cant ever succeed.
    // the main task can error, or can succeed on cancellation.
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -8,6 +8,7 @@ use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
 use aws_config::Region;
 use futures::future::Either;
 use proxy::auth;
 use proxy::auth::backend::jwt::JwkCache;
 use proxy::auth::backend::AuthRateLimiter;
 use proxy::auth::backend::MaybeOwned;
 use proxy::cancellation::CancelMap;
@@ -102,6 +103,9 @@ struct ProxyCliArgs {
        default_value = "http://localhost:3000/authenticate_proxy_request/"
    )]
    auth_endpoint: String,
    /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    is_auth_broker: bool,
    /// path to TLS key for client postgres connections
    ///
    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
@@ -382,9 +386,27 @@ async fn main() -> anyhow::Result<()> {
    info!("Starting mgmt on {mgmt_address}");
    let mgmt_listener = TcpListener::bind(mgmt_address).await?;
-    let proxy_address: SocketAddr = args.proxy.parse()?;
+    let proxy_listener = if !args.is_auth_broker {
-    info!("Starting proxy on {proxy_address}");
+        let proxy_address: SocketAddr = args.proxy.parse()?;
-    let proxy_listener = TcpListener::bind(proxy_address).await?;
+        info!("Starting proxy on {proxy_address}");
        Some(TcpListener::bind(proxy_address).await?)
    } else {
        None
    };
    // TODO: rename the argument to something like serverless.
    // It now covers more than just websockets, it also covers SQL over HTTP.
    let serverless_listener = if let Some(serverless_address) = args.wss {
        let serverless_address: SocketAddr = serverless_address.parse()?;
        info!("Starting wss on {serverless_address}");
        Some(TcpListener::bind(serverless_address).await?)
    } else if args.is_auth_broker {
        bail!("wss arg must be present for auth-broker")
    } else {
        None
    };
    let cancellation_token = CancellationToken::new();
    let cancel_map = CancelMap::default();
@@ -430,21 +452,17 @@ async fn main() -> anyhow::Result<()> {
    // client facing tasks. these will exit on error or on cancellation
    // cancellation returns Ok(())
    let mut client_tasks = JoinSet::new();
-    client_tasks.spawn(proxy::proxy::task_main(
+    if let Some(proxy_listener) = proxy_listener {
-        config,
+        client_tasks.spawn(proxy::proxy::task_main(
-        proxy_listener,
+            config,
-        cancellation_token.clone(),
+            proxy_listener,
-        cancellation_handler.clone(),
+            cancellation_token.clone(),
-        endpoint_rate_limiter.clone(),
+            cancellation_handler.clone(),
-    ));
+            endpoint_rate_limiter.clone(),
-
+        ));
-    // TODO: rename the argument to something like serverless.
+    }
    // It now covers more than just websockets, it also covers SQL over HTTP.
    if let Some(serverless_address) = args.wss {
        let serverless_address: SocketAddr = serverless_address.parse()?;
        info!("Starting wss on {serverless_address}");
        let serverless_listener = TcpListener::bind(serverless_address).await?;
    if let Some(serverless_listener) = serverless_listener {
        client_tasks.spawn(serverless::task_main(
            config,
            serverless_listener,
@@ -461,10 +479,7 @@ async fn main() -> anyhow::Result<()> {
    // maintenance tasks. these never return unless there's an error
    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::handle_signals(
+    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone(), || {}));
        cancellation_token.clone(),
        || async { Ok(()) },
    ));
    maintenance_tasks.spawn(http::health_server::task_main(
        http_listener,
        AppMetrics {
@@ -677,7 +692,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
    )?;
    let http_config = HttpConfig {
-        accept_websockets: true,
+        accept_websockets: !args.is_auth_broker,
        pool_options: GlobalConnPoolOptions {
            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
            gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
@@ -692,12 +707,15 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
    };
    let authentication_config = AuthenticationConfig {
        jwks_cache: JwkCache::default(),
        thread_pool,
        scram_protocol_timeout: args.scram_protocol_timeout,
        rate_limiter_enabled: args.auth_rate_limit_enabled,
        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
        ip_allowlist_check_enabled: !args.is_private_access_proxy,
        is_auth_broker: args.is_auth_broker,
        accept_jwts: args.is_auth_broker,
    };
    let config = Box::leak(Box::new(ProxyConfig {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,5 +1,8 @@
 use crate::{
-    auth::{self, backend::AuthRateLimiter},
+    auth::{
        self,
        backend::{jwt::JwkCache, AuthRateLimiter},
    },
    console::locks::ApiLocks,
    rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
    scram::threadpool::ThreadPool,
@@ -67,6 +70,9 @@ pub struct AuthenticationConfig {
    pub rate_limiter: AuthRateLimiter,
    pub rate_limit_ip_subnet: u8,
    pub ip_allowlist_check_enabled: bool,
    pub jwks_cache: JwkCache,
    pub is_auth_broker: bool,
    pub accept_jwts: bool,
 }
 impl TlsConfig {
@@ -250,18 +256,26 @@ impl CertResolver {
        let common_name = pem.subject().to_string();
-        // We only use non-wildcard certificates in web auth proxy so it seems okay to treat them the same as
+        // We need to get the canonical name for this certificate so we can match them against any domain names
-        // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
+        // seen within the proxy codebase.
-        // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
+        //
-        // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
+        // In scram-proxy we use wildcard certificates only, with the database endpoint as the wildcard subdomain, taken from SNI.
-        // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
+        // We need to remove the wildcard prefix for the purposes of certificate selection.
-        // of cutting off '*.' parts.
+        //
-        let common_name = if common_name.starts_with("CN=*.") {
+        // auth-broker does not use SNI and instead uses the Neon-Connection-String header.
-            common_name.strip_prefix("CN=*.").map(|s| s.to_string())
+        // Auth broker has the subdomain `apiauth` we need to remove for the purposes of validating the Neon-Connection-String.
        //
        // Console Web proxy does not use any wildcard domains and does not need any certificate selection or conn string
        // validation, so let's we can continue with any common-name
        let common_name = if let Some(s) = common_name.strip_prefix("CN=*.") {
            s.to_string()
        } else if let Some(s) = common_name.strip_prefix("CN=apiauth.") {
            s.to_string()
        } else if let Some(s) = common_name.strip_prefix("CN=") {
            s.to_string()
        } else {
-            common_name.strip_prefix("CN=").map(|s| s.to_string())
+            bail!("Failed to parse common name from certificate")
-        }
+        };
        .context("Failed to parse common name from certificate")?;
        let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -1,13 +1,11 @@
 use measured::FixedCardinalityLabel;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::fmt::{self, Display};
 use crate::auth::IpPattern;
-use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
+use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
 use crate::proxy::retry::CouldRetry;
 use crate::RoleName;
 /// Generic error response with human-readable description.
 /// Note that we can't always present it to user as is.
@@ -284,6 +282,8 @@ pub(crate) struct DatabaseInfo {
    /// be inconvenient for debug with local PG instance.
    pub(crate) password: Option<Box<str>>,
    pub(crate) aux: MetricsAuxInfo,
    #[serde(default)]
    pub(crate) allowed_ips: Option<Vec<IpPattern>>,
 }
 // Manually implement debug to omit sensitive info.
@@ -294,6 +294,7 @@ impl fmt::Debug for DatabaseInfo {
            .field("port", &self.port)
            .field("dbname", &self.dbname)
            .field("user", &self.user)
            .field("allowed_ips", &self.allowed_ips)
            .finish_non_exhaustive()
    }
 }
@@ -345,11 +346,6 @@ impl ColdStartInfo {
    }
 }
 #[derive(Debug, Deserialize, Clone)]
 pub struct JwksRoleMapping {
    pub roles: HashMap<RoleName, EndpointJwksResponse>,
 }
 #[derive(Debug, Deserialize, Clone)]
 pub struct EndpointJwksResponse {
    pub jwks: Vec<JwksSettings>,
@@ -358,11 +354,10 @@ pub struct EndpointJwksResponse {
 #[derive(Debug, Deserialize, Clone)]
 pub struct JwksSettings {
    pub id: String,
    pub project_id: ProjectIdInt,
    pub branch_id: BranchIdInt,
    pub jwks_url: url::Url,
    pub provider_name: String,
    pub jwt_audience: Option<String>,
    pub role_names: Vec<RoleNameInt>,
 }
 #[cfg(test)]
@@ -432,6 +427,22 @@ mod tests {
            "aux": dummy_aux(),
        }))?;
        // with allowed_ips
        let dbinfo = serde_json::from_value::<DatabaseInfo>(json!({
            "host": "localhost",
            "port": 5432,
            "dbname": "postgres",
            "user": "john_doe",
            "password": "password",
            "aux": dummy_aux(),
            "allowed_ips": ["127.0.0.1"],
        }))?;
        assert_eq!(
            dbinfo.allowed_ips,
            Some(vec![IpPattern::Single("127.0.0.1".parse()?)])
        );
        Ok(())
    }
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -5,7 +5,10 @@ pub mod neon;
 use super::messages::{ConsoleError, MetricsAuxInfo};
 use crate::{
    auth::{
-        backend::{ComputeCredentialKeys, ComputeUserInfo},
+        backend::{
            jwt::{AuthRule, FetchAuthRules},
            ComputeCredentialKeys, ComputeUserInfo,
        },
        IpPattern,
    },
    cache::{endpoints::EndpointsCache, project_info::ProjectInfoCacheImpl, Cached, TimedLru},
@@ -16,7 +19,7 @@ use crate::{
    intern::ProjectIdInt,
    metrics::ApiLockMetrics,
    rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
-    scram, EndpointCacheKey,
+    scram, EndpointCacheKey, EndpointId,
 };
 use dashmap::DashMap;
 use std::{hash::Hash, sync::Arc, time::Duration};
@@ -334,6 +337,12 @@ pub(crate) trait Api {
        user_info: &ComputeUserInfo,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>>;
    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
        &self,
@@ -343,6 +352,7 @@ pub(crate) trait Api {
 }
 #[non_exhaustive]
 #[derive(Clone)]
 pub enum ConsoleBackend {
    /// Current Cloud API (V2).
    Console(neon::Api),
@@ -386,6 +396,20 @@ impl Api for ConsoleBackend {
        }
    }
    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>> {
        match self {
            Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await,
            #[cfg(any(test, feature = "testing"))]
            Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await,
            #[cfg(test)]
            Self::Test(_api) => Ok(vec![]),
        }
    }
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
@@ -552,3 +576,13 @@ impl WakeComputePermit {
        res
    }
 }
 impl FetchAuthRules for ConsoleBackend {
    async fn fetch_auth_rules(
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>> {
        self.get_endpoint_jwks(ctx, endpoint).await
    }
 }
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -4,7 +4,9 @@ use super::{
    errors::{ApiError, GetAuthInfoError, WakeComputeError},
    AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
 };
-use crate::context::RequestMonitoring;
+use crate::{
    auth::backend::jwt::AuthRule, context::RequestMonitoring, intern::RoleNameInt, RoleName,
 };
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use crate::{auth::IpPattern, cache::Cached};
 use crate::{
@@ -118,6 +120,39 @@ impl Api {
        })
    }
    async fn do_get_endpoint_jwks(&self, endpoint: EndpointId) -> anyhow::Result<Vec<AuthRule>> {
        let (client, connection) =
            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
        let connection = tokio::spawn(connection);
        let res = client.query(
                "select id, jwks_url, audience, role_names from neon_control_plane.endpoint_jwks where endpoint_id = $1",
                &[&endpoint.as_str()],
            )
            .await?;
        let mut rows = vec![];
        for row in res {
            rows.push(AuthRule {
                id: row.get("id"),
                jwks_url: url::Url::parse(row.get("jwks_url"))?,
                audience: row.get("audience"),
                role_names: row
                    .get::<_, Vec<String>>("role_names")
                    .into_iter()
                    .map(RoleName::from)
                    .map(|s| RoleNameInt::from(&s))
                    .collect(),
            });
        }
        drop(client);
        connection.await??;
        Ok(rows)
    }
    async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
        let mut config = compute::ConnCfg::new();
        config
@@ -185,6 +220,14 @@ impl super::Api for Api {
        ))
    }
    async fn get_endpoint_jwks(
        &self,
        _ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>> {
        self.do_get_endpoint_jwks(endpoint).await
    }
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -7,27 +7,33 @@ use super::{
    NodeInfo,
 };
 use crate::{
-    auth::backend::ComputeUserInfo,
+    auth::backend::{jwt::AuthRule, ComputeUserInfo},
    compute,
-    console::messages::{ColdStartInfo, Reason},
+    console::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
    http,
    metrics::{CacheOutcome, Metrics},
    rate_limiter::WakeComputeRateLimiter,
-    scram, EndpointCacheKey,
+    scram, EndpointCacheKey, EndpointId,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
 use ::http::{header::AUTHORIZATION, HeaderName};
 use anyhow::bail;
 use futures::TryFutureExt;
 use std::{sync::Arc, time::Duration};
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
 use tracing::{debug, error, info, info_span, warn, Instrument};
 const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
 #[derive(Clone)]
 pub struct Api {
    endpoint: http::Endpoint,
    pub caches: &'static ApiCaches,
    pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,
    pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
-    jwt: String,
+    // put in a shared ref so we don't copy secrets all over in memory
    jwt: Arc<str>,
 }
 impl Api {
@@ -38,7 +44,9 @@ impl Api {
        locks: &'static ApiLocks<EndpointCacheKey>,
        wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
    ) -> Self {
-        let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN").unwrap_or_default();
+        let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN")
            .unwrap_or_default()
            .into();
        Self {
            endpoint,
            caches,
@@ -71,9 +79,9 @@ impl Api {
        async {
            let request = self
                .endpoint
-                .get("proxy_get_role_secret")
+                .get_path("proxy_get_role_secret")
-                .header("X-Request-ID", &request_id)
+                .header(X_REQUEST_ID, &request_id)
-                .header("Authorization", format!("Bearer {}", &self.jwt))
+                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
                .query(&[("session_id", ctx.session_id())])
                .query(&[
                    ("application_name", application_name.as_str()),
@@ -125,6 +133,61 @@ impl Api {
        .await
    }
    async fn do_get_endpoint_jwks(
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>> {
        if !self
            .caches
            .endpoints_cache
            .is_valid(ctx, &endpoint.normalize())
            .await
        {
            bail!("endpoint not found");
        }
        let request_id = ctx.session_id().to_string();
        async {
            let request = self
                .endpoint
                .get_with_url(|url| {
                    url.path_segments_mut()
                        .push("endpoints")
                        .push(endpoint.as_str())
                        .push("jwks");
                })
                .header(X_REQUEST_ID, &request_id)
                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
                .query(&[("session_id", ctx.session_id())])
                .build()?;
            info!(url = request.url().as_str(), "sending http request");
            let start = Instant::now();
            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
            let response = self.endpoint.execute(request).await?;
            drop(pause);
            info!(duration = ?start.elapsed(), "received http response");
            let body = parse_body::<EndpointJwksResponse>(response).await?;
            let rules = body
                .jwks
                .into_iter()
                .map(|jwks| AuthRule {
                    id: jwks.id,
                    jwks_url: jwks.jwks_url,
                    audience: jwks.jwt_audience,
                    role_names: jwks.role_names,
                })
                .collect();
            Ok(rules)
        }
        .map_err(crate::error::log_error)
        .instrument(info_span!("http", id = request_id))
        .await
    }
    async fn do_wake_compute(
        &self,
        ctx: &RequestMonitoring,
@@ -135,7 +198,7 @@ impl Api {
        async {
            let mut request_builder = self
                .endpoint
-                .get("proxy_wake_compute")
+                .get_path("proxy_wake_compute")
                .header("X-Request-ID", &request_id)
                .header("Authorization", format!("Bearer {}", &self.jwt))
                .query(&[("session_id", ctx.session_id())])
@@ -262,6 +325,15 @@ impl super::Api for Api {
        ))
    }
    #[tracing::instrument(skip_all)]
    async fn get_endpoint_jwks(
        &self,
        ctx: &RequestMonitoring,
        endpoint: EndpointId,
    ) -> anyhow::Result<Vec<AuthRule>> {
        self.do_get_endpoint_jwks(ctx, endpoint).await
    }
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -86,9 +86,17 @@ impl Endpoint {
    /// Return a [builder](RequestBuilder) for a `GET` request,
    /// appending a single `path` segment to the base endpoint URL.
-    pub(crate) fn get(&self, path: &str) -> RequestBuilder {
+    pub(crate) fn get_path(&self, path: &str) -> RequestBuilder {
        self.get_with_url(|u| {
            u.path_segments_mut().push(path);
        })
    }
    /// Return a [builder](RequestBuilder) for a `GET` request,
    /// accepting a closure to modify the url path segments for more complex paths queries.
    pub(crate) fn get_with_url(&self, f: impl for<'a> FnOnce(&'a mut ApiUrl)) -> RequestBuilder {
        let mut url = self.endpoint.clone();
-        url.path_segments_mut().push(path);
+        f(&mut url);
        self.client.get(url.into_inner())
    }
@@ -144,7 +152,7 @@ mod tests {
        // Validate that this pattern makes sense.
        let req = endpoint
-            .get("frobnicate")
+            .get_path("frobnicate")
            .query(&[
                ("foo", Some("10")), // should be just `foo=10`
                ("bar", None),       // shouldn't be passed at all
@@ -162,7 +170,7 @@ mod tests {
        let endpoint = Endpoint::new(url, Client::new());
        let req = endpoint
-            .get("frobnicate")
+            .get_path("frobnicate")
            .query(&[("session_id", uuid::Uuid::nil())])
            .build()?;
--- a/proxy/src/intern.rs
+++ b/proxy/src/intern.rs
@@ -1,5 +1,6 @@
 use std::{
-    hash::BuildHasherDefault, marker::PhantomData, num::NonZeroUsize, ops::Index, sync::OnceLock,
+    any::type_name, hash::BuildHasherDefault, marker::PhantomData, num::NonZeroUsize, ops::Index,
    sync::OnceLock,
 };
 use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
@@ -16,12 +17,21 @@ pub struct StringInterner<Id> {
    _id: PhantomData<Id>,
 }
-#[derive(PartialEq, Debug, Clone, Copy, Eq, Hash)]
+#[derive(PartialEq, Clone, Copy, Eq, Hash)]
 pub struct InternedString<Id> {
    inner: Spur,
    _id: PhantomData<Id>,
 }
 impl<Id: InternId> std::fmt::Debug for InternedString<Id> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_tuple("InternedString")
            .field(&type_name::<Id>())
            .field(&self.as_str())
            .finish()
    }
 }
 impl<Id: InternId> std::fmt::Display for InternedString<Id> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.as_str().fmt(f)
@@ -130,14 +140,14 @@ impl<Id: InternId> Default for StringInterner<Id> {
 }
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub(crate) struct RoleNameTag;
+pub struct RoleNameTag;
 impl InternId for RoleNameTag {
    fn get_interner() -> &'static StringInterner<Self> {
        static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();
        ROLE_NAMES.get_or_init(Default::default)
    }
 }
-pub(crate) type RoleNameInt = InternedString<RoleNameTag>;
+pub type RoleNameInt = InternedString<RoleNameTag>;
 impl From<&RoleName> for RoleNameInt {
    fn from(value: &RoleName) -> Self {
        RoleNameTag::get_interner().get_or_intern(value)
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -82,7 +82,7 @@
    impl_trait_overcaptures,
 )]
-use std::{convert::Infallible, future::Future};
+use std::convert::Infallible;
 use anyhow::{bail, Context};
 use intern::{EndpointIdInt, EndpointIdTag, InternId};
@@ -117,13 +117,12 @@ pub mod usage_metrics;
 pub mod waiters;
 /// Handle unix signals appropriately.
-pub async fn handle_signals<F, Fut>(
+pub async fn handle_signals<F>(
    token: CancellationToken,
    mut refresh_config: F,
 ) -> anyhow::Result<Infallible>
 where
-    F: FnMut() -> Fut,
+    F: FnMut(),
    Fut: Future<Output = anyhow::Result<()>>,
 {
    use tokio::signal::unix::{signal, SignalKind};
@@ -136,7 +135,7 @@ where
            // Hangup is commonly used for config reload.
            _ = hangup.recv() => {
                warn!("received SIGHUP");
-                refresh_config().await?;
+                refresh_config();
            }
            // Shut down the whole application.
            _ = interrupt.recv() => {
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -525,6 +525,10 @@ impl TestBackend for TestConnectMechanism {
    {
        unimplemented!("not used in tests")
    }
    fn dyn_clone(&self) -> Box<dyn TestBackend> {
        Box::new(self.clone())
    }
 }
 fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
--- a/proxy/src/scram/threadpool.rs
+++ b/proxy/src/scram/threadpool.rs
@@ -43,6 +43,13 @@ impl ThreadPool {
    pub fn new(n_workers: u8) -> Arc<Self> {
        // rayon would be nice here, but yielding in rayon does not work well afaict.
        if n_workers == 0 {
            return Arc::new(Self {
                runtime: None,
                metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)),
            });
        }
        Arc::new_cyclic(|pool| {
            let pool = pool.clone();
            let worker_id = AtomicUsize::new(0);
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -5,6 +5,7 @@
 mod backend;
 pub mod cancel_set;
 mod conn_pool;
 mod http_conn_pool;
 mod http_util;
 mod json;
 mod sql_over_http;
@@ -19,7 +20,8 @@ use anyhow::Context;
 use futures::future::{select, Either};
 use futures::TryFutureExt;
 use http::{Method, Response, StatusCode};
-use http_body_util::Full;
+use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Empty};
 use hyper1::body::Incoming;
 use hyper_util::rt::TokioExecutor;
 use hyper_util::server::conn::auto::Builder;
@@ -81,7 +83,28 @@ pub async fn task_main(
        }
    });
    let http_conn_pool = http_conn_pool::GlobalConnPool::new(&config.http_config);
    {
        let http_conn_pool = Arc::clone(&http_conn_pool);
        tokio::spawn(async move {
            http_conn_pool.gc_worker(StdRng::from_entropy()).await;
        });
    }
    // shutdown the connection pool
    tokio::spawn({
        let cancellation_token = cancellation_token.clone();
        let http_conn_pool = http_conn_pool.clone();
        async move {
            cancellation_token.cancelled().await;
            tokio::task::spawn_blocking(move || http_conn_pool.shutdown())
                .await
                .unwrap();
        }
    });
    let backend = Arc::new(PoolingBackend {
        http_conn_pool: Arc::clone(&http_conn_pool),
        pool: Arc::clone(&conn_pool),
        config,
        endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter),
@@ -342,7 +365,7 @@ async fn request_handler(
    // used to cancel in-flight HTTP requests. not used to cancel websockets
    http_cancellation_token: CancellationToken,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> Result<Response<Full<Bytes>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
    let host = request
        .headers()
        .get("host")
@@ -386,7 +409,7 @@ async fn request_handler(
        );
        // Return the response so the spawned future can continue.
-        Ok(response.map(|_: http_body_util::Empty<Bytes>| Full::new(Bytes::new())))
+        Ok(response.map(|b| b.map_err(|x| match x {}).boxed()))
    } else if request.uri().path() == "/sql" && *request.method() == Method::POST {
        let ctx = RequestMonitoring::new(
            session_id,
@@ -409,7 +432,7 @@ async fn request_handler(
            )
            .header("Access-Control-Max-Age", "86400" /* 24 hours */)
            .status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
-            .body(Full::new(Bytes::new()))
+            .body(Empty::new().map_err(|x| match x {}).boxed())
            .map_err(|e| ApiError::InternalServerError(e.into()))
    } else {
        json_response(StatusCode::BAD_REQUEST, "query is not supported")
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -1,6 +1,8 @@
-use std::{sync::Arc, time::Duration};
+use std::{io, sync::Arc, time::Duration};
 use async_trait::async_trait;
 use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
 use tokio::net::{lookup_host, TcpStream};
 use tracing::{field::display, info};
 use crate::{
@@ -27,9 +29,13 @@ use crate::{
    Host,
 };
-use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool};
+use super::{
    conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool},
    http_conn_pool::{self, poll_http2_client},
 };
 pub(crate) struct PoolingBackend {
    pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool>,
    pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
    pub(crate) config: &'static ProxyConfig,
    pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,
@@ -103,32 +109,44 @@ impl PoolingBackend {
    pub(crate) async fn authenticate_with_jwt(
        &self,
        ctx: &RequestMonitoring,
        config: &AuthenticationConfig,
        user_info: &ComputeUserInfo,
-        jwt: &str,
+        jwt: String,
-    ) -> Result<ComputeCredentials, AuthError> {
+    ) -> Result<(), AuthError> {
        match &self.config.auth_backend {
-            crate::auth::Backend::Console(_, ()) => {
+            crate::auth::Backend::Console(console, ()) => {
-                Err(AuthError::auth_failed("JWT login is not yet supported"))
+                config
            }
            crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed(
                "JWT login over web auth proxy is not supported",
            )),
            crate::auth::Backend::Local(cache) => {
                cache
                    .jwks_cache
                    .check_jwt(
                        ctx,
                        user_info.endpoint.clone(),
-                        user_info.user.clone(),
+                        &user_info.user,
-                        &StaticAuthRules,
+                        &**console,
-                        jwt,
+                        &jwt,
                    )
                    .await
                    .map_err(|e| AuthError::auth_failed(e.to_string()))?;
-                Ok(ComputeCredentials {
+
-                    info: user_info.clone(),
+                Ok(())
-                    keys: crate::auth::backend::ComputeCredentialKeys::None,
+            }
-                })
+            crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed(
                "JWT login over web auth proxy is not supported",
            )),
            crate::auth::Backend::Local(_) => {
                config
                    .jwks_cache
                    .check_jwt(
                        ctx,
                        user_info.endpoint.clone(),
                        &user_info.user,
                        &StaticAuthRules,
                        &jwt,
                    )
                    .await
                    .map_err(|e| AuthError::auth_failed(e.to_string()))?;
                // todo: rewrite JWT signature with key shared somehow between local proxy and postgres
                Ok(())
            }
        }
    }
@@ -174,14 +192,55 @@ impl PoolingBackend {
        )
        .await
    }
    // Wake up the destination if needed
    #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
    pub(crate) async fn connect_to_local_proxy(
        &self,
        ctx: &RequestMonitoring,
        conn_info: ConnInfo,
    ) -> Result<http_conn_pool::Client, HttpConnError> {
        info!("pool: looking for an existing connection");
        if let Some(client) = self.http_conn_pool.get(ctx, &conn_info) {
            return Ok(client);
        }
        let conn_id = uuid::Uuid::new_v4();
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "pool: opening a new connection '{conn_info}'");
        let backend = self
            .config
            .auth_backend
            .as_ref()
            .map(|()| ComputeCredentials {
                info: conn_info.user_info.clone(),
                keys: crate::auth::backend::ComputeCredentialKeys::None,
            });
        crate::proxy::connect_compute::connect_to_compute(
            ctx,
            &HyperMechanism {
                conn_id,
                conn_info,
                pool: self.http_conn_pool.clone(),
                locks: &self.config.connect_compute_locks,
            },
            &backend,
            false, // do not allow self signed compute for http flow
            self.config.wake_compute_retry_config,
            self.config.connect_to_compute_retry_config,
        )
        .await
    }
 }
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum HttpConnError {
    #[error("pooled connection closed at inconsistent state")]
    ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),
-    #[error("could not connection to compute")]
+    #[error("could not connection to postgres in compute")]
-    ConnectionError(#[from] tokio_postgres::Error),
+    PostgresConnectionError(#[from] tokio_postgres::Error),
    #[error("could not connection to local-proxy in compute")]
    LocalProxyConnectionError(#[from] LocalProxyConnError),
    #[error("could not get auth info")]
    GetAuthInfo(#[from] GetAuthInfoError),
@@ -193,11 +252,20 @@ pub(crate) enum HttpConnError {
    TooManyConnectionAttempts(#[from] ApiLockError),
 }
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum LocalProxyConnError {
    #[error("error with connection to local-proxy")]
    Io(#[source] std::io::Error),
    #[error("could not establish h2 connection")]
    H2(#[from] hyper1::Error),
 }
 impl ReportableError for HttpConnError {
    fn get_error_kind(&self) -> ErrorKind {
        match self {
            HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
-            HttpConnError::ConnectionError(p) => p.get_error_kind(),
+            HttpConnError::PostgresConnectionError(p) => p.get_error_kind(),
            HttpConnError::LocalProxyConnectionError(_) => ErrorKind::Compute,
            HttpConnError::GetAuthInfo(a) => a.get_error_kind(),
            HttpConnError::AuthError(a) => a.get_error_kind(),
            HttpConnError::WakeCompute(w) => w.get_error_kind(),
@@ -210,7 +278,8 @@ impl UserFacingError for HttpConnError {
    fn to_string_client(&self) -> String {
        match self {
            HttpConnError::ConnectionClosedAbruptly(_) => self.to_string(),
-            HttpConnError::ConnectionError(p) => p.to_string(),
+            HttpConnError::PostgresConnectionError(p) => p.to_string(),
            HttpConnError::LocalProxyConnectionError(p) => p.to_string(),
            HttpConnError::GetAuthInfo(c) => c.to_string_client(),
            HttpConnError::AuthError(c) => c.to_string_client(),
            HttpConnError::WakeCompute(c) => c.to_string_client(),
@@ -224,7 +293,8 @@ impl UserFacingError for HttpConnError {
 impl CouldRetry for HttpConnError {
    fn could_retry(&self) -> bool {
        match self {
-            HttpConnError::ConnectionError(e) => e.could_retry(),
+            HttpConnError::PostgresConnectionError(e) => e.could_retry(),
            HttpConnError::LocalProxyConnectionError(e) => e.could_retry(),
            HttpConnError::ConnectionClosedAbruptly(_) => false,
            HttpConnError::GetAuthInfo(_) => false,
            HttpConnError::AuthError(_) => false,
@@ -236,7 +306,7 @@ impl CouldRetry for HttpConnError {
 impl ShouldRetryWakeCompute for HttpConnError {
    fn should_retry_wake_compute(&self) -> bool {
        match self {
-            HttpConnError::ConnectionError(e) => e.should_retry_wake_compute(),
+            HttpConnError::PostgresConnectionError(e) => e.should_retry_wake_compute(),
            // we never checked cache validity
            HttpConnError::TooManyConnectionAttempts(_) => false,
            _ => true,
@@ -244,6 +314,38 @@ impl ShouldRetryWakeCompute for HttpConnError {
    }
 }
 impl ReportableError for LocalProxyConnError {
    fn get_error_kind(&self) -> ErrorKind {
        match self {
            LocalProxyConnError::Io(_) => ErrorKind::Compute,
            LocalProxyConnError::H2(_) => ErrorKind::Compute,
        }
    }
 }
 impl UserFacingError for LocalProxyConnError {
    fn to_string_client(&self) -> String {
        "Could not establish HTTP connection to the database".to_string()
    }
 }
 impl CouldRetry for LocalProxyConnError {
    fn could_retry(&self) -> bool {
        match self {
            LocalProxyConnError::Io(_) => false,
            LocalProxyConnError::H2(_) => false,
        }
    }
 }
 impl ShouldRetryWakeCompute for LocalProxyConnError {
    fn should_retry_wake_compute(&self) -> bool {
        match self {
            LocalProxyConnError::Io(_) => false,
            LocalProxyConnError::H2(_) => false,
        }
    }
 }
 struct TokioMechanism {
    pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
    conn_info: ConnInfo,
@@ -293,3 +395,99 @@ impl ConnectMechanism for TokioMechanism {
    fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
 }
 struct HyperMechanism {
    pool: Arc<http_conn_pool::GlobalConnPool>,
    conn_info: ConnInfo,
    conn_id: uuid::Uuid,
    /// connect_to_compute concurrency lock
    locks: &'static ApiLocks<Host>,
 }
 #[async_trait]
 impl ConnectMechanism for HyperMechanism {
    type Connection = http_conn_pool::Client;
    type ConnectError = HttpConnError;
    type Error = HttpConnError;
    async fn connect_once(
        &self,
        ctx: &RequestMonitoring,
        node_info: &CachedNodeInfo,
        timeout: Duration,
    ) -> Result<Self::Connection, Self::ConnectError> {
        let host = node_info.config.get_host()?;
        let permit = self.locks.get_permit(&host).await?;
        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
        // let port = node_info.config.get_ports().first().unwrap_or_else(10432);
        let res = connect_http2(&host, 10432, timeout).await;
        drop(pause);
        let (client, connection) = permit.release_result(res)?;
        Ok(poll_http2_client(
            self.pool.clone(),
            ctx,
            &self.conn_info,
            client,
            connection,
            self.conn_id,
            node_info.aux.clone(),
        ))
    }
    fn update_connect_config(&self, _config: &mut compute::ConnCfg) {}
 }
 async fn connect_http2(
    host: &str,
    port: u16,
    timeout: Duration,
 ) -> Result<(http_conn_pool::Send, http_conn_pool::Connect), LocalProxyConnError> {
    // assumption: host is an ip address so this should not actually perform any requests.
    // todo: add that assumption as a guarantee in the control-plane API.
    let mut addrs = lookup_host((host, port))
        .await
        .map_err(LocalProxyConnError::Io)?;
    let mut last_err = None;
    let stream = loop {
        let Some(addr) = addrs.next() else {
            return Err(last_err.unwrap_or_else(|| {
                LocalProxyConnError::Io(io::Error::new(
                    io::ErrorKind::InvalidInput,
                    "could not resolve any addresses",
                ))
            }));
        };
        match tokio::time::timeout(timeout, TcpStream::connect(addr)).await {
            Ok(Ok(stream)) => {
                stream.set_nodelay(true).map_err(LocalProxyConnError::Io)?;
                break stream;
            }
            Ok(Err(e)) => {
                last_err = Some(LocalProxyConnError::Io(e));
            }
            Err(e) => {
                last_err = Some(LocalProxyConnError::Io(io::Error::new(
                    io::ErrorKind::TimedOut,
                    e,
                )));
            }
        };
    };
    let (client, connection) = hyper1::client::conn::http2::Builder::new(TokioExecutor::new())
        .timer(TokioTimer::new())
        .keep_alive_interval(Duration::from_secs(20))
        .keep_alive_while_idle(true)
        .keep_alive_timeout(Duration::from_secs(5))
        .handshake(TokioIo::new(stream))
        .await?;
    Ok((client, connection))
 }
--- a/proxy/src/serverless/http_conn_pool.rs
+++ b/proxy/src/serverless/http_conn_pool.rs
@@ -0,0 +1,335 @@
 use dashmap::DashMap;
 use hyper1::client::conn::http2;
 use hyper_util::rt::{TokioExecutor, TokioIo};
 use parking_lot::RwLock;
 use rand::Rng;
 use std::collections::VecDeque;
 use std::sync::atomic::{self, AtomicUsize};
 use std::{sync::Arc, sync::Weak};
 use tokio::net::TcpStream;
 use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
 use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
 use crate::{context::RequestMonitoring, EndpointCacheKey};
 use tracing::{debug, error};
 use tracing::{info, info_span, Instrument};
 use super::conn_pool::ConnInfo;
 pub(crate) type Send = http2::SendRequest<hyper1::body::Incoming>;
 pub(crate) type Connect =
    http2::Connection<TokioIo<TcpStream>, hyper1::body::Incoming, TokioExecutor>;
 #[derive(Clone)]
 struct ConnPoolEntry {
    conn: Send,
    conn_id: uuid::Uuid,
    aux: MetricsAuxInfo,
 }
 // Per-endpoint connection pool
 // Number of open connections is limited by the `max_conns_per_endpoint`.
 pub(crate) struct EndpointConnPool {
    conns: VecDeque<ConnPoolEntry>,
    _guard: HttpEndpointPoolsGuard<'static>,
    global_connections_count: Arc<AtomicUsize>,
 }
 impl EndpointConnPool {
    fn get_conn_entry(&mut self) -> Option<ConnPoolEntry> {
        let Self { conns, .. } = self;
        let conn = conns.pop_front()?;
        conns.push_back(conn.clone());
        Some(conn)
    }
    fn remove_conn(&mut self, conn_id: uuid::Uuid) -> bool {
        let Self {
            conns,
            global_connections_count,
            ..
        } = self;
        let old_len = conns.len();
        conns.retain(|conn| conn.conn_id != conn_id);
        let new_len = conns.len();
        let removed = old_len - new_len;
        if removed > 0 {
            global_connections_count.fetch_sub(removed, atomic::Ordering::Relaxed);
            Metrics::get()
                .proxy
                .http_pool_opened_connections
                .get_metric()
                .dec_by(removed as i64);
        }
        removed > 0
    }
 }
 impl Drop for EndpointConnPool {
    fn drop(&mut self) {
        if !self.conns.is_empty() {
            self.global_connections_count
                .fetch_sub(self.conns.len(), atomic::Ordering::Relaxed);
            Metrics::get()
                .proxy
                .http_pool_opened_connections
                .get_metric()
                .dec_by(self.conns.len() as i64);
        }
    }
 }
 pub(crate) struct GlobalConnPool {
    // endpoint -> per-endpoint connection pool
    //
    // That should be a fairly conteded map, so return reference to the per-endpoint
    // pool as early as possible and release the lock.
    global_pool: DashMap<EndpointCacheKey, Arc<RwLock<EndpointConnPool>>>,
    /// Number of endpoint-connection pools
    ///
    /// [`DashMap::len`] iterates over all inner pools and acquires a read lock on each.
    /// That seems like far too much effort, so we're using a relaxed increment counter instead.
    /// It's only used for diagnostics.
    global_pool_size: AtomicUsize,
    /// Total number of connections in the pool
    global_connections_count: Arc<AtomicUsize>,
    config: &'static crate::config::HttpConfig,
 }
 impl GlobalConnPool {
    pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
        let shards = config.pool_options.pool_shards;
        Arc::new(Self {
            global_pool: DashMap::with_shard_amount(shards),
            global_pool_size: AtomicUsize::new(0),
            config,
            global_connections_count: Arc::new(AtomicUsize::new(0)),
        })
    }
    pub(crate) fn shutdown(&self) {
        // drops all strong references to endpoint-pools
        self.global_pool.clear();
    }
    pub(crate) async fn gc_worker(&self, mut rng: impl Rng) {
        let epoch = self.config.pool_options.gc_epoch;
        let mut interval = tokio::time::interval(epoch / (self.global_pool.shards().len()) as u32);
        loop {
            interval.tick().await;
            let shard = rng.gen_range(0..self.global_pool.shards().len());
            self.gc(shard);
        }
    }
    fn gc(&self, shard: usize) {
        debug!(shard, "pool: performing epoch reclamation");
        // acquire a random shard lock
        let mut shard = self.global_pool.shards()[shard].write();
        let timer = Metrics::get()
            .proxy
            .http_pool_reclaimation_lag_seconds
            .start_timer();
        let current_len = shard.len();
        let mut clients_removed = 0;
        shard.retain(|endpoint, x| {
            // if the current endpoint pool is unique (no other strong or weak references)
            // then it is currently not in use by any connections.
            if let Some(pool) = Arc::get_mut(x.get_mut()) {
                let EndpointConnPool { conns, .. } = pool.get_mut();
                let old_len = conns.len();
                conns.retain(|conn| !conn.conn.is_closed());
                let new_len = conns.len();
                let removed = old_len - new_len;
                clients_removed += removed;
                // we only remove this pool if it has no active connections
                if conns.is_empty() {
                    info!("pool: discarding pool for endpoint {endpoint}");
                    return false;
                }
            }
            true
        });
        let new_len = shard.len();
        drop(shard);
        timer.observe();
        // Do logging outside of the lock.
        if clients_removed > 0 {
            let size = self
                .global_connections_count
                .fetch_sub(clients_removed, atomic::Ordering::Relaxed)
                - clients_removed;
            Metrics::get()
                .proxy
                .http_pool_opened_connections
                .get_metric()
                .dec_by(clients_removed as i64);
            info!("pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}");
        }
        let removed = current_len - new_len;
        if removed > 0 {
            let global_pool_size = self
                .global_pool_size
                .fetch_sub(removed, atomic::Ordering::Relaxed)
                - removed;
            info!("pool: performed global pool gc. size now {global_pool_size}");
        }
    }
    pub(crate) fn get(
        self: &Arc<Self>,
        ctx: &RequestMonitoring,
        conn_info: &ConnInfo,
    ) -> Option<Client> {
        let endpoint = conn_info.endpoint_cache_key()?;
        let endpoint_pool = self.get_or_create_endpoint_pool(&endpoint);
        let client = endpoint_pool.write().get_conn_entry()?;
        if client.conn.is_closed() {
            info!("pool: cached connection '{conn_info}' is closed, opening a new one");
            return None;
        }
        tracing::Span::current().record("conn_id", tracing::field::display(client.conn_id));
        info!(
            cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
            "pool: reusing connection '{conn_info}'"
        );
        ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
        ctx.success();
        Some(Client::new(client.conn, client.aux))
    }
    fn get_or_create_endpoint_pool(
        self: &Arc<Self>,
        endpoint: &EndpointCacheKey,
    ) -> Arc<RwLock<EndpointConnPool>> {
        // fast path
        if let Some(pool) = self.global_pool.get(endpoint) {
            return pool.clone();
        }
        // slow path
        let new_pool = Arc::new(RwLock::new(EndpointConnPool {
            conns: VecDeque::new(),
            _guard: Metrics::get().proxy.http_endpoint_pools.guard(),
            global_connections_count: self.global_connections_count.clone(),
        }));
        // find or create a pool for this endpoint
        let mut created = false;
        let pool = self
            .global_pool
            .entry(endpoint.clone())
            .or_insert_with(|| {
                created = true;
                new_pool
            })
            .clone();
        // log new global pool size
        if created {
            let global_pool_size = self
                .global_pool_size
                .fetch_add(1, atomic::Ordering::Relaxed)
                + 1;
            info!(
                "pool: created new pool for '{endpoint}', global pool size now {global_pool_size}"
            );
        }
        pool
    }
 }
 pub(crate) fn poll_http2_client(
    global_pool: Arc<GlobalConnPool>,
    ctx: &RequestMonitoring,
    conn_info: &ConnInfo,
    client: Send,
    connection: Connect,
    conn_id: uuid::Uuid,
    aux: MetricsAuxInfo,
 ) -> Client {
    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());
    let session_id = ctx.session_id();
    let span = info_span!(parent: None, "connection", %conn_id);
    let cold_start_info = ctx.cold_start_info();
    span.in_scope(|| {
        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection");
    });
    let pool = match conn_info.endpoint_cache_key() {
        Some(endpoint) => {
            let pool = global_pool.get_or_create_endpoint_pool(&endpoint);
            pool.write().conns.push_back(ConnPoolEntry {
                conn: client.clone(),
                conn_id,
                aux: aux.clone(),
            });
            Arc::downgrade(&pool)
        }
        None => Weak::new(),
    };
    // let idle = global_pool.get_idle_timeout();
    tokio::spawn(
        async move {
            let _conn_gauge = conn_gauge;
            let res = connection.await;
            match res {
                Ok(()) => info!("connection closed"),
                Err(e) => error!(%session_id, "connection error: {}", e),
            }
            // remove from connection pool
            if let Some(pool) = pool.clone().upgrade() {
                if pool.write().remove_conn(conn_id) {
                    info!("closed connection removed");
                }
            }
        }
        .instrument(span),
    );
    Client::new(client, aux)
 }
 pub(crate) struct Client {
    pub(crate) inner: Send,
    aux: MetricsAuxInfo,
 }
 impl Client {
    pub(self) fn new(inner: Send, aux: MetricsAuxInfo) -> Self {
        Self { inner, aux }
    }
    pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
        USAGE_METRICS.register(Ids {
            endpoint_id: self.aux.endpoint_id,
            branch_id: self.aux.branch_id,
        })
    }
 }
--- a/proxy/src/serverless/http_util.rs
+++ b/proxy/src/serverless/http_util.rs
@@ -5,13 +5,13 @@ use bytes::Bytes;
 use anyhow::Context;
 use http::{Response, StatusCode};
-use http_body_util::Full;
+use http_body_util::{combinators::BoxBody, BodyExt, Full};
 use serde::Serialize;
 use utils::http::error::ApiError;
 /// Like [`ApiError::into_response`]
-pub(crate) fn api_error_into_response(this: ApiError) -> Response<Full<Bytes>> {
+pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper1::Error>> {
    match this {
        ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
            format!("{err:#?}"), // use debug printing so that we give the cause
@@ -64,17 +64,24 @@ struct HttpErrorBody {
 impl HttpErrorBody {
    /// Same as [`utils::http::error::HttpErrorBody::response_from_msg_and_status`]
-    fn response_from_msg_and_status(msg: String, status: StatusCode) -> Response<Full<Bytes>> {
+    fn response_from_msg_and_status(
        msg: String,
        status: StatusCode,
    ) -> Response<BoxBody<Bytes, hyper1::Error>> {
        HttpErrorBody { msg }.to_response(status)
    }
    /// Same as [`utils::http::error::HttpErrorBody::to_response`]
-    fn to_response(&self, status: StatusCode) -> Response<Full<Bytes>> {
+    fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper1::Error>> {
        Response::builder()
            .status(status)
            .header(http::header::CONTENT_TYPE, "application/json")
            // we do not have nested maps with non string keys so serialization shouldn't fail
-            .body(Full::new(Bytes::from(serde_json::to_string(self).unwrap())))
+            .body(
                Full::new(Bytes::from(serde_json::to_string(self).unwrap()))
                    .map_err(|x| match x {})
                    .boxed(),
            )
            .unwrap()
    }
 }
@@ -83,14 +90,14 @@ impl HttpErrorBody {
 pub(crate) fn json_response<T: Serialize>(
    status: StatusCode,
    data: T,
-) -> Result<Response<Full<Bytes>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
    let json = serde_json::to_string(&data)
        .context("Failed to serialize JSON response")
        .map_err(ApiError::InternalServerError)?;
    let response = Response::builder()
        .status(status)
        .header(http::header::CONTENT_TYPE, "application/json")
-        .body(Full::new(Bytes::from(json)))
+        .body(Full::new(Bytes::from(json)).map_err(|x| match x {}).boxed())
        .map_err(|e| ApiError::InternalServerError(e.into()))?;
    Ok(response)
 }
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -8,6 +8,8 @@ use futures::future::Either;
 use futures::StreamExt;
 use futures::TryFutureExt;
 use http::header::AUTHORIZATION;
 use http::Method;
 use http_body_util::combinators::BoxBody;
 use http_body_util::BodyExt;
 use http_body_util::Full;
 use hyper1::body::Body;
@@ -38,9 +40,11 @@ use url::Url;
 use urlencoding;
 use utils::http::error::ApiError;
 use crate::auth::backend::ComputeCredentials;
 use crate::auth::backend::ComputeUserInfo;
 use crate::auth::endpoint_sni;
 use crate::auth::ComputeUserInfoParseError;
 use crate::config::AuthenticationConfig;
 use crate::config::ProxyConfig;
 use crate::config::TlsConfig;
 use crate::context::RequestMonitoring;
@@ -56,6 +60,7 @@ use crate::usage_metrics::MetricCounterRecorder;
 use crate::DbName;
 use crate::RoleName;
 use super::backend::LocalProxyConnError;
 use super::backend::PoolingBackend;
 use super::conn_pool::AuthData;
 use super::conn_pool::Client;
@@ -123,8 +128,8 @@ pub(crate) enum ConnInfoError {
    MissingUsername,
    #[error("invalid username: {0}")]
    InvalidUsername(#[from] std::string::FromUtf8Error),
-    #[error("missing password")]
+    #[error("missing authentication credentials: {0}")]
-    MissingPassword,
+    MissingCredentials(Credentials),
    #[error("missing hostname")]
    MissingHostname,
    #[error("invalid hostname: {0}")]
@@ -133,6 +138,14 @@ pub(crate) enum ConnInfoError {
    MalformedEndpoint,
 }
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum Credentials {
    #[error("required password")]
    Password,
    #[error("required authorization bearer token in JWT format")]
    BearerJwt,
 }
 impl ReportableError for ConnInfoError {
    fn get_error_kind(&self) -> ErrorKind {
        ErrorKind::User
@@ -146,6 +159,7 @@ impl UserFacingError for ConnInfoError {
 }
 fn get_conn_info(
    config: &'static AuthenticationConfig,
    ctx: &RequestMonitoring,
    headers: &HeaderMap,
    tls: Option<&TlsConfig>,
@@ -181,21 +195,32 @@ fn get_conn_info(
    ctx.set_user(username.clone());
    let auth = if let Some(auth) = headers.get(&AUTHORIZATION) {
        if !config.accept_jwts {
            return Err(ConnInfoError::MissingCredentials(Credentials::Password));
        }
        let auth = auth
            .to_str()
            .map_err(|_| ConnInfoError::InvalidHeader(&AUTHORIZATION))?;
        AuthData::Jwt(
            auth.strip_prefix("Bearer ")
-                .ok_or(ConnInfoError::MissingPassword)?
+                .ok_or(ConnInfoError::MissingCredentials(Credentials::BearerJwt))?
                .into(),
        )
    } else if let Some(pass) = connection_url.password() {
        // wrong credentials provided
        if config.accept_jwts {
            return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));
        }
        AuthData::Password(match urlencoding::decode_binary(pass.as_bytes()) {
            std::borrow::Cow::Borrowed(b) => b.into(),
            std::borrow::Cow::Owned(b) => b.into(),
        })
    } else if config.accept_jwts {
        return Err(ConnInfoError::MissingCredentials(Credentials::BearerJwt));
    } else {
-        return Err(ConnInfoError::MissingPassword);
+        return Err(ConnInfoError::MissingCredentials(Credentials::Password));
    };
    let endpoint = match connection_url.host() {
@@ -247,7 +272,7 @@ pub(crate) async fn handle(
    request: Request<Incoming>,
    backend: Arc<PoolingBackend>,
    cancel: CancellationToken,
-) -> Result<Response<Full<Bytes>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
    let result = handle_inner(cancel, config, &ctx, request, backend).await;
    let mut response = match result {
@@ -279,7 +304,7 @@ pub(crate) async fn handle(
            let mut message = e.to_string_client();
            let db_error = match &e {
-                SqlOverHttpError::ConnectCompute(HttpConnError::ConnectionError(e))
+                SqlOverHttpError::ConnectCompute(HttpConnError::PostgresConnectionError(e))
                | SqlOverHttpError::Postgres(e) => e.as_db_error(),
                _ => None,
            };
@@ -504,7 +529,7 @@ async fn handle_inner(
    ctx: &RequestMonitoring,
    request: Request<Incoming>,
    backend: Arc<PoolingBackend>,
-) -> Result<Response<Full<Bytes>>, SqlOverHttpError> {
+) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
    let _requeset_gauge = Metrics::get()
        .proxy
        .connection_requests
@@ -514,18 +539,50 @@ async fn handle_inner(
        "handling interactive connection from client"
    );
-    //
+    let conn_info = get_conn_info(
-    // Determine the destination and connection params
+        &config.authentication_config,
-    //
+        ctx,
-    let headers = request.headers();
+        request.headers(),
-
+        config.tls_config.as_ref(),
-    // TLS config should be there.
+    )?;
    let conn_info = get_conn_info(ctx, headers, config.tls_config.as_ref())?;
    info!(
        user = conn_info.conn_info.user_info.user.as_str(),
        "credentials"
    );
    match conn_info.auth {
        AuthData::Jwt(jwt) if config.authentication_config.is_auth_broker => {
            handle_auth_broker_inner(config, ctx, request, conn_info.conn_info, jwt, backend).await
        }
        auth => {
            handle_db_inner(
                cancel,
                config,
                ctx,
                request,
                conn_info.conn_info,
                auth,
                backend,
            )
            .await
        }
    }
 }
 async fn handle_db_inner(
    cancel: CancellationToken,
    config: &'static ProxyConfig,
    ctx: &RequestMonitoring,
    request: Request<Incoming>,
    conn_info: ConnInfo,
    auth: AuthData,
    backend: Arc<PoolingBackend>,
 ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
    //
    // Determine the destination and connection params
    //
    let headers = request.headers();
    // Allow connection pooling only if explicitly requested
    // or if we have decided that http pool is no longer opt-in
    let allow_pool = !config.http_config.pool_options.opt_in
@@ -563,26 +620,36 @@ async fn handle_inner(
    let authenticate_and_connect = Box::pin(
        async {
-            let keys = match &conn_info.auth {
+            let keys = match auth {
                AuthData::Password(pw) => {
                    backend
                        .authenticate_with_password(
                            ctx,
                            &config.authentication_config,
-                            &conn_info.conn_info.user_info,
+                            &conn_info.user_info,
-                            pw,
+                            &pw,
                        )
                        .await?
                }
                AuthData::Jwt(jwt) => {
                    backend
-                        .authenticate_with_jwt(ctx, &conn_info.conn_info.user_info, jwt)
+                        .authenticate_with_jwt(
-                        .await?
+                            ctx,
                            &config.authentication_config,
                            &conn_info.user_info,
                            jwt,
                        )
                        .await?;
                    ComputeCredentials {
                        info: conn_info.user_info.clone(),
                        keys: crate::auth::backend::ComputeCredentialKeys::None,
                    }
                }
            };
            let client = backend
-                .connect_to_compute(ctx, conn_info.conn_info, keys, !allow_pool)
+                .connect_to_compute(ctx, conn_info, keys, !allow_pool)
                .await?;
            // not strictly necessary to mark success here,
            // but it's just insurance for if we forget it somewhere else
@@ -640,7 +707,11 @@ async fn handle_inner(
    let len = json_output.len();
    let response = response
-        .body(Full::new(Bytes::from(json_output)))
+        .body(
            Full::new(Bytes::from(json_output))
                .map_err(|x| match x {})
                .boxed(),
        )
        // only fails if invalid status code or invalid header/values are given.
        // these are not user configurable so it cannot fail dynamically
        .expect("building response payload should not fail");
@@ -656,6 +727,65 @@ async fn handle_inner(
    Ok(response)
 }
 static HEADERS_TO_FORWARD: &[&HeaderName] = &[
    &AUTHORIZATION,
    &CONN_STRING,
    &RAW_TEXT_OUTPUT,
    &ARRAY_MODE,
    &TXN_ISOLATION_LEVEL,
    &TXN_READ_ONLY,
    &TXN_DEFERRABLE,
 ];
 async fn handle_auth_broker_inner(
    config: &'static ProxyConfig,
    ctx: &RequestMonitoring,
    request: Request<Incoming>,
    conn_info: ConnInfo,
    jwt: String,
    backend: Arc<PoolingBackend>,
 ) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
    backend
        .authenticate_with_jwt(
            ctx,
            &config.authentication_config,
            &conn_info.user_info,
            jwt,
        )
        .await
        .map_err(HttpConnError::from)?;
    let mut client = backend.connect_to_local_proxy(ctx, conn_info).await?;
    let local_proxy_uri = ::http::Uri::from_static("http://proxy.local/sql");
    let (mut parts, body) = request.into_parts();
    let mut req = Request::builder().method(Method::POST).uri(local_proxy_uri);
    // todo(conradludgate): maybe auth-broker should parse these and re-serialize
    // these instead just to ensure they remain normalised.
    for &h in HEADERS_TO_FORWARD {
        if let Some(hv) = parts.headers.remove(h) {
            req = req.header(h, hv);
        }
    }
    let req = req
        .body(body)
        .expect("all headers and params received via hyper should be valid for request");
    // todo: map body to count egress
    let _metrics = client.metrics();
    Ok(client
        .inner
        .send_request(req)
        .await
        .map_err(LocalProxyConnError::from)
        .map_err(HttpConnError::from)?
        .map(|b| b.boxed()))
 }
 impl QueryData {
    async fn process(
        self,
@@ -705,7 +835,9 @@ impl QueryData {
                    // query failed or was cancelled.
                    Ok(Err(error)) => {
                        let db_error = match &error {
-                            SqlOverHttpError::ConnectCompute(HttpConnError::ConnectionError(e))
+                            SqlOverHttpError::ConnectCompute(
                                HttpConnError::PostgresConnectionError(e),
                            )
                            | SqlOverHttpError::Postgres(e) => e.as_db_error(),
                            _ => None,
                        };
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -21,7 +21,6 @@ chrono.workspace = true
 clap = { workspace = true, features = ["derive"] }
 crc32c.workspace = true
 fail.workspace = true
 git-version.workspace = true
 hex.workspace = true
 humantime.workspace = true
 hyper.workspace = true
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -374,14 +374,16 @@ type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
 async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
    // fsync the datadir to make sure we have a consistent state on disk.
-    let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
+    if !conf.no_sync {
-    let started = Instant::now();
+        let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
-    utils::crashsafe::syncfs(dfd)?;
+        let started = Instant::now();
-    let elapsed = started.elapsed();
+        utils::crashsafe::syncfs(dfd)?;
-    info!(
+        let elapsed = started.elapsed();
-        elapsed_ms = elapsed.as_millis(),
+        info!(
-        "syncfs data directory done"
+            elapsed_ms = elapsed.as_millis(),
-    );
+            "syncfs data directory done"
        );
    }
    info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
    let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
--- a/Show More
+++ b/Show More