dnm: test ci

2026-02-11 14:40:36 +00:00 · 2024-08-02 10:13:36 +03:00
212 changed files with 2869 additions and 6689 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -8,9 +8,6 @@ self-hosted-runner:
    - small-arm64
    - us-east-2
 config-variables:
  - BENCHMARK_PROJECT_ID_PUB
  - BENCHMARK_PROJECT_ID_SUB
  - REMOTE_STORAGE_AZURE_CONTAINER
  - REMOTE_STORAGE_AZURE_REGION
  - SLACK_UPCOMING_RELEASE_CHANNEL_ID
  - DEV_AWS_OIDC_ROLE_ARN
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -56,10 +56,6 @@ concurrency:
 jobs:
  bench:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
      matrix:
@@ -67,13 +63,9 @@ jobs:
          - DEFAULT_PG_VERSION: 16
            PLATFORM: "neon-staging"
            region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
            RUNNER: [ self-hosted, us-east-2, x64 ]
            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
          - DEFAULT_PG_VERSION: 16
            PLATFORM: "azure-staging"
            region_id: 'azure-eastus2'
            RUNNER: [ self-hosted, eastus2, x64 ]
            IMAGE: neondatabase/build-tools:pinned
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "300"
      TEST_PG_BENCH_SCALES_MATRIX: "10,100"
@@ -84,21 +76,14 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.PLATFORM }}
-    runs-on: ${{ matrix.RUNNER }}
+    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: ${{ matrix.IMAGE }}
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
      options: --init
    steps:
    - uses: actions/checkout@v4
    - name: Configure AWS credentials # necessary on Azure runners
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}  
        role-duration-seconds: 18000 # 5 hours
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
@@ -162,7 +147,7 @@ jobs:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -176,7 +161,6 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
@@ -184,7 +168,7 @@ jobs:
        path: /tmp/neon/
        prefix: latest
-    - name: Run Logical Replication benchmarks
+    - name: Run benchmark
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
@@ -192,15 +176,12 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
        BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
        BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
-    - name: Run Physical Replication benchmarks
+    - name: Run benchmark
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
@@ -253,9 +234,6 @@ jobs:
      id: pgbench-compare-matrix
      run: |
        region_id_default=${{ env.DEFAULT_REGION_ID }}
        runner_default='["self-hosted", "us-east-2", "x64"]'
        runner_azure='["self-hosted", "eastus2", "x64"]'
        image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
        matrix='{
          "pg_version" : [
            16
@@ -269,19 +247,16 @@ jobs:
            "neonvm-captest-new"
          ],
          "db_size": [ "10gb" ],
-          "runner": ['"$runner_default"'],
+          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb"  },
-          "image": [ "'"$image_default"'" ],
+                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb" },
-          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb"  },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb" },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
+                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }]
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
        }'
        if [ "$(date +%A)" = "Saturday" ]; then
-          matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]')
+          matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb"}]')
        fi
        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -324,10 +299,6 @@ jobs:
  pgbench-compare:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    needs: [ generate-matrices ]
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
@@ -343,9 +314,9 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}
-    runs-on: ${{ matrix.runner }}
+    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: ${{ matrix.image }}
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
      options: --init
    # Increase timeout to 8h, default timeout is 6h
@@ -354,13 +325,6 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - name: Configure AWS credentials # necessary on Azure runners
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        role-duration-seconds: 18000 # 5 hours
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
@@ -468,20 +432,12 @@ jobs:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  pgbench-pgvector:
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
      matrix:
        include:
          - PLATFORM: "neonvm-captest-pgvector"
            RUNNER: [ self-hosted, us-east-2, x64 ]
            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
          - PLATFORM: "azure-captest-pgvector"
            RUNNER: [ self-hosted, eastus2, x64 ]
            IMAGE: neondatabase/build-tools:pinned
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
@@ -494,9 +450,9 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.PLATFORM }}
-    runs-on: ${{ matrix.RUNNER }}
+    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: ${{ matrix.IMAGE }}
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
      options: --init
    steps:
@@ -507,12 +463,12 @@ jobs:
    - name: Install postgresql-16 where pytest expects it
      run: |
        cd /home/nonroot
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
+        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.3-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
+        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.3-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb 
+        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.3-1.pgdg110%2B1_amd64.deb 
-        dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
+        dpkg -x libpq5_16.3-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
+        dpkg -x postgresql-client-16_16.3-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
+        dpkg -x postgresql-16_16.3-1.pgdg110+1_amd64.deb pg
        mkdir -p /tmp/neon/pg_install/v16/bin
        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench  
        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql  
@@ -538,13 +494,6 @@ jobs:
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
    - name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        role-duration-seconds: 18000 # 5 hours
    - name: Benchmark pgvector hnsw indexing
      uses: ./.github/actions/run-python-test-set
      with:
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -149,6 +149,8 @@ jobs:
    env:
      BUILD_TYPE: release
      # remove the cachepot wrapper and build without crate caches
      RUSTC_WRAPPER: ""
      # build with incremental compilation produce partial results
      # so do not attempt to cache this build, also disable the incremental compilation
      CARGO_INCREMENTAL: 0
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -66,31 +66,7 @@ jobs:
        ports:
          - 9000:9000
          - 8123:8123
-      zookeeper:
+
        image: quay.io/debezium/zookeeper:2.7
        ports:
          - 2181:2181
      kafka:
        image: quay.io/debezium/kafka:2.7
        env:
          ZOOKEEPER_CONNECT: "zookeeper:2181"
          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
          KAFKA_BROKER_ID: 1
          KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
          KAFKA_JMX_PORT: 9991
        ports:
          - 9092:9092
      debezium:
        image: quay.io/debezium/connect:2.7
        env:
          BOOTSTRAP_SERVERS: kafka:9092
          GROUP_ID: 1
          CONFIG_STORAGE_TOPIC: debezium-config
          OFFSET_STORAGE_TOPIC: debezium-offset
          STATUS_STORAGE_TOPIC: debezium-status
          DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
        ports:
          - 8083:8083
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -7,20 +7,12 @@ on:
        description: 'Source tag'
        required: true
        type: string
      force:
        description: 'Force the image to be pinned'
        default: false
        type: boolean
  workflow_call:
    inputs:
      from-tag:
        description: 'Source tag'
        required: true
        type: string
      force:
        description: 'Force the image to be pinned'
        default: false
        type: boolean
 defaults:
  run:
@@ -30,19 +22,16 @@ concurrency:
  group: pin-build-tools-image-${{ inputs.from-tag }}
  cancel-in-progress: false
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
 jobs:
  tag-image:
    runs-on: ubuntu-22.04
    env:
      FROM_TAG: ${{ inputs.from-tag }}
      TO_TAG: pinned
 jobs:
  check-manifests:
    runs-on: ubuntu-22.04
    outputs:
      skip: ${{ steps.check-manifests.outputs.skip }}
    steps:
      - name: Check if we really need to pin the image
        id: check-manifests
@@ -58,44 +47,27 @@ jobs:
          echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
  tag-image:
    needs: check-manifests
    # use format(..) to catch both inputs.force = true AND inputs.force = 'true'
    if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # for `azure/login`
    steps:
      - uses: docker/login-action@v3
-
+        if: steps.check-manifests.outputs.skip == 'false'
        with:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub
        if: steps.check-manifests.outputs.skip == 'false'
        run: |
          docker buildx imagetools create -t neondatabase/build-tools:${TO_TAG} \
                                             neondatabase/build-tools:${FROM_TAG}
      - uses: docker/login-action@v3
        if: steps.check-manifests.outputs.skip == 'false'
        with:
          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
-      - name: Azure login
+      - name: Tag build-tools with `${{ env.TO_TAG }}` in ECR
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
+        if: steps.check-manifests.outputs.skip == 'false'
        with:
          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
      - name: Login to ACR
        run: |
          az acr login --name=neoneastus2
      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
        run: |
          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
                                          -t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
                                          -t neondatabase/build-tools:${TO_TAG} \
                                             neondatabase/build-tools:${FROM_TAG}
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -13,6 +13,8 @@ defaults:
 env:
  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
 jobs:
  cancel-previous-e2e-tests:
@@ -62,35 +64,19 @@ jobs:
    needs: [ tag ]
    runs-on: ubuntu-22.04
    env:
      EVENT_ACTION: ${{ github.event.action }}
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      TAG: ${{ needs.tag.outputs.build-tag }}
    steps:
-      - name: Wait for `promote-images` job to finish
+      - name: check if ecr image are present
-        # It's important to have a timeout here, the script in the step can run infinitely
+        env:
-        timeout-minutes: 60
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
        run: |
-          if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then
+          for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
-            exit 0
+            OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
-          fi
+            if [ "$OUTPUT" == "" ]; then
-
+              echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
          # For PRs we use the run id as the tag
          BUILD_AND_TEST_RUN_ID=${TAG}
          while true; do
            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
            case "$conclusion" in
              success)
                break
                ;;
              failure | cancelled | skipped)
                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
              exit 1
-                ;;
+            fi
              *)
                echo "The 'promote-images' hasn't succeed yet. Waiting..."
                sleep 60
                ;;
            esac
          done
      - name: Set e2e-platforms
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -484,7 +484,7 @@ dependencies = [
 "http 0.2.9",
 "http 1.1.0",
 "once_cell",
- "p256 0.11.1",
+ "p256",
 "percent-encoding",
 "ring 0.17.6",
 "sha2",
@@ -848,12 +848,6 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
 [[package]]
 name = "base16ct"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
 [[package]]
 name = "base64"
 version = "0.13.1"
@@ -977,9 +971,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
 [[package]]
 name = "bytemuck"
-version = "1.16.3"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
+checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5"
 [[package]]
 name = "byteorder"
@@ -1532,10 +1526,8 @@ version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
 dependencies = [
 "generic-array",
 "rand_core 0.6.4",
 "subtle",
 "zeroize",
 ]
 [[package]]
@@ -1629,7 +1621,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c"
 dependencies = [
 "const-oid",
 "pem-rfc7468",
 "zeroize",
 ]
@@ -1729,7 +1720,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
 "block-buffer",
 "const-oid",
 "crypto-common",
 "subtle",
 ]
@@ -1781,25 +1771,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c"
 dependencies = [
 "der 0.6.1",
- "elliptic-curve 0.12.3",
+ "elliptic-curve",
- "rfc6979 0.3.1",
+ "rfc6979",
 "signature 1.6.4",
 ]
 [[package]]
 name = "ecdsa"
 version = "0.16.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
 dependencies = [
 "der 0.7.8",
 "digest",
 "elliptic-curve 0.13.8",
 "rfc6979 0.4.0",
 "signature 2.2.0",
 "spki 0.7.3",
 ]
 [[package]]
 name = "either"
 version = "1.8.1"
@@ -1812,36 +1788,16 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3"
 dependencies = [
- "base16ct 0.1.1",
+ "base16ct",
 "crypto-bigint 0.4.9",
 "der 0.6.1",
 "digest",
- "ff 0.12.1",
+ "ff",
 "generic-array",
- "group 0.12.1",
+ "group",
- "pkcs8 0.9.0",
+ "pkcs8",
 "rand_core 0.6.4",
- "sec1 0.3.0",
+ "sec1",
 "subtle",
 "zeroize",
 ]
 [[package]]
 name = "elliptic-curve"
 version = "0.13.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
 dependencies = [
 "base16ct 0.2.0",
 "crypto-bigint 0.5.5",
 "digest",
 "ff 0.13.0",
 "generic-array",
 "group 0.13.0",
 "pem-rfc7468",
 "pkcs8 0.10.2",
 "rand_core 0.6.4",
 "sec1 0.7.3",
 "subtle",
 "zeroize",
 ]
@@ -1995,16 +1951,6 @@ dependencies = [
 "subtle",
 ]
 [[package]]
 name = "ff"
 version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ded41244b729663b1e574f1b4fb731469f69f79c17667b5d776b16cda0479449"
 dependencies = [
 "rand_core 0.6.4",
 "subtle",
 ]
 [[package]]
 name = "filetime"
 version = "0.2.22"
@@ -2202,7 +2148,6 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
 dependencies = [
 "typenum",
 "version_check",
 "zeroize",
 ]
 [[package]]
@@ -2269,18 +2214,7 @@ version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7"
 dependencies = [
- "ff 0.12.1",
+ "ff",
 "rand_core 0.6.4",
 "subtle",
 ]
 [[package]]
 name = "group"
 version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63"
 dependencies = [
 "ff 0.13.0",
 "rand_core 0.6.4",
 "subtle",
 ]
@@ -2842,42 +2776,6 @@ dependencies = [
 "libc",
 ]
 [[package]]
 name = "jose-b64"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bec69375368709666b21c76965ce67549f2d2db7605f1f8707d17c9656801b56"
 dependencies = [
 "base64ct",
 "serde",
 "subtle",
 "zeroize",
 ]
 [[package]]
 name = "jose-jwa"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ab78e053fe886a351d67cf0d194c000f9d0dcb92906eb34d853d7e758a4b3a7"
 dependencies = [
 "serde",
 ]
 [[package]]
 name = "jose-jwk"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "280fa263807fe0782ecb6f2baadc28dffc04e00558a58e33bfdb801d11fd58e7"
 dependencies = [
 "jose-b64",
 "jose-jwa",
 "p256 0.13.2",
 "p384",
 "rsa",
 "serde",
 "zeroize",
 ]
 [[package]]
 name = "js-sys"
 version = "0.3.69"
@@ -2937,9 +2835,6 @@ name = "lazy_static"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 dependencies = [
 "spin 0.5.2",
 ]
 [[package]]
 name = "lazycell"
@@ -3309,23 +3204,6 @@ dependencies = [
 "num-traits",
 ]
 [[package]]
 name = "num-bigint-dig"
 version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151"
 dependencies = [
 "byteorder",
 "lazy_static",
 "libm",
 "num-integer",
 "num-iter",
 "num-traits",
 "rand 0.8.5",
 "smallvec",
 "zeroize",
 ]
 [[package]]
 name = "num-complex"
 version = "0.4.4"
@@ -3603,33 +3481,11 @@ version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
 dependencies = [
- "ecdsa 0.14.8",
+ "ecdsa",
- "elliptic-curve 0.12.3",
+ "elliptic-curve",
 "sha2",
 ]
 [[package]]
 name = "p256"
 version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
 dependencies = [
 "ecdsa 0.16.9",
 "elliptic-curve 0.13.8",
 "primeorder",
 "sha2",
 ]
 [[package]]
 name = "p384"
 version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "70786f51bcc69f6a4c0360e063a4cac5419ef7c5cd5b3c99ad70f3be5ba79209"
 dependencies = [
 "elliptic-curve 0.13.8",
 "primeorder",
 ]
 [[package]]
 name = "pagebench"
 version = "0.1.0"
@@ -3991,15 +3847,6 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "pem-rfc7468"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
 dependencies = [
 "base64ct",
 ]
 [[package]]
 name = "percent-encoding"
 version = "2.2.0"
@@ -4016,29 +3863,6 @@ dependencies = [
 "indexmap 1.9.3",
 ]
 [[package]]
 name = "pg_sni_router"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "clap",
 "futures",
 "git-version",
 "itertools 0.10.5",
 "pq_proto",
 "proxy-core",
 "proxy-sasl",
 "rustls 0.22.4",
 "rustls-pemfile 2.1.1",
 "socket2 0.5.5",
 "tokio",
 "tokio-util",
 "tracing",
 "tracing-utils",
 "utils",
 "uuid",
 ]
 [[package]]
 name = "phf"
 version = "0.11.1"
@@ -4089,17 +3913,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 [[package]]
 name = "pkcs1"
 version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f"
 dependencies = [
 "der 0.7.8",
 "pkcs8 0.10.2",
 "spki 0.7.3",
 ]
 [[package]]
 name = "pkcs8"
 version = "0.9.0"
@@ -4110,16 +3923,6 @@ dependencies = [
 "spki 0.6.0",
 ]
 [[package]]
 name = "pkcs8"
 version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
 dependencies = [
 "der 0.7.8",
 "spki 0.7.3",
 ]
 [[package]]
 name = "pkg-config"
 version = "0.3.27"
@@ -4157,7 +3960,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#cff6927e4f58b1af6ecc2ee7279df1f2ff537295"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4170,7 +3973,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#cff6927e4f58b1af6ecc2ee7279df1f2ff537295"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -4189,7 +3992,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#cff6927e4f58b1af6ecc2ee7279df1f2ff537295"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4313,15 +4116,6 @@ dependencies = [
 "syn 2.0.52",
 ]
 [[package]]
 name = "primeorder"
 version = "0.13.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
 dependencies = [
 "elliptic-curve 0.13.8",
 ]
 [[package]]
 name = "proc-macro-hack"
 version = "0.5.20+deprecated"
@@ -4436,38 +4230,9 @@ dependencies = [
 [[package]]
 name = "proxy"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "aws-config",
 "clap",
 "futures",
 "git-version",
 "humantime",
 "itertools 0.10.5",
 "metrics",
 "pq_proto",
 "proxy-core",
 "proxy-sasl",
 "remote_storage",
 "rustls 0.22.4",
 "rustls-pemfile 2.1.1",
 "socket2 0.5.5",
 "tikv-jemallocator",
 "tokio",
 "tokio-util",
 "tracing",
 "tracing-utils",
 "utils",
 "uuid",
 ]
 [[package]]
 name = "proxy-core"
 version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
 "arc-swap",
 "async-compression",
 "async-trait",
 "atomic-take",
@@ -4485,11 +4250,11 @@ dependencies = [
 "consumption_metrics",
 "crossbeam-deque",
 "dashmap",
 "ecdsa 0.16.9",
 "env_logger",
 "fallible-iterator",
 "framed-websockets",
 "futures",
 "git-version",
 "hashbrown 0.14.5",
 "hashlink",
 "hex",
@@ -4505,14 +4270,12 @@ dependencies = [
 "indexmap 2.0.1",
 "ipnet",
 "itertools 0.10.5",
 "jose-jwa",
 "jose-jwk",
 "lasso",
 "md5",
 "measured",
 "metrics",
 "once_cell",
- "p256 0.13.2",
+ "opentelemetry",
 "parking_lot 0.12.1",
 "parquet",
 "parquet_derive",
@@ -4521,7 +4284,7 @@ dependencies = [
 "postgres-protocol",
 "postgres_backend",
 "pq_proto",
- "proxy-sasl",
+ "prometheus",
 "rand 0.8.5",
 "rand_distr",
 "rcgen",
@@ -4533,7 +4296,6 @@ dependencies = [
 "reqwest-retry",
 "reqwest-tracing",
 "routerify",
 "rsa",
 "rstest",
 "rustc-hash",
 "rustls 0.22.4",
@@ -4543,7 +4305,6 @@ dependencies = [
 "serde",
 "serde_json",
 "sha2",
 "signature 2.2.0",
 "smallvec",
 "smol_str",
 "socket2 0.5.5",
@@ -4551,6 +4312,7 @@ dependencies = [
 "task-local-extensions",
 "thiserror",
 "tikv-jemalloc-ctl",
 "tikv-jemallocator",
 "tokio",
 "tokio-postgres",
 "tokio-postgres-rustls",
@@ -4562,7 +4324,6 @@ dependencies = [
 "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
 "try-lock",
 "typed-json",
 "url",
 "urlencoding",
@@ -4573,35 +4334,6 @@ dependencies = [
 "x509-parser",
 ]
 [[package]]
 name = "proxy-sasl"
 version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
 "base64 0.13.1",
 "bytes",
 "crossbeam-deque",
 "hmac",
 "itertools 0.10.5",
 "lasso",
 "measured",
 "parking_lot 0.12.1",
 "pbkdf2",
 "postgres-protocol",
 "pq_proto",
 "rand 0.8.5",
 "rustls 0.22.4",
 "sha2",
 "subtle",
 "thiserror",
 "tokio",
 "tracing",
 "uuid",
 "workspace_hack",
 "x509-parser",
 ]
 [[package]]
 name = "quick-xml"
 version = "0.31.0"
@@ -5074,16 +4806,6 @@ dependencies = [
 "zeroize",
 ]
 [[package]]
 name = "rfc6979"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2"
 dependencies = [
 "hmac",
 "subtle",
 ]
 [[package]]
 name = "ring"
 version = "0.16.20"
@@ -5144,26 +4866,6 @@ dependencies = [
 "archery",
 ]
 [[package]]
 name = "rsa"
 version = "0.9.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc"
 dependencies = [
 "const-oid",
 "digest",
 "num-bigint-dig",
 "num-integer",
 "num-traits",
 "pkcs1",
 "pkcs8 0.10.2",
 "rand_core 0.6.4",
 "signature 2.2.0",
 "spki 0.7.3",
 "subtle",
 "zeroize",
 ]
 [[package]]
 name = "rstest"
 version = "0.18.2"
@@ -5492,24 +5194,10 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928"
 dependencies = [
- "base16ct 0.1.1",
+ "base16ct",
 "der 0.6.1",
 "generic-array",
- "pkcs8 0.9.0",
+ "pkcs8",
 "subtle",
 "zeroize",
 ]
 [[package]]
 name = "sec1"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
 dependencies = [
 "base16ct 0.2.0",
 "der 0.7.8",
 "generic-array",
 "pkcs8 0.10.2",
 "subtle",
 "zeroize",
 ]
@@ -5856,7 +5544,6 @@ version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
 dependencies = [
 "digest",
 "rand_core 0.6.4",
 ]
@@ -6016,7 +5703,6 @@ dependencies = [
 "pageserver_client",
 "postgres_connection",
 "r2d2",
 "rand 0.8.5",
 "reqwest 0.12.4",
 "routerify",
 "scopeguard",
@@ -6499,7 +6185,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#cff6927e4f58b1af6ecc2ee7279df1f2ff537295"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -6876,9 +6562,9 @@ dependencies = [
 [[package]]
 name = "try-lock"
-version = "0.2.5"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
 [[package]]
 name = "tungstenite"
@@ -7691,17 +7377,13 @@ dependencies = [
 "clap",
 "clap_builder",
 "crossbeam-utils",
 "crypto-bigint 0.5.5",
 "der 0.7.8",
 "deranged",
 "digest",
 "either",
 "fail",
 "futures-channel",
 "futures-executor",
 "futures-io",
 "futures-util",
 "generic-array",
 "getrandom 0.2.11",
 "hashbrown 0.14.5",
 "hex",
@@ -7709,7 +7391,6 @@ dependencies = [
 "hyper 0.14.26",
 "indexmap 1.9.3",
 "itertools 0.10.5",
 "lazy_static",
 "libc",
 "log",
 "memchr",
@@ -7733,9 +7414,7 @@ dependencies = [
 "serde",
 "serde_json",
 "sha2",
 "signature 2.2.0",
 "smallvec",
 "spki 0.7.3",
 "subtle",
 "syn 1.0.109",
 "syn 2.0.52",
@@ -7846,7 +7525,6 @@ version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
 dependencies = [
 "serde",
 "zeroize_derive",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,10 +9,7 @@ members = [
    "pageserver/ctl",
    "pageserver/client",
    "pageserver/pagebench",
-    "proxy/core",
+    "proxy",
    "proxy/sasl",
    "proxy/proxy",
    "proxy/pg_sni_router",
    "safekeeper",
    "storage_broker",
    "storage_controller",
@@ -187,7 +184,6 @@ tracing = "0.1"
 tracing-error = "0.2.0"
 tracing-opentelemetry = "0.21.0"
 tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 try-lock = "0.2.5"
 twox-hash = { version = "1.6.3", default-features = false }
 typed-json = "0.1"
 url = "2.2"
--- a/21
+++ b/21
@@ -17,7 +17,7 @@ COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
-ENV BUILD_TYPE=release
+ENV BUILD_TYPE release
 RUN set -e \
    && mold -run make -j $(nproc) -s neon-pg-ext \
    && rm -rf pg_install/build \
@@ -29,12 +29,24 @@ WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
 # Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
 # Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
 # cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
 ARG RUSTC_WRAPPER=cachepot
 ENV AWS_REGION=eu-central-1
 ENV CACHEPOT_S3_KEY_PREFIX=cachepot
 ARG CACHEPOT_BUCKET=neon-github-dev
 #ARG AWS_ACCESS_KEY_ID
 #ARG AWS_SECRET_ACCESS_KEY
 COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
 COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
    && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
      --bin pg_sni_router  \
@@ -46,7 +58,8 @@ RUN set -e \
      --bin proxy  \
      --bin neon_local \
      --bin storage_scrubber \
-      --locked --release
+      --locked --release \
    && cachepot -s
 # Build final image
 #
@@ -91,7 +104,7 @@ RUN mkdir -p /data/.neon/ && \
 # When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
 # that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v16/lib
+ENV LD_LIBRARY_PATH /usr/local/v16/lib
 VOLUME ["/data"]
@@ -99,5 +112,5 @@ USER neon
 EXPOSE 6400
 EXPOSE 9898
-CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
+CMD /usr/local/bin/pageserver -D /data/.neon
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -58,7 +58,7 @@ RUN set -e \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 # protobuf-compiler (protoc)
-ENV PROTOC_VERSION=25.1
+ENV PROTOC_VERSION 25.1
 RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
    && unzip -q protoc.zip -d protoc \
    && mv protoc/bin/protoc /usr/local/bin/protoc \
@@ -99,7 +99,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
    && rm awscliv2.zip
 # Mold: A Modern Linker
-ENV MOLD_VERSION=v2.33.0
+ENV MOLD_VERSION v2.31.0
 RUN set -e \
    && git clone https://github.com/rui314/mold.git \
    && mkdir mold/build \
@@ -168,7 +168,7 @@ USER nonroot:nonroot
 WORKDIR /home/nonroot
 # Python
-ENV PYTHON_VERSION=3.9.19 \
+ENV PYTHON_VERSION=3.9.18 \
    PYENV_ROOT=/home/nonroot/.pyenv \
    PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
 RUN set -e \
@@ -192,14 +192,9 @@ WORKDIR /home/nonroot
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.80.1
+ENV RUSTC_VERSION=1.80.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
 ARG CARGO_HAKARI_VERSION=0.9.30
 ARG CARGO_DENY_VERSION=0.16.1
 ARG CARGO_HACK_VERSION=0.6.31
 ARG CARGO_NEXTEST_VERSION=0.9.72
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
 	./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -208,13 +203,15 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools-preview rustfmt clippy && \
-    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
+    cargo install --git https://github.com/paritytech/cachepot && \
-    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
+    cargo install rustfilt && \
-    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
+    cargo install cargo-hakari && \
-    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
+    cargo install cargo-deny --locked && \
-    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-hack && \
    cargo install cargo-nextest && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git
 ENV RUSTC_WRAPPER=cachepot
 # Show versions
 RUN whoami \
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -94,7 +94,7 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
@@ -411,7 +411,7 @@ FROM build-deps AS timescaledb-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
@@ -444,7 +444,7 @@ FROM build-deps AS pg-hint-plan-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14") \
@@ -480,7 +480,7 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-cron-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
@@ -506,7 +506,7 @@ RUN apt-get update && \
        libboost-system1.74-dev \
        libeigen3-dev
-ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
 RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
@@ -546,7 +546,7 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
 FROM build-deps AS pg-uuidv7-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
@@ -563,7 +563,7 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
 FROM build-deps AS pg-roaringbitmap-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
@@ -580,7 +580,7 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
 FROM build-deps AS pg-semver-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
@@ -598,7 +598,7 @@ FROM build-deps AS pg-embedding-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
        export PG_EMBEDDING_VERSION=0.3.5 \
@@ -622,7 +622,7 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-anon-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
@@ -750,7 +750,7 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -
 FROM build-deps AS wal2json-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
@@ -766,7 +766,7 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
 FROM build-deps AS pg-ivm-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
@@ -783,7 +783,7 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
 FROM build-deps AS pg-partman-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
@@ -933,8 +933,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
 #COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
 COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
 COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
-COPY --from=rum-pg-build /rum.tar.gz /ext-src
+#COPY --from=rum-pg-build /rum.tar.gz /ext-src
 COPY patches/rum.patch /ext-src
 #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
 COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
 COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
@@ -946,7 +945,7 @@ COPY patches/pg_hintplan.patch /ext-src
 COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
 COPY patches/pg_cron.patch /ext-src
 #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
-#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
+COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
 COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
 COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
 COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
@@ -961,7 +960,6 @@ RUN cd /ext-src/ && for f in *.tar.gz; \
    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
    || exit 1; rm -f $f; done
 RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
 RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
 # cmake is required for the h3 test
 RUN apt-get update && apt-get install -y cmake
 RUN patch -p1 < /ext-src/pg_hintplan.patch
@@ -1034,6 +1032,6 @@ RUN apt update &&  \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
-ENV LANG=en_US.utf8
+ENV LANG en_US.utf8
 USER postgres
 ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/README.md
+++ b/README.md
@@ -313,3 +313,5 @@ To get more familiar with this aspect, refer to:
 - Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.
 - To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).
 - To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
 .
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -4,11 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 default = []
 # Enables test specific features.
 testing = []
 [dependencies]
 anyhow.workspace = true
 async-compression.workspace = true
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -400,15 +400,7 @@ impl ComputeNode {
    pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let mut retry_period_ms = 500.0;
        let mut attempts = 0;
-        const DEFAULT_ATTEMPTS: u16 = 10;
+        let max_attempts = 10;
        #[cfg(feature = "testing")]
        let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
            u16::from_str(&v).unwrap()
        } else {
            DEFAULT_ATTEMPTS
        };
        #[cfg(not(feature = "testing"))]
        let max_attempts = DEFAULT_ATTEMPTS;
        loop {
            let result = self.try_get_basebackup(compute_state, lsn);
            match result {
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -289,7 +289,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
 fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
    for (var, val) in std::env::vars() {
-        if var.starts_with("NEON_") {
+        if var.starts_with("NEON_PAGESERVER_") {
            cmd = cmd.env(var, val);
        }
    }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -158,8 +158,6 @@ pub struct NeonStorageControllerConf {
    /// Threshold for auto-splitting a tenant into shards
    pub split_threshold: Option<u64>,
    pub max_secondary_lag_bytes: Option<u64>,
 }
 impl NeonStorageControllerConf {
@@ -175,7 +173,6 @@ impl Default for NeonStorageControllerConf {
            max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
            max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
            split_threshold: None,
            max_secondary_lag_bytes: None,
        }
    }
 }
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -383,10 +383,6 @@ impl StorageController {
            args.push(format!("--split-threshold={split_threshold}"))
        }
        if let Some(lag) = self.config.max_secondary_lag_bytes.as_ref() {
            args.push(format!("--max-secondary-lag-bytes={lag}"))
        }
        args.push(format!(
            "--neon-local-repo-dir={}",
            self.env.base_data_dir.display()
--- a/deny.toml
+++ b/deny.toml
@@ -4,7 +4,6 @@
 # to your expectations and requirements.
 # Root options
 [graph]
 targets = [
    { triple = "x86_64-unknown-linux-gnu" },
    { triple = "aarch64-unknown-linux-gnu" },
@@ -13,7 +12,6 @@ targets = [
 ]
 all-features = false
 no-default-features = false
 [output]
 feature-depth = 1
 # This section is considered when running `cargo deny check advisories`
@@ -21,16 +19,17 @@ feature-depth = 1
 # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
 [advisories]
 db-urls = ["https://github.com/rustsec/advisory-db"]
 vulnerability = "deny"
 unmaintained = "warn"
 yanked = "warn"
-
+notice = "warn"
-[[advisories.ignore]]
+ignore = []
 id = "RUSTSEC-2023-0071"
 reason = "the marvin attack only affects private key decryption, not public key signature verification"
 # This section is considered when running `cargo deny check licenses`
 # More documentation for the licenses section can be found here:
 # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
 [licenses]
 unlicensed = "deny"
 allow = [
    "Apache-2.0",
    "Artistic-2.0",
@@ -43,6 +42,10 @@ allow = [
    "OpenSSL",
    "Unicode-DFS-2016",
 ]
 deny = []
 copyleft = "warn"
 allow-osi-fsf-free = "neither"
 default = "deny"
 confidence-threshold = 0.8
 exceptions = [
    # Zlib license has some restrictions if we decide to change sth
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -78,7 +78,7 @@ for pg_version in 14 15 16; do
        docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
        rm -rf $TMPDIR
        # We are running tests now
-        if docker exec -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
+        if docker exec -e SKIP=rum-src,timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
            $TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
        then
            cleanup
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 set -x
-cd /ext-src || exit 2
+cd /ext-src
 FAILED=
-LIST=$( (echo "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
+LIST=$((echo ${SKIP} | sed 's/,/\n/g'; ls -d *-src) | sort | uniq -u)
 for d in ${LIST}
 do
-       [ -d "${d}" ] || continue
+       [ -d ${d} ] || continue
    psql -c "select 1" >/dev/null || break
-       USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
+       make -C ${d} installcheck || FAILED="${d} ${FAILED}"
 done
 [ -z "${FAILED}" ] && exit 0
-echo "${FAILED}"
+echo ${FAILED}
 exit 1
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -1,18 +1,13 @@
 # Summary
 # Looking for `neon.tech` docs?
 This page linkes to a selection of technical content about the open source code in this repository.
 Please visit https://neon.tech/docs for documentation about using the Neon service, which is based on the code
 in this repository.
 # Architecture
 [Introduction]()
 - [Separation of Compute and Storage](./separation-compute-storage.md)
 # Architecture
 - [Compute]()
  - [WAL proposer]()
  - [WAL Backpressure]()
  - [Postgres changes](./core_changes.md)
 - [Pageserver](./pageserver.md)
@@ -21,15 +16,33 @@ in this repository.
    - [WAL Redo](./pageserver-walredo.md)
    - [Page cache](./pageserver-pagecache.md)
    - [Storage](./pageserver-storage.md)
        - [Datadir mapping]()
        - [Layer files]()
        - [Branching]()
        - [Garbage collection]()
    - [Cloud Storage]()
    - [Processing a GetPage request](./pageserver-processing-getpage.md)
    - [Processing WAL](./pageserver-processing-wal.md)
 	- [Management API]()
 	- [Tenant Rebalancing]()
 - [WAL Service](walservice.md)
  - [Consensus protocol](safekeeper-protocol.md)
  - [Management API]()
  - [Rebalancing]()
 - [Control Plane]()
 - [Proxy]()
 - [Source view](./sourcetree.md)
  - [docker.md](./docker.md) — Docker images and building pipeline.
  - [Error handling and logging](./error-handling.md)
  - [Testing]()
    - [Unit testing]()
    - [Integration testing]()
    - [Benchmarks]()
 - [Glossary](./glossary.md)
@@ -45,6 +58,28 @@ in this repository.
 # RFCs
-Major changes are documented in RFCS:
+- [RFCs](./rfcs/README.md)
- See [RFCs](./rfcs/README.md) for more information
+
- view the RFCs at https://github.com/neondatabase/neon/tree/main/docs/rfcs
+- [002-storage](rfcs/002-storage.md)
 - [003-laptop-cli](rfcs/003-laptop-cli.md)
 - [004-durability](rfcs/004-durability.md)
 - [005-zenith_local](rfcs/005-zenith_local.md)
 - [006-laptop-cli-v2-CLI](rfcs/006-laptop-cli-v2-CLI.md)
 - [006-laptop-cli-v2-repository-structure](rfcs/006-laptop-cli-v2-repository-structure.md)
 - [007-serverless-on-laptop](rfcs/007-serverless-on-laptop.md)
 - [008-push-pull](rfcs/008-push-pull.md)
 - [009-snapshot-first-storage-cli](rfcs/009-snapshot-first-storage-cli.md)
 - [009-snapshot-first-storage](rfcs/009-snapshot-first-storage.md)
 - [009-snapshot-first-storage-pitr](rfcs/009-snapshot-first-storage-pitr.md)
 - [010-storage_details](rfcs/010-storage_details.md)
 - [011-retention-policy](rfcs/011-retention-policy.md)
 - [012-background-tasks](rfcs/012-background-tasks.md)
 - [013-term-history](rfcs/013-term-history.md)
 - [014-safekeepers-gossip](rfcs/014-safekeepers-gossip.md)
 - [014-storage-lsm](rfcs/014-storage-lsm.md)
 - [015-storage-messaging](rfcs/015-storage-messaging.md)
 - [016-connection-routing](rfcs/016-connection-routing.md)
 - [017-timeline-data-management](rfcs/017-timeline-data-management.md)
 - [018-storage-messaging-2](rfcs/018-storage-messaging-2.md)
 - [019-tenant-timeline-lifecycles](rfcs/019-tenant-timeline-lifecycles.md)
 - [cluster-size-limits](rfcs/cluster-size-limits.md)
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -107,10 +107,7 @@ impl Key {
    /// As long as Neon does not support tablespace (because of lack of access to local file system),
    /// we can assume that only some predefined namespace OIDs are used which can fit in u16
    pub fn to_i128(&self) -> i128 {
-        assert!(
+        assert!(self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
            self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222,
            "invalid key: {self}",
        );
        (((self.field1 & 0x7F) as i128) << 120)
            | (((self.field2 & 0xFFFF) as i128) << 104)
            | ((self.field3 as i128) << 72)
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -637,13 +637,6 @@ pub struct TenantInfo {
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
    pub attachment_status: TenantAttachmentStatus,
    pub generation: u32,
    /// Opaque explanation if gc is being blocked.
    ///
    /// Only looked up for the individual tenant detail, not the listing. This is purely for
    /// debugging, not included in openapi.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub gc_blocking: Option<String>,
 }
 #[derive(Serialize, Deserialize, Clone)]
@@ -947,8 +940,6 @@ pub struct TopTenantShardsResponse {
 }
 pub mod virtual_file {
    use std::path::PathBuf;
    #[derive(
        Copy,
        Clone,
@@ -967,53 +958,6 @@ pub mod virtual_file {
        #[cfg(target_os = "linux")]
        TokioEpollUring,
    }
    /// Direct IO modes for a pageserver.
    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
    #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
    pub enum DirectIoMode {
        /// Direct IO disabled (uses usual buffered IO).
        #[default]
        Disabled,
        /// Direct IO disabled (performs checks and perf simulations).
        Evaluate {
            /// Alignment check level
            alignment_check: DirectIoAlignmentCheckLevel,
            /// Latency padded for performance simulation.
            latency_padding: DirectIoLatencyPadding,
        },
        /// Direct IO enabled.
        Enabled {
            /// Actions to perform on alignment error.
            on_alignment_error: DirectIoOnAlignmentErrorAction,
        },
    }
    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
    #[serde(rename_all = "kebab-case")]
    pub enum DirectIoAlignmentCheckLevel {
        #[default]
        Error,
        Log,
        None,
    }
    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
    #[serde(rename_all = "kebab-case")]
    pub enum DirectIoOnAlignmentErrorAction {
        Error,
        #[default]
        FallbackToBuffered,
    }
    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
    #[serde(tag = "type", rename_all = "kebab-case")]
    pub enum DirectIoLatencyPadding {
        /// Pad virtual file operations with IO to a fake file.
        FakeFileRW { path: PathBuf },
        #[default]
        None,
    }
 }
 // Wrapped in libpq CopyData
@@ -1483,7 +1427,6 @@ mod tests {
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
            generation: 1,
            gc_blocking: None,
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -1506,7 +1449,6 @@ mod tests {
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
            generation: 1,
            gc_blocking: None,
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
--- a/libs/pageserver_api/src/models/detach_ancestor.rs
+++ b/libs/pageserver_api/src/models/detach_ancestor.rs
@@ -1,8 +1,6 @@
 use std::collections::HashSet;
 use utils::id::TimelineId;
 #[derive(Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
 pub struct AncestorDetached {
-    pub reparented_timelines: HashSet<TimelineId>,
+    pub reparented_timelines: Vec<TimelineId>,
 }
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -144,20 +144,7 @@ impl PgConnectionConfig {
            // implement and this function is hardly a bottleneck. The function is only called around
            // establishing a new connection.
            #[allow(unstable_name_collisions)]
-            config.options(
+            config.options(&encode_options(&self.options));
                &self
                    .options
                    .iter()
                    .map(|s| {
                        if s.contains(['\\', ' ']) {
                            Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
                        } else {
                            Cow::Borrowed(s.as_str())
                        }
                    })
                    .intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
                    .collect::<String>(),
            );
        }
        config
    }
@@ -178,6 +165,21 @@ impl PgConnectionConfig {
    }
 }
 #[allow(unstable_name_collisions)]
 fn encode_options(options: &[String]) -> String {
    options
        .iter()
        .map(|s| {
            if s.contains(['\\', ' ']) {
                Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
            } else {
                Cow::Borrowed(s.as_str())
            }
        })
        .intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
        .collect::<String>()
 }
 impl fmt::Display for PgConnectionConfig {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        // The password is intentionally hidden and not part of this display string.
@@ -206,7 +208,7 @@ impl fmt::Debug for PgConnectionConfig {
 #[cfg(test)]
 mod tests_pg_connection_config {
-    use crate::PgConnectionConfig;
+    use crate::{encode_options, PgConnectionConfig};
    use once_cell::sync::Lazy;
    use url::Host;
@@ -255,18 +257,12 @@ mod tests_pg_connection_config {
    #[test]
    fn test_with_options() {
-        let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([
+        let options = encode_options(&[
-            "hello",
+            "hello".to_owned(),
-            "world",
+            "world".to_owned(),
-            "with space",
+            "with space".to_owned(),
-            "and \\ backslashes",
+            "and \\ backslashes".to_owned(),
        ]);
-        assert_eq!(cfg.host(), &*STUB_HOST);
+        assert_eq!(options, "hello world with\\ space and\\ \\\\\\ backslashes");
        assert_eq!(cfg.port(), 123);
        assert_eq!(cfg.raw_address(), "stub.host.example:123");
        assert_eq!(
            cfg.to_tokio_postgres_config().get_options(),
            Some("hello world with\\ space and\\ \\\\\\ backslashes")
        );
    }
 }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -128,7 +128,7 @@ pub mod circuit_breaker;
 ///
 /// #############################################################################################
 /// TODO this macro is not the way the library is intended to be used, see <https://github.com/neondatabase/neon/issues/1565> for details.
-/// We used `cachepot` to reduce our current CI build times: <https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036>
+/// We use `cachepot` to reduce our current CI build times: <https://github.com/neondatabase/cloud/pull/1033#issuecomment-1100935036>
 /// Yet, it seems to ignore the GIT_VERSION env variable, passed to Docker build, even with build.rs that contains
 /// `println!("cargo:rerun-if-env-changed=GIT_VERSION");` code for cachepot cache invalidation.
 /// The problem needs further investigation and regular `const` declaration instead of a macro.
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -78,9 +78,8 @@ impl Drop for GateGuard {
    }
 }
-#[derive(Debug, thiserror::Error)]
+#[derive(Debug)]
 pub enum GateError {
    #[error("gate is closed")]
    GateClosed,
 }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -108,7 +108,3 @@ harness = false
 [[bench]]
 name = "bench_walredo"
 harness = false
 [[bench]]
 name = "bench_ingest"
 harness = false
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -1,239 +0,0 @@
 use std::{env, num::NonZeroUsize};
 use bytes::Bytes;
 use camino::Utf8PathBuf;
 use criterion::{criterion_group, criterion_main, Criterion};
 use pageserver::{
    config::PageServerConf,
    context::{DownloadBehavior, RequestContext},
    l0_flush::{L0FlushConfig, L0FlushGlobalState},
    page_cache,
    repository::Value,
    task_mgr::TaskKind,
    tenant::storage_layer::InMemoryLayer,
    virtual_file,
 };
 use pageserver_api::{key::Key, shard::TenantShardId};
 use utils::{
    bin_ser::BeSer,
    id::{TenantId, TimelineId},
 };
 // A very cheap hash for generating non-sequential keys.
 fn murmurhash32(mut h: u32) -> u32 {
    h ^= h >> 16;
    h = h.wrapping_mul(0x85ebca6b);
    h ^= h >> 13;
    h = h.wrapping_mul(0xc2b2ae35);
    h ^= h >> 16;
    h
 }
 enum KeyLayout {
    /// Sequential unique keys
    Sequential,
    /// Random unique keys
    Random,
    /// Random keys, but only use the bits from the mask of them
    RandomReuse(u32),
 }
 enum WriteDelta {
    Yes,
    No,
 }
 async fn ingest(
    conf: &'static PageServerConf,
    put_size: usize,
    put_count: usize,
    key_layout: KeyLayout,
    write_delta: WriteDelta,
 ) -> anyhow::Result<()> {
    let mut lsn = utils::lsn::Lsn(1000);
    let mut key = Key::from_i128(0x0);
    let timeline_id = TimelineId::generate();
    let tenant_id = TenantId::generate();
    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
    tokio::fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id)).await?;
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    let gate = utils::sync::gate::Gate::default();
    let entered = gate.enter().unwrap();
    let layer =
        InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, entered, &ctx).await?;
    let data = Value::Image(Bytes::from(vec![0u8; put_size])).ser()?;
    let ctx = RequestContext::new(
        pageserver::task_mgr::TaskKind::WalReceiverConnectionHandler,
        pageserver::context::DownloadBehavior::Download,
    );
    for i in 0..put_count {
        lsn += put_size as u64;
        // Generate lots of keys within a single relation, which simulates the typical bulk ingest case: people
        // usually care the most about write performance when they're blasting a huge batch of data into a huge table.
        match key_layout {
            KeyLayout::Sequential => {
                // Use sequential order to illustrate the experience a user is likely to have
                // when ingesting bulk data.
                key.field6 = i as u32;
            }
            KeyLayout::Random => {
                // Use random-order keys to avoid giving a false advantage to data structures that are
                // faster when inserting on the end.
                key.field6 = murmurhash32(i as u32);
            }
            KeyLayout::RandomReuse(mask) => {
                // Use low bits only, to limit cardinality
                key.field6 = murmurhash32(i as u32) & mask;
            }
        }
        layer.put_value(key, lsn, &data, &ctx).await?;
    }
    layer.freeze(lsn + 1).await;
    if matches!(write_delta, WriteDelta::Yes) {
        let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {
            max_concurrency: NonZeroUsize::new(1).unwrap(),
        });
        let (_desc, path) = layer
            .write_to_disk(&ctx, None, l0_flush_state.inner())
            .await?
            .unwrap();
        tokio::fs::remove_file(path).await?;
    }
    Ok(())
 }
 /// Wrapper to instantiate a tokio runtime
 fn ingest_main(
    conf: &'static PageServerConf,
    put_size: usize,
    put_count: usize,
    key_layout: KeyLayout,
    write_delta: WriteDelta,
 ) {
    let runtime = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .unwrap();
    runtime.block_on(async move {
        let r = ingest(conf, put_size, put_count, key_layout, write_delta).await;
        if let Err(e) = r {
            panic!("{e:?}");
        }
    });
 }
 /// Declare a series of benchmarks for the Pageserver's ingest write path.
 ///
 /// This benchmark does not include WAL decode: it starts at InMemoryLayer::put_value, and ends either
 /// at freezing the ephemeral layer, or writing the ephemeral layer out to an L0 (depending on whether WriteDelta is set).
 ///
 /// Genuine disk I/O is used, so expect results to differ depending on storage.  However, when running on
 /// a fast disk, CPU is the bottleneck at time of writing.
 fn criterion_benchmark(c: &mut Criterion) {
    let temp_dir_parent: Utf8PathBuf = env::current_dir().unwrap().try_into().unwrap();
    let temp_dir = camino_tempfile::tempdir_in(temp_dir_parent).unwrap();
    eprintln!("Data directory: {}", temp_dir.path());
    let conf: &'static PageServerConf = Box::leak(Box::new(
        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
    ));
    virtual_file::init(16384, virtual_file::io_engine_for_bench());
    page_cache::init(conf.page_cache_size);
    {
        let mut group = c.benchmark_group("ingest-small-values");
        let put_size = 100usize;
        let put_count = 128 * 1024 * 1024 / put_size;
        group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
        group.sample_size(10);
        group.bench_function("ingest 128MB/100b seq", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::Sequential,
                    WriteDelta::Yes,
                )
            })
        });
        group.bench_function("ingest 128MB/100b rand", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::Random,
                    WriteDelta::Yes,
                )
            })
        });
        group.bench_function("ingest 128MB/100b rand-1024keys", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::RandomReuse(0x3ff),
                    WriteDelta::Yes,
                )
            })
        });
        group.bench_function("ingest 128MB/100b seq, no delta", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::Sequential,
                    WriteDelta::No,
                )
            })
        });
    }
    {
        let mut group = c.benchmark_group("ingest-big-values");
        let put_size = 8192usize;
        let put_count = 128 * 1024 * 1024 / put_size;
        group.throughput(criterion::Throughput::Bytes((put_size * put_count) as u64));
        group.sample_size(10);
        group.bench_function("ingest 128MB/8k seq", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::Sequential,
                    WriteDelta::Yes,
                )
            })
        });
        group.bench_function("ingest 128MB/8k seq, no delta", |b| {
            b.iter(|| {
                ingest_main(
                    conf,
                    put_size,
                    put_count,
                    KeyLayout::Sequential,
                    WriteDelta::No,
                )
            })
        });
    }
 }
 criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -123,7 +123,6 @@ fn main() -> anyhow::Result<()> {
    // after setting up logging, log the effective IO engine choice and read path implementations
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
    info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
    info!(?conf.get_impl, "starting with get page implementation");
    info!(?conf.get_vectored_impl, "starting with vectored get page implementation");
    info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -300,9 +300,6 @@ pub struct PageServerConf {
    /// This flag is temporary and will be removed after gradual rollout.
    /// See <https://github.com/neondatabase/neon/issues/8184>.
    pub compact_level0_phase1_value_access: CompactL0Phase1ValueAccess,
    /// Direct IO settings
    pub virtual_file_direct_io: virtual_file::DirectIoMode,
 }
 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -411,8 +408,6 @@ struct PageServerConfigBuilder {
    l0_flush: BuilderValue<L0FlushConfig>,
    compact_level0_phase1_value_access: BuilderValue<CompactL0Phase1ValueAccess>,
    virtual_file_direct_io: BuilderValue<virtual_file::DirectIoMode>,
 }
 impl PageServerConfigBuilder {
@@ -503,7 +498,6 @@ impl PageServerConfigBuilder {
            ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
            l0_flush: Set(L0FlushConfig::default()),
            compact_level0_phase1_value_access: Set(CompactL0Phase1ValueAccess::default()),
            virtual_file_direct_io: Set(virtual_file::DirectIoMode::default()),
        }
    }
 }
@@ -691,10 +685,6 @@ impl PageServerConfigBuilder {
        self.compact_level0_phase1_value_access = BuilderValue::Set(value);
    }
    pub fn virtual_file_direct_io(&mut self, value: virtual_file::DirectIoMode) {
        self.virtual_file_direct_io = BuilderValue::Set(value);
    }
    pub fn build(self, id: NodeId) -> anyhow::Result<PageServerConf> {
        let default = Self::default_values();
@@ -753,7 +743,6 @@ impl PageServerConfigBuilder {
                ephemeral_bytes_per_memory_kb,
                l0_flush,
                compact_level0_phase1_value_access,
                virtual_file_direct_io,
            }
            CUSTOM LOGIC
            {
@@ -1029,9 +1018,6 @@ impl PageServerConf {
                "compact_level0_phase1_value_access" => {
                    builder.compact_level0_phase1_value_access(utils::toml_edit_ext::deserialize_item(item).context("compact_level0_phase1_value_access")?)
                }
                "virtual_file_direct_io" => {
                    builder.virtual_file_direct_io(utils::toml_edit_ext::deserialize_item(item).context("virtual_file_direct_io")?)
                }
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }
@@ -1117,7 +1103,6 @@ impl PageServerConf {
            ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
            l0_flush: L0FlushConfig::default(),
            compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
            virtual_file_direct_io: virtual_file::DirectIoMode::default(),
        }
    }
 }
@@ -1360,7 +1345,6 @@ background_task_maximum_delay = '334 s'
                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
                l0_flush: L0FlushConfig::default(),
                compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
                virtual_file_direct_io: virtual_file::DirectIoMode::default(),
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1436,7 +1420,6 @@ background_task_maximum_delay = '334 s'
                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
                l0_flush: L0FlushConfig::default(),
                compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(),
                virtual_file_direct_io: virtual_file::DirectIoMode::default(),
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -308,45 +308,6 @@ paths:
            application/json:
              schema:
                type: string
  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/block_gc:
    parameters:
      - name: tenant_shard_id
        in: path
        required: true
        schema:
          type: string
      - name: timeline_id
        in: path
        required: true
        schema:
          type: string
          format: hex
    post:
      description: Persistently add a gc blocking at the tenant level because of this timeline
      responses:
        "200":
          description: OK
  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/unblock_gc:
    parameters:
      - name: tenant_shard_id
        in: path
        required: true
        schema:
          type: string
      - name: timeline_id
        in: path
        required: true
        schema:
          type: string
          format: hex
    post:
      description: Persistently remove a tenant level gc blocking for this timeline
      responses:
        "200":
          description: OK
  /v1/tenant/{tenant_shard_id}/location_config:
    parameters:
      - name: tenant_shard_id
@@ -932,7 +893,7 @@ components:
          description: Whether to poll remote storage for layers to download.  If false, secondary locations don't download anything.
    ArchivalConfigRequest:
      type: object
-      required:
+      required
        - state
      properties:
        state:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -935,7 +935,6 @@ async fn tenant_list_handler(
            generation: (*gen)
                .into()
                .expect("Tenants are always attached with a generation"),
            gc_blocking: None,
        })
        .collect::<Vec<TenantInfo>>();
@@ -987,7 +986,6 @@ async fn tenant_status(
                    .generation()
                    .into()
                    .expect("Tenants are always attached with a generation"),
                gc_blocking: tenant.gc_block.summary().map(|x| format!("{x:?}")),
            },
            walredo: tenant.wal_redo_manager_status(),
            timelines: tenant.list_timeline_ids(),
@@ -1162,10 +1160,7 @@ async fn layer_map_info_handler(
    let timeline =
        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
            .await?;
-    let layer_map_info = timeline
+    let layer_map_info = timeline.layer_map_info(reset).await;
        .layer_map_info(reset)
        .await
        .map_err(|_shutdown| ApiError::ShuttingDown)?;
    json_response(StatusCode::OK, layer_map_info)
 }
@@ -1231,72 +1226,6 @@ async fn evict_timeline_layer_handler(
    }
 }
 async fn timeline_gc_blocking_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    block_or_unblock_gc(request, true).await
 }
 async fn timeline_gc_unblocking_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    block_or_unblock_gc(request, false).await
 }
 /// Adding a block is `POST ../block_gc`, removing a block is `POST ../unblock_gc`.
 ///
 /// Both are technically unsafe because they might fire off index uploads, thus they are POST.
 async fn block_or_unblock_gc(
    request: Request<Body>,
    block: bool,
 ) -> Result<Response<Body>, ApiError> {
    use crate::tenant::{
        remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
    };
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let state = get_state(&request);
    let tenant = state
        .tenant_manager
        .get_attached_tenant_shard(tenant_shard_id)?;
    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
    let timeline = tenant.get_timeline(timeline_id, true)?;
    let fut = async {
        if block {
            timeline.block_gc(&tenant).await.map(|_| ())
        } else {
            timeline.unblock_gc(&tenant).await
        }
    };
    let span = tracing::info_span!(
        "block_or_unblock_gc",
        tenant_id = %tenant_shard_id.tenant_id,
        shard_id = %tenant_shard_id.shard_slug(),
        timeline_id = %timeline_id,
        block = block,
    );
    let res = fut.instrument(span).await;
    res.map_err(|e| {
        if e.is::<NotInitialized>() || e.is::<WaitCompletionError>() {
            ApiError::ShuttingDown
        } else {
            ApiError::InternalServerError(e)
        }
    })?;
    json_response(StatusCode::OK, ())
 }
 /// Get tenant_size SVG graph along with the JSON data.
 fn synthetic_size_html_response(
    inputs: ModelInputs,
@@ -2975,14 +2904,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| api_handler(r, evict_timeline_layer_handler),
        )
        .post(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/block_gc",
            |r| api_handler(r, timeline_gc_blocking_handler),
        )
        .post(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/unblock_gc",
            |r| api_handler(r, timeline_gc_unblocking_handler),
        )
        .post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
            api_handler(r, secondary_upload_handler)
        })
--- a/pageserver/src/l0_flush.rs
+++ b/pageserver/src/l0_flush.rs
@@ -24,7 +24,7 @@ impl Default for L0FlushConfig {
 #[derive(Clone)]
 pub struct L0FlushGlobalState(Arc<Inner>);
-pub enum Inner {
+pub(crate) enum Inner {
    PageCached,
    Direct { semaphore: tokio::sync::Semaphore },
 }
@@ -40,7 +40,7 @@ impl L0FlushGlobalState {
        }
    }
-    pub fn inner(&self) -> &Arc<Inner> {
+    pub(crate) fn inner(&self) -> &Arc<Inner> {
        &self.0
    }
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -122,15 +122,11 @@ impl Listener {
    }
 }
 impl Connections {
-    pub(crate) async fn shutdown(self) {
+    pub async fn shutdown(self) {
        let Self { cancel, mut tasks } = self;
        cancel.cancel();
        while let Some(res) = tasks.join_next().await {
-            Self::handle_connection_completion(res);
+            // the logging done here mimics what was formerly done by task_mgr
        }
    }
    fn handle_connection_completion(res: Result<anyhow::Result<()>, tokio::task::JoinError>) {
            match res {
                Ok(Ok(())) => {}
                Ok(Err(e)) => error!("error in page_service connection task: {:?}", e),
@@ -138,6 +134,7 @@ impl Connections {
            }
        }
    }
 }
 ///
 /// Main loop of the page service.
@@ -158,19 +155,20 @@ pub async fn libpq_listener_main(
    let connections_cancel = CancellationToken::new();
    let mut connection_handler_tasks = tokio::task::JoinSet::default();
-    loop {
+    // Wait for a new connection to arrive, or for server shutdown.
-        let accepted = tokio::select! {
+    while let Some(res) = tokio::select! {
        biased;
            _ = listener_cancel.cancelled() => break,
            next = connection_handler_tasks.join_next(), if !connection_handler_tasks.is_empty() => {
                let res = next.expect("we dont poll while empty");
                Connections::handle_connection_completion(res);
                continue;
            }
            accepted = listener.accept() => accepted,
        };
-        match accepted {
+        _ = listener_cancel.cancelled() => {
            // We were requested to shut down.
            None
        }
        res = listener.accept() => {
            Some(res)
        }
    } {
        match res {
            Ok((socket, peer_addr)) => {
                // Connection established. Spawn a new task to handle it.
                debug!("accepted connection from {}", peer_addr);
--- a/pageserver/src/statvfs.rs
+++ b/pageserver/src/statvfs.rs
@@ -56,6 +56,7 @@ impl Statvfs {
 }
 pub mod mock {
    use anyhow::Context;
    use camino::Utf8Path;
    use regex::Regex;
    use tracing::log::info;
@@ -134,30 +135,14 @@ pub mod mock {
            {
                continue;
            }
-            let m = match entry.metadata() {
+            total += entry
-                Ok(m) => m,
+                .metadata()
-                Err(e) if is_not_found(&e) => {
+                .with_context(|| format!("get metadata of {:?}", entry.path()))?
-                    // some temp file which got removed right as we are walking
+                .len();
                    continue;
                }
                Err(e) => {
                    return Err(anyhow::Error::new(e)
                        .context(format!("get metadata of {:?}", entry.path())))
                }
            };
            total += m.len();
        }
        Ok(total)
    }
    fn is_not_found(e: &walkdir::Error) -> bool {
        let Some(io_error) = e.io_error() else {
            return false;
        };
        let kind = io_error.kind();
        matches!(kind, std::io::ErrorKind::NotFound)
    }
    pub struct Statvfs {
        pub blocks: u64,
        pub blocks_available: u64,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -148,7 +148,6 @@ pub(crate) mod timeline;
 pub mod size;
 mod gc_block;
 pub(crate) mod throttle;
 pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -304,12 +303,6 @@ pub struct Tenant {
    /// An ongoing timeline detach must be checked during attempts to GC or compact a timeline.
    ongoing_timeline_detach: std::sync::Mutex<Option<(TimelineId, utils::completion::Barrier)>>,
    /// `index_part.json` based gc blocking reason tracking.
    ///
    /// New gc iterations must start a new iteration by acquiring `GcBlock::start` before
    /// proceeding.
    pub(crate) gc_block: gc_block::GcBlock,
    l0_flush_global_state: L0FlushGlobalState,
 }
@@ -601,12 +594,6 @@ impl From<PageReconstructError> for GcError {
    }
 }
 impl From<timeline::layer_manager::Shutdown> for GcError {
    fn from(_: timeline::layer_manager::Shutdown) -> Self {
        GcError::TimelineCancelled
    }
 }
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum LoadConfigError {
    #[error("TOML deserialization error: '{0}'")]
@@ -716,7 +703,6 @@ impl Tenant {
                    .read()
                    .await
                    .layer_map()
                    .expect("currently loading, layer manager cannot be shutdown already")
                    .iter_historic_layers()
                    .next()
                    .is_some(),
@@ -1050,8 +1036,6 @@ impl Tenant {
            }
        }
        let mut gc_blocks = HashMap::new();
        // For every timeline, download the metadata file, scan the local directory,
        // and build a layer map that contains an entry for each remote and local
        // layer file.
@@ -1061,16 +1045,6 @@ impl Tenant {
                .remove(&timeline_id)
                .expect("just put it in above");
            if let Some(blocking) = index_part.gc_blocking.as_ref() {
                // could just filter these away, but it helps while testing
                anyhow::ensure!(
                    !blocking.reasons.is_empty(),
                    "index_part for {timeline_id} is malformed: it should not have gc blocking with zero reasons"
                );
                let prev = gc_blocks.insert(timeline_id, blocking.reasons);
                assert!(prev.is_none());
            }
            // TODO again handle early failure
            self.load_remote_timeline(
                timeline_id,
@@ -1115,8 +1089,6 @@ impl Tenant {
        // IndexPart is the source of truth.
        self.clean_up_timelines(&existent_timelines)?;
        self.gc_block.set_scanned(gc_blocks);
        fail::fail_point!("attach-before-activate", |_| {
            anyhow::bail!("attach-before-activate");
        });
@@ -1707,14 +1679,6 @@ impl Tenant {
            }
        }
        let _guard = match self.gc_block.start().await {
            Ok(guard) => guard,
            Err(reasons) => {
                info!("Skipping GC: {reasons}");
                return Ok(GcResult::default());
            }
        };
        self.gc_iteration_internal(target_timeline_id, horizon, pitr, cancel, ctx)
            .await
    }
@@ -2727,7 +2691,6 @@ impl Tenant {
            )),
            tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
            ongoing_timeline_detach: std::sync::Mutex::default(),
            gc_block: Default::default(),
            l0_flush_global_state,
        }
    }
@@ -3012,6 +2975,54 @@ impl Tenant {
        // because that will stall branch creation.
        let gc_cs = self.gc_cs.lock().await;
        // Paranoia check: it is critical that GcInfo's list of child timelines is correct, to avoid incorrectly GC'ing data they
        // depend on.  So although GcInfo is updated continuously by Timeline::new and Timeline::drop, we also calculate it here
        // and fail out if it's inaccurate.
        // (this can be removed later, it's a risk mitigation for https://github.com/neondatabase/neon/pull/8427)
        {
            let mut all_branchpoints: BTreeMap<TimelineId, Vec<(Lsn, TimelineId)>> =
                BTreeMap::new();
            timelines.iter().for_each(|timeline| {
                if let Some(ancestor_timeline_id) = &timeline.get_ancestor_timeline_id() {
                    let ancestor_children =
                        all_branchpoints.entry(*ancestor_timeline_id).or_default();
                    ancestor_children.push((timeline.get_ancestor_lsn(), timeline.timeline_id));
                }
            });
            for timeline in &timelines {
                let mut branchpoints: Vec<(Lsn, TimelineId)> = all_branchpoints
                    .remove(&timeline.timeline_id)
                    .unwrap_or_default();
                branchpoints.sort_by_key(|b| b.0);
                let target = timeline.gc_info.read().unwrap();
                // We require that retain_lsns contains everything in `branchpoints`, but not that
                // they are exactly equal: timeline deletions can race with us, so retain_lsns
                // may contain some extra stuff.  It is safe to have extra timelines in there, because it
                // just means that we retain slightly more data than we otherwise might.
                let have_branchpoints = target.retain_lsns.iter().copied().collect::<HashSet<_>>();
                for b in &branchpoints {
                    if !have_branchpoints.contains(b) {
                        tracing::error!(
                            "Bug: `retain_lsns` is set incorrectly.  Expected be {:?}, but found {:?}",
                            branchpoints,
                            target.retain_lsns
                        );
                        debug_assert!(false);
                        // Do not GC based on bad information!
                        // (ab-use an existing GcError type rather than adding a new one, since this is a
                        // "should never happen" check that will be removed soon).
                        return Err(GcError::Remote(anyhow::anyhow!(
                            "retain_lsns failed validation!"
                        )));
                    }
                }
            }
        }
        // Ok, we now know all the branch points.
        // Update the GC information for each timeline.
        let mut gc_timelines = Vec::with_capacity(timelines.len());
@@ -4081,7 +4092,7 @@ pub(crate) mod harness {
 #[cfg(test)]
 mod tests {
-    use std::collections::{BTreeMap, BTreeSet};
+    use std::collections::BTreeMap;
    use super::*;
    use crate::keyspace::KeySpaceAccum;
@@ -4633,10 +4644,10 @@ mod tests {
        let layer_map = tline.layers.read().await;
        let level0_deltas = layer_map
-            .layer_map()?
+            .layer_map()
-            .level0_deltas()
+            .get_level0_deltas()
-            .iter()
+            .into_iter()
-            .map(|desc| layer_map.get_from_desc(desc))
+            .map(|desc| layer_map.get_from_desc(&desc))
            .collect::<Vec<_>>();
        assert!(!level0_deltas.is_empty());
@@ -4756,7 +4767,7 @@ mod tests {
        lsn: Lsn,
        repeat: usize,
        key_count: usize,
-    ) -> anyhow::Result<HashMap<Key, BTreeSet<Lsn>>> {
+    ) -> anyhow::Result<()> {
        let compact = true;
        bulk_insert_maybe_compact_gc(tenant, timeline, ctx, lsn, repeat, key_count, compact).await
    }
@@ -4769,9 +4780,7 @@ mod tests {
        repeat: usize,
        key_count: usize,
        compact: bool,
-    ) -> anyhow::Result<HashMap<Key, BTreeSet<Lsn>>> {
+    ) -> anyhow::Result<()> {
        let mut inserted: HashMap<Key, BTreeSet<Lsn>> = Default::default();
        let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
        let mut blknum = 0;
@@ -4792,7 +4801,6 @@ mod tests {
                        ctx,
                    )
                    .await?;
                inserted.entry(test_key).or_default().insert(lsn);
                writer.finish_write(lsn);
                drop(writer);
@@ -4817,7 +4825,7 @@ mod tests {
            assert_eq!(res.layers_removed, 0, "this never removes anything");
        }
-        Ok(inserted)
+        Ok(())
    }
    //
@@ -4864,16 +4872,14 @@ mod tests {
            .await?;
        let lsn = Lsn(0x10);
-        let inserted = bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
+        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
        let guard = tline.layers.read().await;
-        let lm = guard.layer_map()?;
+        guard.layer_map().dump(true, &ctx).await?;
        lm.dump(true, &ctx).await?;
        let mut reads = Vec::new();
        let mut prev = None;
-        lm.iter_historic_layers().for_each(|desc| {
+        guard.layer_map().iter_historic_layers().for_each(|desc| {
            if !desc.is_delta() {
                prev = Some(desc.clone());
                return;
@@ -4927,39 +4933,9 @@ mod tests {
                    &ctx,
                )
                .await;
-
+            tline
-            let mut expected_lsns: HashMap<Key, Lsn> = Default::default();
+                .validate_get_vectored_impl(&vectored_res, read, reads_lsn, &ctx)
-            let mut expect_missing = false;
+                .await;
            let mut key = read.start().unwrap();
            while key != read.end().unwrap() {
                if let Some(lsns) = inserted.get(&key) {
                    let expected_lsn = lsns.iter().rfind(|lsn| **lsn <= reads_lsn);
                    match expected_lsn {
                        Some(lsn) => {
                            expected_lsns.insert(key, *lsn);
                        }
                        None => {
                            expect_missing = true;
                            break;
                        }
                    }
                } else {
                    expect_missing = true;
                    break;
                }
                key = key.next();
            }
            if expect_missing {
                assert!(matches!(vectored_res, Err(GetVectoredError::MissingKey(_))));
            } else {
                for (key, image) in vectored_res? {
                    let expected_lsn = expected_lsns.get(&key).expect("determined above");
                    let expected_image = test_img(&format!("{} at {}", key.field6, expected_lsn));
                    assert_eq!(image?, expected_image);
                }
            }
        }
        Ok(())
@@ -5009,6 +4985,10 @@ mod tests {
            )
            .await;
        child_timeline
            .validate_get_vectored_impl(&vectored_res, aux_keyspace, read_lsn, &ctx)
            .await;
        let images = vectored_res?;
        assert!(images.is_empty());
        Ok(())
@@ -5879,12 +5859,23 @@ mod tests {
            tline.freeze_and_flush().await?; // force create a delta layer
        }
-        let before_num_l0_delta_files =
+        let before_num_l0_delta_files = tline
-            tline.layers.read().await.layer_map()?.level0_deltas().len();
+            .layers
            .read()
            .await
            .layer_map()
            .get_level0_deltas()
            .len();
        tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
-        let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
+        let after_num_l0_delta_files = tline
            .layers
            .read()
            .await
            .layer_map()
            .get_level0_deltas()
            .len();
        assert!(after_num_l0_delta_files < before_num_l0_delta_files, "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}");
@@ -6908,10 +6899,7 @@ mod tests {
        }
        let cancel = CancellationToken::new();
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        for (idx, expected) in expected_result.iter().enumerate() {
            assert_eq!(
@@ -7005,10 +6993,7 @@ mod tests {
            guard.cutoffs.time = Lsn(0x40);
            guard.cutoffs.space = Lsn(0x40);
        }
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        Ok(())
    }
@@ -7342,10 +7327,7 @@ mod tests {
        }
        let cancel = CancellationToken::new();
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        for idx in 0..10 {
            assert_eq!(
@@ -7371,10 +7353,7 @@ mod tests {
            guard.cutoffs.time = Lsn(0x40);
            guard.cutoffs.space = Lsn(0x40);
        }
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        Ok(())
    }
@@ -7919,28 +7898,11 @@ mod tests {
        verify_result().await;
        let cancel = CancellationToken::new();
-        let mut dryrun_flags = EnumSet::new();
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
        dryrun_flags.insert(CompactFlags::DryRun);
        tline
            .compact_with_gc(&cancel, dryrun_flags, &ctx)
            .await
            .unwrap();
        // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs
        // cleaning things up, and therefore, we don't do sanity checks on the layer map during unit tests.
        verify_result().await;
        tline
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        verify_result().await;
        // compact again
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        verify_result().await;
        // increase GC horizon and compact again
@@ -7950,17 +7912,11 @@ mod tests {
            guard.cutoffs.time = Lsn(0x38);
            guard.cutoffs.space = Lsn(0x38);
        }
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result
        // not increasing the GC horizon and compact again
-        tline
+        tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        verify_result().await;
        Ok(())
@@ -8141,10 +8097,7 @@ mod tests {
        verify_result().await;
        let cancel = CancellationToken::new();
-        branch_tline
+        branch_tline.compact_with_gc(&cancel, &ctx).await.unwrap();
            .compact_with_gc(&cancel, EnumSet::new(), &ctx)
            .await
            .unwrap();
        verify_result().await;
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -29,7 +29,6 @@ impl EphemeralFile {
        conf: &PageServerConf,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        gate_guard: utils::sync::gate::GateGuard,
        ctx: &RequestContext,
    ) -> Result<EphemeralFile, io::Error> {
        static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
@@ -52,12 +51,10 @@ impl EphemeralFile {
        )
        .await?;
        let prewarm = conf.l0_flush.prewarm_on_write();
        Ok(EphemeralFile {
            _tenant_shard_id: tenant_shard_id,
            _timeline_id: timeline_id,
-            rw: page_caching::RW::new(file, prewarm, gate_guard),
+            rw: page_caching::RW::new(file, conf.l0_flush.prewarm_on_write()),
        })
    }
@@ -164,11 +161,7 @@ mod tests {
    async fn test_ephemeral_blobs() -> Result<(), io::Error> {
        let (conf, tenant_id, timeline_id, ctx) = harness("ephemeral_blobs")?;
-        let gate = utils::sync::gate::Gate::default();
+        let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &ctx).await?;
        let entered = gate.enter().unwrap();
        let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, entered, &ctx).await?;
        let pos_foo = file.write_blob(b"foo", &ctx).await?;
        assert_eq!(
@@ -222,38 +215,4 @@ mod tests {
        Ok(())
    }
    #[tokio::test]
    async fn ephemeral_file_holds_gate_open() {
        const FOREVER: std::time::Duration = std::time::Duration::from_secs(5);
        let (conf, tenant_id, timeline_id, ctx) =
            harness("ephemeral_file_holds_gate_open").unwrap();
        let gate = utils::sync::gate::Gate::default();
        let file = EphemeralFile::create(conf, tenant_id, timeline_id, gate.enter().unwrap(), &ctx)
            .await
            .unwrap();
        let mut closing = tokio::task::spawn(async move {
            gate.close().await;
        });
        // gate is entered until the ephemeral file is dropped
        // do not start paused tokio-epoll-uring has a sleep loop
        tokio::time::pause();
        tokio::time::timeout(FOREVER, &mut closing)
            .await
            .expect_err("closing cannot complete before dropping");
        // this is a requirement of the reset_tenant functionality: we have to be able to restart a
        // tenant fast, and for that, we need all tenant_dir operations be guarded by entering a gate
        drop(file);
        tokio::time::timeout(FOREVER, &mut closing)
            .await
            .expect("closing completes right away")
            .expect("closing does not panic");
    }
 }
--- a/pageserver/src/tenant/ephemeral_file/page_caching.rs
+++ b/pageserver/src/tenant/ephemeral_file/page_caching.rs
@@ -18,8 +18,6 @@ use super::zero_padded_read_write;
 pub struct RW {
    page_cache_file_id: page_cache::FileId,
    rw: super::zero_padded_read_write::RW<PreWarmingWriter>,
    /// Gate guard is held on as long as we need to do operations in the path (delete on drop).
    _gate_guard: utils::sync::gate::GateGuard,
 }
 /// When we flush a block to the underlying [`crate::virtual_file::VirtualFile`],
@@ -31,11 +29,7 @@ pub enum PrewarmOnWrite {
 }
 impl RW {
-    pub fn new(
+    pub fn new(file: VirtualFile, prewarm_on_write: PrewarmOnWrite) -> Self {
        file: VirtualFile,
        prewarm_on_write: PrewarmOnWrite,
        _gate_guard: utils::sync::gate::GateGuard,
    ) -> Self {
        let page_cache_file_id = page_cache::next_file_id();
        Self {
            page_cache_file_id,
@@ -44,7 +38,6 @@ impl RW {
                file,
                prewarm_on_write,
            )),
            _gate_guard,
        }
    }
@@ -152,7 +145,6 @@ impl Drop for RW {
        // We leave them there, [`crate::page_cache::PageCache::find_victim`] will evict them when needed.
        // unlink the file
        // we are clear to do this, because we have entered a gate
        let res = std::fs::remove_file(&self.rw.as_writer().file.path);
        if let Err(e) = res {
            if e.kind() != std::io::ErrorKind::NotFound {
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -1,213 +0,0 @@
 use std::collections::HashMap;
 use utils::id::TimelineId;
 use super::remote_timeline_client::index::GcBlockingReason;
 type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
 #[derive(Default)]
 pub(crate) struct GcBlock {
    /// The timelines which have current reasons to block gc.
    ///
    /// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done
    /// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`.
    reasons: std::sync::Mutex<Storage>,
    blocking: tokio::sync::Mutex<()>,
 }
 impl GcBlock {
    /// Start another gc iteration.
    ///
    /// Returns a guard to be held for the duration of gc iteration to allow synchronizing with
    /// it's ending, or if not currently possible, a value describing the reasons why not.
    ///
    /// Cancellation safe.
    pub(super) async fn start(&self) -> Result<Guard<'_>, BlockingReasons> {
        let reasons = {
            let g = self.reasons.lock().unwrap();
            // TODO: the assumption is that this method gets called periodically. in prod, we use 1h, in
            // tests, we use everything. we should warn if the gc has been consecutively blocked
            // for more than 1h (within single tenant session?).
            BlockingReasons::clean_and_summarize(g)
        };
        if let Some(reasons) = reasons {
            Err(reasons)
        } else {
            Ok(Guard {
                _inner: self.blocking.lock().await,
            })
        }
    }
    pub(crate) fn summary(&self) -> Option<BlockingReasons> {
        let g = self.reasons.lock().unwrap();
        BlockingReasons::summarize(&g)
    }
    /// Start blocking gc for this one timeline for the given reason.
    ///
    /// This is not a guard based API but instead it mimics set API. The returned future will not
    /// resolve until an existing gc round has completed.
    ///
    /// Returns true if this block was new, false if gc was already blocked for this reason.
    ///
    /// Cancellation safe: cancelling after first poll will keep the reason to block gc, but will
    /// keep the gc blocking reason.
    pub(crate) async fn insert(
        &self,
        timeline: &super::Timeline,
        reason: GcBlockingReason,
    ) -> anyhow::Result<bool> {
        let (added, uploaded) = {
            let mut g = self.reasons.lock().unwrap();
            let set = g.entry(timeline.timeline_id).or_default();
            let added = set.insert(reason);
            // LOCK ORDER: intentionally hold the lock, see self.reasons.
            let uploaded = timeline
                .remote_client
                .schedule_insert_gc_block_reason(reason)?;
            (added, uploaded)
        };
        uploaded.await?;
        // ensure that any ongoing gc iteration has completed
        drop(self.blocking.lock().await);
        Ok(added)
    }
    /// Remove blocking gc for this one timeline and the given reason.
    pub(crate) async fn remove(
        &self,
        timeline: &super::Timeline,
        reason: GcBlockingReason,
    ) -> anyhow::Result<()> {
        use std::collections::hash_map::Entry;
        super::span::debug_assert_current_span_has_tenant_and_timeline_id();
        let (remaining_blocks, uploaded) = {
            let mut g = self.reasons.lock().unwrap();
            match g.entry(timeline.timeline_id) {
                Entry::Occupied(mut oe) => {
                    let set = oe.get_mut();
                    set.remove(reason);
                    if set.is_empty() {
                        oe.remove();
                    }
                }
                Entry::Vacant(_) => {
                    // we must still do the index_part.json update regardless, in case we had earlier
                    // been cancelled
                }
            }
            let remaining_blocks = g.len();
            // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
            let uploaded = timeline
                .remote_client
                .schedule_remove_gc_block_reason(reason)?;
            (remaining_blocks, uploaded)
        };
        uploaded.await?;
        // no need to synchronize with gc iteration again
        if remaining_blocks > 0 {
            tracing::info!(remaining_blocks, removed=?reason, "gc blocking removed, but gc remains blocked");
        } else {
            tracing::info!("gc is now unblocked for the tenant");
        }
        Ok(())
    }
    pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
        let unblocked = {
            let mut g = self.reasons.lock().unwrap();
            if g.is_empty() {
                return;
            }
            g.remove(&timeline.timeline_id);
            BlockingReasons::clean_and_summarize(g).is_none()
        };
        if unblocked {
            tracing::info!("gc is now unblocked following deletion");
        }
    }
    /// Initialize with the non-deleted timelines of this tenant.
    pub(crate) fn set_scanned(&self, scanned: Storage) {
        let mut g = self.reasons.lock().unwrap();
        assert!(g.is_empty());
        g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
        if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
            tracing::info!(summary=?reasons, "initialized with gc blocked");
        }
    }
 }
 pub(super) struct Guard<'a> {
    _inner: tokio::sync::MutexGuard<'a, ()>,
 }
 #[derive(Debug)]
 pub(crate) struct BlockingReasons {
    timelines: usize,
    reasons: enumset::EnumSet<GcBlockingReason>,
 }
 impl std::fmt::Display for BlockingReasons {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{} timelines block for {:?}",
            self.timelines, self.reasons
        )
    }
 }
 impl BlockingReasons {
    fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
        let mut reasons = enumset::EnumSet::empty();
        g.retain(|_key, value| {
            reasons = reasons.union(*value);
            !value.is_empty()
        });
        if !g.is_empty() {
            Some(BlockingReasons {
                timelines: g.len(),
                reasons,
            })
        } else {
            None
        }
    }
    fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
        if g.is_empty() {
            None
        } else {
            let reasons = g
                .values()
                .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
            Some(BlockingReasons {
                timelines: g.len(),
                reasons,
            })
        }
    }
 }
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -846,8 +846,8 @@ impl LayerMap {
    }
    /// Return all L0 delta layers
-    pub fn level0_deltas(&self) -> &Vec<Arc<PersistentLayerDesc>> {
+    pub fn get_level0_deltas(&self) -> Vec<Arc<PersistentLayerDesc>> {
-        &self.l0_delta_layers
+        self.l0_delta_layers.to_vec()
    }
    /// debugging function to print out the contents of the layer map
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -13,7 +13,7 @@ use pageserver_api::upcall_api::ReAttachResponseTenant;
 use rand::{distributions::Alphanumeric, Rng};
 use std::borrow::Cow;
 use std::cmp::Ordering;
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap};
 use std::ops::Deref;
 use std::sync::Arc;
 use std::time::Duration;
@@ -224,8 +224,21 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<U
 }
 /// See [`Self::spawn`].
-#[derive(Clone, Default)]
+#[derive(Clone)]
-pub struct BackgroundPurges(tokio_util::task::TaskTracker);
+pub struct BackgroundPurges(Arc<std::sync::Mutex<BackgroundPurgesInner>>);
 enum BackgroundPurgesInner {
    Open(tokio::task::JoinSet<()>),
    // we use the async mutex for coalescing
    ShuttingDown(Arc<tokio::sync::Mutex<tokio::task::JoinSet<()>>>),
 }
 impl Default for BackgroundPurges {
    fn default() -> Self {
        Self(Arc::new(std::sync::Mutex::new(
            BackgroundPurgesInner::Open(JoinSet::new()),
        )))
    }
 }
 impl BackgroundPurges {
    /// When we have moved a tenant's content to a temporary directory, we may delete it lazily in
@@ -234,32 +247,24 @@ impl BackgroundPurges {
    /// Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory.
    /// Thus the [`BackgroundPurges`] type to keep track of these tasks.
    pub fn spawn(&self, tmp_path: Utf8PathBuf) {
-        // because on shutdown we close and wait, we are misusing TaskTracker a bit.
+        let mut guard = self.0.lock().unwrap();
-        //
+        let jset = match &mut *guard {
-        // so first acquire a token, then check if the tracker has been closed. the tracker might get closed
+            BackgroundPurgesInner::Open(ref mut jset) => jset,
-        // right after, but at least the shutdown will wait for what we are spawning next.
+            BackgroundPurgesInner::ShuttingDown(_) => {
-        let token = self.0.token();
+                warn!("trying to spawn background purge during shutdown, ignoring");
        if self.0.is_closed() {
            warn!(
                %tmp_path,
                "trying to spawn background purge during shutdown, ignoring"
            );
                return;
            }
        let span = info_span!(parent: None, "background_purge", %tmp_path);
        let task = move || {
            let _token = token;
            let _entered = span.entered();
            if let Err(error) = std::fs::remove_dir_all(tmp_path.as_path()) {
                // should we fatal_io_error here?
                warn!(%error, "failed to purge tenant directory");
            }
        };
-
+        jset.spawn_on(
-        BACKGROUND_RUNTIME.spawn_blocking(task);
+            async move {
                if let Err(error) = fs::remove_dir_all(tmp_path.as_path()).await {
                    // should we fatal_io_error here?
                    warn!(%error, path=%tmp_path, "failed to purge tenant directory");
                }
            }
            .instrument(info_span!(parent: None, "background_purge")),
            BACKGROUND_RUNTIME.handle(),
        );
    }
    /// When this future completes, all background purges have completed.
@@ -273,9 +278,42 @@ impl BackgroundPurges {
    /// instances of this future will continue to be correct.
    #[instrument(skip_all)]
    pub async fn shutdown(&self) {
-        // forbid new tasks (can be called many times)
+        let jset = {
-        self.0.close();
+            let mut guard = self.0.lock().unwrap();
-        self.0.wait().await;
+            match &mut *guard {
                BackgroundPurgesInner::Open(jset) => {
                    *guard = BackgroundPurgesInner::ShuttingDown(Arc::new(tokio::sync::Mutex::new(
                        std::mem::take(jset),
                    )))
                }
                BackgroundPurgesInner::ShuttingDown(_) => {
                    // calling shutdown multiple times is most likely a bug in pageserver shutdown code
                    warn!("already shutting down");
                }
            };
            match &mut *guard {
                BackgroundPurgesInner::ShuttingDown(ref mut jset) => jset.clone(),
                BackgroundPurgesInner::Open(_) => {
                    unreachable!("above code transitions into shut down state");
                }
            }
        };
        let mut jset = jset.lock().await; // concurrent callers coalesce here
        while let Some(res) = jset.join_next().await {
            match res {
                Ok(()) => {}
                Err(e) if e.is_panic() => {
                    // If it panicked, the error is already logged by the panic hook.
                }
                Err(e) if e.is_cancelled() => {
                    unreachable!("we don't cancel the joinset or runtime")
                }
                Err(e) => {
                    // No idea when this can happen, but let's log it.
                    warn!(%e, "background purge task failed or panicked");
                }
            }
        }
    }
 }
@@ -1729,9 +1767,14 @@ impl TenantManager {
            let parent_timelines = timelines.keys().cloned().collect::<Vec<_>>();
            for timeline in timelines.values() {
                tracing::info!(timeline_id=%timeline.timeline_id, "Loading list of layers to hardlink");
-                let layers = timeline.layers.read().await;
+                let timeline_layers = timeline
                    .layers
                    .read()
                    .await
                    .likely_resident_layers()
                    .collect::<Vec<_>>();
-                for layer in layers.likely_resident_layers() {
+                for layer in timeline_layers {
                    let relative_path = layer
                        .local_path()
                        .strip_prefix(&parent_path)
@@ -1928,8 +1971,7 @@ impl TenantManager {
        timeline_id: TimelineId,
        prepared: PreparedTimelineDetach,
        ctx: &RequestContext,
-    ) -> Result<HashSet<TimelineId>, anyhow::Error> {
+    ) -> Result<Vec<TimelineId>, anyhow::Error> {
        // FIXME: this is unnecessary, slotguard already has these semantics
        struct RevertOnDropSlot(Option<SlotGuard>);
        impl Drop for RevertOnDropSlot {
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -800,123 +800,6 @@ impl RemoteTimelineClient {
            .context("wait completion")
    }
    /// Adds a gc blocking reason for this timeline if one does not exist already.
    ///
    /// A retryable step of timeline detach ancestor.
    ///
    /// Returns a future which waits until the completion of the upload.
    pub(crate) fn schedule_insert_gc_block_reason(
        self: &Arc<Self>,
        reason: index::GcBlockingReason,
    ) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>
    {
        let maybe_barrier = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
            if let index::GcBlockingReason::DetachAncestor = reason {
                if upload_queue.dirty.metadata.ancestor_timeline().is_none() {
                    drop(guard);
                    panic!("cannot start detach ancestor if there is nothing to detach from");
                }
            }
            let wanted = |x: Option<&index::GcBlocking>| x.is_some_and(|x| x.blocked_by(reason));
            let current = upload_queue.dirty.gc_blocking.as_ref();
            let uploaded = upload_queue.clean.0.gc_blocking.as_ref();
            match (current, uploaded) {
                (x, y) if wanted(x) && wanted(y) => None,
                (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
                // Usual case: !wanted(x) && !wanted(y)
                //
                // Unusual: !wanted(x) && wanted(y) which means we have two processes waiting to
                // turn on and off some reason.
                (x, y) => {
                    if !wanted(x) && wanted(y) {
                        // this could be avoided by having external in-memory synchronization, like
                        // timeline detach ancestor
                        warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason");
                    }
                    // at this point, the metadata must always show that there is a parent
                    upload_queue.dirty.gc_blocking = current
                        .map(|x| x.with_reason(reason))
                        .or_else(|| Some(index::GcBlocking::started_now_for(reason)));
                    self.schedule_index_upload(upload_queue)?;
                    Some(self.schedule_barrier0(upload_queue))
                }
            }
        };
        Ok(async move {
            if let Some(barrier) = maybe_barrier {
                Self::wait_completion0(barrier).await?;
            }
            Ok(())
        })
    }
    /// Removes a gc blocking reason for this timeline if one exists.
    ///
    /// A retryable step of timeline detach ancestor.
    ///
    /// Returns a future which waits until the completion of the upload.
    pub(crate) fn schedule_remove_gc_block_reason(
        self: &Arc<Self>,
        reason: index::GcBlockingReason,
    ) -> Result<impl std::future::Future<Output = Result<(), WaitCompletionError>>, NotInitialized>
    {
        let maybe_barrier = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
            if let index::GcBlockingReason::DetachAncestor = reason {
                if !upload_queue
                    .clean
                    .0
                    .lineage
                    .is_detached_from_original_ancestor()
                {
                    drop(guard);
                    panic!("cannot complete timeline_ancestor_detach while not detached");
                }
            }
            let wanted = |x: Option<&index::GcBlocking>| {
                x.is_none() || x.is_some_and(|b| !b.blocked_by(reason))
            };
            let current = upload_queue.dirty.gc_blocking.as_ref();
            let uploaded = upload_queue.clean.0.gc_blocking.as_ref();
            match (current, uploaded) {
                (x, y) if wanted(x) && wanted(y) => None,
                (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
                (x, y) => {
                    if !wanted(x) && wanted(y) {
                        warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)");
                    }
                    upload_queue.dirty.gc_blocking =
                        current.as_ref().and_then(|x| x.without_reason(reason));
                    assert!(wanted(upload_queue.dirty.gc_blocking.as_ref()));
                    // FIXME: bogus ?
                    self.schedule_index_upload(upload_queue)?;
                    Some(self.schedule_barrier0(upload_queue))
                }
            }
        };
        Ok(async move {
            if let Some(barrier) = maybe_barrier {
                Self::wait_completion0(barrier).await?;
            }
            Ok(())
        })
    }
    /// Launch an upload operation in the background; the file is added to be included in next
    /// `index_part.json` upload.
    pub(crate) fn schedule_layer_file_upload(
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -60,9 +60,6 @@ pub struct IndexPart {
    #[serde(default)]
    pub(crate) lineage: Lineage,
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub(crate) gc_blocking: Option<GcBlocking>,
    /// Describes the kind of aux files stored in the timeline.
    ///
    /// The value is modified during file ingestion when the latest wanted value communicated via tenant config is applied if it is acceptable.
@@ -88,11 +85,10 @@ impl IndexPart {
    /// - 6: last_aux_file_policy is added.
    /// - 7: metadata_bytes is no longer written, but still read
    /// - 8: added `archived_at`
-    /// - 9: +gc_blocking
+    const LATEST_VERSION: usize = 8;
    const LATEST_VERSION: usize = 9;
    // Versions we may see when reading from a bucket.
-    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9];
+    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8];
    pub const FILE_NAME: &'static str = "index_part.json";
@@ -105,7 +101,6 @@ impl IndexPart {
            deleted_at: None,
            archived_at: None,
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        }
    }
@@ -256,64 +251,6 @@ impl Lineage {
    }
 }
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub(crate) struct GcBlocking {
    pub(crate) started_at: NaiveDateTime,
    pub(crate) reasons: enumset::EnumSet<GcBlockingReason>,
 }
 #[derive(Debug, enumset::EnumSetType, serde::Serialize, serde::Deserialize)]
 #[enumset(serialize_repr = "list")]
 pub(crate) enum GcBlockingReason {
    Manual,
    DetachAncestor,
 }
 impl GcBlocking {
    pub(super) fn started_now_for(reason: GcBlockingReason) -> Self {
        GcBlocking {
            started_at: chrono::Utc::now().naive_utc(),
            reasons: enumset::EnumSet::only(reason),
        }
    }
    /// Returns true if the given reason is one of the reasons why the gc is blocked.
    pub(crate) fn blocked_by(&self, reason: GcBlockingReason) -> bool {
        self.reasons.contains(reason)
    }
    /// Returns a version of self with the given reason.
    pub(super) fn with_reason(&self, reason: GcBlockingReason) -> Self {
        assert!(!self.blocked_by(reason));
        let mut reasons = self.reasons;
        reasons.insert(reason);
        Self {
            started_at: self.started_at,
            reasons,
        }
    }
    /// Returns a version of self without the given reason. Assumption is that if
    /// there are no more reasons, we can unblock the gc by returning `None`.
    pub(super) fn without_reason(&self, reason: GcBlockingReason) -> Option<Self> {
        assert!(self.blocked_by(reason));
        if self.reasons.len() == 1 {
            None
        } else {
            let mut reasons = self.reasons;
            assert!(reasons.remove(reason));
            assert!(!reasons.is_empty());
            Some(Self {
                started_at: self.started_at,
                reasons,
            })
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -355,7 +292,6 @@ mod tests {
            deleted_at: None,
            archived_at: None,
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -399,7 +335,6 @@ mod tests {
            deleted_at: None,
            archived_at: None,
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -444,7 +379,6 @@ mod tests {
            deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
            archived_at: None,
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -492,7 +426,6 @@ mod tests {
            deleted_at: None,
            archived_at: None,
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -535,7 +468,6 @@ mod tests {
            deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
            archived_at: None,
            lineage: Lineage::default(),
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -581,7 +513,6 @@ mod tests {
                reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
                original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
            },
            gc_blocking: None,
            last_aux_file_policy: None,
        };
@@ -632,7 +563,6 @@ mod tests {
                reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
                original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
            },
            gc_blocking: None,
            last_aux_file_policy: Some(AuxFilePolicy::V2),
        };
@@ -688,7 +618,6 @@ mod tests {
            deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
            archived_at: None,
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: Default::default(),
        };
@@ -745,7 +674,6 @@ mod tests {
            deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
            archived_at: Some(parse_naive_datetime("2023-04-29T09:00:00.123000000")),
            lineage: Default::default(),
            gc_blocking: None,
            last_aux_file_policy: Default::default(),
        };
@@ -753,68 +681,6 @@ mod tests {
        assert_eq!(part, expected);
    }
    #[test]
    fn v9_indexpart_is_parsed() {
        let example = r#"{
            "version": 9,
            "layer_metadata":{
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
            },
            "disk_consistent_lsn":"0/16960E8",
            "metadata": {
                "disk_consistent_lsn": "0/16960E8",
                "prev_record_lsn": "0/1696070",
                "ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
                "ancestor_lsn": "0/0",
                "latest_gc_cutoff_lsn": "0/1696070",
                "initdb_lsn": "0/1696070",
                "pg_version": 14
            },
            "gc_blocking": {
                "started_at": "2024-07-19T09:00:00.123",
                "reasons": ["DetachAncestor"]
            }
        }"#;
        let expected = IndexPart {
            version: 9,
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
                    file_size: 25600000,
                    generation: Generation::none(),
                    shard: ShardIndex::unsharded()
                }),
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
                    file_size: 9007199254741001,
                    generation: Generation::none(),
                    shard: ShardIndex::unsharded()
                })
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::new(
                Lsn::from_str("0/16960E8").unwrap(),
                Some(Lsn::from_str("0/1696070").unwrap()),
                Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
                Lsn::INVALID,
                Lsn::from_str("0/1696070").unwrap(),
                Lsn::from_str("0/1696070").unwrap(),
                14,
            ).with_recalculated_checksum().unwrap(),
            deleted_at: None,
            lineage: Default::default(),
            gc_blocking: Some(GcBlocking {
                started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
                reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
            }),
            last_aux_file_policy: Default::default(),
            archived_at: None,
        };
        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }
    fn parse_naive_datetime(s: &str) -> NaiveDateTime {
        chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f").unwrap()
    }
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -55,7 +55,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, warn, Instrument};
 use utils::{
    backoff, completion::Barrier, crashsafe::path_with_suffix_extension, failpoint_support, fs_ext,
-    id::TimelineId, pausable_failpoint, serde_system_time,
+    id::TimelineId, serde_system_time,
 };
 use super::{
@@ -1146,14 +1146,12 @@ impl<'a> TenantDownloader<'a> {
        layer: HeatMapLayer,
        ctx: &RequestContext,
    ) -> Result<Option<HeatMapLayer>, UpdateError> {
-        // Failpoints for simulating slow remote storage
+        // Failpoint for simulating slow remote storage
        failpoint_support::sleep_millis_async!(
            "secondary-layer-download-sleep",
            &self.secondary_state.cancel
        );
        pausable_failpoint!("secondary-layer-download-pausable");
        let local_path = local_layer_path(
            self.conf,
            tenant_shard_id,
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -8,9 +8,6 @@ mod layer_desc;
 mod layer_name;
 pub mod merge_iterator;
 #[cfg(test)]
 pub mod split_writer;
 use crate::context::{AccessStatsBehavior, RequestContext};
 use crate::repository::Value;
 use crate::walrecord::NeonWalRecord;
@@ -435,6 +432,21 @@ impl ReadableLayer {
    }
 }
 /// Return value from [`Layer::get_value_reconstruct_data`]
 #[derive(Clone, Copy, Debug)]
 pub enum ValueReconstructResult {
    /// Got all the data needed to reconstruct the requested page
    Complete,
    /// This layer didn't contain all the required data, the caller should look up
    /// the predecessor layer at the returned LSN and collect more data from there.
    Continue,
    /// This layer didn't contain data needed to reconstruct the page version at
    /// the returned LSN. This is usually considered an error, but might be OK
    /// in some circumstances.
    Missing,
 }
 /// Layers contain a hint indicating whether they are likely to be used for reads.  This is a hint rather
 /// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
 /// of layers (for example when creating a branch that makes some previously covered layers visible).  It should
@@ -539,25 +551,19 @@ impl LayerAccessStats {
        self.record_residence_event_at(SystemTime::now())
    }
-    fn record_access_at(&self, now: SystemTime) -> bool {
+    pub(crate) fn record_access_at(&self, now: SystemTime) {
        let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
        // A layer which is accessed must be visible.
        mask |= 0x1 << Self::VISIBILITY_SHIFT;
        value |= 0x1 << Self::VISIBILITY_SHIFT;
-        let old_bits = self.write_bits(mask, value);
+        self.write_bits(mask, value);
        !matches!(
            self.decode_visibility(old_bits),
            LayerVisibilityHint::Visible
        )
    }
-    /// Returns true if we modified the layer's visibility to set it to Visible implicitly
+    pub(crate) fn record_access(&self, ctx: &RequestContext) {
    /// as a result of this access
    pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {
        if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
-            return false;
+            return;
        }
        self.record_access_at(SystemTime::now())
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -36,12 +36,13 @@ use crate::tenant::block_io::{BlockBuf, BlockCursor, BlockLease, BlockReader, Fi
 use crate::tenant::disk_btree::{
    DiskBtreeBuilder, DiskBtreeIterator, DiskBtreeReader, VisitDirection,
 };
 use crate::tenant::storage_layer::{Layer, ValueReconstructResult, ValueReconstructState};
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
    BlobFlag, MaxVectoredReadBytes, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
    VectoredReadPlanner,
 };
-use crate::tenant::PageReconstructError;
+use crate::tenant::{PageReconstructError, Timeline};
 use crate::virtual_file::{self, VirtualFile};
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
@@ -71,7 +72,10 @@ use utils::{
    lsn::Lsn,
 };
-use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ValuesReconstructState};
+use super::{
    AsLayerDesc, LayerAccessStats, LayerName, PersistentLayerDesc, ResidentLayer,
    ValuesReconstructState,
 };
 ///
 /// Header stored in the beginning of the file
@@ -196,6 +200,7 @@ impl DeltaKey {
 pub struct DeltaLayer {
    path: Utf8PathBuf,
    pub desc: PersistentLayerDesc,
    access_stats: LayerAccessStats,
    inner: OnceCell<Arc<DeltaLayerInner>>,
 }
@@ -294,6 +299,7 @@ impl DeltaLayer {
    /// not loaded already.
    ///
    async fn load(&self, ctx: &RequestContext) -> Result<&Arc<DeltaLayerInner>> {
        self.access_stats.record_access(ctx);
        // Quick exit if already loaded
        self.inner
            .get_or_try_init(|| self.load_inner(ctx))
@@ -344,6 +350,7 @@ impl DeltaLayer {
                summary.lsn_range,
                metadata.len(),
            ),
            access_stats: Default::default(),
            inner: OnceCell::new(),
        })
    }
@@ -366,6 +373,7 @@ impl DeltaLayer {
 /// 3. Call `finish`.
 ///
 struct DeltaLayerWriterInner {
    conf: &'static PageServerConf,
    pub path: Utf8PathBuf,
    timeline_id: TimelineId,
    tenant_shard_id: TenantShardId,
@@ -376,9 +384,6 @@ struct DeltaLayerWriterInner {
    tree: DiskBtreeBuilder<BlockBuf, DELTA_KEY_SIZE>,
    blob_writer: BlobWriter<true>,
    // Number of key-lsns in the layer.
    num_keys: usize,
 }
 impl DeltaLayerWriterInner {
@@ -412,6 +417,7 @@ impl DeltaLayerWriterInner {
        let tree_builder = DiskBtreeBuilder::new(block_buf);
        Ok(Self {
            conf,
            path,
            timeline_id,
            tenant_shard_id,
@@ -419,7 +425,6 @@ impl DeltaLayerWriterInner {
            lsn_range,
            tree: tree_builder,
            blob_writer,
            num_keys: 0,
        })
    }
@@ -470,9 +475,6 @@ impl DeltaLayerWriterInner {
        let delta_key = DeltaKey::from_key_lsn(&key, lsn);
        let res = self.tree.append(&delta_key.0, blob_ref.0);
        self.num_keys += 1;
        (val, res.map_err(|e| anyhow::anyhow!(e)))
    }
@@ -486,10 +488,11 @@ impl DeltaLayerWriterInner {
    async fn finish(
        self,
        key_end: Key,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+    ) -> anyhow::Result<ResidentLayer> {
        let temp_path = self.path.clone();
-        let result = self.finish0(key_end, ctx).await;
+        let result = self.finish0(key_end, timeline, ctx).await;
        if result.is_err() {
            tracing::info!(%temp_path, "cleaning up temporary file after error during writing");
            if let Err(e) = std::fs::remove_file(&temp_path) {
@@ -502,8 +505,9 @@ impl DeltaLayerWriterInner {
    async fn finish0(
        self,
        key_end: Key,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+    ) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;
@@ -568,9 +572,11 @@ impl DeltaLayerWriterInner {
        // fsync the file
        file.sync_all().await?;
-        trace!("created delta layer {}", self.path);
+        let layer = Layer::finish_creating(self.conf, timeline, desc, &self.path)?;
-        Ok((desc, self.path))
+        trace!("created delta layer {}", layer.local_path());
        Ok(layer)
    }
 }
@@ -671,20 +677,14 @@ impl DeltaLayerWriter {
    pub(crate) async fn finish(
        mut self,
        key_end: Key,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+    ) -> anyhow::Result<ResidentLayer> {
-        self.inner.take().unwrap().finish(key_end, ctx).await
+        self.inner
-    }
+            .take()
-
+            .unwrap()
-    #[cfg(test)]
+            .finish(key_end, timeline, ctx)
-    pub(crate) fn num_keys(&self) -> usize {
+            .await
        self.inner.as_ref().unwrap().num_keys
    }
    #[cfg(test)]
    pub(crate) fn estimated_size(&self) -> u64 {
        let inner = self.inner.as_ref().unwrap();
        inner.blob_writer.size() + inner.tree.borrow_writer().size() + PAGE_SZ as u64
    }
 }
@@ -808,6 +808,95 @@ impl DeltaLayerInner {
        })
    }
    pub(super) async fn get_value_reconstruct_data(
        &self,
        key: Key,
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
        ctx: &RequestContext,
    ) -> anyhow::Result<ValueReconstructResult> {
        let mut need_image = true;
        // Scan the page versions backwards, starting from `lsn`.
        let block_reader = FileBlockReader::new(&self.file, self.file_id);
        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
            self.index_start_blk,
            self.index_root_blk,
            &block_reader,
        );
        let search_key = DeltaKey::from_key_lsn(&key, Lsn(lsn_range.end.0 - 1));
        let mut offsets: Vec<(Lsn, u64)> = Vec::new();
        tree_reader
            .visit(
                &search_key.0,
                VisitDirection::Backwards,
                |key, value| {
                    let blob_ref = BlobRef(value);
                    if key[..KEY_SIZE] != search_key.0[..KEY_SIZE] {
                        return false;
                    }
                    let entry_lsn = DeltaKey::extract_lsn_from_buf(key);
                    if entry_lsn < lsn_range.start {
                        return false;
                    }
                    offsets.push((entry_lsn, blob_ref.pos()));
                    !blob_ref.will_init()
                },
                &RequestContextBuilder::extend(ctx)
                    .page_content_kind(PageContentKind::DeltaLayerBtreeNode)
                    .build(),
            )
            .await?;
        let ctx = &RequestContextBuilder::extend(ctx)
            .page_content_kind(PageContentKind::DeltaLayerValue)
            .build();
        // Ok, 'offsets' now contains the offsets of all the entries we need to read
        let cursor = block_reader.block_cursor();
        let mut buf = Vec::new();
        for (entry_lsn, pos) in offsets {
            cursor
                .read_blob_into_buf(pos, &mut buf, ctx)
                .await
                .with_context(|| {
                    format!("Failed to read blob from virtual file {}", self.file.path)
                })?;
            let val = Value::des(&buf).with_context(|| {
                format!(
                    "Failed to deserialize file blob from virtual file {}",
                    self.file.path
                )
            })?;
            match val {
                Value::Image(img) => {
                    reconstruct_state.img = Some((entry_lsn, img));
                    need_image = false;
                    break;
                }
                Value::WalRecord(rec) => {
                    let will_init = rec.will_init();
                    reconstruct_state.records.push((entry_lsn, rec));
                    if will_init {
                        // This WAL record initializes the page, so no need to go further back
                        need_image = false;
                        break;
                    }
                }
            }
        }
        // If an older page image is needed to reconstruct the page, let the
        // caller know.
        if need_image {
            Ok(ValueReconstructResult::Continue)
        } else {
            Ok(ValueReconstructResult::Complete)
        }
    }
    // Look up the keys in the provided keyspace and update
    // the reconstruct state with whatever is found.
    //
@@ -1580,9 +1669,8 @@ pub(crate) mod test {
    use super::*;
    use crate::repository::Value;
    use crate::tenant::harness::TIMELINE_ID;
    use crate::tenant::storage_layer::{Layer, ResidentLayer};
    use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
-    use crate::tenant::{Tenant, Timeline};
+    use crate::tenant::Tenant;
    use crate::{
        context::DownloadBehavior,
        task_mgr::TaskKind,
@@ -1876,8 +1964,9 @@ pub(crate) mod test {
            res?;
        }
-        let (desc, path) = writer.finish(entries_meta.key_range.end, &ctx).await?;
+        let resident = writer
-        let resident = Layer::finish_creating(harness.conf, &timeline, desc, &path)?;
+            .finish(entries_meta.key_range.end, &timeline, &ctx)
            .await?;
        let inner = resident.get_as_delta(&ctx).await?;
@@ -1957,7 +2046,6 @@ pub(crate) mod test {
            .await
            .likely_resident_layers()
            .next()
            .cloned()
            .unwrap();
        {
@@ -2032,8 +2120,7 @@ pub(crate) mod test {
            .read()
            .await
            .likely_resident_layers()
-            .find(|&x| x != &initdb_layer)
+            .find(|x| x != &initdb_layer)
            .cloned()
            .unwrap();
        // create a copy for the timeline, so we don't overwrite the file
@@ -2068,8 +2155,7 @@ pub(crate) mod test {
                .await
                .unwrap();
-            let (desc, path) = writer.finish(Key::MAX, ctx).await.unwrap();
+            let copied_layer = writer.finish(Key::MAX, &branch, ctx).await.unwrap();
            let copied_layer = Layer::finish_creating(tenant.conf, &branch, desc, &path).unwrap();
            copied_layer.get_as_delta(ctx).await.unwrap();
@@ -2197,9 +2283,7 @@ pub(crate) mod test {
        for (key, lsn, value) in deltas {
            writer.put_value(key, lsn, value, ctx).await?;
        }
-
+        let delta_layer = writer.finish(key_end, tline, ctx).await?;
        let (desc, path) = writer.finish(key_end, ctx).await?;
        let delta_layer = Layer::finish_creating(tenant.conf, tline, desc, &path)?;
        Ok::<_, anyhow::Error>(delta_layer)
    }
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -32,6 +32,9 @@ use crate::tenant::block_io::{BlockBuf, BlockReader, FileBlockReader};
 use crate::tenant::disk_btree::{
    DiskBtreeBuilder, DiskBtreeIterator, DiskBtreeReader, VisitDirection,
 };
 use crate::tenant::storage_layer::{
    LayerAccessStats, ValueReconstructResult, ValueReconstructState,
 };
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
    BlobFlag, MaxVectoredReadBytes, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
@@ -134,6 +137,7 @@ pub struct ImageLayer {
    pub desc: PersistentLayerDesc,
    // This entry contains an image of all pages as of this LSN, should be the same as desc.lsn
    pub lsn: Lsn,
    access_stats: LayerAccessStats,
    inner: OnceCell<ImageLayerInner>,
 }
@@ -251,6 +255,7 @@ impl ImageLayer {
    /// not loaded already.
    ///
    async fn load(&self, ctx: &RequestContext) -> Result<&ImageLayerInner> {
        self.access_stats.record_access(ctx);
        self.inner
            .get_or_try_init(|| self.load_inner(ctx))
            .await
@@ -301,6 +306,7 @@ impl ImageLayer {
                metadata.len(),
            ), // Now we assume image layer ALWAYS covers the full range. This may change in the future.
            lsn: summary.lsn,
            access_stats: Default::default(),
            inner: OnceCell::new(),
        })
    }
@@ -423,6 +429,46 @@ impl ImageLayerInner {
        })
    }
    pub(super) async fn get_value_reconstruct_data(
        &self,
        key: Key,
        reconstruct_state: &mut ValueReconstructState,
        ctx: &RequestContext,
    ) -> anyhow::Result<ValueReconstructResult> {
        let block_reader = FileBlockReader::new(&self.file, self.file_id);
        let tree_reader =
            DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, &block_reader);
        let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
        key.write_to_byte_slice(&mut keybuf);
        if let Some(offset) = tree_reader
            .get(
                &keybuf,
                &RequestContextBuilder::extend(ctx)
                    .page_content_kind(PageContentKind::ImageLayerBtreeNode)
                    .build(),
            )
            .await?
        {
            let blob = block_reader
                .block_cursor()
                .read_blob(
                    offset,
                    &RequestContextBuilder::extend(ctx)
                        .page_content_kind(PageContentKind::ImageLayerValue)
                        .build(),
                )
                .await
                .with_context(|| format!("failed to read value from offset {}", offset))?;
            let value = Bytes::from(blob);
            reconstruct_state.img = Some((self.lsn, value));
            Ok(ValueReconstructResult::Complete)
        } else {
            Ok(ValueReconstructResult::Missing)
        }
    }
    // Look up the keys in the provided keyspace and update
    // the reconstruct state with whatever is found.
    pub(super) async fn get_values_reconstruct_data(
@@ -696,21 +742,11 @@ struct ImageLayerWriterInner {
    // where we have chosen their compressed form
    uncompressed_bytes_chosen: u64,
    // Number of keys in the layer.
    num_keys: usize,
    blob_writer: BlobWriter<false>,
    tree: DiskBtreeBuilder<BlockBuf, KEY_SIZE>,
    #[cfg_attr(not(feature = "testing"), allow(dead_code))]
    last_written_key: Key,
 }
 impl ImageLayerWriterInner {
    fn size(&self) -> u64 {
        self.tree.borrow_writer().size() + self.blob_writer.size()
    }
    ///
    /// Start building a new image layer.
    ///
@@ -764,8 +800,6 @@ impl ImageLayerWriterInner {
            uncompressed_bytes: 0,
            uncompressed_bytes_eligible: 0,
            uncompressed_bytes_chosen: 0,
            num_keys: 0,
            last_written_key: Key::MIN,
        };
        Ok(writer)
@@ -786,7 +820,6 @@ impl ImageLayerWriterInner {
        let compression = self.conf.image_compression;
        let uncompressed_len = img.len() as u64;
        self.uncompressed_bytes += uncompressed_len;
        self.num_keys += 1;
        let (_img, res) = self
            .blob_writer
            .write_blob_maybe_compressed(img, ctx, compression)
@@ -806,11 +839,6 @@ impl ImageLayerWriterInner {
        key.write_to_byte_slice(&mut keybuf);
        self.tree.append(&keybuf, off)?;
        #[cfg(feature = "testing")]
        {
            self.last_written_key = key;
        }
        Ok(())
    }
@@ -821,7 +849,6 @@ impl ImageLayerWriterInner {
        self,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Option<Key>,
    ) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;
@@ -872,23 +899,11 @@ impl ImageLayerWriterInner {
        let desc = PersistentLayerDesc::new_img(
            self.tenant_shard_id,
            self.timeline_id,
-            if let Some(end_key) = end_key {
+            self.key_range.clone(),
                self.key_range.start..end_key
            } else {
                self.key_range.clone()
            },
            self.lsn,
            metadata.len(),
        );
        #[cfg(feature = "testing")]
        if let Some(end_key) = end_key {
            assert!(
                self.last_written_key < end_key,
                "written key violates end_key range"
            );
        }
        // Note: Because we open the file in write-only mode, we cannot
        // reuse the same VirtualFile for reading later. That's why we don't
        // set inner.file here. The first read will have to re-open it.
@@ -965,18 +980,6 @@ impl ImageLayerWriter {
        self.inner.as_mut().unwrap().put_image(key, img, ctx).await
    }
    #[cfg(test)]
    /// Estimated size of the image layer.
    pub(crate) fn estimated_size(&self) -> u64 {
        let inner = self.inner.as_ref().unwrap();
        inner.blob_writer.size() + inner.tree.borrow_writer().size() + PAGE_SZ as u64
    }
    #[cfg(test)]
    pub(crate) fn num_keys(&self) -> usize {
        self.inner.as_ref().unwrap().num_keys
    }
    ///
    /// Finish writing the image layer.
    ///
@@ -985,26 +988,7 @@ impl ImageLayerWriter {
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
    ) -> anyhow::Result<super::ResidentLayer> {
-        self.inner.take().unwrap().finish(timeline, ctx, None).await
+        self.inner.take().unwrap().finish(timeline, ctx).await
    }
    #[cfg(test)]
    /// Finish writing the image layer with an end key, used in [`super::split_writer::SplitImageLayerWriter`]. The end key determines the end of the image layer's covered range and is exclusive.
    pub(super) async fn finish_with_end_key(
        mut self,
        timeline: &Arc<Timeline>,
        end_key: Key,
        ctx: &RequestContext,
    ) -> anyhow::Result<super::ResidentLayer> {
        self.inner
            .take()
            .unwrap()
            .finish(timeline, ctx, Some(end_key))
            .await
    }
    pub(crate) fn size(&self) -> u64 {
        self.inner.as_ref().unwrap().size()
    }
 }
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -10,11 +10,11 @@ use crate::page_cache::PAGE_SZ;
 use crate::repository::{Key, Value};
 use crate::tenant::block_io::{BlockCursor, BlockReader, BlockReaderRef};
 use crate::tenant::ephemeral_file::EphemeralFile;
 use crate::tenant::storage_layer::ValueReconstructResult;
 use crate::tenant::timeline::GetVectoredError;
-use crate::tenant::PageReconstructError;
+use crate::tenant::{PageReconstructError, Timeline};
 use crate::{l0_flush, page_cache, walrecord};
-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, ensure, Result};
 use camino::Utf8PathBuf;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::InMemoryLayerInfo;
 use pageserver_api::shard::TenantShardId;
@@ -34,7 +34,8 @@ use std::sync::atomic::{AtomicU64, AtomicUsize};
 use tokio::sync::{RwLock, RwLockWriteGuard};
 use super::{
-    DeltaLayerWriter, PersistentLayerDesc, ValueReconstructSituation, ValuesReconstructState,
+    DeltaLayerWriter, ResidentLayer, ValueReconstructSituation, ValueReconstructState,
    ValuesReconstructState,
 };
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
@@ -54,6 +55,9 @@ pub struct InMemoryLayer {
    /// Writes are only allowed when this is `None`.
    pub(crate) end_lsn: OnceLock<Lsn>,
    /// Used for traversal path. Cached representation of the in-memory layer before frozen.
    local_path_str: Arc<str>,
    /// Used for traversal path. Cached representation of the in-memory layer after frozen.
    frozen_local_path_str: OnceLock<Arc<str>>,
@@ -244,6 +248,12 @@ impl InMemoryLayer {
        self.start_lsn..self.end_lsn_or_max()
    }
    pub(crate) fn local_path_str(&self) -> &Arc<str> {
        self.frozen_local_path_str
            .get()
            .unwrap_or(&self.local_path_str)
    }
    /// debugging function to print out the contents of the layer
    ///
    /// this is likely completly unused
@@ -293,6 +303,60 @@ impl InMemoryLayer {
        Ok(())
    }
    /// Look up given value in the layer.
    pub(crate) async fn get_value_reconstruct_data(
        &self,
        key: Key,
        lsn_range: Range<Lsn>,
        reconstruct_state: &mut ValueReconstructState,
        ctx: &RequestContext,
    ) -> anyhow::Result<ValueReconstructResult> {
        ensure!(lsn_range.start >= self.start_lsn);
        let mut need_image = true;
        let ctx = RequestContextBuilder::extend(ctx)
            .page_content_kind(PageContentKind::InMemoryLayer)
            .build();
        let inner = self.inner.read().await;
        let reader = inner.file.block_cursor();
        // Scan the page versions backwards, starting from `lsn`.
        if let Some(vec_map) = inner.index.get(&key) {
            let slice = vec_map.slice_range(lsn_range);
            for (entry_lsn, pos) in slice.iter().rev() {
                let buf = reader.read_blob(*pos, &ctx).await?;
                let value = Value::des(&buf)?;
                match value {
                    Value::Image(img) => {
                        reconstruct_state.img = Some((*entry_lsn, img));
                        return Ok(ValueReconstructResult::Complete);
                    }
                    Value::WalRecord(rec) => {
                        let will_init = rec.will_init();
                        reconstruct_state.records.push((*entry_lsn, rec));
                        if will_init {
                            // This WAL record initializes the page, so no need to go further back
                            need_image = false;
                            break;
                        }
                    }
                }
            }
        }
        // release lock on 'inner'
        // If an older page image is needed to reconstruct the page, let the
        // caller know.
        if need_image {
            Ok(ValueReconstructResult::Continue)
        } else {
            Ok(ValueReconstructResult::Complete)
        }
    }
    // Look up the keys in the provided keyspace and update
    // the reconstruct state with whatever is found.
    //
@@ -385,17 +449,20 @@ impl InMemoryLayer {
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        start_lsn: Lsn,
        gate_guard: utils::sync::gate::GateGuard,
        ctx: &RequestContext,
    ) -> Result<InMemoryLayer> {
        trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
-        let file =
+        let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, ctx).await?;
            EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate_guard, ctx).await?;
        let key = InMemoryLayerFileId(file.page_cache_file_id());
        Ok(InMemoryLayer {
            file_id: key,
            local_path_str: {
                let mut buf = String::new();
                inmem_layer_log_display(&mut buf, timeline_id, start_lsn, Lsn::MAX).unwrap();
                buf.into()
            },
            frozen_local_path_str: OnceLock::new(),
            conf,
            timeline_id,
@@ -415,7 +482,8 @@ impl InMemoryLayer {
    /// Common subroutine of the public put_wal_record() and put_page_image() functions.
    /// Adds the page version to the in-memory tree
-    pub async fn put_value(
+
    pub(crate) async fn put_value(
        &self,
        key: Key,
        lsn: Lsn,
@@ -480,6 +548,8 @@ impl InMemoryLayer {
    /// Records the end_lsn for non-dropped layers.
    /// `end_lsn` is exclusive
    pub async fn freeze(&self, end_lsn: Lsn) {
        let inner = self.inner.write().await;
        assert!(
            self.start_lsn < end_lsn,
            "{} >= {}",
@@ -497,28 +567,24 @@ impl InMemoryLayer {
            })
            .expect("frozen_local_path_str set only once");
        #[cfg(debug_assertions)]
        {
            let inner = self.inner.write().await;
        for vec_map in inner.index.values() {
            for (lsn, _pos) in vec_map.as_slice() {
                assert!(*lsn < end_lsn);
            }
        }
    }
    }
    /// Write this frozen in-memory layer to disk. If `key_range` is set, the delta
    /// layer will only contain the key range the user specifies, and may return `None`
    /// if there are no matching keys.
    ///
    /// Returns a new delta layer with all the same data as this in-memory layer
-    pub async fn write_to_disk(
+    pub(crate) async fn write_to_disk(
        &self,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        key_range: Option<Range<Key>>,
-        l0_flush_global_state: &l0_flush::Inner,
+    ) -> Result<Option<ResidentLayer>> {
    ) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {
        // Grab the lock in read-mode. We hold it over the I/O, but because this
        // layer is not writeable anymore, no one should be trying to acquire the
        // write lock on it, so we shouldn't block anyone. There's one exception
@@ -530,8 +596,9 @@ impl InMemoryLayer {
        // rare though, so we just accept the potential latency hit for now.
        let inner = self.inner.read().await;
        let l0_flush_global_state = timeline.l0_flush_global_state.inner().clone();
        use l0_flush::Inner;
-        let _concurrency_permit = match l0_flush_global_state {
+        let _concurrency_permit = match &*l0_flush_global_state {
            Inner::PageCached => None,
            Inner::Direct { semaphore, .. } => Some(semaphore.acquire().await),
        };
@@ -561,7 +628,7 @@ impl InMemoryLayer {
        )
        .await?;
-        match l0_flush_global_state {
+        match &*l0_flush_global_state {
            l0_flush::Inner::PageCached => {
                let ctx = RequestContextBuilder::extend(ctx)
                    .page_content_kind(PageContentKind::InMemoryLayer)
@@ -626,7 +693,7 @@ impl InMemoryLayer {
        }
        // MAX is used here because we identify L0 layers by full key range
-        let (desc, path) = delta_layer_writer.finish(Key::MAX, ctx).await?;
+        let delta_layer = delta_layer_writer.finish(Key::MAX, timeline, ctx).await?;
        // Hold the permit until all the IO is done, including the fsync in `delta_layer_writer.finish()``.
        //
@@ -638,6 +705,6 @@ impl InMemoryLayer {
        // we dirtied when writing to the filesystem have been flushed and marked !dirty.
        drop(_concurrency_permit);
-        Ok(Some((desc, path)))
+        Ok(Some(delta_layer))
    }
 }
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -24,7 +24,8 @@ use super::delta_layer::{self, DeltaEntry};
 use super::image_layer::{self};
 use super::{
    AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
-    LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
+    LayerVisibilityHint, PersistentLayerDesc, ValueReconstructResult, ValueReconstructState,
    ValuesReconstructState,
 };
 use utils::generation::Generation;
@@ -300,6 +301,42 @@ impl Layer {
        self.0.delete_on_drop();
    }
    /// Return data needed to reconstruct given page at LSN.
    ///
    /// It is up to the caller to collect more data from the previous layer and
    /// perform WAL redo, if necessary.
    ///
    /// # Cancellation-Safety
    ///
    /// This method is cancellation-safe.
    pub(crate) async fn get_value_reconstruct_data(
        &self,
        key: Key,
        lsn_range: Range<Lsn>,
        reconstruct_data: &mut ValueReconstructState,
        ctx: &RequestContext,
    ) -> anyhow::Result<ValueReconstructResult> {
        use anyhow::ensure;
        let layer = self.0.get_or_maybe_download(true, Some(ctx)).await?;
        self.0.access_stats.record_access(ctx);
        if self.layer_desc().is_delta {
            ensure!(lsn_range.start >= self.layer_desc().lsn_range.start);
            ensure!(self.layer_desc().key_range.contains(&key));
        } else {
            ensure!(self.layer_desc().key_range.contains(&key));
            ensure!(lsn_range.start >= self.layer_desc().image_layer_lsn());
            ensure!(lsn_range.end >= self.layer_desc().image_layer_lsn());
        }
        layer
            .get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
            .instrument(tracing::debug_span!("get_value_reconstruct_data", layer=%self))
            .await
            .with_context(|| format!("get_value_reconstruct_data for layer {self}"))
    }
    pub(crate) async fn get_values_reconstruct_data(
        &self,
        keyspace: KeySpace,
@@ -316,7 +353,7 @@ impl Layer {
                other => GetVectoredError::Other(anyhow::anyhow!(other)),
            })?;
-        self.record_access(ctx);
+        self.0.access_stats.record_access(ctx);
        layer
            .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, &self.0, ctx)
@@ -396,18 +433,18 @@ impl Layer {
        self.0.info(reset)
    }
-    pub(crate) fn latest_activity(&self) -> SystemTime {
+    pub(crate) fn access_stats(&self) -> &LayerAccessStats {
-        self.0.access_stats.latest_activity()
+        &self.0.access_stats
    }
    pub(crate) fn visibility(&self) -> LayerVisibilityHint {
        self.0.access_stats.visibility()
    }
    pub(crate) fn local_path(&self) -> &Utf8Path {
        &self.0.path
    }
    pub(crate) fn debug_str(&self) -> &Arc<str> {
        &self.0.debug_str
    }
    pub(crate) fn metadata(&self) -> LayerFileMetadata {
        self.0.metadata()
    }
@@ -451,31 +488,13 @@ impl Layer {
        }
    }
    fn record_access(&self, ctx: &RequestContext) {
        if self.0.access_stats.record_access(ctx) {
            // Visibility was modified to Visible
            tracing::info!(
                "Layer {} became visible as a result of access",
                self.0.desc.key()
            );
            if let Some(tl) = self.0.timeline.upgrade() {
                tl.metrics
                    .visible_physical_size_gauge
                    .add(self.0.desc.file_size)
            }
        }
    }
    pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) {
-        let old_visibility = self.0.access_stats.set_visibility(visibility.clone());
+        let old_visibility = self.access_stats().set_visibility(visibility.clone());
        use LayerVisibilityHint::*;
        match (old_visibility, visibility) {
            (Visible, Covered) => {
                // Subtract this layer's contribution to the visible size metric
                if let Some(tl) = self.0.timeline.upgrade() {
                    debug_assert!(
                        tl.metrics.visible_physical_size_gauge.get() >= self.0.desc.file_size
                    );
                    tl.metrics
                        .visible_physical_size_gauge
                        .sub(self.0.desc.file_size)
@@ -500,7 +519,7 @@ impl Layer {
 ///
 /// However when we want something evicted, we cannot evict it right away as there might be current
 /// reads happening on it. For example: it has been searched from [`LayerMap::search`] but not yet
-/// read with [`Layer::get_values_reconstruct_data`].
+/// read with [`Layer::get_value_reconstruct_data`].
 ///
 /// [`LayerMap::search`]: crate::tenant::layer_map::LayerMap::search
 #[derive(Debug)]
@@ -581,6 +600,9 @@ struct LayerInner {
    /// Full path to the file; unclear if this should exist anymore.
    path: Utf8PathBuf,
    /// String representation of the layer, used for traversal id.
    debug_str: Arc<str>,
    desc: PersistentLayerDesc,
    /// Timeline access is needed for remote timeline client and metrics.
@@ -693,9 +715,6 @@ impl Drop for LayerInner {
            }
            if matches!(self.access_stats.visibility(), LayerVisibilityHint::Visible) {
                debug_assert!(
                    timeline.metrics.visible_physical_size_gauge.get() >= self.desc.file_size
                );
                timeline
                    .metrics
                    .visible_physical_size_gauge
@@ -817,6 +836,9 @@ impl LayerInner {
        LayerInner {
            conf,
            debug_str: {
                format!("timelines/{}/{}", timeline.timeline_id, desc.layer_name()).into()
            },
            path: local_path,
            desc,
            timeline: Arc::downgrade(timeline),
@@ -1737,6 +1759,28 @@ impl DownloadedLayer {
            .map_err(|e| anyhow::anyhow!("layer load failed earlier: {e}"))
    }
    async fn get_value_reconstruct_data(
        &self,
        key: Key,
        lsn_range: Range<Lsn>,
        reconstruct_data: &mut ValueReconstructState,
        owner: &Arc<LayerInner>,
        ctx: &RequestContext,
    ) -> anyhow::Result<ValueReconstructResult> {
        use LayerKind::*;
        match self.get(owner, ctx).await? {
            Delta(d) => {
                d.get_value_reconstruct_data(key, lsn_range, reconstruct_data, ctx)
                    .await
            }
            Image(i) => {
                i.get_value_reconstruct_data(key, reconstruct_data, ctx)
                    .await
            }
        }
    }
    async fn get_values_reconstruct_data(
        &self,
        keyspace: KeySpace,
@@ -1835,7 +1879,7 @@ impl ResidentLayer {
                // this is valid because the DownloadedLayer::kind is a OnceCell, not a
                // Mutex<OnceCell>, so we cannot go and deinitialize the value with OnceCell::take
                // while it's being held.
-                self.owner.record_access(ctx);
+                owner.access_stats.record_access(ctx);
                delta_layer::DeltaLayerInner::load_keys(d, ctx)
                    .await
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -39,7 +39,7 @@ async fn smoke_test() {
    let layer = {
        let mut layers = {
            let layers = timeline.layers.read().await;
-            layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+            layers.likely_resident_layers().collect::<Vec<_>>()
        };
        assert_eq!(layers.len(), 1);
@@ -50,26 +50,13 @@ async fn smoke_test() {
    // all layers created at pageserver are like `layer`, initialized with strong
    // Arc<DownloadedLayer>.
    let controlfile_keyspace = KeySpace {
        ranges: vec![CONTROLFILE_KEY..CONTROLFILE_KEY.next()],
    };
    let img_before = {
-        let mut data = ValuesReconstructState::default();
+        let mut data = ValueReconstructState::default();
        layer
-            .get_values_reconstruct_data(
+            .get_value_reconstruct_data(CONTROLFILE_KEY, Lsn(0x10)..Lsn(0x11), &mut data, &ctx)
                controlfile_keyspace.clone(),
                Lsn(0x10)..Lsn(0x11),
                &mut data,
                &ctx,
            )
            .await
            .unwrap();
-        data.keys
+        data.img
            .remove(&CONTROLFILE_KEY)
            .expect("must be present")
            .expect("should not error")
            .img
            .take()
            .expect("tenant harness writes the control file")
    };
@@ -87,24 +74,13 @@ async fn smoke_test() {
    // on accesses when the layer is evicted, it will automatically be downloaded.
    let img_after = {
-        let mut data = ValuesReconstructState::default();
+        let mut data = ValueReconstructState::default();
        layer
-            .get_values_reconstruct_data(
+            .get_value_reconstruct_data(CONTROLFILE_KEY, Lsn(0x10)..Lsn(0x11), &mut data, &ctx)
                controlfile_keyspace.clone(),
                Lsn(0x10)..Lsn(0x11),
                &mut data,
                &ctx,
            )
            .instrument(download_span.clone())
            .await
            .unwrap();
-        data.keys
+        data.img.take().unwrap()
            .remove(&CONTROLFILE_KEY)
            .expect("must be present")
            .expect("should not error")
            .img
            .take()
            .expect("tenant harness writes the control file")
    };
    assert_eq!(img_before, img_after);
@@ -176,7 +152,7 @@ async fn smoke_test() {
    {
        let layers = &[layer];
        let mut g = timeline.layers.write().await;
-        g.open_mut().unwrap().finish_gc_timeline(layers);
+        g.finish_gc_timeline(layers);
        // this just updates the remote_physical_size for demonstration purposes
        rtc.schedule_gc_update(layers).unwrap();
    }
@@ -216,7 +192,7 @@ async fn evict_and_wait_on_wanted_deleted() {
    let layer = {
        let mut layers = {
            let layers = timeline.layers.read().await;
-            layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+            layers.likely_resident_layers().collect::<Vec<_>>()
        };
        assert_eq!(layers.len(), 1);
@@ -260,7 +236,7 @@ async fn evict_and_wait_on_wanted_deleted() {
    // the deletion of the layer in remote_storage happens.
    {
        let mut layers = timeline.layers.write().await;
-        layers.open_mut().unwrap().finish_gc_timeline(&[layer]);
+        layers.finish_gc_timeline(&[layer]);
    }
    SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
@@ -301,7 +277,7 @@ fn read_wins_pending_eviction() {
        let layer = {
            let mut layers = {
                let layers = timeline.layers.read().await;
-                layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+                layers.likely_resident_layers().collect::<Vec<_>>()
            };
            assert_eq!(layers.len(), 1);
@@ -433,7 +409,7 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) {
        let layer = {
            let mut layers = {
                let layers = timeline.layers.read().await;
-                layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+                layers.likely_resident_layers().collect::<Vec<_>>()
            };
            assert_eq!(layers.len(), 1);
@@ -602,7 +578,7 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() {
    let layer = {
        let mut layers = {
            let layers = timeline.layers.read().await;
-            layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+            layers.likely_resident_layers().collect::<Vec<_>>()
        };
        assert_eq!(layers.len(), 1);
@@ -682,7 +658,7 @@ async fn evict_and_wait_does_not_wait_for_download() {
    let layer = {
        let mut layers = {
            let layers = timeline.layers.read().await;
-            layers.likely_resident_layers().cloned().collect::<Vec<_>>()
+            layers.likely_resident_layers().collect::<Vec<_>>()
        };
        assert_eq!(layers.len(), 1);
@@ -801,9 +777,9 @@ async fn eviction_cancellation_on_drop() {
    let (evicted_layer, not_evicted) = {
        let mut layers = {
            let mut guard = timeline.layers.write().await;
-            let layers = guard.likely_resident_layers().cloned().collect::<Vec<_>>();
+            let layers = guard.likely_resident_layers().collect::<Vec<_>>();
            // remove the layers from layermap
-            guard.open_mut().unwrap().finish_gc_timeline(&layers);
+            guard.finish_gc_timeline(&layers);
            layers
        };
@@ -854,7 +830,7 @@ async fn eviction_cancellation_on_drop() {
 fn layer_size() {
    assert_eq!(size_of::<LayerAccessStats>(), 8);
    assert_eq!(size_of::<PersistentLayerDesc>(), 104);
-    assert_eq!(size_of::<LayerInner>(), 296);
+    assert_eq!(size_of::<LayerInner>(), 312);
    // it also has the utf8 path
 }
--- a/pageserver/src/tenant/storage_layer/split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/split_writer.rs
@@ -1,454 +0,0 @@
 use std::{ops::Range, sync::Arc};
 use bytes::Bytes;
 use pageserver_api::key::{Key, KEY_SIZE};
 use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId};
 use crate::tenant::storage_layer::Layer;
 use crate::{config::PageServerConf, context::RequestContext, repository::Value, tenant::Timeline};
 use super::{DeltaLayerWriter, ImageLayerWriter, ResidentLayer};
 /// An image writer that takes images and produces multiple image layers. The interface does not
 /// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files
 /// to be cleaned up)
 #[must_use]
 pub struct SplitImageLayerWriter {
    inner: ImageLayerWriter,
    target_layer_size: u64,
    generated_layers: Vec<ResidentLayer>,
    conf: &'static PageServerConf,
    timeline_id: TimelineId,
    tenant_shard_id: TenantShardId,
    lsn: Lsn,
 }
 impl SplitImageLayerWriter {
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        start_key: Key,
        lsn: Lsn,
        target_layer_size: u64,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
            inner: ImageLayerWriter::new(
                conf,
                timeline_id,
                tenant_shard_id,
                &(start_key..Key::MAX),
                lsn,
                ctx,
            )
            .await?,
            generated_layers: Vec::new(),
            conf,
            timeline_id,
            tenant_shard_id,
            lsn,
        })
    }
    pub async fn put_image(
        &mut self,
        key: Key,
        img: Bytes,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // The current estimation is an upper bound of the space that the key/image could take
        // because we did not consider compression in this estimation. The resulting image layer
        // could be smaller than the target size.
        let addition_size_estimation = KEY_SIZE as u64 + img.len() as u64;
        if self.inner.num_keys() >= 1
            && self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
        {
            let next_image_writer = ImageLayerWriter::new(
                self.conf,
                self.timeline_id,
                self.tenant_shard_id,
                &(key..Key::MAX),
                self.lsn,
                ctx,
            )
            .await?;
            let prev_image_writer = std::mem::replace(&mut self.inner, next_image_writer);
            self.generated_layers.push(
                prev_image_writer
                    .finish_with_end_key(tline, key, ctx)
                    .await?,
            );
        }
        self.inner.put_image(key, img, ctx).await
    }
    pub(crate) async fn finish(
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Key,
    ) -> anyhow::Result<Vec<ResidentLayer>> {
        let Self {
            mut generated_layers,
            inner,
            ..
        } = self;
        generated_layers.push(inner.finish_with_end_key(tline, end_key, ctx).await?);
        Ok(generated_layers)
    }
    /// When split writer fails, the caller should call this function and handle partially generated layers.
    #[allow(dead_code)]
    pub(crate) async fn take(self) -> anyhow::Result<(Vec<ResidentLayer>, ImageLayerWriter)> {
        Ok((self.generated_layers, self.inner))
    }
 }
 /// A delta writer that takes key-lsn-values and produces multiple delta layers. The interface does not
 /// guarantee atomicity (i.e., if the delta layer generation fails, there might be leftover files
 /// to be cleaned up).
 #[must_use]
 pub struct SplitDeltaLayerWriter {
    inner: DeltaLayerWriter,
    target_layer_size: u64,
    generated_layers: Vec<ResidentLayer>,
    conf: &'static PageServerConf,
    timeline_id: TimelineId,
    tenant_shard_id: TenantShardId,
    lsn_range: Range<Lsn>,
 }
 impl SplitDeltaLayerWriter {
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        start_key: Key,
        lsn_range: Range<Lsn>,
        target_layer_size: u64,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
            inner: DeltaLayerWriter::new(
                conf,
                timeline_id,
                tenant_shard_id,
                start_key,
                lsn_range.clone(),
                ctx,
            )
            .await?,
            generated_layers: Vec::new(),
            conf,
            timeline_id,
            tenant_shard_id,
            lsn_range,
        })
    }
    pub async fn put_value(
        &mut self,
        key: Key,
        lsn: Lsn,
        val: Value,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // The current estimation is key size plus LSN size plus value size estimation. This is not an accurate
        // number, and therefore the final layer size could be a little bit larger or smaller than the target.
        let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;
        if self.inner.num_keys() >= 1
            && self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
        {
            let next_delta_writer = DeltaLayerWriter::new(
                self.conf,
                self.timeline_id,
                self.tenant_shard_id,
                key,
                self.lsn_range.clone(),
                ctx,
            )
            .await?;
            let prev_delta_writer = std::mem::replace(&mut self.inner, next_delta_writer);
            let (desc, path) = prev_delta_writer.finish(key, ctx).await?;
            let delta_layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
            self.generated_layers.push(delta_layer);
        }
        self.inner.put_value(key, lsn, val, ctx).await
    }
    pub(crate) async fn finish(
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Key,
    ) -> anyhow::Result<Vec<ResidentLayer>> {
        let Self {
            mut generated_layers,
            inner,
            ..
        } = self;
        let (desc, path) = inner.finish(end_key, ctx).await?;
        let delta_layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
        generated_layers.push(delta_layer);
        Ok(generated_layers)
    }
    /// When split writer fails, the caller should call this function and handle partially generated layers.
    #[allow(dead_code)]
    pub(crate) async fn take(self) -> anyhow::Result<(Vec<ResidentLayer>, DeltaLayerWriter)> {
        Ok((self.generated_layers, self.inner))
    }
 }
 #[cfg(test)]
 mod tests {
    use crate::{
        tenant::{
            harness::{TenantHarness, TIMELINE_ID},
            storage_layer::AsLayerDesc,
        },
        DEFAULT_PG_VERSION,
    };
    use super::*;
    fn get_key(id: u32) -> Key {
        let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
        key.field6 = id;
        key
    }
    fn get_img(id: u32) -> Bytes {
        format!("{id:064}").into()
    }
    fn get_large_img() -> Bytes {
        vec![0; 8192].into()
    }
    #[tokio::test]
    async fn write_one_image() {
        let harness = TenantHarness::create("split_writer_write_one_image")
            .await
            .unwrap();
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await
            .unwrap();
        let mut image_writer = SplitImageLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        let mut delta_writer = SplitDeltaLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        image_writer
            .put_image(get_key(0), get_img(0), &tline, &ctx)
            .await
            .unwrap();
        let layers = image_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 1);
        delta_writer
            .put_value(
                get_key(0),
                Lsn(0x18),
                Value::Image(get_img(0)),
                &tline,
                &ctx,
            )
            .await
            .unwrap();
        let layers = delta_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 1);
    }
    #[tokio::test]
    async fn write_split() {
        let harness = TenantHarness::create("split_writer_write_split")
            .await
            .unwrap();
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await
            .unwrap();
        let mut image_writer = SplitImageLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        let mut delta_writer = SplitDeltaLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        const N: usize = 2000;
        for i in 0..N {
            let i = i as u32;
            image_writer
                .put_image(get_key(i), get_large_img(), &tline, &ctx)
                .await
                .unwrap();
            delta_writer
                .put_value(
                    get_key(i),
                    Lsn(0x20),
                    Value::Image(get_large_img()),
                    &tline,
                    &ctx,
                )
                .await
                .unwrap();
        }
        let image_layers = image_writer
            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
        let delta_layers = delta_writer
            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
        assert_eq!(image_layers.len(), N / 512 + 1);
        assert_eq!(delta_layers.len(), N / 512 + 1);
        for idx in 0..image_layers.len() {
            assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
            assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
            assert_ne!(delta_layers[idx].layer_desc().key_range.start, Key::MIN);
            assert_ne!(delta_layers[idx].layer_desc().key_range.end, Key::MAX);
            if idx > 0 {
                assert_eq!(
                    image_layers[idx - 1].layer_desc().key_range.end,
                    image_layers[idx].layer_desc().key_range.start
                );
                assert_eq!(
                    delta_layers[idx - 1].layer_desc().key_range.end,
                    delta_layers[idx].layer_desc().key_range.start
                );
            }
        }
    }
    #[tokio::test]
    async fn write_large_img() {
        let harness = TenantHarness::create("split_writer_write_large_img")
            .await
            .unwrap();
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await
            .unwrap();
        let mut image_writer = SplitImageLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18),
            4 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        let mut delta_writer = SplitDeltaLayerWriter::new(
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024,
            &ctx,
        )
        .await
        .unwrap();
        image_writer
            .put_image(get_key(0), get_img(0), &tline, &ctx)
            .await
            .unwrap();
        image_writer
            .put_image(get_key(1), get_large_img(), &tline, &ctx)
            .await
            .unwrap();
        let layers = image_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 2);
        delta_writer
            .put_value(
                get_key(0),
                Lsn(0x18),
                Value::Image(get_img(0)),
                &tline,
                &ctx,
            )
            .await
            .unwrap();
        delta_writer
            .put_value(
                get_key(1),
                Lsn(0x1A),
                Value::Image(get_large_img()),
                &tline,
                &ctx,
            )
            .await
            .unwrap();
        let layers = delta_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 2);
    }
 }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -407,16 +407,9 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                        error_run_count += 1;
                        let wait_duration = Duration::from_secs_f64(wait_duration);
-                        if matches!(e, crate::tenant::GcError::TimelineCancelled) {
+                        error!(
-                            // Timeline was cancelled during gc. We might either be in an event
+                        "Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
-                            // that affects the entire tenant (tenant deletion, pageserver shutdown),
+                    );
                            // or in one that affects the timeline only (timeline deletion).
                            // Therefore, don't exit the loop.
                            info!("Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}");
                        } else {
                            error!("Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}");
                        }
                        wait_duration
                    }
                }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -19,10 +19,8 @@ use bytes::Bytes;
 use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
 use pageserver_api::key::KEY_SIZE;
 use pageserver_api::keyspace::ShardedRange;
 use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
 use serde::Serialize;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, info_span, trace, warn, Instrument};
 use utils::id::TimelineId;
@@ -43,7 +41,6 @@ use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 use crate::keyspace::KeySpace;
 use crate::repository::{Key, Value};
 use crate::walrecord::NeonWalRecord;
 use utils::lsn::Lsn;
@@ -76,7 +73,6 @@ impl KeyHistoryRetention {
        key: Key,
        delta_writer: &mut Vec<(Key, Lsn, Value)>,
        mut image_writer: Option<&mut ImageLayerWriter>,
        stat: &mut CompactionStatistics,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        let mut first_batch = true;
@@ -86,7 +82,6 @@ impl KeyHistoryRetention {
                    let Value::Image(img) = &logs[0].1 else {
                        unreachable!()
                    };
                    stat.produce_image_key(img);
                    if let Some(image_writer) = image_writer.as_mut() {
                        image_writer.put_image(key, img.clone(), ctx).await?;
                    } else {
@@ -94,111 +89,24 @@ impl KeyHistoryRetention {
                    }
                } else {
                    for (lsn, val) in logs {
                        stat.produce_key(&val);
                        delta_writer.push((key, lsn, val));
                    }
                }
                first_batch = false;
            } else {
                for (lsn, val) in logs {
                    stat.produce_key(&val);
                    delta_writer.push((key, lsn, val));
                }
            }
        }
        let KeyLogAtLsn(above_horizon_logs) = self.above_horizon;
        for (lsn, val) in above_horizon_logs {
            stat.produce_key(&val);
            delta_writer.push((key, lsn, val));
        }
        Ok(())
    }
 }
 #[derive(Debug, Serialize, Default)]
 struct CompactionStatisticsNumSize {
    num: u64,
    size: u64,
 }
 #[derive(Debug, Serialize, Default)]
 pub struct CompactionStatistics {
    delta_layer_visited: CompactionStatisticsNumSize,
    image_layer_visited: CompactionStatisticsNumSize,
    delta_layer_produced: CompactionStatisticsNumSize,
    image_layer_produced: CompactionStatisticsNumSize,
    num_delta_layer_discarded: usize,
    num_image_layer_discarded: usize,
    num_unique_keys_visited: usize,
    wal_keys_visited: CompactionStatisticsNumSize,
    image_keys_visited: CompactionStatisticsNumSize,
    wal_produced: CompactionStatisticsNumSize,
    image_produced: CompactionStatisticsNumSize,
 }
 impl CompactionStatistics {
    fn estimated_size_of_value(val: &Value) -> usize {
        match val {
            Value::Image(img) => img.len(),
            Value::WalRecord(NeonWalRecord::Postgres { rec, .. }) => rec.len(),
            _ => std::mem::size_of::<NeonWalRecord>(),
        }
    }
    fn estimated_size_of_key() -> usize {
        KEY_SIZE // TODO: distinguish image layer and delta layer (count LSN in delta layer)
    }
    fn visit_delta_layer(&mut self, size: u64) {
        self.delta_layer_visited.num += 1;
        self.delta_layer_visited.size += size;
    }
    fn visit_image_layer(&mut self, size: u64) {
        self.image_layer_visited.num += 1;
        self.image_layer_visited.size += size;
    }
    fn on_unique_key_visited(&mut self) {
        self.num_unique_keys_visited += 1;
    }
    fn visit_wal_key(&mut self, val: &Value) {
        self.wal_keys_visited.num += 1;
        self.wal_keys_visited.size +=
            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;
    }
    fn visit_image_key(&mut self, val: &Value) {
        self.image_keys_visited.num += 1;
        self.image_keys_visited.size +=
            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;
    }
    fn produce_key(&mut self, val: &Value) {
        match val {
            Value::Image(img) => self.produce_image_key(img),
            Value::WalRecord(_) => self.produce_wal_key(val),
        }
    }
    fn produce_wal_key(&mut self, val: &Value) {
        self.wal_produced.num += 1;
        self.wal_produced.size +=
            Self::estimated_size_of_value(val) as u64 + Self::estimated_size_of_key() as u64;
    }
    fn produce_image_key(&mut self, val: &Bytes) {
        self.image_produced.num += 1;
        self.image_produced.size += val.len() as u64 + Self::estimated_size_of_key() as u64;
    }
    fn discard_delta_layer(&mut self) {
        self.num_delta_layer_discarded += 1;
    }
    fn discard_image_layer(&mut self) {
        self.num_image_layer_discarded += 1;
    }
    fn produce_delta_layer(&mut self, size: u64) {
        self.delta_layer_produced.num += 1;
        self.delta_layer_produced.size += size;
    }
    fn produce_image_layer(&mut self, size: u64) {
        self.image_layer_produced.num += 1;
        self.image_layer_produced.size += size;
    }
 }
 impl Timeline {
    /// TODO: cancellation
    ///
@@ -210,18 +118,12 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<bool, CompactionError> {
        if flags.contains(CompactFlags::EnhancedGcBottomMostCompaction) {
-            self.compact_with_gc(cancel, flags, ctx)
+            self.compact_with_gc(cancel, ctx)
                .await
                .map_err(CompactionError::Other)?;
            return Ok(false);
        }
        if flags.contains(CompactFlags::DryRun) {
            return Err(CompactionError::Other(anyhow!(
                "dry-run mode is not supported for legacy compaction for now"
            )));
        }
        // High level strategy for compaction / image creation:
        //
        // 1. First, calculate the desired "partitioning" of the
@@ -371,7 +273,7 @@ impl Timeline {
        );
        let layers = self.layers.read().await;
-        for layer_desc in layers.layer_map()?.iter_historic_layers() {
+        for layer_desc in layers.layer_map().iter_historic_layers() {
            let layer = layers.get_from_desc(&layer_desc);
            if layer.metadata().shard.shard_count == self.shard_identity.count {
                // This layer does not belong to a historic ancestor, no need to re-image it.
@@ -549,9 +451,7 @@ impl Timeline {
    ///
    /// The result may be used as an input to eviction and secondary downloads to de-prioritize layers
    /// that we know won't be needed for reads.
-    pub(super) async fn update_layer_visibility(
+    pub(super) async fn update_layer_visibility(&self) {
        &self,
    ) -> Result<(), super::layer_manager::Shutdown> {
        let head_lsn = self.get_last_record_lsn();
        // We will sweep through layers in reverse-LSN order.  We only do historic layers.  L0 deltas
@@ -559,7 +459,7 @@ impl Timeline {
        // Note that L0 deltas _can_ be covered by image layers, but we consider them 'visible' because we anticipate that
        // they will be subject to L0->L1 compaction in the near future.
        let layer_manager = self.layers.read().await;
-        let layer_map = layer_manager.layer_map()?;
+        let layer_map = layer_manager.layer_map();
        let readable_points = {
            let children = self.gc_info.read().unwrap().retain_lsns.clone();
@@ -582,7 +482,6 @@ impl Timeline {
        // TODO: publish our covered KeySpace to our parent, so that when they update their visibility, they can
        // avoid assuming that everything at a branch point is visible.
        drop(covered);
        Ok(())
    }
    /// Collect a bunch of Level 0 layer files, and compact and reshuffle them as
@@ -636,8 +535,12 @@ impl Timeline {
    ) -> Result<CompactLevel0Phase1Result, CompactionError> {
        stats.read_lock_held_spawn_blocking_startup_micros =
            stats.read_lock_acquisition_micros.till_now(); // set by caller
-        let layers = guard.layer_map()?;
+        let layers = guard.layer_map();
-        let level0_deltas = layers.level0_deltas();
+        let level0_deltas = layers.get_level0_deltas();
        let mut level0_deltas = level0_deltas
            .into_iter()
            .map(|x| guard.get_from_desc(&x))
            .collect_vec();
        stats.level0_deltas_count = Some(level0_deltas.len());
        // Only compact if enough layers have accumulated.
@@ -650,11 +553,6 @@ impl Timeline {
            return Ok(CompactLevel0Phase1Result::default());
        }
        let mut level0_deltas = level0_deltas
            .iter()
            .map(|x| guard.get_from_desc(x))
            .collect::<Vec<_>>();
        // Gather the files to compact in this iteration.
        //
        // Start with the oldest Level 0 delta file, and collect any other
@@ -1108,16 +1006,14 @@ impl Timeline {
                        || contains_hole
                    {
                        // ... if so, flush previous layer and prepare to write new one
-                        let (desc, path) = writer
+                        new_layers.push(
                            writer
                                .take()
                                .unwrap()
-                            .finish(prev_key.unwrap().next(), ctx)
+                                .finish(prev_key.unwrap().next(), self, ctx)
                                .await
-                            .map_err(CompactionError::Other)?;
+                                .map_err(CompactionError::Other)?,
-                        let new_delta = Layer::finish_creating(self.conf, self, desc, &path)
+                        );
                            .map_err(CompactionError::Other)?;
                        new_layers.push(new_delta);
                        writer = None;
                        if contains_hole {
@@ -1180,13 +1076,12 @@ impl Timeline {
            prev_key = Some(key);
        }
        if let Some(writer) = writer {
-            let (desc, path) = writer
+            new_layers.push(
-                .finish(prev_key.unwrap().next(), ctx)
+                writer
                    .finish(prev_key.unwrap().next(), self, ctx)
                    .await
-                .map_err(CompactionError::Other)?;
+                    .map_err(CompactionError::Other)?,
-            let new_delta = Layer::finish_creating(self.conf, self, desc, &path)
+            );
                .map_err(CompactionError::Other)?;
            new_layers.push(new_delta);
        }
        // Sync layers
@@ -1411,9 +1306,10 @@ impl Timeline {
        // Find the top of the historical layers
        let end_lsn = {
            let guard = self.layers.read().await;
-            let layers = guard.layer_map()?;
+            let layers = guard.layer_map();
-            let l0_deltas = layers.level0_deltas();
+            let l0_deltas = layers.get_level0_deltas();
            drop(guard);
            // As an optimization, if we find that there are too few L0 layers,
            // bail out early. We know that the compaction algorithm would do
@@ -1745,47 +1641,38 @@ impl Timeline {
    pub(crate) async fn compact_with_gc(
        self: &Arc<Self>,
        cancel: &CancellationToken,
        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        use std::collections::BTreeSet;
        // Block other compaction/GC tasks from running for now. GC-compaction could run along
-        // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
+        // with legacy compaction tasks in the future.
        // Note that we already acquired the compaction lock when the outer `compact` function gets called.
-        let gc_lock = async {
+        let _compaction_lock = tokio::select! {
-            tokio::select! {
+            guard = self.compaction_lock.lock() => guard,
                guard = self.gc_lock.lock() => Ok(guard),
            // TODO: refactor to CompactionError to correctly pass cancelled error
-                _ = cancel.cancelled() => Err(anyhow!("cancelled")),
+            _ = cancel.cancelled() => return Err(anyhow!("cancelled")),
            }
        };
-        let gc_lock = crate::timed(
+        let _gc = tokio::select! {
-            gc_lock,
+            guard = self.gc_lock.lock() => guard,
-            "acquires gc lock",
+            // TODO: refactor to CompactionError to correctly pass cancelled error
-            std::time::Duration::from_secs(5),
+            _ = cancel.cancelled() => return Err(anyhow!("cancelled")),
-        )
+        };
        .await?;
-        let dry_run = flags.contains(CompactFlags::DryRun);
+        info!("running enhanced gc bottom-most compaction");
        info!("running enhanced gc bottom-most compaction, dry_run={dry_run}");
        scopeguard::defer! {
            info!("done enhanced gc bottom-most compaction");
        };
        let mut stat = CompactionStatistics::default();
        // Step 0: pick all delta layers + image layers below/intersect with the GC horizon.
        // The layer selection has the following properties:
        // 1. If a layer is in the selection, all layers below it are in the selection.
        // 2. Inferred from (1), for each key in the layer selection, the value can be reconstructed only with the layers in the layer selection.
        let (layer_selection, gc_cutoff, retain_lsns_below_horizon) = {
            let guard = self.layers.read().await;
-            let layers = guard.layer_map()?;
+            let layers = guard.layer_map();
            let gc_info = self.gc_info.read().unwrap();
            let mut retain_lsns_below_horizon = Vec::new();
            let gc_cutoff = gc_info.cutoffs.select_min();
@@ -1849,9 +1736,6 @@ impl Timeline {
                let key_range = desc.get_key_range();
                delta_split_points.insert(key_range.start);
                delta_split_points.insert(key_range.end);
                stat.visit_delta_layer(desc.file_size());
            } else {
                stat.visit_image_layer(desc.file_size());
            }
        }
        let mut delta_layers = Vec::new();
@@ -1887,8 +1771,6 @@ impl Timeline {
            tline: &Arc<Timeline>,
            lowest_retain_lsn: Lsn,
            ctx: &RequestContext,
            stats: &mut CompactionStatistics,
            dry_run: bool,
            last_batch: bool,
        ) -> anyhow::Result<Option<FlushDeltaResult>> {
            // Check if we need to split the delta layer. We split at the original delta layer boundary to avoid
@@ -1945,7 +1827,6 @@ impl Timeline {
                    let layer_generation = guard.get_from_key(&delta_key).metadata().generation;
                    drop(guard);
                    if layer_generation == tline.generation {
                        stats.discard_delta_layer();
                        // TODO: depending on whether we design this compaction process to run along with
                        // other compactions, there could be layer map modifications after we drop the
                        // layer guard, and in case it creates duplicated layer key, we will still error
@@ -1972,16 +1853,9 @@ impl Timeline {
            for (key, lsn, val) in deltas {
                delta_layer_writer.put_value(key, lsn, val, ctx).await?;
            }
-
+            let delta_layer = delta_layer_writer
-            stats.produce_delta_layer(delta_layer_writer.size());
+                .finish(delta_key.key_range.end, tline, ctx)
            if dry_run {
                return Ok(None);
            }
            let (desc, path) = delta_layer_writer
                .finish(delta_key.key_range.end, ctx)
                .await?;
            let delta_layer = Layer::finish_creating(tline.conf, tline, desc, &path)?;
            Ok(Some(FlushDeltaResult::CreateResidentLayer(delta_layer)))
        }
@@ -2073,13 +1947,6 @@ impl Timeline {
        let mut current_delta_split_point = 0;
        let mut delta_layers = Vec::new();
        while let Some((key, lsn, val)) = merge_iter.next().await? {
            if cancel.is_cancelled() {
                return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
            }
            match val {
                Value::Image(_) => stat.visit_image_key(&val),
                Value::WalRecord(_) => stat.visit_wal_key(&val),
            }
            if last_key.is_none() || last_key.as_ref() == Some(&key) {
                if last_key.is_none() {
                    last_key = Some(key);
@@ -2087,7 +1954,6 @@ impl Timeline {
                accumulated_values.push((key, lsn, val));
            } else {
                let last_key = last_key.as_mut().unwrap();
                stat.on_unique_key_visited();
                let retention = self
                    .generate_key_retention(
                        *last_key,
@@ -2104,7 +1970,6 @@ impl Timeline {
                        *last_key,
                        &mut delta_values,
                        image_layer_writer.as_mut(),
                        &mut stat,
                        ctx,
                    )
                    .await?;
@@ -2117,8 +1982,6 @@ impl Timeline {
                        self,
                        lowest_retain_lsn,
                        ctx,
                        &mut stat,
                        dry_run,
                        false,
                    )
                    .await?,
@@ -2131,7 +1994,6 @@ impl Timeline {
        let last_key = last_key.expect("no keys produced during compaction");
        // TODO: move this part to the loop body
        stat.on_unique_key_visited();
        let retention = self
            .generate_key_retention(
                last_key,
@@ -2148,7 +2010,6 @@ impl Timeline {
                last_key,
                &mut delta_values,
                image_layer_writer.as_mut(),
                &mut stat,
                ctx,
            )
            .await?;
@@ -2161,8 +2022,6 @@ impl Timeline {
                self,
                lowest_retain_lsn,
                ctx,
                &mut stat,
                dry_run,
                true,
            )
            .await?,
@@ -2170,28 +2029,12 @@ impl Timeline {
        assert!(delta_values.is_empty(), "unprocessed keys");
        let image_layer = if discard_image_layer {
            stat.discard_image_layer();
            None
        } else if let Some(writer) = image_layer_writer {
            stat.produce_image_layer(writer.size());
            if !dry_run {
            Some(writer.finish(self, ctx).await?)
        } else {
            None
            }
        } else {
            None
        };
        info!(
            "gc-compaction statistics: {}",
            serde_json::to_string(&stat)?
        );
        if dry_run {
            return Ok(());
        }
        info!(
            "produced {} delta layers and {} image layers",
            delta_layers.len(),
@@ -2215,19 +2058,14 @@ impl Timeline {
        let mut layer_selection = layer_selection;
        layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
        compact_to.extend(image_layer);
        // Step 3: Place back to the layer map.
        {
            let mut guard = self.layers.write().await;
-            guard
+            guard.finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
                .open_mut()?
                .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
        };
        self.remote_client
            .schedule_compaction_update(&layer_selection, &compact_to)?;
        drop(gc_lock);
        Ok(())
    }
 }
@@ -2301,7 +2139,7 @@ impl CompactionJobExecutor for TimelineAdaptor {
        self.flush_updates().await?;
        let guard = self.timeline.layers.read().await;
-        let layer_map = guard.layer_map()?;
+        let layer_map = guard.layer_map();
        let result = layer_map
            .iter_historic_layers()
@@ -2424,9 +2262,9 @@ impl CompactionJobExecutor for TimelineAdaptor {
            ))
        });
-        let (desc, path) = writer.finish(prev.unwrap().0.next(), ctx).await?;
+        let new_delta_layer = writer
-        let new_delta_layer =
+            .finish(prev.unwrap().0.next(), &self.timeline, ctx)
-            Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
+            .await?;
        self.new_deltas.push(new_delta_layer);
        Ok(())
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -63,19 +63,10 @@ pub(super) async fn delete_local_timeline_directory(
    tenant_shard_id: TenantShardId,
    timeline: &Timeline,
 ) -> anyhow::Result<()> {
-    // Always ensure the lock order is compaction -> gc.
+    let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) };
-    let compaction_lock = timeline.compaction_lock.lock();
+    let guards = crate::timed(
-    let compaction_lock = crate::timed(
+        guards,
-        compaction_lock,
+        "acquire gc and compaction locks",
        "acquires compaction lock",
        std::time::Duration::from_secs(5),
    )
    .await;
    let gc_lock = timeline.gc_lock.lock();
    let gc_lock = crate::timed(
        gc_lock,
        "acquires gc lock",
        std::time::Duration::from_secs(5),
    )
    .await;
@@ -116,8 +107,7 @@ pub(super) async fn delete_local_timeline_directory(
        .context("fsync_pre_mark_remove")?;
    info!("finished deleting layer files, releasing locks");
-    drop(gc_lock);
+    drop(guards);
    drop(compaction_lock);
    fail::fail_point!("timeline-delete-after-rm", |_| {
        Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
@@ -230,8 +220,6 @@ impl DeleteTimelineFlow {
        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
        timeline.shutdown(super::ShutdownMode::Hard).await;
        tenant.gc_block.before_delete(&timeline);
        fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
            Err(anyhow::anyhow!(
                "failpoint: timeline-delete-before-index-deleted-at"
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -1,4 +1,4 @@
-use std::{collections::HashSet, sync::Arc};
+use std::sync::Arc;
 use super::{layer_manager::LayerManager, FlushLayerError, Timeline};
 use crate::{
@@ -74,11 +74,6 @@ impl From<crate::tenant::upload_queue::NotInitialized> for Error {
        Error::ShuttingDown
    }
 }
 impl From<super::layer_manager::Shutdown> for Error {
    fn from(_: super::layer_manager::Shutdown) -> Self {
        Error::ShuttingDown
    }
 }
 impl From<FlushLayerError> for Error {
    fn from(value: FlushLayerError) -> Self {
@@ -146,9 +141,50 @@ pub(super) async fn prepare(
            }
        }
-        let reparented_timelines = reparented_direct_children(detached, tenant)?;
+        // detached has previously been detached; let's inspect each of the current timelines and
        // report back the timelines which have been reparented by our detach
        let mut all_direct_children = tenant
            .timelines
            .lock()
            .unwrap()
            .values()
            .filter(|tl| matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached)))
            .map(|tl| (tl.ancestor_lsn, tl.clone()))
            .collect::<Vec<_>>();
        let mut any_shutdown = false;
        all_direct_children.retain(
            |(_, tl)| match tl.remote_client.initialized_upload_queue() {
                Ok(accessor) => accessor
                    .latest_uploaded_index_part()
                    .lineage
                    .is_reparented(),
                Err(_shutdownalike) => {
                    // not 100% a shutdown, but let's bail early not to give inconsistent results in
                    // sharded enviroment.
                    any_shutdown = true;
                    true
                }
            },
        );
        if any_shutdown {
            // it could be one or many being deleted; have client retry
            return Err(Error::ShuttingDown);
        }
        let mut reparented = all_direct_children;
        // why this instead of hashset? there is a reason, but I've forgotten it many times.
        //
        // maybe if this was a hashset we would not be able to distinguish some race condition.
        reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id));
        return Ok(Progress::Done(AncestorDetached {
-            reparented_timelines,
+            reparented_timelines: reparented
                .into_iter()
                .map(|(_, tl)| tl.timeline_id)
                .collect(),
        }));
    };
@@ -241,7 +277,7 @@ pub(super) async fn prepare(
        // between retries, these can change if compaction or gc ran in between. this will mean
        // we have to redo work.
-        partition_work(ancestor_lsn, &layers)?
+        partition_work(ancestor_lsn, &layers)
    };
    // TODO: layers are already sorted by something: use that to determine how much of remote
@@ -345,67 +381,16 @@ pub(super) async fn prepare(
    Ok(Progress::Prepared(guard, prepared))
 }
 fn reparented_direct_children(
    detached: &Arc<Timeline>,
    tenant: &Tenant,
 ) -> Result<HashSet<TimelineId>, Error> {
    let mut all_direct_children = tenant
        .timelines
        .lock()
        .unwrap()
        .values()
        .filter_map(|tl| {
            let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));
            if is_direct_child {
                Some(tl.clone())
            } else {
                if let Some(timeline) = tl.ancestor_timeline.as_ref() {
                    assert_ne!(timeline.timeline_id, detached.timeline_id, "we cannot have two timelines with the same timeline_id live");
                }
                None
            }
        })
        // Collect to avoid lock taking order problem with Tenant::timelines and
        // Timeline::remote_client
        .collect::<Vec<_>>();
    let mut any_shutdown = false;
    all_direct_children.retain(|tl| match tl.remote_client.initialized_upload_queue() {
        Ok(accessor) => accessor
            .latest_uploaded_index_part()
            .lineage
            .is_reparented(),
        Err(_shutdownalike) => {
            // not 100% a shutdown, but let's bail early not to give inconsistent results in
            // sharded enviroment.
            any_shutdown = true;
            true
        }
    });
    if any_shutdown {
        // it could be one or many being deleted; have client retry
        return Err(Error::ShuttingDown);
    }
    Ok(all_direct_children
        .into_iter()
        .map(|tl| tl.timeline_id)
        .collect())
 }
 fn partition_work(
    ancestor_lsn: Lsn,
-    source: &LayerManager,
+    source_layermap: &LayerManager,
-) -> Result<(usize, Vec<Layer>, Vec<Layer>), Error> {
+) -> (usize, Vec<Layer>, Vec<Layer>) {
    let mut straddling_branchpoint = vec![];
    let mut rest_of_historic = vec![];
    let mut later_by_lsn = 0;
-    for desc in source.layer_map()?.iter_historic_layers() {
+    for desc in source_layermap.layer_map().iter_historic_layers() {
        // off by one chances here:
        // - start is inclusive
        // - end is exclusive
@@ -424,10 +409,10 @@ fn partition_work(
            &mut rest_of_historic
        };
-        target.push(source.get_from_desc(&desc));
+        target.push(source_layermap.get_from_desc(&desc));
    }
-    Ok((later_by_lsn, straddling_branchpoint, rest_of_historic))
+    (later_by_lsn, straddling_branchpoint, rest_of_historic)
 }
 async fn upload_rewritten_layer(
@@ -503,12 +488,10 @@ async fn copy_lsn_prefix(
        // reuse the key instead of adding more holes between layers by using the real
        // highest key in the layer.
        let reused_highest_key = layer.layer_desc().key_range.end;
-        let (desc, path) = writer
+        let copied = writer
-            .finish(reused_highest_key, ctx)
+            .finish(reused_highest_key, target_timeline, ctx)
            .await
            .map_err(CopyDeltaPrefix)?;
        let copied = Layer::finish_creating(target_timeline.conf, target_timeline, desc, &path)
            .map_err(CopyDeltaPrefix)?;
        tracing::debug!(%layer, %copied, "new layer produced");
@@ -554,12 +537,11 @@ pub(super) async fn complete(
    tenant: &Tenant,
    prepared: PreparedTimelineDetach,
    _ctx: &RequestContext,
-) -> Result<HashSet<TimelineId>, anyhow::Error> {
+) -> Result<Vec<TimelineId>, anyhow::Error> {
    let PreparedTimelineDetach { layers } = prepared;
    let ancestor = detached
-        .ancestor_timeline
+        .get_ancestor_timeline()
        .as_ref()
        .expect("must still have a ancestor");
    let ancestor_lsn = detached.get_ancestor_lsn();
@@ -599,7 +581,7 @@ pub(super) async fn complete(
            }
            let tl_ancestor = tl.ancestor_timeline.as_ref()?;
-            let is_same = Arc::ptr_eq(ancestor, tl_ancestor);
+            let is_same = Arc::ptr_eq(&ancestor, tl_ancestor);
            let is_earlier = tl.get_ancestor_lsn() <= ancestor_lsn;
            let is_deleting = tl
@@ -640,18 +622,13 @@ pub(super) async fn complete(
        });
    let reparenting_candidates = tasks.len();
-    let mut reparented = HashSet::with_capacity(tasks.len());
+    let mut reparented = Vec::with_capacity(tasks.len());
    while let Some(res) = tasks.join_next().await {
        match res {
            Ok(Some(timeline)) => {
                tracing::info!(reparented=%timeline.timeline_id, "reparenting done");
-
+                reparented.push((timeline.ancestor_lsn, timeline.timeline_id));
                assert!(
                    reparented.insert(timeline.timeline_id),
                    "duplicate reparenting? timeline_id={}",
                    timeline.timeline_id
                );
            }
            Ok(None) => {
                // lets just ignore this for now. one or all reparented timelines could had
@@ -673,5 +650,12 @@ pub(super) async fn complete(
        tracing::info!("failed to reparent some candidates");
    }
    reparented.sort_unstable();
    let reparented = reparented
        .into_iter()
        .map(|(_, timeline_id)| timeline_id)
        .collect();
    Ok(reparented)
 }
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -213,11 +213,19 @@ impl Timeline {
        let mut js = tokio::task::JoinSet::new();
        {
            let guard = self.layers.read().await;
            let layers = guard.layer_map();
            for layer in layers.iter_historic_layers() {
                let layer = guard.get_from_desc(&layer);
-            guard
+                // guard against eviction while we inspect it; it might be that eviction_task and
-                .likely_resident_layers()
+                // disk_usage_eviction_task both select the same layers to be evicted, and
-                .filter(|layer| {
+                // seemingly free up double the space. both succeeding is of no consequence.
-                    let last_activity_ts = layer.latest_activity();
+
                if !layer.is_likely_resident() {
                    continue;
                }
                let last_activity_ts = layer.access_stats().latest_activity();
                let no_activity_for = match now.duration_since(last_activity_ts) {
                    Ok(d) => d,
@@ -237,21 +245,19 @@ impl Timeline {
                        // they would be meaningless outside of the pageserver process.
                        // At the time of writing, the trade-off is that access stats are more
                        // valuable than detecting clock skew.
-                            return false;
+                        continue;
                    }
                };
-                    no_activity_for > p.threshold
+                if no_activity_for > p.threshold {
                })
                .cloned()
                .for_each(|layer| {
                    js.spawn(async move {
                        layer
                            .evict_and_wait(std::time::Duration::from_secs(5))
                            .await
                    });
                    stats.candidates += 1;
-                });
+                }
            }
        };
        let join_all = async move {
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -1,4 +1,4 @@
-use anyhow::{bail, ensure, Context};
+use anyhow::{bail, ensure, Context, Result};
 use itertools::Itertools;
 use pageserver_api::shard::TenantShardId;
 use std::{collections::HashMap, sync::Arc};
@@ -24,142 +24,39 @@ use crate::{
 use super::TimelineWriterState;
 /// Provides semantic APIs to manipulate the layer map.
-pub(crate) enum LayerManager {
+#[derive(Default)]
-    /// Open as in not shutdown layer manager; we still have in-memory layers and we can manipulate
+pub(crate) struct LayerManager {
-    /// the layers.
+    layer_map: LayerMap,
-    Open(OpenLayerManager),
+    layer_fmgr: LayerFileManager<Layer>,
    /// Shutdown layer manager where there are no more in-memory layers and persistent layers are
    /// read-only.
    Closed {
        layers: HashMap<PersistentLayerKey, Layer>,
    },
 }
 impl Default for LayerManager {
    fn default() -> Self {
        LayerManager::Open(OpenLayerManager::default())
    }
 }
 impl LayerManager {
-    pub(crate) fn get_from_key(&self, key: &PersistentLayerKey) -> Layer {
+    pub(crate) fn get_from_desc(&self, desc: &PersistentLayerDesc) -> Layer {
-        // The assumption for the `expect()` is that all code maintains the following invariant:
+        self.layer_fmgr.get_from_desc(desc)
        // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor.
        self.layers()
            .get(key)
            .with_context(|| format!("get layer from key: {key}"))
            .expect("not found")
            .clone()
    }
-    pub(crate) fn get_from_desc(&self, desc: &PersistentLayerDesc) -> Layer {
+    pub(crate) fn get_from_key(&self, desc: &PersistentLayerKey) -> Layer {
-        self.get_from_key(&desc.key())
+        self.layer_fmgr.get_from_key(desc)
    }
    /// Get an immutable reference to the layer map.
    ///
    /// We expect users only to be able to get an immutable layer map. If users want to make modifications,
    /// they should use the below semantic APIs. This design makes us step closer to immutable storage state.
-    pub(crate) fn layer_map(&self) -> Result<&LayerMap, Shutdown> {
+    pub(crate) fn layer_map(&self) -> &LayerMap {
-        use LayerManager::*;
+        &self.layer_map
        match self {
            Open(OpenLayerManager { layer_map, .. }) => Ok(layer_map),
            Closed { .. } => Err(Shutdown),
        }
    }
    pub(crate) fn open_mut(&mut self) -> Result<&mut OpenLayerManager, Shutdown> {
        use LayerManager::*;
        match self {
            Open(open) => Ok(open),
            Closed { .. } => Err(Shutdown),
        }
    }
    /// LayerManager shutdown. The in-memory layers do cleanup on drop, so we must drop them in
    /// order to allow shutdown to complete.
    ///
    /// If there was a want to flush in-memory layers, it must have happened earlier.
    pub(crate) fn shutdown(&mut self, writer_state: &mut Option<TimelineWriterState>) {
        use LayerManager::*;
        match self {
            Open(OpenLayerManager {
                layer_map,
                layer_fmgr: LayerFileManager(hashmap),
            }) => {
                let open = layer_map.open_layer.take();
                let frozen = layer_map.frozen_layers.len();
                let taken_writer_state = writer_state.take();
                tracing::info!(open = open.is_some(), frozen, "dropped inmemory layers");
                let layers = std::mem::take(hashmap);
                *self = Closed { layers };
                assert_eq!(open.is_some(), taken_writer_state.is_some());
            }
            Closed { .. } => {
                tracing::debug!("ignoring multiple shutdowns on layer manager")
            }
        }
    }
    /// Sum up the historic layer sizes
    pub(crate) fn layer_size_sum(&self) -> u64 {
        self.layers()
            .values()
            .map(|l| l.layer_desc().file_size)
            .sum()
    }
    pub(crate) fn likely_resident_layers(&self) -> impl Iterator<Item = &'_ Layer> + '_ {
        self.layers().values().filter(|l| l.is_likely_resident())
    }
    pub(crate) fn contains(&self, layer: &Layer) -> bool {
        self.contains_key(&layer.layer_desc().key())
    }
    pub(crate) fn contains_key(&self, key: &PersistentLayerKey) -> bool {
        self.layers().contains_key(key)
    }
    pub(crate) fn all_persistent_layers(&self) -> Vec<PersistentLayerKey> {
        self.layers().keys().cloned().collect_vec()
    }
    fn layers(&self) -> &HashMap<PersistentLayerKey, Layer> {
        use LayerManager::*;
        match self {
            Open(OpenLayerManager { layer_fmgr, .. }) => &layer_fmgr.0,
            Closed { layers } => layers,
        }
    }
 }
 #[derive(Default)]
 pub(crate) struct OpenLayerManager {
    layer_map: LayerMap,
    layer_fmgr: LayerFileManager<Layer>,
 }
 impl std::fmt::Debug for OpenLayerManager {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("OpenLayerManager")
            .field("layer_count", &self.layer_fmgr.0.len())
            .finish()
    }
 }
 #[derive(Debug, thiserror::Error)]
 #[error("layer manager has been shutdown")]
 pub(crate) struct Shutdown;
 impl OpenLayerManager {
    /// Called from `load_layer_map`. Initialize the layer manager with:
    /// 1. all on-disk layers
    /// 2. next open layer (with disk disk_consistent_lsn LSN)
-    pub(crate) fn initialize_local_layers(&mut self, layers: Vec<Layer>, next_open_layer_at: Lsn) {
+    pub(crate) fn initialize_local_layers(
        &mut self,
        on_disk_layers: Vec<Layer>,
        next_open_layer_at: Lsn,
    ) {
        let mut updates = self.layer_map.batch_update();
-        for layer in layers {
+        for layer in on_disk_layers {
            Self::insert_historic_layer(layer, &mut updates, &mut self.layer_fmgr);
        }
        updates.flush();
@@ -171,19 +68,26 @@ impl OpenLayerManager {
        self.layer_map.next_open_layer_at = Some(next_open_layer_at);
    }
-    /// Open a new writable layer to append data if there is no open layer, otherwise return the
+    /// Open a new writable layer to append data if there is no open layer, otherwise return the current open layer,
-    /// current open layer, called within `get_layer_for_write`.
+    /// called within `get_layer_for_write`.
    pub(crate) async fn get_layer_for_write(
        &mut self,
        lsn: Lsn,
        last_record_lsn: Lsn,
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        gate_guard: utils::sync::gate::GateGuard,
        ctx: &RequestContext,
-    ) -> anyhow::Result<Arc<InMemoryLayer>> {
+    ) -> Result<Arc<InMemoryLayer>> {
        ensure!(lsn.is_aligned());
        ensure!(
            lsn > last_record_lsn,
            "cannot modify relation after advancing last_record_lsn (incoming_lsn={}, last_record_lsn={})",
            lsn,
            last_record_lsn,
        );
        // Do we have a layer open for writing already?
        let layer = if let Some(open_layer) = &self.layer_map.open_layer {
            if open_layer.get_lsn_range().start > lsn {
@@ -209,15 +113,8 @@ impl OpenLayerManager {
                lsn
            );
-            let new_layer = InMemoryLayer::create(
+            let new_layer =
-                conf,
+                InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn, ctx).await?;
                timeline_id,
                tenant_shard_id,
                start_lsn,
                gate_guard,
                ctx,
            )
            .await?;
            let layer = Arc::new(new_layer);
            self.layer_map.open_layer = Some(layer.clone());
@@ -271,7 +168,7 @@ impl OpenLayerManager {
        froze
    }
-    /// Add image layers to the layer map, called from [`super::Timeline::create_image_layers`].
+    /// Add image layers to the layer map, called from `create_image_layers`.
    pub(crate) fn track_new_image_layers(
        &mut self,
        image_layers: &[ResidentLayer],
@@ -344,7 +241,7 @@ impl OpenLayerManager {
        self.finish_compact_l0(compact_from, compact_to, metrics)
    }
-    /// Called post-compaction when some previous generation image layers were trimmed.
+    /// Called when compaction is completed.
    pub(crate) fn rewrite_layers(
        &mut self,
        rewrite_layers: &[(Layer, ResidentLayer)],
@@ -362,10 +259,13 @@ impl OpenLayerManager {
                new_layer.layer_desc().lsn_range
            );
-            // Transfer visibility hint from old to new layer, since the new layer covers the same key space.  This is not guaranteed to
+            // Transfer visibilty hint from old to new layer, since the new layer covers the same key space.  This is not guaranteed to
            // be accurate (as the new layer may cover a different subset of the key range), but is a sensible default, and prevents
            // always marking rewritten layers as visible.
-            new_layer.as_ref().set_visibility(old_layer.visibility());
+            new_layer
                .as_ref()
                .access_stats()
                .set_visibility(old_layer.access_stats().visibility());
            // Safety: we may never rewrite the same file in-place.  Callers are responsible
            // for ensuring that they only rewrite layers after something changes the path,
@@ -433,6 +333,31 @@ impl OpenLayerManager {
        mapping.remove(layer);
        layer.delete_on_drop();
    }
    pub(crate) fn likely_resident_layers(&self) -> impl Iterator<Item = Layer> + '_ {
        // for small layer maps, we most likely have all resident, but for larger more are likely
        // to be evicted assuming lots of layers correlated with longer lifespan.
        self.layer_map().iter_historic_layers().filter_map(|desc| {
            self.layer_fmgr
                .0
                .get(&desc.key())
                .filter(|l| l.is_likely_resident())
                .cloned()
        })
    }
    pub(crate) fn contains(&self, layer: &Layer) -> bool {
        self.layer_fmgr.contains(layer)
    }
    pub(crate) fn contains_key(&self, key: &PersistentLayerKey) -> bool {
        self.layer_fmgr.contains_key(key)
    }
    pub(crate) fn all_persistent_layers(&self) -> Vec<PersistentLayerKey> {
        self.layer_fmgr.0.keys().cloned().collect_vec()
    }
 }
 pub(crate) struct LayerFileManager<T>(HashMap<PersistentLayerKey, T>);
@@ -444,6 +369,24 @@ impl<T> Default for LayerFileManager<T> {
 }
 impl<T: AsLayerDesc + Clone> LayerFileManager<T> {
    fn get_from_key(&self, key: &PersistentLayerKey) -> T {
        // The assumption for the `expect()` is that all code maintains the following invariant:
        // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor.
        self.0
            .get(key)
            .with_context(|| format!("get layer from key: {}", key))
            .expect("not found")
            .clone()
    }
    fn get_from_desc(&self, desc: &PersistentLayerDesc) -> T {
        self.get_from_key(&desc.key())
    }
    fn contains_key(&self, key: &PersistentLayerKey) -> bool {
        self.0.contains_key(key)
    }
    pub(crate) fn insert(&mut self, layer: T) {
        let present = self.0.insert(layer.layer_desc().key(), layer.clone());
        if present.is_some() && cfg!(debug_assertions) {
@@ -451,6 +394,10 @@ impl<T: AsLayerDesc + Clone> LayerFileManager<T> {
        }
    }
    pub(crate) fn contains(&self, layer: &T) -> bool {
        self.0.contains_key(&layer.layer_desc().key())
    }
    pub(crate) fn remove(&mut self, layer: &T) {
        let present = self.0.remove(&layer.layer_desc().key());
        if present.is_none() && cfg!(debug_assertions) {
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -122,10 +122,6 @@ impl CurrentLogicalSize {
            Self::Exact(_) => Accuracy::Exact,
        }
    }
    pub(crate) fn is_exact(&self) -> bool {
        matches!(self, Self::Exact(_))
    }
 }
 impl LogicalSize {
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -30,12 +30,10 @@ use tokio::time::Instant;
 pub use pageserver_api::models::virtual_file as api;
 pub(crate) mod io_engine;
 pub use io_engine::feature_test as io_engine_feature_test;
 pub use io_engine::io_engine_for_bench;
 pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
 mod metadata;
 mod open_options;
 use self::owned_buffers_io::write::OwnedAsyncWriter;
 pub(crate) use api::DirectIoMode;
 pub(crate) use io_engine::IoEngineKind;
 pub(crate) use metadata::Metadata;
 pub(crate) use open_options::*;
--- a/pageserver/src/virtual_file/io_engine.rs
+++ b/pageserver/src/virtual_file/io_engine.rs
@@ -328,29 +328,3 @@ pub fn feature_test() -> anyhow::Result<FeatureTestResult> {
    .join()
    .unwrap()
 }
 /// For use in benchmark binaries only.
 ///
 /// Benchmarks which initialize `virtual_file` need to know what engine to use, but we also
 /// don't want to silently fall back to slower I/O engines in a benchmark: this could waste
 /// developer time trying to figure out why it's slow.
 ///
 /// In practice, this method will either return IoEngineKind::TokioEpollUring, or panic.
 pub fn io_engine_for_bench() -> IoEngineKind {
    #[cfg(not(target_os = "linux"))]
    {
        panic!("This benchmark does I/O and can only give a representative result on Linux");
    }
    #[cfg(target_os = "linux")]
    {
        match feature_test().unwrap() {
            FeatureTestResult::PlatformPreferred(engine) => engine,
            FeatureTestResult::Worse {
                engine: _engine,
                remark,
            } => {
                panic!("This benchmark does I/O can requires the preferred I/O engine: {remark}");
            }
        }
    }
 }
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -45,7 +45,6 @@ static const char *jwt_token = NULL;
 /* GUCs */
 static char *ConsoleURL = NULL;
 static bool ForwardDDL = true;
 static bool RegressTestMode = false;
 /*
 * CURL docs say that this buffer must exist until we call curl_easy_cleanup
@@ -803,14 +802,6 @@ NeonProcessUtility(
 		case T_DropRoleStmt:
 			HandleDropRole(castNode(DropRoleStmt, parseTree));
 			break;
 		case T_CreateTableSpaceStmt:
 			if (!RegressTestMode)
 			{
 				ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					errmsg("CREATE TABLESPACE is not supported on Neon")));
 			}
   			break;
 		default:
 			break;
 	}
@@ -873,18 +864,6 @@ InitControlPlaneConnector()
 							 NULL,
 							 NULL);
 	DefineCustomBoolVariable(
 							 "neon.regress_test_mode",
 							 "Controls whether we are running in the regression test mode",
 							 NULL,
 							 &RegressTestMode,
 							 false,
 							 PGC_SUSET,
 							 0,
 							 NULL,
 							 NULL,
 							 NULL);
 	jwt_token = getenv("NEON_CONTROL_PLANE_TOKEN");
 	if (!jwt_token)
 	{
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,103 +1,91 @@
 # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.3.5"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
 files = [
    {file = "aiohappyeyeballs-2.3.5-py3-none-any.whl", hash = "sha256:4d6dea59215537dbc746e93e779caea8178c866856a721c9c660d7a5a7b8be03"},
    {file = "aiohappyeyeballs-2.3.5.tar.gz", hash = "sha256:6fa48b9f1317254f122a07a131a86b71ca6946ca989ce6326fff54a99a920105"},
 ]
 [[package]]
 name = "aiohttp"
-version = "3.10.2"
+version = "3.9.4"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95213b3d79c7e387144e9cb7b9d2809092d6ff2c044cb59033aedc612f38fb6d"},
+    {file = "aiohttp-3.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:76d32588ef7e4a3f3adff1956a0ba96faabbdee58f2407c122dd45aa6e34f372"},
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1aa005f060aff7124cfadaa2493f00a4e28ed41b232add5869e129a2e395935a"},
+    {file = "aiohttp-3.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56181093c10dbc6ceb8a29dfeea1e815e1dfdc020169203d87fd8d37616f73f9"},
-    {file = "aiohttp-3.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eabe6bf4c199687592f5de4ccd383945f485779c7ffb62a9b9f1f8a3f9756df8"},
+    {file = "aiohttp-3.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7a5b676d3c65e88b3aca41816bf72831898fcd73f0cbb2680e9d88e819d1e4d"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96e010736fc16d21125c7e2dc5c350cd43c528b85085c04bf73a77be328fe944"},
+    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1df528a85fb404899d4207a8d9934cfd6be626e30e5d3a5544a83dbae6d8a7e"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99f81f9c1529fd8e03be4a7bd7df32d14b4f856e90ef6e9cbad3415dbfa9166c"},
+    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f595db1bceabd71c82e92df212dd9525a8a2c6947d39e3c994c4f27d2fe15b11"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d611d1a01c25277bcdea06879afbc11472e33ce842322496b211319aa95441bb"},
+    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0b09d76e5a4caac3d27752027fbd43dc987b95f3748fad2b924a03fe8632ad"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00191d38156e09e8c81ef3d75c0d70d4f209b8381e71622165f22ef7da6f101"},
+    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689eb4356649ec9535b3686200b231876fb4cab4aca54e3bece71d37f50c1d13"},
-    {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74c091a5ded6cb81785de2d7a8ab703731f26de910dbe0f3934eabef4ae417cc"},
+    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3666cf4182efdb44d73602379a66f5fdfd5da0db5e4520f0ac0dcca644a3497"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:18186a80ec5a701816adbf1d779926e1069392cf18504528d6e52e14b5920525"},
+    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b65b0f8747b013570eea2f75726046fa54fa8e0c5db60f3b98dd5d161052004a"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5a7ceb2a0d2280f23a02c64cd0afdc922079bb950400c3dd13a1ab2988428aac"},
+    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a1885d2470955f70dfdd33a02e1749613c5a9c5ab855f6db38e0b9389453dce7"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8bd7be6ff6c162a60cb8fce65ee879a684fbb63d5466aba3fa5b9288eb04aefa"},
+    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0593822dcdb9483d41f12041ff7c90d4d1033ec0e880bcfaf102919b715f47f1"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:fae962b62944eaebff4f4fddcf1a69de919e7b967136a318533d82d93c3c6bd1"},
+    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:47f6eb74e1ecb5e19a78f4a4228aa24df7fbab3b62d4a625d3f41194a08bd54f"},
-    {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a0fde16d284efcacbe15fb0c1013f0967b6c3e379649239d783868230bf1db42"},
+    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c8b04a3dbd54de6ccb7604242fe3ad67f2f3ca558f2d33fe19d4b08d90701a89"},
-    {file = "aiohttp-3.10.2-cp310-cp310-win32.whl", hash = "sha256:f81cd85a0e76ec7b8e2b6636fe02952d35befda4196b8c88f3cec5b4fb512839"},
+    {file = "aiohttp-3.9.4-cp310-cp310-win32.whl", hash = "sha256:8a78dfb198a328bfb38e4308ca8167028920fb747ddcf086ce706fbdd23b2926"},
-    {file = "aiohttp-3.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:54ba10eb5a3481c28282eb6afb5f709aedf53cf9c3a31875ffbdc9fc719ffd67"},
+    {file = "aiohttp-3.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:e78da6b55275987cbc89141a1d8e75f5070e577c482dd48bd9123a76a96f0bbb"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87fab7f948e407444c2f57088286e00e2ed0003ceaf3d8f8cc0f60544ba61d91"},
+    {file = "aiohttp-3.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c111b3c69060d2bafc446917534150fd049e7aedd6cbf21ba526a5a97b4402a5"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ec6ad66ed660d46503243cbec7b2b3d8ddfa020f984209b3b8ef7d98ce69c3f2"},
+    {file = "aiohttp-3.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:efbdd51872cf170093998c87ccdf3cb5993add3559341a8e5708bcb311934c94"},
-    {file = "aiohttp-3.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4be88807283bd96ae7b8e401abde4ca0bab597ba73b5e9a2d98f36d451e9aac"},
+    {file = "aiohttp-3.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7bfdb41dc6e85d8535b00d73947548a748e9534e8e4fddd2638109ff3fb081df"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01c98041f90927c2cbd72c22a164bb816fa3010a047d264969cf82e1d4bcf8d1"},
+    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd9d334412961125e9f68d5b73c1d0ab9ea3f74a58a475e6b119f5293eee7ba"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54e36c67e1a9273ecafab18d6693da0fb5ac48fd48417e4548ac24a918c20998"},
+    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:35d78076736f4a668d57ade00c65d30a8ce28719d8a42471b2a06ccd1a2e3063"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7de3ddb6f424af54535424082a1b5d1ae8caf8256ebd445be68c31c662354720"},
+    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:824dff4f9f4d0f59d0fa3577932ee9a20e09edec8a2f813e1d6b9f89ced8293f"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dd9c7db94b4692b827ce51dcee597d61a0e4f4661162424faf65106775b40e7"},
+    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52b8b4e06fc15519019e128abedaeb56412b106ab88b3c452188ca47a25c4093"},
-    {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e57e21e1167705f8482ca29cc5d02702208d8bf4aff58f766d94bcd6ead838cd"},
+    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eae569fb1e7559d4f3919965617bb39f9e753967fae55ce13454bec2d1c54f09"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a1a50e59b720060c29e2951fd9f13c01e1ea9492e5a527b92cfe04dd64453c16"},
+    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69b97aa5792428f321f72aeb2f118e56893371f27e0b7d05750bcad06fc42ca1"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:686c87782481fda5ee6ba572d912a5c26d9f98cc5c243ebd03f95222af3f1b0f"},
+    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d79aad0ad4b980663316f26d9a492e8fab2af77c69c0f33780a56843ad2f89e"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:dafb4abb257c0ed56dc36f4e928a7341b34b1379bd87e5a15ce5d883c2c90574"},
+    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d6577140cd7db19e430661e4b2653680194ea8c22c994bc65b7a19d8ec834403"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:494a6f77560e02bd7d1ab579fdf8192390567fc96a603f21370f6e63690b7f3d"},
+    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:9860d455847cd98eb67897f5957b7cd69fbcb436dd3f06099230f16a66e66f79"},
-    {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fe8503b1b917508cc68bf44dae28823ac05e9f091021e0c41f806ebbb23f92f"},
+    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:69ff36d3f8f5652994e08bd22f093e11cfd0444cea310f92e01b45a4e46b624e"},
-    {file = "aiohttp-3.10.2-cp311-cp311-win32.whl", hash = "sha256:4ddb43d06ce786221c0dfd3c91b4892c318eaa36b903f7c4278e7e2fa0dd5102"},
+    {file = "aiohttp-3.9.4-cp311-cp311-win32.whl", hash = "sha256:e27d3b5ed2c2013bce66ad67ee57cbf614288bda8cdf426c8d8fe548316f1b5f"},
-    {file = "aiohttp-3.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:ca2f5abcb0a9a47e56bac173c01e9f6c6e7f27534d91451c5f22e6a35a5a2093"},
+    {file = "aiohttp-3.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d6a67e26daa686a6fbdb600a9af8619c80a332556245fa8e86c747d226ab1a1e"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:14eb6b17f6246959fb0b035d4f4ae52caa870c4edfb6170aad14c0de5bfbf478"},
+    {file = "aiohttp-3.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c5ff8ff44825736a4065d8544b43b43ee4c6dd1530f3a08e6c0578a813b0aa35"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:465e445ec348d4e4bd349edd8b22db75f025da9d7b6dc1369c48e7935b85581e"},
+    {file = "aiohttp-3.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d12a244627eba4e9dc52cbf924edef905ddd6cafc6513849b4876076a6f38b0e"},
-    {file = "aiohttp-3.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:341f8ece0276a828d95b70cd265d20e257f5132b46bf77d759d7f4e0443f2906"},
+    {file = "aiohttp-3.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dcad56c8d8348e7e468899d2fb3b309b9bc59d94e6db08710555f7436156097f"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01fbb87b5426381cd9418b3ddcf4fc107e296fa2d3446c18ce6c76642f340a3"},
+    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f7e69a7fd4b5ce419238388e55abd220336bd32212c673ceabc57ccf3d05b55"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c474af073e1a6763e1c5522bbb2d85ff8318197e4c6c919b8d7886e16213345"},
+    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4870cb049f10d7680c239b55428916d84158798eb8f353e74fa2c98980dcc0b"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d9076810a5621236e29b2204e67a68e1fe317c8727ee4c9abbfbb1083b442c38"},
+    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2feaf1b7031ede1bc0880cec4b0776fd347259a723d625357bb4b82f62687b"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f515d6859e673940e08de3922b9c4a2249653b0ac181169313bd6e4b1978ac"},
+    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:939393e8c3f0a5bcd33ef7ace67680c318dc2ae406f15e381c0054dd658397de"},
-    {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:655e583afc639bef06f3b2446972c1726007a21003cd0ef57116a123e44601bc"},
+    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d2334e387b2adcc944680bebcf412743f2caf4eeebd550f67249c1c3696be04"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8da9449a575133828cc99985536552ea2dcd690e848f9d41b48d8853a149a959"},
+    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e0198ea897680e480845ec0ffc5a14e8b694e25b3f104f63676d55bf76a82f1a"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19073d57d0feb1865d12361e2a1f5a49cb764bf81a4024a3b608ab521568093a"},
+    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e40d2cd22914d67c84824045861a5bb0fb46586b15dfe4f046c7495bf08306b2"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c8e98e1845805f184d91fda6f9ab93d7c7b0dddf1c07e0255924bfdb151a8d05"},
+    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:aba80e77c227f4234aa34a5ff2b6ff30c5d6a827a91d22ff6b999de9175d71bd"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:377220a5efde6f9497c5b74649b8c261d3cce8a84cb661be2ed8099a2196400a"},
+    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:fb68dc73bc8ac322d2e392a59a9e396c4f35cb6fdbdd749e139d1d6c985f2527"},
-    {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92f7f4a4dc9cdb5980973a74d43cdbb16286dacf8d1896b6c3023b8ba8436f8e"},
+    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f3460a92638dce7e47062cf088d6e7663adb135e936cb117be88d5e6c48c9d53"},
-    {file = "aiohttp-3.10.2-cp312-cp312-win32.whl", hash = "sha256:9bb2834a6f11d65374ce97d366d6311a9155ef92c4f0cee543b2155d06dc921f"},
+    {file = "aiohttp-3.9.4-cp312-cp312-win32.whl", hash = "sha256:32dc814ddbb254f6170bca198fe307920f6c1308a5492f049f7f63554b88ef36"},
-    {file = "aiohttp-3.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:518dc3cb37365255708283d1c1c54485bbacccd84f0a0fb87ed8917ba45eda5b"},
+    {file = "aiohttp-3.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:63f41a909d182d2b78fe3abef557fcc14da50c7852f70ae3be60e83ff64edba5"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7f98e70bbbf693086efe4b86d381efad8edac040b8ad02821453083d15ec315f"},
+    {file = "aiohttp-3.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c3770365675f6be220032f6609a8fbad994d6dcf3ef7dbcf295c7ee70884c9af"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f6f0b252a009e98fe84028a4ec48396a948e7a65b8be06ccfc6ef68cf1f614d"},
+    {file = "aiohttp-3.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:305edae1dea368ce09bcb858cf5a63a064f3bff4767dec6fa60a0cc0e805a1d3"},
-    {file = "aiohttp-3.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9360e3ffc7b23565600e729e8c639c3c50d5520e05fdf94aa2bd859eef12c407"},
+    {file = "aiohttp-3.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f121900131d116e4a93b55ab0d12ad72573f967b100e49086e496a9b24523ea"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3988044d1635c7821dd44f0edfbe47e9875427464e59d548aece447f8c22800a"},
+    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b71e614c1ae35c3d62a293b19eface83d5e4d194e3eb2fabb10059d33e6e8cbf"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a9d59da1543a6f1478c3436fd49ec59be3868bca561a33778b4391005e499d"},
+    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419f009fa4cfde4d16a7fc070d64f36d70a8d35a90d71aa27670bba2be4fd039"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f49bdb94809ac56e09a310a62f33e5f22973d6fd351aac72a39cd551e98194"},
+    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b39476ee69cfe64061fd77a73bf692c40021f8547cda617a3466530ef63f947"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfd2dca3f11c365d6857a07e7d12985afc59798458a2fdb2ffa4a0332a3fd43"},
+    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b33f34c9c7decdb2ab99c74be6443942b730b56d9c5ee48fb7df2c86492f293c"},
-    {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c1508ec97b2cd3e120bfe309a4ff8e852e8a7460f1ef1de00c2c0ed01e33c"},
+    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c78700130ce2dcebb1a8103202ae795be2fa8c9351d0dd22338fe3dac74847d9"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:49904f38667c44c041a0b44c474b3ae36948d16a0398a8f8cd84e2bb3c42a069"},
+    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:268ba22d917655d1259af2d5659072b7dc11b4e1dc2cb9662fdd867d75afc6a4"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:352f3a4e5f11f3241a49b6a48bc5b935fabc35d1165fa0d87f3ca99c1fcca98b"},
+    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:17e7c051f53a0d2ebf33013a9cbf020bb4e098c4bc5bce6f7b0c962108d97eab"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:fc61f39b534c5d5903490478a0dd349df397d2284a939aa3cbaa2fb7a19b8397"},
+    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7be99f4abb008cb38e144f85f515598f4c2c8932bf11b65add0ff59c9c876d99"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:ad2274e707be37420d0b6c3d26a8115295fe9d8e6e530fa6a42487a8ca3ad052"},
+    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d58a54d6ff08d2547656356eea8572b224e6f9bbc0cf55fa9966bcaac4ddfb10"},
-    {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c836bf3c7512100219fe1123743fd8dd9a2b50dd7cfb0c3bb10d041309acab4b"},
+    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7673a76772bda15d0d10d1aa881b7911d0580c980dbd16e59d7ba1422b2d83cd"},
-    {file = "aiohttp-3.10.2-cp38-cp38-win32.whl", hash = "sha256:53e8898adda402be03ff164b0878abe2d884e3ea03a4701e6ad55399d84b92dc"},
+    {file = "aiohttp-3.9.4-cp38-cp38-win32.whl", hash = "sha256:e4370dda04dc8951012f30e1ce7956a0a226ac0714a7b6c389fb2f43f22a250e"},
-    {file = "aiohttp-3.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:7cc8f65f5b22304693de05a245b6736b14cb5bc9c8a03da6e2ae9ef15f8b458f"},
+    {file = "aiohttp-3.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:eb30c4510a691bb87081192a394fb661860e75ca3896c01c6d186febe7c88530"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9dfc906d656e14004c5bc672399c1cccc10db38df2b62a13fb2b6e165a81c316"},
+    {file = "aiohttp-3.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:84e90494db7df3be5e056f91412f9fa9e611fbe8ce4aaef70647297f5943b276"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:91b10208b222ddf655c3a3d5b727879d7163db12b634492df41a9182a76edaae"},
+    {file = "aiohttp-3.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7d4845f8501ab28ebfdbeab980a50a273b415cf69e96e4e674d43d86a464df9d"},
-    {file = "aiohttp-3.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fd16b5e1a7bdd14668cd6bde60a2a29b49147a535c74f50d8177d11b38433a7"},
+    {file = "aiohttp-3.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69046cd9a2a17245c4ce3c1f1a4ff8c70c7701ef222fce3d1d8435f09042bba1"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2bfdda4971bd79201f59adbad24ec2728875237e1c83bba5221284dbbf57bda"},
+    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b73a06bafc8dcc508420db43b4dd5850e41e69de99009d0351c4f3007960019"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69d73f869cf29e8a373127fc378014e2b17bcfbe8d89134bc6fb06a2f67f3cb3"},
+    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:418bb0038dfafeac923823c2e63226179976c76f981a2aaad0ad5d51f2229bca"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df59f8486507c421c0620a2c3dce81fbf1d54018dc20ff4fecdb2c106d6e6abc"},
+    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71a8f241456b6c2668374d5d28398f8e8cdae4cce568aaea54e0f39359cd928d"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df930015db36b460aa9badbf35eccbc383f00d52d4b6f3de2ccb57d064a6ade"},
+    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:935c369bf8acc2dc26f6eeb5222768aa7c62917c3554f7215f2ead7386b33748"},
-    {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:562b1153ab7f766ee6b8b357ec777a302770ad017cf18505d34f1c088fccc448"},
+    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4e48c8752d14ecfb36d2ebb3d76d614320570e14de0a3aa7a726ff150a03c"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d984db6d855de58e0fde1ef908d48fe9a634cadb3cf715962722b4da1c40619d"},
+    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:916b0417aeddf2c8c61291238ce25286f391a6acb6f28005dd9ce282bd6311b6"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:14dc3fcb0d877911d775d511eb617a486a8c48afca0a887276e63db04d3ee920"},
+    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9b6787b6d0b3518b2ee4cbeadd24a507756ee703adbac1ab6dc7c4434b8c572a"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b52a27a5c97275e254704e1049f4b96a81e67d6205f52fa37a4777d55b0e98ef"},
+    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:221204dbda5ef350e8db6287937621cf75e85778b296c9c52260b522231940ed"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:cd33d9de8cfd006a0d0fe85f49b4183c57e91d18ffb7e9004ce855e81928f704"},
+    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:10afd99b8251022ddf81eaed1d90f5a988e349ee7d779eb429fb07b670751e8c"},
-    {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1238fc979160bc03a92fff9ad021375ff1c8799c6aacb0d8ea1b357ea40932bb"},
+    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2506d9f7a9b91033201be9ffe7d89c6a54150b0578803cce5cb84a943d075bc3"},
-    {file = "aiohttp-3.10.2-cp39-cp39-win32.whl", hash = "sha256:e2f43d238eae4f0b04f58d4c0df4615697d4ca3e9f9b1963d49555a94f0f5a04"},
+    {file = "aiohttp-3.9.4-cp39-cp39-win32.whl", hash = "sha256:e571fdd9efd65e86c6af2f332e0e95dad259bfe6beb5d15b3c3eca3a6eb5d87b"},
-    {file = "aiohttp-3.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:947847f07a8f81d7b39b2d0202fd73e61962ebe17ac2d8566f260679e467da7b"},
+    {file = "aiohttp-3.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:7d29dd5319d20aa3b7749719ac9685fbd926f71ac8c77b2477272725f882072d"},
-    {file = "aiohttp-3.10.2.tar.gz", hash = "sha256:4d1f694b5d6e459352e5e925a42e05bac66655bfde44d81c59992463d2897014"},
+    {file = "aiohttp-3.9.4.tar.gz", hash = "sha256:6ff71ede6d9a5a58cfb7b6fffc83ab5d4a63138276c771ac91ceaaddf5459644"},
 ]
 [package.dependencies]
 aiohappyeyeballs = ">=2.3.0"
 aiosignal = ">=1.1.2"
 async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""}
 attrs = ">=17.3.0"
@@ -106,7 +94,7 @@ multidict = ">=4.5,<7.0"
 yarl = ">=1.0,<2.0"
 [package.extras]
-speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+speedups = ["Brotli", "aiodns", "brotlicffi"]
 [[package]]
 name = "aiopg"
@@ -1526,20 +1514,6 @@ files = [
 [package.dependencies]
 six = "*"
 [[package]]
 name = "kafka-python"
 version = "2.0.2"
 description = "Pure Python client for Apache Kafka"
 optional = false
 python-versions = "*"
 files = [
    {file = "kafka-python-2.0.2.tar.gz", hash = "sha256:04dfe7fea2b63726cd6f3e79a2d86e709d608d74406638c5da33a01d45a9d7e3"},
    {file = "kafka_python-2.0.2-py2.py3-none-any.whl", hash = "sha256:2d92418c7cb1c298fa6c7f0fb3519b520d0d7526ac6cb7ae2a4fc65a51a94b6e"},
 ]
 [package.extras]
 crc32c = ["crc32c"]
 [[package]]
 name = "lazy-object-proxy"
 version = "1.10.0"
@@ -3383,4 +3357,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "c09bcb333ab550958b33dbf4fec968c500d8e701fd4c96402cddbd9bb8048055"
+content-hash = "7cee6a8c30bc7f4bfb0a87c6bad3952dfb4da127fad853d2710a93ac3eab8a00"
--- a/proxy/core/Cargo.toml
+++ b/proxy/core/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "proxy-core"
+name = "proxy"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
@@ -9,11 +9,8 @@ default = []
 testing = []
 [dependencies]
 proxy-sasl = { version = "0.1", path = "../sasl" }
 ahash.workspace = true
 anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-trait.workspace = true
 atomic-take.workspace = true
@@ -33,6 +30,7 @@ dashmap.workspace = true
 env_logger.workspace = true
 framed-websockets.workspace = true
 futures.workspace = true
 git-version.workspace = true
 hashbrown.workspace = true
 hashlink.workspace = true
 hex.workspace = true
@@ -53,15 +51,17 @@ md5.workspace = true
 measured = { workspace = true, features = ["lasso"] }
 metrics.workspace = true
 once_cell.workspace = true
 opentelemetry.workspace = true
 parking_lot.workspace = true
 parquet.workspace = true
 parquet_derive.workspace = true
 pin-project-lite.workspace = true
 postgres_backend.workspace = true
 pq_proto.workspace = true
 prometheus.workspace = true
 rand.workspace = true
 regex.workspace = true
-remote_storage = { version = "0.1", path = "../../libs/remote_storage/" }
+remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 reqwest.workspace = true
 reqwest-middleware = { workspace = true, features = ["json"] }
 reqwest-retry.workspace = true
@@ -73,13 +73,14 @@ rustls.workspace = true
 scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-sha2 = { workspace = true, features = ["asm", "oid"] }
+sha2 = { workspace = true, features = ["asm"] }
 smol_str.workspace = true
 smallvec.workspace = true
 socket2.workspace = true
 subtle.workspace = true
 task-local-extensions.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
 tokio-postgres.workspace = true
 tokio-postgres-rustls.workspace = true
@@ -91,7 +92,6 @@ tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 tracing.workspace = true
 try-lock.workspace = true
 typed-json.workspace = true
 url.workspace = true
 urlencoding.workspace = true
@@ -102,14 +102,6 @@ x509-parser.workspace = true
 postgres-protocol.workspace = true
 redis.workspace = true
 # jwt stuff
 jose-jwa = "0.1.2"
 jose-jwk = { version = "0.1.2", features = ["p256", "p384", "rsa"] }
 signature = "2"
 ecdsa = "0.16"
 p256 = "0.13"
 rsa = "0.9"
 workspace_hack.workspace = true
 [dev-dependencies]
--- a/proxy/core/src/auth/backend/jwt.rs
+++ b/proxy/core/src/auth/backend/jwt.rs
@@ -1,554 +0,0 @@
 use std::{future::Future, sync::Arc, time::Duration};
 use anyhow::{bail, ensure, Context};
 use arc_swap::ArcSwapOption;
 use dashmap::DashMap;
 use jose_jwk::crypto::KeyInfo;
 use signature::Verifier;
 use tokio::time::Instant;
 use crate::{http::parse_json_body_with_limit, intern::EndpointIdInt};
 // TODO(conrad): make these configurable.
 const MIN_RENEW: Duration = Duration::from_secs(30);
 const AUTO_RENEW: Duration = Duration::from_secs(300);
 const MAX_RENEW: Duration = Duration::from_secs(3600);
 const MAX_JWK_BODY_SIZE: usize = 64 * 1024;
 /// How to get the JWT auth rules
 pub trait FetchAuthRules: Clone + Send + Sync + 'static {
    fn fetch_auth_rules(&self) -> impl Future<Output = anyhow::Result<AuthRules>> + Send;
 }
 #[derive(Clone)]
 struct FetchAuthRulesFromCplane {
    #[allow(dead_code)]
    endpoint: EndpointIdInt,
 }
 impl FetchAuthRules for FetchAuthRulesFromCplane {
    async fn fetch_auth_rules(&self) -> anyhow::Result<AuthRules> {
        Err(anyhow::anyhow!("not yet implemented"))
    }
 }
 pub struct AuthRules {
    jwks_urls: Vec<url::Url>,
 }
 #[derive(Default)]
 pub struct JwkCache {
    client: reqwest::Client,
    map: DashMap<EndpointIdInt, Arc<JwkCacheEntryLock>>,
 }
 pub struct JwkCacheEntryLock {
    cached: ArcSwapOption<JwkCacheEntry>,
    lookup: tokio::sync::Semaphore,
 }
 impl Default for JwkCacheEntryLock {
    fn default() -> Self {
        JwkCacheEntryLock {
            cached: ArcSwapOption::empty(),
            lookup: tokio::sync::Semaphore::new(1),
        }
    }
 }
 pub struct JwkCacheEntry {
    /// Should refetch at least every hour to verify when old keys have been removed.
    /// Should refetch when new key IDs are seen only every 5 minutes or so
    last_retrieved: Instant,
    /// cplane will return multiple JWKs urls that we need to scrape.
    key_sets: ahash::HashMap<url::Url, jose_jwk::JwkSet>,
 }
 impl JwkCacheEntryLock {
    async fn acquire_permit<'a>(self: &'a Arc<Self>) -> JwkRenewalPermit<'a> {
        JwkRenewalPermit::acquire_permit(self).await
    }
    fn try_acquire_permit<'a>(self: &'a Arc<Self>) -> Option<JwkRenewalPermit<'a>> {
        JwkRenewalPermit::try_acquire_permit(self)
    }
    async fn renew_jwks<F: FetchAuthRules>(
        &self,
        _permit: JwkRenewalPermit<'_>,
        client: &reqwest::Client,
        auth_rules: &F,
    ) -> anyhow::Result<Arc<JwkCacheEntry>> {
        // double check that no one beat us to updating the cache.
        let now = Instant::now();
        let guard = self.cached.load_full();
        if let Some(cached) = guard {
            let last_update = now.duration_since(cached.last_retrieved);
            if last_update < Duration::from_secs(300) {
                return Ok(cached);
            }
        }
        let rules = auth_rules.fetch_auth_rules().await?;
        let mut key_sets = ahash::HashMap::with_capacity_and_hasher(
            rules.jwks_urls.len(),
            ahash::RandomState::new(),
        );
        // TODO(conrad): run concurrently
        for url in rules.jwks_urls {
            let req = client.get(url.clone());
            // TODO(conrad): eventually switch to using reqwest_middleware/`new_client_with_timeout`.
            match req.send().await.and_then(|r| r.error_for_status()) {
                // todo: should we re-insert JWKs if we want to keep this JWKs URL?
                // I expect these failures would be quite sparse.
                Err(e) => tracing::warn!(?url, error=?e, "could not fetch JWKs"),
                Ok(r) => {
                    let resp: http::Response<reqwest::Body> = r.into();
                    match parse_json_body_with_limit::<jose_jwk::JwkSet>(
                        resp.into_body(),
                        MAX_JWK_BODY_SIZE,
                    )
                    .await
                    {
                        Err(e) => tracing::warn!(?url, error=?e, "could not decode JWKs"),
                        Ok(jwks) => {
                            key_sets.insert(url, jwks);
                        }
                    }
                }
            }
        }
        let entry = Arc::new(JwkCacheEntry {
            last_retrieved: now,
            key_sets,
        });
        self.cached.swap(Some(Arc::clone(&entry)));
        Ok(entry)
    }
    async fn get_or_update_jwk_cache<F: FetchAuthRules>(
        self: &Arc<Self>,
        client: &reqwest::Client,
        fetch: &F,
    ) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
        let now = Instant::now();
        let guard = self.cached.load_full();
        // if we have no cached JWKs, try and get some
        let Some(cached) = guard else {
            let permit = self.acquire_permit().await;
            return self.renew_jwks(permit, client, fetch).await;
        };
        let last_update = now.duration_since(cached.last_retrieved);
        // check if the cached JWKs need updating.
        if last_update > MAX_RENEW {
            let permit = self.acquire_permit().await;
            // it's been too long since we checked the keys. wait for them to update.
            return self.renew_jwks(permit, client, fetch).await;
        }
        // every 5 minutes we should spawn a job to eagerly update the token.
        if last_update > AUTO_RENEW {
            if let Some(permit) = self.try_acquire_permit() {
                tracing::debug!("JWKs should be renewed. Renewal permit acquired");
                let permit = permit.into_owned();
                let entry = self.clone();
                let client = client.clone();
                let fetch = fetch.clone();
                tokio::spawn(async move {
                    if let Err(e) = entry.renew_jwks(permit, &client, &fetch).await {
                        tracing::warn!(error=?e, "could not fetch JWKs in background job");
                    }
                });
            } else {
                tracing::debug!("JWKs should be renewed. Renewal permit already taken, skipping");
            }
        }
        Ok(cached)
    }
    async fn check_jwt<F: FetchAuthRules>(
        self: &Arc<Self>,
        jwt: String,
        client: &reqwest::Client,
        fetch: &F,
    ) -> Result<(), anyhow::Error> {
        // JWT compact form is defined to be
        // <B64(Header)> || . || <B64(Payload)> || . || <B64(Signature)>
        // where Signature = alg(<B64(Header)> || . || <B64(Payload)>);
        let (header_payload, signature) = jwt
            .rsplit_once(".")
            .context("not a valid compact JWT encoding")?;
        let (header, _payload) = header_payload
            .split_once(".")
            .context("not a valid compact JWT encoding")?;
        let header = base64::decode_config(header, base64::URL_SAFE_NO_PAD)
            .context("not a valid compact JWT encoding")?;
        let header = serde_json::from_slice::<JWTHeader>(&header)
            .context("not a valid compact JWT encoding")?;
        ensure!(header.typ == "JWT");
        let kid = header.kid.context("missing key id")?;
        let mut guard = self.get_or_update_jwk_cache(client, fetch).await?;
        // get the key from the JWKs if possible. If not, wait for the keys to update.
        let jwk = loop {
            let jwk = guard
                .key_sets
                .values()
                .flat_map(|jwks| &jwks.keys)
                .find(|jwk| jwk.prm.kid.as_deref() == Some(kid));
            match jwk {
                Some(jwk) => break jwk,
                None if guard.last_retrieved.elapsed() > MIN_RENEW => {
                    let permit = self.acquire_permit().await;
                    guard = self.renew_jwks(permit, client, fetch).await?;
                }
                _ => {
                    bail!("jwk not found");
                }
            }
        };
        ensure!(
            jwk.is_supported(&header.alg),
            "signature algorithm not supported"
        );
        let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
            .context("not a valid compact JWT encoding")?;
        match &jwk.key {
            jose_jwk::Key::Ec(key) => {
                verify_ec_signature(header_payload.as_bytes(), &sig, key)?;
            }
            jose_jwk::Key::Rsa(key) => {
                verify_rsa_signature(header_payload.as_bytes(), &sig, key, &jwk.prm.alg)?;
            }
            key => bail!("unsupported key type {key:?}"),
        };
        // TODO(conrad): verify iss, exp, nbf, etc...
        Ok(())
    }
 }
 impl JwkCache {
    pub async fn check_jwt(
        &self,
        endpoint: EndpointIdInt,
        jwt: String,
    ) -> Result<(), anyhow::Error> {
        // try with just a read lock first
        let entry = self.map.get(&endpoint).as_deref().map(Arc::clone);
        let entry = match entry {
            Some(entry) => entry,
            None => {
                // acquire a write lock after to insert.
                let entry = self.map.entry(endpoint).or_default();
                Arc::clone(&*entry)
            }
        };
        let fetch = FetchAuthRulesFromCplane { endpoint };
        entry.check_jwt(jwt, &self.client, &fetch).await
    }
 }
 fn verify_ec_signature(data: &[u8], sig: &[u8], key: &jose_jwk::Ec) -> anyhow::Result<()> {
    use ecdsa::Signature;
    use signature::Verifier;
    match key.crv {
        jose_jwk::EcCurves::P256 => {
            let pk =
                p256::PublicKey::try_from(key).map_err(|_| anyhow::anyhow!("invalid P256 key"))?;
            let key = p256::ecdsa::VerifyingKey::from(&pk);
            let sig = Signature::from_slice(sig)?;
            key.verify(data, &sig)?;
        }
        key => bail!("unsupported ec key type {key:?}"),
    }
    Ok(())
 }
 fn verify_rsa_signature(
    data: &[u8],
    sig: &[u8],
    key: &jose_jwk::Rsa,
    alg: &Option<jose_jwa::Algorithm>,
 ) -> anyhow::Result<()> {
    use jose_jwa::{Algorithm, Signing};
    use rsa::{
        pkcs1v15::{Signature, VerifyingKey},
        RsaPublicKey,
    };
    let key = RsaPublicKey::try_from(key).map_err(|_| anyhow::anyhow!("invalid RSA key"))?;
    match alg {
        Some(Algorithm::Signing(Signing::Rs256)) => {
            let key = VerifyingKey::<sha2::Sha256>::new(key);
            let sig = Signature::try_from(sig)?;
            key.verify(data, &sig)?;
        }
        _ => bail!("invalid RSA signing algorithm"),
    };
    Ok(())
 }
 /// <https://datatracker.ietf.org/doc/html/rfc7515#section-4.1>
 #[derive(serde::Deserialize, serde::Serialize)]
 struct JWTHeader<'a> {
    /// must be "JWT"
    typ: &'a str,
    /// must be a supported alg
    alg: jose_jwa::Algorithm,
    /// key id, must be provided for our usecase
    kid: Option<&'a str>,
 }
 struct JwkRenewalPermit<'a> {
    inner: Option<JwkRenewalPermitInner<'a>>,
 }
 enum JwkRenewalPermitInner<'a> {
    Owned(Arc<JwkCacheEntryLock>),
    Borrowed(&'a Arc<JwkCacheEntryLock>),
 }
 impl JwkRenewalPermit<'_> {
    fn into_owned(mut self) -> JwkRenewalPermit<'static> {
        JwkRenewalPermit {
            inner: self.inner.take().map(JwkRenewalPermitInner::into_owned),
        }
    }
    async fn acquire_permit(from: &Arc<JwkCacheEntryLock>) -> JwkRenewalPermit {
        match from.lookup.acquire().await {
            Ok(permit) => {
                permit.forget();
                JwkRenewalPermit {
                    inner: Some(JwkRenewalPermitInner::Borrowed(from)),
                }
            }
            Err(_) => panic!("semaphore should not be closed"),
        }
    }
    fn try_acquire_permit(from: &Arc<JwkCacheEntryLock>) -> Option<JwkRenewalPermit> {
        match from.lookup.try_acquire() {
            Ok(permit) => {
                permit.forget();
                Some(JwkRenewalPermit {
                    inner: Some(JwkRenewalPermitInner::Borrowed(from)),
                })
            }
            Err(tokio::sync::TryAcquireError::NoPermits) => None,
            Err(tokio::sync::TryAcquireError::Closed) => panic!("semaphore should not be closed"),
        }
    }
 }
 impl JwkRenewalPermitInner<'_> {
    fn into_owned(self) -> JwkRenewalPermitInner<'static> {
        match self {
            JwkRenewalPermitInner::Owned(p) => JwkRenewalPermitInner::Owned(p),
            JwkRenewalPermitInner::Borrowed(p) => JwkRenewalPermitInner::Owned(Arc::clone(p)),
        }
    }
 }
 impl Drop for JwkRenewalPermit<'_> {
    fn drop(&mut self) {
        let entry = match &self.inner {
            None => return,
            Some(JwkRenewalPermitInner::Owned(p)) => p,
            Some(JwkRenewalPermitInner::Borrowed(p)) => *p,
        };
        entry.lookup.add_permits(1);
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::{future::IntoFuture, net::SocketAddr, time::SystemTime};
    use base64::URL_SAFE_NO_PAD;
    use bytes::Bytes;
    use http::Response;
    use http_body_util::Full;
    use hyper1::service::service_fn;
    use hyper_util::rt::TokioIo;
    use rand::rngs::OsRng;
    use signature::Signer;
    use tokio::net::TcpListener;
    fn new_ec_jwk(kid: String) -> (p256::SecretKey, jose_jwk::Jwk) {
        let sk = p256::SecretKey::random(&mut OsRng);
        let pk = sk.public_key().into();
        let jwk = jose_jwk::Jwk {
            key: jose_jwk::Key::Ec(pk),
            prm: jose_jwk::Parameters {
                kid: Some(kid),
                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Es256)),
                ..Default::default()
            },
        };
        (sk, jwk)
    }
    fn new_rsa_jwk(kid: String) -> (rsa::RsaPrivateKey, jose_jwk::Jwk) {
        let sk = rsa::RsaPrivateKey::new(&mut OsRng, 2048).unwrap();
        let pk = sk.to_public_key().into();
        let jwk = jose_jwk::Jwk {
            key: jose_jwk::Key::Rsa(pk),
            prm: jose_jwk::Parameters {
                kid: Some(kid),
                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Rs256)),
                ..Default::default()
            },
        };
        (sk, jwk)
    }
    fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {
        let header = JWTHeader {
            typ: "JWT",
            alg: jose_jwa::Algorithm::Signing(sig),
            kid: Some(&kid),
        };
        let body = typed_json::json! {{
            "exp": SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() + 3600,
        }};
        let header =
            base64::encode_config(serde_json::to_string(&header).unwrap(), URL_SAFE_NO_PAD);
        let body = base64::encode_config(body.to_string(), URL_SAFE_NO_PAD);
        format!("{header}.{body}")
    }
    fn new_ec_jwt(kid: String, key: p256::SecretKey) -> String {
        use p256::ecdsa::{Signature, SigningKey};
        let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
        let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());
        let sig = base64::encode_config(sig.to_bytes(), URL_SAFE_NO_PAD);
        format!("{payload}.{sig}")
    }
    fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {
        use rsa::pkcs1v15::SigningKey;
        use rsa::signature::SignatureEncoding;
        let payload = build_jwt_payload(kid, jose_jwa::Signing::Rs256);
        let sig = SigningKey::<sha2::Sha256>::new(key).sign(payload.as_bytes());
        let sig = base64::encode_config(sig.to_bytes(), URL_SAFE_NO_PAD);
        format!("{payload}.{sig}")
    }
    #[tokio::test]
    async fn renew() {
        let (rs1, jwk1) = new_rsa_jwk("1".into());
        let (rs2, jwk2) = new_rsa_jwk("2".into());
        let (ec1, jwk3) = new_ec_jwk("3".into());
        let (ec2, jwk4) = new_ec_jwk("4".into());
        let jwt1 = new_rsa_jwt("1".into(), rs1);
        let jwt2 = new_rsa_jwt("2".into(), rs2);
        let jwt3 = new_ec_jwt("3".into(), ec1);
        let jwt4 = new_ec_jwt("4".into(), ec2);
        let foo_jwks = jose_jwk::JwkSet {
            keys: vec![jwk1, jwk3],
        };
        let bar_jwks = jose_jwk::JwkSet {
            keys: vec![jwk2, jwk4],
        };
        let service = service_fn(move |req| {
            let foo_jwks = foo_jwks.clone();
            let bar_jwks = bar_jwks.clone();
            async move {
                let jwks = match req.uri().path() {
                    "/foo" => &foo_jwks,
                    "/bar" => &bar_jwks,
                    _ => {
                        return Response::builder()
                            .status(404)
                            .body(Full::new(Bytes::new()));
                    }
                };
                let body = serde_json::to_vec(jwks).unwrap();
                Response::builder()
                    .status(200)
                    .body(Full::new(Bytes::from(body)))
            }
        });
        let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
        let server = hyper1::server::conn::http1::Builder::new();
        let addr = listener.local_addr().unwrap();
        tokio::spawn(async move {
            loop {
                let (s, _) = listener.accept().await.unwrap();
                let serve = server.serve_connection(TokioIo::new(s), service.clone());
                tokio::spawn(serve.into_future());
            }
        });
        let client = reqwest::Client::new();
        #[derive(Clone)]
        struct Fetch(SocketAddr);
        impl FetchAuthRules for Fetch {
            async fn fetch_auth_rules(&self) -> anyhow::Result<AuthRules> {
                Ok(AuthRules {
                    jwks_urls: vec![
                        format!("http://{}/foo", self.0).parse().unwrap(),
                        format!("http://{}/bar", self.0).parse().unwrap(),
                    ],
                })
            }
        }
        let jwk_cache = Arc::new(JwkCacheEntryLock::default());
        jwk_cache
            .check_jwt(jwt1, &client, &Fetch(addr))
            .await
            .unwrap();
        jwk_cache
            .check_jwt(jwt2, &client, &Fetch(addr))
            .await
            .unwrap();
        jwk_cache
            .check_jwt(jwt3, &client, &Fetch(addr))
            .await
            .unwrap();
        jwk_cache
            .check_jwt(jwt4, &client, &Fetch(addr))
            .await
            .unwrap();
    }
 }
--- a/proxy/pg_sni_router/Cargo.toml
+++ b/proxy/pg_sni_router/Cargo.toml
@@ -1,29 +0,0 @@
 [package]
 name = "pg_sni_router"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 default = []
 testing = []
 [dependencies]
 proxy-sasl = { version = "0.1", path = "../sasl" }
 proxy-core = { version = "0.1", path = "../core" }
 anyhow.workspace = true
 clap.workspace = true
 futures.workspace = true
 git-version.workspace = true
 itertools.workspace = true
 pq_proto.workspace = true
 rustls-pemfile.workspace = true
 rustls.workspace = true
 socket2.workspace = true
 tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
 tracing-utils.workspace = true
 tracing.workspace = true
 utils.workspace = true
 uuid.workspace = true
--- a/proxy/proxy/Cargo.toml
+++ b/proxy/proxy/Cargo.toml
@@ -1,34 +0,0 @@
 [package]
 name = "proxy"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 default = []
 testing = []
 [dependencies]
 proxy-sasl = { version = "0.1", path = "../sasl" }
 proxy-core = { version = "0.1", path = "../core" }
 anyhow.workspace = true
 aws-config.workspace = true
 clap.workspace = true
 futures.workspace = true
 git-version.workspace = true
 humantime.workspace = true
 itertools.workspace = true
 metrics.workspace = true
 pq_proto.workspace = true
 remote_storage = { version = "0.1", path = "../../libs/remote_storage/" }
 rustls-pemfile.workspace = true
 rustls.workspace = true
 socket2.workspace = true
 tikv-jemallocator.workspace = true
 tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
 tracing-utils.workspace = true
 tracing.workspace = true
 utils.workspace = true
 uuid.workspace = true
--- a/proxy/sasl/Cargo.toml
+++ b/proxy/sasl/Cargo.toml
@@ -1,37 +0,0 @@
 [package]
 name = "proxy-sasl"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 default = []
 testing = []
 [dependencies]
 ahash.workspace = true
 anyhow.workspace = true
 base64.workspace = true
 bytes = { workspace = true, features = ["serde"] }
 crossbeam-deque.workspace = true
 hmac.workspace = true
 itertools.workspace = true
 lasso = { workspace = true, features = ["multi-threaded"] }
 measured = { workspace = true, features = ["lasso"] }
 parking_lot.workspace = true
 pq_proto.workspace = true
 rand.workspace = true
 rustls.workspace = true
 sha2 = { workspace = true, features = ["asm", "oid"] }
 subtle.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["signal"] }
 tracing.workspace = true
 x509-parser.workspace = true
 postgres-protocol.workspace = true
 workspace_hack.workspace = true
 [dev-dependencies]
 pbkdf2 = { workspace = true, features = ["simple", "std"] }
 uuid.workspace = true
--- a/proxy/sasl/src/lib.rs
+++ b/proxy/sasl/src/lib.rs
@@ -1,3 +0,0 @@
 mod parse;
 pub mod sasl;
 pub mod scram;
--- a/proxy/sasl/src/parse.rs
+++ b/proxy/sasl/src/parse.rs
@@ -1,43 +0,0 @@
 //! Small parsing helpers.
 use std::ffi::CStr;
 pub fn split_cstr(bytes: &[u8]) -> Option<(&CStr, &[u8])> {
    let cstr = CStr::from_bytes_until_nul(bytes).ok()?;
    let (_, other) = bytes.split_at(cstr.to_bytes_with_nul().len());
    Some((cstr, other))
 }
 /// See <https://doc.rust-lang.org/std/primitive.slice.html#method.split_array_ref>.
 pub fn split_at_const<const N: usize>(bytes: &[u8]) -> Option<(&[u8; N], &[u8])> {
    (bytes.len() >= N).then(|| {
        let (head, tail) = bytes.split_at(N);
        (head.try_into().unwrap(), tail)
    })
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_split_cstr() {
        assert!(split_cstr(b"").is_none());
        assert!(split_cstr(b"foo").is_none());
        let (cstr, rest) = split_cstr(b"\0").expect("uh-oh");
        assert_eq!(cstr.to_bytes(), b"");
        assert_eq!(rest, b"");
        let (cstr, rest) = split_cstr(b"foo\0bar").expect("uh-oh");
        assert_eq!(cstr.to_bytes(), b"foo");
        assert_eq!(rest, b"bar");
    }
    #[test]
    fn test_split_at_const() {
        assert!(split_at_const::<0>(b"").is_some());
        assert!(split_at_const::<1>(b"").is_none());
        assert!(matches!(split_at_const::<1>(b"ok"), Some((b"o", b"k"))));
    }
 }
--- a/proxy/core/src/auth.rs
+++ b/proxy/core/src/auth.rs
@@ -38,7 +38,7 @@ pub enum AuthErrorImpl {
    /// SASL protocol errors (includes [SCRAM](crate::scram)).
    #[error(transparent)]
-    Sasl(#[from] proxy_sasl::sasl::Error),
+    Sasl(#[from] crate::sasl::Error),
    #[error("Unsupported authentication method: {0}")]
    BadAuthMethod(Box<str>),
@@ -148,28 +148,3 @@ impl ReportableError for AuthError {
        }
    }
 }
 impl UserFacingError for proxy_sasl::sasl::Error {
    fn to_string_client(&self) -> String {
        match self {
            proxy_sasl::sasl::Error::ChannelBindingFailed(m) => m.to_string(),
            proxy_sasl::sasl::Error::ChannelBindingBadMethod(m) => {
                format!("unsupported channel binding method {m}")
            }
            _ => "authentication protocol violation".to_string(),
        }
    }
 }
 impl ReportableError for proxy_sasl::sasl::Error {
    fn get_error_kind(&self) -> crate::error::ErrorKind {
        match self {
            proxy_sasl::sasl::Error::ChannelBindingFailed(_) => crate::error::ErrorKind::User,
            proxy_sasl::sasl::Error::ChannelBindingBadMethod(_) => crate::error::ErrorKind::User,
            proxy_sasl::sasl::Error::BadClientMessage(_) => crate::error::ErrorKind::User,
            proxy_sasl::sasl::Error::MissingBinding => crate::error::ErrorKind::Service,
            proxy_sasl::sasl::Error::Base64(_) => crate::error::ErrorKind::ControlPlane,
            proxy_sasl::sasl::Error::Io(_) => crate::error::ErrorKind::ClientDisconnect,
        }
    }
 }
--- a/proxy/core/src/auth/backend.rs
+++ b/proxy/core/src/auth/backend.rs
@@ -1,6 +1,5 @@
 mod classic;
 mod hacks;
 pub mod jwt;
 mod link;
 use std::net::IpAddr;
@@ -9,7 +8,6 @@ use std::time::Duration;
 use ipnet::{Ipv4Net, Ipv6Net};
 pub use link::LinkAuthError;
 use proxy_sasl::scram;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_postgres::config::AuthKeys;
 use tracing::{info, warn};
@@ -37,7 +35,7 @@ use crate::{
    },
    stream, url,
 };
-use crate::{EndpointCacheKey, EndpointId, RoleName};
+use crate::{scram, EndpointCacheKey, EndpointId, RoleName};
 /// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality
 pub enum MaybeOwned<'a, T> {
@@ -220,7 +218,7 @@ impl RateBucketInfo {
 impl AuthenticationConfig {
    pub fn check_rate_limit(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        config: &AuthenticationConfig,
        secret: AuthSecret,
        endpoint: &EndpointId,
@@ -245,7 +243,7 @@ impl AuthenticationConfig {
        let limit_not_exceeded = self.rate_limiter.check(
            (
                endpoint_int,
-                MaskedIp::new(ctx.peer_addr(), config.rate_limit_ip_subnet),
+                MaskedIp::new(ctx.peer_addr, config.rate_limit_ip_subnet),
            ),
            password_weight,
        );
@@ -276,7 +274,7 @@ impl AuthenticationConfig {
 ///
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    api: &impl console::Api,
    user_info: ComputeUserInfoMaybeEndpoint,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
@@ -305,8 +303,8 @@ async fn auth_quirks(
    let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
    // check allowed list
-    if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
+    if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
-        return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
+        return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr));
    }
    if !endpoint_rate_limiter.check(info.endpoint.clone().into(), 1) {
@@ -358,7 +356,7 @@ async fn auth_quirks(
 }
 async fn authenticate_with_secret(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    secret: AuthSecret,
    info: ComputeUserInfo,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
@@ -372,8 +370,8 @@ async fn authenticate_with_secret(
        let auth_outcome =
            validate_password_and_exchange(&config.thread_pool, ep, &password, secret).await?;
        let keys = match auth_outcome {
-            proxy_sasl::sasl::Outcome::Success(key) => key,
+            crate::sasl::Outcome::Success(key) => key,
-            proxy_sasl::sasl::Outcome::Failure(reason) => {
+            crate::sasl::Outcome::Failure(reason) => {
                info!("auth backend failed with an error: {reason}");
                return Err(auth::AuthError::auth_failed(&*info.user));
            }
@@ -423,7 +421,7 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint, &()> {
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub async fn authenticate(
        self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
@@ -469,7 +467,7 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint, &()> {
 impl BackendType<'_, ComputeUserInfo, &()> {
    pub async fn get_role_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
        use BackendType::*;
        match self {
@@ -480,7 +478,7 @@ impl BackendType<'_, ComputeUserInfo, &()> {
    pub async fn get_allowed_ips_and_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
        use BackendType::*;
        match self {
@@ -494,7 +492,7 @@ impl BackendType<'_, ComputeUserInfo, &()> {
 impl ComputeConnectBackend for BackendType<'_, ComputeCredentials, NodeInfo> {
    async fn wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
    ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
        use BackendType::*;
@@ -516,7 +514,7 @@ impl ComputeConnectBackend for BackendType<'_, ComputeCredentials, NodeInfo> {
 impl ComputeConnectBackend for BackendType<'_, ComputeCredentials, &()> {
    async fn wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
    ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
        use BackendType::*;
@@ -559,9 +557,9 @@ mod tests {
        context::RequestMonitoring,
        proxy::NeonOptions,
        rate_limiter::{EndpointRateLimiter, RateBucketInfo},
        scram::{threadpool::ThreadPool, ServerSecret},
        stream::{PqStream, Stream},
    };
    use proxy_sasl::scram::{threadpool::ThreadPool, ServerSecret};
    use super::{auth_quirks, AuthRateLimiter};
@@ -573,7 +571,7 @@ mod tests {
    impl console::Api for Auth {
        async fn get_role_secret(
            &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &mut RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
        ) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> {
            Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
@@ -581,7 +579,7 @@ mod tests {
        async fn get_allowed_ips_and_secret(
            &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &mut RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
        ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>
        {
@@ -593,7 +591,7 @@ mod tests {
        async fn wake_compute(
            &self,
-            _ctx: &RequestMonitoring,
+            _ctx: &mut RequestMonitoring,
            _user_info: &super::ComputeUserInfo,
        ) -> Result<CachedNodeInfo, console::errors::WakeComputeError> {
            unimplemented!()
@@ -667,14 +665,10 @@ mod tests {
        let (mut client, server) = tokio::io::duplex(1024);
        let mut stream = PqStream::new(Stream::from_raw(server));
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let api = Auth {
            ips: vec![],
-            secret: AuthSecret::Scram(
+            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
                ServerSecret::build_test_secret("my-secret-password")
                    .await
                    .unwrap(),
            ),
        };
        let user_info = ComputeUserInfoMaybeEndpoint {
@@ -729,7 +723,7 @@ mod tests {
        ));
        let _creds = auth_quirks(
-            &ctx,
+            &mut ctx,
            &api,
            user_info,
            &mut stream,
@@ -748,14 +742,10 @@ mod tests {
        let (mut client, server) = tokio::io::duplex(1024);
        let mut stream = PqStream::new(Stream::from_raw(server));
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let api = Auth {
            ips: vec![],
-            secret: AuthSecret::Scram(
+            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
                ServerSecret::build_test_secret("my-secret-password")
                    .await
                    .unwrap(),
            ),
        };
        let user_info = ComputeUserInfoMaybeEndpoint {
@@ -785,7 +775,7 @@ mod tests {
        ));
        let _creds = auth_quirks(
-            &ctx,
+            &mut ctx,
            &api,
            user_info,
            &mut stream,
@@ -804,14 +794,10 @@ mod tests {
        let (mut client, server) = tokio::io::duplex(1024);
        let mut stream = PqStream::new(Stream::from_raw(server));
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let api = Auth {
            ips: vec![],
-            secret: AuthSecret::Scram(
+            secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()),
                ServerSecret::build_test_secret("my-secret-password")
                    .await
                    .unwrap(),
            ),
        };
        let user_info = ComputeUserInfoMaybeEndpoint {
@@ -842,7 +828,7 @@ mod tests {
        ));
        let creds = auth_quirks(
-            &ctx,
+            &mut ctx,
            &api,
            user_info,
            &mut stream,
--- a/proxy/core/src/auth/backend/classic.rs
+++ b/proxy/core/src/auth/backend/classic.rs
@@ -5,14 +5,14 @@ use crate::{
    config::AuthenticationConfig,
    console::AuthSecret,
    context::RequestMonitoring,
    sasl,
    stream::{PqStream, Stream},
 };
 use proxy_sasl::sasl;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
 pub(super) async fn authenticate(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    creds: ComputeUserInfo,
    client: &mut PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
    config: &'static AuthenticationConfig,
@@ -27,7 +27,7 @@ pub(super) async fn authenticate(
        }
        AuthSecret::Scram(secret) => {
            info!("auth endpoint chooses SCRAM");
-            let scram = auth::Scram(&secret, ctx);
+            let scram = auth::Scram(&secret, &mut *ctx);
            let auth_outcome = tokio::time::timeout(
                config.scram_protocol_timeout,
--- a/proxy/core/src/auth/backend/hacks.rs
+++ b/proxy/core/src/auth/backend/hacks.rs
@@ -7,9 +7,9 @@ use crate::{
    console::AuthSecret,
    context::RequestMonitoring,
    intern::EndpointIdInt,
    sasl,
    stream::{self, Stream},
 };
 use proxy_sasl::sasl;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, warn};
@@ -18,7 +18,7 @@ use tracing::{info, warn};
 /// These properties are benefical for serverless JS workers, so we
 /// use this mechanism for websocket connections.
 pub async fn authenticate_cleartext(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    info: ComputeUserInfo,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
    secret: AuthSecret,
@@ -28,7 +28,7 @@ pub async fn authenticate_cleartext(
    ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
    // pause the timer while we communicate with the client
-    let paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+    let paused = ctx.latency_timer.pause(crate::metrics::Waiting::Client);
    let ep = EndpointIdInt::from(&info.endpoint);
@@ -60,7 +60,7 @@ pub async fn authenticate_cleartext(
 /// Similar to [`authenticate_cleartext`], but there's a specific password format,
 /// and passwords are not yet validated (we don't know how to validate them!)
 pub async fn password_hack_no_authentication(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    info: ComputeUserInfoNoEndpoint,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
 ) -> auth::Result<ComputeCredentials> {
@@ -68,7 +68,7 @@ pub async fn password_hack_no_authentication(
    ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
    // pause the timer while we communicate with the client
-    let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+    let _paused = ctx.latency_timer.pause(crate::metrics::Waiting::Client);
    let payload = AuthFlow::new(client)
        .begin(auth::PasswordHack)
--- a/proxy/core/src/auth/backend/link.rs
+++ b/proxy/core/src/auth/backend/link.rs
@@ -57,7 +57,7 @@ pub fn new_psql_session_id() -> String {
 }
 pub(super) async fn authenticate(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
--- a/proxy/core/src/auth/credentials.rs
+++ b/proxy/core/src/auth/credentials.rs
@@ -84,7 +84,7 @@ pub fn endpoint_sni(
 impl ComputeUserInfoMaybeEndpoint {
    pub fn parse(
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        params: &StartupMessageParams,
        sni: Option<&str>,
        common_names: Option<&HashSet<String>>,
@@ -249,8 +249,8 @@ mod tests {
    fn parse_bare_minimum() -> anyhow::Result<()> {
        // According to postgresql, only `user` should be required.
        let options = StartupMessageParams::new([("user", "john_doe")]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id, None);
@@ -264,8 +264,8 @@ mod tests {
            ("database", "world"), // should be ignored
            ("foo", "bar"),        // should be ignored
        ]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id, None);
@@ -279,9 +279,9 @@ mod tests {
        let sni = Some("foo.localhost");
        let common_names = Some(["localhost".into()].into());
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let user_info =
-            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
+            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id.as_deref(), Some("foo"));
        assert_eq!(user_info.options.get_cache_key("foo"), "foo");
@@ -296,8 +296,8 @@ mod tests {
            ("options", "-ckey=1 project=bar -c geqo=off"),
        ]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
@@ -311,8 +311,8 @@ mod tests {
            ("options", "-ckey=1 endpoint=bar -c geqo=off"),
        ]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
@@ -329,8 +329,8 @@ mod tests {
            ),
        ]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert!(user_info.endpoint_id.is_none());
@@ -344,8 +344,8 @@ mod tests {
            ("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"),
        ]);
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?;
+        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
        assert!(user_info.endpoint_id.is_none());
@@ -359,9 +359,9 @@ mod tests {
        let sni = Some("baz.localhost");
        let common_names = Some(["localhost".into()].into());
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let user_info =
-            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
+            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.user, "john_doe");
        assert_eq!(user_info.endpoint_id.as_deref(), Some("baz"));
@@ -374,16 +374,16 @@ mod tests {
        let common_names = Some(["a.com".into(), "b.com".into()].into());
        let sni = Some("p1.a.com");
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let user_info =
-            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
+            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
        let common_names = Some(["a.com".into(), "b.com".into()].into());
        let sni = Some("p1.b.com");
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let user_info =
-            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
+            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
        Ok(())
@@ -397,8 +397,9 @@ mod tests {
        let sni = Some("second.localhost");
        let common_names = Some(["localhost".into()].into());
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())
+        let err =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())
                .expect_err("should fail");
        match err {
            InconsistentProjectNames { domain, option } => {
@@ -416,8 +417,9 @@ mod tests {
        let sni = Some("project.localhost");
        let common_names = Some(["example.com".into()].into());
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
-        let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())
+        let err =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())
                .expect_err("should fail");
        match err {
            UnknownCommonName { cn } => {
@@ -436,9 +438,9 @@ mod tests {
        let sni = Some("project.localhost");
        let common_names = Some(["localhost".into()].into());
-        let ctx = RequestMonitoring::test();
+        let mut ctx = RequestMonitoring::test();
        let user_info =
-            ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?;
+            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.endpoint_id.as_deref(), Some("project"));
        assert_eq!(
            user_info.options.get_cache_key("project"),
--- a/proxy/core/src/auth/flow.rs
+++ b/proxy/core/src/auth/flow.rs
@@ -2,17 +2,16 @@
 use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload};
 use crate::{
    config::TlsServerEndPoint,
    console::AuthSecret,
    context::RequestMonitoring,
    intern::EndpointIdInt,
    sasl,
    scram::{self, threadpool::ThreadPool},
    stream::{PqStream, Stream},
 };
 use postgres_protocol::authentication::sasl::{SCRAM_SHA_256, SCRAM_SHA_256_PLUS};
 use pq_proto::{BeAuthenticationSaslMessage, BeMessage, BeMessage as Be};
 use proxy_sasl::{
    sasl,
    scram::{self, threadpool::ThreadPool, TlsServerEndPoint},
 };
 use std::{io, sync::Arc};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::info;
@@ -28,7 +27,7 @@ pub trait AuthMethod {
 pub struct Begin;
 /// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`].
-pub struct Scram<'a>(pub &'a scram::ServerSecret, pub &'a RequestMonitoring);
+pub struct Scram<'a>(pub &'a scram::ServerSecret, pub &'a mut RequestMonitoring);
 impl AuthMethod for Scram<'_> {
    #[inline(always)]
@@ -57,7 +56,7 @@ impl AuthMethod for PasswordHack {
 /// Use clear-text password auth called `password` in docs
 /// <https://www.postgresql.org/docs/current/auth-password.html>
 pub struct CleartextPassword {
-    pub pool: Arc<ThreadPool<EndpointIdInt>>,
+    pub pool: Arc<ThreadPool>,
    pub endpoint: EndpointIdInt,
    pub secret: AuthSecret,
 }
@@ -156,7 +155,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
        let Scram(secret, ctx) = self.state;
        // pause the timer while we communicate with the client
-        let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+        let _paused = ctx.latency_timer.pause(crate::metrics::Waiting::Client);
        // Initial client message contains the chosen auth method's name.
        let msg = self.stream.read_password_message().await?;
@@ -169,13 +168,15 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
        }
        match sasl.method {
-            SCRAM_SHA_256 => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256),
+            SCRAM_SHA_256 => ctx.auth_method = Some(crate::context::AuthMethod::ScramSha256),
-            SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus),
+            SCRAM_SHA_256_PLUS => {
                ctx.auth_method = Some(crate::context::AuthMethod::ScramSha256Plus)
            }
            _ => {}
        }
        info!("client chooses {}", sasl.method);
-        let outcome = sasl::SaslStream::new(&mut self.stream.framed, sasl.message)
+        let outcome = sasl::SaslStream::new(self.stream, sasl.message)
            .authenticate(scram::Exchange::new(
                secret,
                rand::random,
@@ -192,7 +193,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
 }
 pub(crate) async fn validate_password_and_exchange(
-    pool: &ThreadPool<EndpointIdInt>,
+    pool: &ThreadPool,
    endpoint: EndpointIdInt,
    password: &[u8],
    secret: AuthSecret,
@@ -207,8 +208,7 @@ pub(crate) async fn validate_password_and_exchange(
        }
        // perform scram authentication as both client and server to validate the keys
        AuthSecret::Scram(scram_secret) => {
-            let outcome =
+            let outcome = crate::scram::exchange(pool, endpoint, &scram_secret, password).await?;
                proxy_sasl::scram::exchange(pool, endpoint, &scram_secret, password).await?;
            let client_key = match outcome {
                sasl::Outcome::Success(client_key) => client_key,
--- a/proxy/core/src/auth/password_hack.rs
+++ b/proxy/core/src/auth/password_hack.rs
--- a/proxy/pg_sni_router/src/main.rs
+++ b/proxy/pg_sni_router/src/main.rs
@@ -7,18 +7,17 @@ use std::{net::SocketAddr, sync::Arc};
 use futures::future::Either;
 use itertools::Itertools;
-use proxy_core::context::RequestMonitoring;
+use proxy::config::TlsServerEndPoint;
-use proxy_core::metrics::Metrics;
+use proxy::context::RequestMonitoring;
-use proxy_core::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
+use proxy::metrics::{Metrics, ThreadPoolMetrics};
-use proxy_sasl::scram::threadpool::ThreadPoolMetrics;
+use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
 use proxy_sasl::scram::TlsServerEndPoint;
 use rustls::pki_types::PrivateKeyDer;
 use tokio::net::TcpListener;
 use anyhow::{anyhow, bail, ensure, Context};
 use clap::Arg;
 use futures::TryFutureExt;
-use proxy_core::stream::{PqStream, Stream};
+use proxy::stream::{PqStream, Stream};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::sync::CancellationToken;
@@ -63,7 +62,7 @@ fn cli() -> clap::Command {
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy_core::logging::init().await?;
+    let _logging_guard = proxy::logging::init().await?;
    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
@@ -134,14 +133,14 @@ async fn main() -> anyhow::Result<()> {
        proxy_listener,
        cancellation_token.clone(),
    ));
-    let signals_task = tokio::spawn(proxy_core::handle_signals(cancellation_token));
+    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token));
    // the signal task cant ever succeed.
    // the main task can error, or can succeed on cancellation.
    // we want to immediately exit on either of these cases
    let signal = match futures::future::select(signals_task, main).await {
-        Either::Left((res, _)) => proxy_core::flatten_err(res)?,
+        Either::Left((res, _)) => proxy::flatten_err(res)?,
-        Either::Right((res, _)) => return proxy_core::flatten_err(res),
+        Either::Right((res, _)) => return proxy::flatten_err(res),
    };
    // maintenance tasks return `Infallible` success values, this is an impossible value
@@ -181,7 +180,7 @@ async fn task_main(
                let ctx = RequestMonitoring::new(
                    session_id,
                    peer_addr.ip(),
-                    proxy_core::metrics::Protocol::SniRouter,
+                    proxy::metrics::Protocol::SniRouter,
                    "sni",
                );
                handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
@@ -206,7 +205,7 @@ async fn task_main(
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: &RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    raw_stream: S,
    tls_config: Arc<rustls::ServerConfig>,
    tls_server_end_point: TlsServerEndPoint,
@@ -250,20 +249,20 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
                "unexpected startup packet, rejecting connection"
            );
            stream
-                .throw_error_str(ERR_INSECURE_CONNECTION, proxy_core::error::ErrorKind::User)
+                .throw_error_str(ERR_INSECURE_CONNECTION, proxy::error::ErrorKind::User)
                .await?
        }
    }
 }
 async fn handle_client(
-    ctx: RequestMonitoring,
+    mut ctx: RequestMonitoring,
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
    tls_server_end_point: TlsServerEndPoint,
    stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
-    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config, tls_server_end_point).await?;
+    let mut tls_stream = ssl_handshake(&mut ctx, stream, tls_config, tls_server_end_point).await?;
    // Cut off first part of the SNI domain
    // We receive required destination details in the format of
--- a/proxy/proxy/src/main.rs
+++ b/proxy/proxy/src/main.rs
@@ -5,39 +5,38 @@ use aws_config::meta::region::RegionProviderChain;
 use aws_config::profile::ProfileFileCredentialsProvider;
 use aws_config::provider_config::ProviderConfig;
 use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
 use aws_config::Region;
 use futures::future::Either;
-use proxy_core::auth;
+use proxy::auth;
-use proxy_core::auth::backend::AuthRateLimiter;
+use proxy::auth::backend::AuthRateLimiter;
-use proxy_core::auth::backend::MaybeOwned;
+use proxy::auth::backend::MaybeOwned;
-use proxy_core::cancellation::CancelMap;
+use proxy::cancellation::CancelMap;
-use proxy_core::cancellation::CancellationHandler;
+use proxy::cancellation::CancellationHandler;
-use proxy_core::config::remote_storage_from_toml;
+use proxy::config::remote_storage_from_toml;
-use proxy_core::config::AuthenticationConfig;
+use proxy::config::AuthenticationConfig;
-use proxy_core::config::CacheOptions;
+use proxy::config::CacheOptions;
-use proxy_core::config::HttpConfig;
+use proxy::config::HttpConfig;
-use proxy_core::config::ProjectInfoCacheOptions;
+use proxy::config::ProjectInfoCacheOptions;
-use proxy_core::console;
+use proxy::console;
-use proxy_core::context::parquet::ParquetUploadArgs;
+use proxy::context::parquet::ParquetUploadArgs;
-use proxy_core::http;
+use proxy::http;
-use proxy_core::http::health_server::AppMetrics;
+use proxy::http::health_server::AppMetrics;
-use proxy_core::metrics::Metrics;
+use proxy::metrics::Metrics;
-use proxy_core::rate_limiter::EndpointRateLimiter;
+use proxy::rate_limiter::EndpointRateLimiter;
-use proxy_core::rate_limiter::LeakyBucketConfig;
+use proxy::rate_limiter::LeakyBucketConfig;
-use proxy_core::rate_limiter::RateBucketInfo;
+use proxy::rate_limiter::RateBucketInfo;
-use proxy_core::rate_limiter::WakeComputeRateLimiter;
+use proxy::rate_limiter::WakeComputeRateLimiter;
-use proxy_core::redis::cancellation_publisher::RedisPublisherClient;
+use proxy::redis::cancellation_publisher::RedisPublisherClient;
-use proxy_core::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
+use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use proxy_core::redis::elasticache;
+use proxy::redis::elasticache;
-use proxy_core::redis::notifications;
+use proxy::redis::notifications;
-use proxy_core::serverless::cancel_set::CancelSet;
+use proxy::scram::threadpool::ThreadPool;
-use proxy_core::serverless::GlobalConnPoolOptions;
+use proxy::serverless::cancel_set::CancelSet;
-use proxy_core::usage_metrics;
+use proxy::serverless::GlobalConnPoolOptions;
 use proxy::usage_metrics;
 use anyhow::bail;
-use proxy_core::config::{self, ProxyConfig};
+use proxy::config::{self, ProxyConfig};
-use proxy_core::serverless;
+use proxy::serverless;
 use proxy_sasl::scram::threadpool::ThreadPool;
 use remote_storage::RemoteStorageConfig;
 use std::net::SocketAddr;
 use std::pin::pin;
@@ -268,7 +267,7 @@ struct SqlOverHttpArgs {
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy_core::logging::init().await?;
+    let _logging_guard = proxy::logging::init().await?;
    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
@@ -279,7 +278,7 @@ async fn main() -> anyhow::Result<()> {
        build_tag: BUILD_TAG,
    });
-    let jemalloc = match proxy_core::jemalloc::MetricRecorder::new() {
+    let jemalloc = match proxy::jemalloc::MetricRecorder::new() {
        Ok(t) => Some(t),
        Err(e) => {
            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
@@ -291,10 +290,9 @@ async fn main() -> anyhow::Result<()> {
    let config = build_config(&args)?;
    info!("Authentication backend: {}", config.auth_backend);
-    info!("Using region: {}", args.aws_region);
+    info!("Using region: {}", config.aws_region);
-    let region_provider =
+    let region_provider = RegionProviderChain::default_provider().or_else(&*config.aws_region); // Replace with your Redis region if needed
        RegionProviderChain::default_provider().or_else(Region::new(args.aws_region.clone()));
    let provider_conf =
        ProviderConfig::without_region().with_region(region_provider.region().await);
    let aws_credentials_provider = {
@@ -320,7 +318,7 @@ async fn main() -> anyhow::Result<()> {
    };
    let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new(
        elasticache::AWSIRSAConfig::new(
-            args.aws_region.clone(),
+            config.aws_region.clone(),
            args.redis_cluster_name,
            args.redis_user_id,
        ),
@@ -378,14 +376,11 @@ async fn main() -> anyhow::Result<()> {
    let cancel_map = CancelMap::default();
    let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
    RateBucketInfo::validate(redis_rps_limit)?;
    let redis_publisher = match &regional_redis_client {
        Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
            redis_publisher.clone(),
            args.region.clone(),
-            redis_rps_limit,
+            &config.redis_rps_limit,
        )?))),
        None => None,
    };
@@ -394,7 +389,7 @@ async fn main() -> anyhow::Result<()> {
    >::new(
        cancel_map.clone(),
        redis_publisher,
-        proxy_core::metrics::CancellationSource::FromClient,
+        proxy::metrics::CancellationSource::FromClient,
    ));
    // bit of a hack - find the min rps and max rps supported and turn it into
@@ -419,7 +414,7 @@ async fn main() -> anyhow::Result<()> {
    // client facing tasks. these will exit on error or on cancellation
    // cancellation returns Ok(())
    let mut client_tasks = JoinSet::new();
-    client_tasks.spawn(proxy_core::proxy::task_main(
+    client_tasks.spawn(proxy::proxy::task_main(
        config,
        proxy_listener,
        cancellation_token.clone(),
@@ -443,20 +438,20 @@ async fn main() -> anyhow::Result<()> {
        ));
    }
-    client_tasks.spawn(proxy_core::context::parquet::worker(
+    client_tasks.spawn(proxy::context::parquet::worker(
        cancellation_token.clone(),
        args.parquet_upload,
    ));
    // maintenance tasks. these never return unless there's an error
    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy_core::handle_signals(cancellation_token.clone()));
+    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone()));
    maintenance_tasks.spawn(http::health_server::task_main(
        http_listener,
        AppMetrics {
            jemalloc,
            neon_metrics,
-            proxy: proxy_core::metrics::Metrics::get(),
+            proxy: proxy::metrics::Metrics::get(),
        },
    ));
    maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
@@ -471,7 +466,7 @@ async fn main() -> anyhow::Result<()> {
    }
    if let auth::BackendType::Console(api, _) = &config.auth_backend {
-        if let proxy_core::console::provider::ConsoleBackend::Console(api) = &**api {
+        if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
            match (redis_notifications_client, regional_redis_client.clone()) {
                (None, None) => {}
                (client1, client2) => {
@@ -516,11 +511,11 @@ async fn main() -> anyhow::Result<()> {
        .await
        {
            // exit immediately on maintenance task completion
-            Either::Left((Some(res), _)) => break proxy_core::flatten_err(res)?,
+            Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
            // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
            Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
            // exit immediately on client task error
-            Either::Right((Some(res), _)) => proxy_core::flatten_err(res)?,
+            Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
            // exit if all our client tasks have shutdown gracefully
            Either::Right((None, _)) => return Ok(()),
        }
@@ -607,7 +602,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                timeout,
                epoch,
                &Metrics::get().wake_compute_lock,
-            )));
+            )?));
            tokio::spawn(locks.garbage_collect_worker());
            let url = args.auth_endpoint.parse()?;
@@ -658,9 +653,10 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        timeout,
        epoch,
        &Metrics::get().proxy.connect_compute_lock,
-    );
+    )?;
    let http_config = HttpConfig {
        request_timeout: args.sql_over_http.sql_over_http_timeout,
        pool_options: GlobalConnPoolOptions {
            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
            gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
@@ -680,6 +676,9 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
    };
    let mut redis_rps_limit = args.redis_rps_limit.clone();
    RateBucketInfo::validate(&mut redis_rps_limit)?;
    let config = Box::leak(Box::new(ProxyConfig {
        tls_config,
        auth_backend,
@@ -688,8 +687,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        http_config,
        authentication_config,
        require_client_ip: args.require_client_ip,
        disable_ip_check_for_http: args.disable_ip_check_for_http,
        redis_rps_limit,
        handshake_timeout: args.handshake_timeout,
        region: args.region.clone(),
        aws_region: args.aws_region.clone(),
        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
        connect_compute_locks,
        connect_to_compute_retry_config: config::RetryConfig::parse(
@@ -707,7 +709,7 @@ mod tests {
    use std::time::Duration;
    use clap::Parser;
-    use proxy_core::rate_limiter::RateBucketInfo;
+    use proxy::rate_limiter::RateBucketInfo;
    #[test]
    fn parse_endpoint_rps_limit() {
--- a/proxy/core/src/cache.rs
+++ b/proxy/core/src/cache.rs
--- a/proxy/core/src/cache/common.rs
+++ b/proxy/core/src/cache/common.rs
--- a/proxy/core/src/cache/endpoints.rs
+++ b/proxy/core/src/cache/endpoints.rs
@@ -68,7 +68,7 @@ impl EndpointsCache {
            ready: AtomicBool::new(false),
        }
    }
-    pub async fn is_valid(&self, ctx: &RequestMonitoring, endpoint: &EndpointId) -> bool {
+    pub async fn is_valid(&self, ctx: &mut RequestMonitoring, endpoint: &EndpointId) -> bool {
        if !self.ready.load(Ordering::Acquire) {
            return true;
        }
--- a/proxy/core/src/cache/project_info.rs
+++ b/proxy/core/src/cache/project_info.rs
@@ -371,8 +371,7 @@ impl Cache for ProjectInfoCacheImpl {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::ProjectId;
+    use crate::{scram::ServerSecret, ProjectId};
    use proxy_sasl::scram::ServerSecret;
    #[tokio::test]
    async fn test_project_info_cache_settings() {
--- a/proxy/core/src/cache/timed_lru.rs
+++ b/proxy/core/src/cache/timed_lru.rs
--- a/proxy/core/src/cancellation.rs
+++ b/proxy/core/src/cancellation.rs
--- a/proxy/core/src/compute.rs
+++ b/proxy/core/src/compute.rs
@@ -103,12 +103,8 @@ impl ConnCfg {
    /// Reuse password or auth keys from the other config.
    pub fn reuse_password(&mut self, other: Self) {
-        if let Some(password) = other.get_password() {
+        if let Some(password) = other.get_auth() {
-            self.password(password);
+            self.auth(password);
        }
        if let Some(keys) = other.get_auth_keys() {
            self.auth_keys(keys);
        }
    }
@@ -124,48 +120,64 @@ impl ConnCfg {
    /// Apply startup message params to the connection config.
    pub fn set_startup_params(&mut self, params: &StartupMessageParams) {
        let mut client_encoding = false;
        for (k, v) in params.iter() {
            match k {
                "user" => {
                    // Only set `user` if it's not present in the config.
                    // Link auth flow takes username from the console's response.
-        if let (None, Some(user)) = (self.get_user(), params.get("user")) {
+                    if self.get_user().is_none() {
-            self.user(user);
+                        self.user(v);
                    }
        // Only set `dbname` if it's not present in the config.
        // Link auth flow takes dbname from the console's response.
        if let (None, Some(dbname)) = (self.get_dbname(), params.get("database")) {
            self.dbname(dbname);
        }
        // Don't add `options` if they were only used for specifying a project.
        // Connection pools don't support `options`, because they affect backend startup.
        if let Some(options) = filtered_options(params) {
            self.options(&options);
        }
        if let Some(app_name) = params.get("application_name") {
            self.application_name(app_name);
        }
        // TODO: This is especially ugly...
        if let Some(replication) = params.get("replication") {
            use tokio_postgres::config::ReplicationMode;
            match replication {
                "true" | "on" | "yes" | "1" => {
                    self.replication_mode(ReplicationMode::Physical);
                }
                "database" => {
-                    self.replication_mode(ReplicationMode::Logical);
+                    // Only set `dbname` if it's not present in the config.
                    // Link auth flow takes dbname from the console's response.
                    if self.get_dbname().is_none() {
                        self.dbname(v);
                    }
-                _other => {}
+                }
                "options" => {
                    // Don't add `options` if they were only used for specifying a project.
                    // Connection pools don't support `options`, because they affect backend startup.
                    if let Some(options) = filtered_options(v) {
                        self.options(&options);
                    }
                }
-        // TODO: extend the list of the forwarded startup parameters.
+                // the special ones in tokio-postgres that we don't want being set by the user
-        // Currently, tokio-postgres doesn't allow us to pass
+                "dbname" => {}
-        // arbitrary parameters, but the ones above are a good start.
+                "password" => {}
-        //
+                "sslmode" => {}
-        // This and the reverse params problem can be better addressed
+                "host" => {}
-        // in a bespoke connection machinery (a new library for that sake).
+                "port" => {}
                "connect_timeout" => {}
                "keepalives" => {}
                "keepalives_idle" => {}
                "keepalives_interval" => {}
                "keepalives_retries" => {}
                "target_session_attrs" => {}
                "channel_binding" => {}
                "max_backend_message_size" => {}
                "client_encoding" => {
                    client_encoding = true;
                    // only error should be from bad null bytes,
                    // but we've already checked for those.
                    _ = self.param("client_encoding", v);
                }
                _ => {
                    // only error should be from bad null bytes,
                    // but we've already checked for those.
                    _ = self.param(k, v);
                }
            }
        }
        if !client_encoding {
            // for compatibility since we removed it from tokio-postgres
            self.param("client_encoding", "UTF8").unwrap();
        }
    }
 }
@@ -276,12 +288,12 @@ impl ConnCfg {
    /// Connect to a corresponding compute node.
    pub async fn connect(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        allow_self_signed_compute: bool,
        aux: MetricsAuxInfo,
        timeout: Duration,
    ) -> Result<PostgresConnection, ConnectionError> {
-        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
+        let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
        let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
        drop(pause);
@@ -304,14 +316,14 @@ impl ConnCfg {
        )?;
        // connect_raw() will not use TLS if sslmode is "disable"
-        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
+        let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
        let (client, connection) = self.0.connect_raw(stream, tls).await?;
        drop(pause);
        tracing::Span::current().record("pid", tracing::field::display(client.get_process_id()));
        let stream = connection.stream.into_inner();
        info!(
-            cold_start_info = ctx.cold_start_info().as_str(),
+            cold_start_info = ctx.cold_start_info.as_str(),
            "connected to compute node at {host} ({socket_addr}) sslmode={:?}",
            self.0.get_ssl_mode()
        );
@@ -330,7 +342,7 @@ impl ConnCfg {
            params,
            cancel_closure,
            aux,
-            _guage: Metrics::get().proxy.db_connections.guard(ctx.protocol()),
+            _guage: Metrics::get().proxy.db_connections.guard(ctx.protocol),
        };
        Ok(connection)
@@ -338,10 +350,9 @@ impl ConnCfg {
 }
 /// Retrieve `options` from a startup message, dropping all proxy-secific flags.
-fn filtered_options(params: &StartupMessageParams) -> Option<String> {
+fn filtered_options(options: &str) -> Option<String> {
    #[allow(unstable_name_collisions)]
-    let options: String = params
+    let options: String = StartupMessageParams::parse_options_raw(options)
        .options_raw()?
        .filter(|opt| parse_endpoint_param(opt).is_none() && neon_option(opt).is_none())
        .intersperse(" ") // TODO: use impl from std once it's stabilized
        .collect();
@@ -413,27 +424,23 @@ mod tests {
    #[test]
    fn test_filtered_options() {
        // Empty options is unlikely to be useful anyway.
-        let params = StartupMessageParams::new([("options", "")]);
+        assert_eq!(filtered_options(""), None);
        assert_eq!(filtered_options(&params), None);
        // It's likely that clients will only use options to specify endpoint/project.
-        let params = StartupMessageParams::new([("options", "project=foo")]);
+        let params = "project=foo";
-        assert_eq!(filtered_options(&params), None);
+        assert_eq!(filtered_options(params), None);
        // Same, because unescaped whitespaces are no-op.
-        let params = StartupMessageParams::new([("options", " project=foo ")]);
+        let params = " project=foo ";
-        assert_eq!(filtered_options(&params).as_deref(), None);
+        assert_eq!(filtered_options(params), None);
-        let params = StartupMessageParams::new([("options", r"\  project=foo \ ")]);
+        let params = r"\  project=foo \ ";
-        assert_eq!(filtered_options(&params).as_deref(), Some(r"\  \ "));
+        assert_eq!(filtered_options(params).as_deref(), Some(r"\  \ "));
-        let params = StartupMessageParams::new([("options", "project = foo")]);
+        let params = "project = foo";
-        assert_eq!(filtered_options(&params).as_deref(), Some("project = foo"));
+        assert_eq!(filtered_options(params).as_deref(), Some("project = foo"));
-        let params = StartupMessageParams::new([(
+        let params = "project = foo neon_endpoint_type:read_write   neon_lsn:0/2";
-            "options",
+        assert_eq!(filtered_options(params).as_deref(), Some("project = foo"));
            "project = foo neon_endpoint_type:read_write   neon_lsn:0/2",
        )]);
        assert_eq!(filtered_options(&params).as_deref(), Some("project = foo"));
    }
 }
--- a/proxy/core/src/config.rs
+++ b/proxy/core/src/config.rs
@@ -1,26 +1,27 @@
 use crate::{
    auth::{self, backend::AuthRateLimiter},
    console::locks::ApiLocks,
    intern::EndpointIdInt,
    rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
    scram::threadpool::ThreadPool,
    serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
    Host,
 };
 use anyhow::{bail, ensure, Context, Ok};
 use itertools::Itertools;
 use proxy_sasl::scram::{threadpool::ThreadPool, TlsServerEndPoint};
 use remote_storage::RemoteStorageConfig;
 use rustls::{
    crypto::ring::sign,
    pki_types::{CertificateDer, PrivateKeyDer},
 };
 use sha2::{Digest, Sha256};
 use std::{
    collections::{HashMap, HashSet},
    str::FromStr,
    sync::Arc,
    time::Duration,
 };
 use tracing::{error, info};
 use x509_parser::oid_registry;
 pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
@@ -30,8 +31,11 @@ pub struct ProxyConfig {
    pub http_config: HttpConfig,
    pub authentication_config: AuthenticationConfig,
    pub require_client_ip: bool,
    pub disable_ip_check_for_http: bool,
    pub redis_rps_limit: Vec<RateBucketInfo>,
    pub region: String,
    pub handshake_timeout: Duration,
    pub aws_region: String,
    pub wake_compute_retry_config: RetryConfig,
    pub connect_compute_locks: ApiLocks<Host>,
    pub connect_to_compute_retry_config: RetryConfig,
@@ -51,13 +55,14 @@ pub struct TlsConfig {
 }
 pub struct HttpConfig {
    pub request_timeout: tokio::time::Duration,
    pub pool_options: GlobalConnPoolOptions,
    pub cancel_set: CancelSet,
    pub client_conn_threshold: u64,
 }
 pub struct AuthenticationConfig {
-    pub thread_pool: Arc<ThreadPool<EndpointIdInt>>,
+    pub thread_pool: Arc<ThreadPool>,
    pub scram_protocol_timeout: tokio::time::Duration,
    pub rate_limiter_enabled: bool,
    pub rate_limiter: AuthRateLimiter,
@@ -125,6 +130,66 @@ pub fn configure_tls(
    })
 }
 /// Channel binding parameter
 ///
 /// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
 /// Description: The hash of the TLS server's certificate as it
 /// appears, octet for octet, in the server's Certificate message.  Note
 /// that the Certificate message contains a certificate_list, in which
 /// the first element is the server's certificate.
 ///
 /// The hash function is to be selected as follows:
 ///
 /// * if the certificate's signatureAlgorithm uses a single hash
 ///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
 ///
 /// * if the certificate's signatureAlgorithm uses a single hash
 ///   function and that hash function neither MD5 nor SHA-1, then use
 ///   the hash function associated with the certificate's
 ///   signatureAlgorithm;
 ///
 /// * if the certificate's signatureAlgorithm uses no hash functions or
 ///   uses multiple hash functions, then this channel binding type's
 ///   channel bindings are undefined at this time (updates to is channel
 ///   binding type may occur to address this issue if it ever arises).
 #[derive(Debug, Clone, Copy)]
 pub enum TlsServerEndPoint {
    Sha256([u8; 32]),
    Undefined,
 }
 impl TlsServerEndPoint {
    pub fn new(cert: &CertificateDer) -> anyhow::Result<Self> {
        let sha256_oids = [
            // I'm explicitly not adding MD5 or SHA1 here... They're bad.
            oid_registry::OID_SIG_ECDSA_WITH_SHA256,
            oid_registry::OID_PKCS1_SHA256WITHRSA,
        ];
        let pem = x509_parser::parse_x509_certificate(cert)
            .context("Failed to parse PEM object from cerficiate")?
            .1;
        info!(subject = %pem.subject, "parsing TLS certificate");
        let reg = oid_registry::OidRegistry::default().with_all_crypto();
        let oid = pem.signature_algorithm.oid();
        let alg = reg.get(oid);
        if sha256_oids.contains(oid) {
            let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
            info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
            Ok(Self::Sha256(tls_server_end_point))
        } else {
            error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
            Ok(Self::Undefined)
        }
    }
    pub fn supported(&self) -> bool {
        !matches!(self, TlsServerEndPoint::Undefined)
    }
 }
 #[derive(Default, Debug)]
 pub struct CertResolver {
    certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
--- a/proxy/core/src/console.rs
+++ b/proxy/core/src/console.rs
--- a/proxy/core/src/console/messages.rs
+++ b/proxy/core/src/console/messages.rs
--- a/proxy/core/src/console/mgmt.rs
+++ b/proxy/core/src/console/mgmt.rs
--- a/proxy/core/src/console/provider.rs
+++ b/proxy/core/src/console/provider.rs
@@ -16,10 +16,9 @@ use crate::{
    intern::ProjectIdInt,
    metrics::ApiLockMetrics,
    rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
-    EndpointCacheKey,
+    scram, EndpointCacheKey,
 };
 use dashmap::DashMap;
 use proxy_sasl::scram;
 use std::{hash::Hash, sync::Arc, time::Duration};
 use tokio::time::Instant;
 use tracing::info;
@@ -293,7 +292,7 @@ pub struct NodeInfo {
 impl NodeInfo {
    pub async fn connect(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        timeout: Duration,
    ) -> Result<compute::PostgresConnection, compute::ConnectionError> {
        self.config
@@ -331,20 +330,20 @@ pub(crate) trait Api {
    /// We still have to mock the scram to avoid leaking information that user doesn't exist.
    async fn get_role_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
    async fn get_allowed_ips_and_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }
@@ -364,7 +363,7 @@ pub enum ConsoleBackend {
 impl Api for ConsoleBackend {
    async fn get_role_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
        use ConsoleBackend::*;
@@ -379,7 +378,7 @@ impl Api for ConsoleBackend {
    async fn get_allowed_ips_and_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
        use ConsoleBackend::*;
@@ -394,7 +393,7 @@ impl Api for ConsoleBackend {
    async fn wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
        use ConsoleBackend::*;
@@ -470,15 +469,15 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
        timeout: Duration,
        epoch: std::time::Duration,
        metrics: &'static ApiLockMetrics,
-    ) -> Self {
+    ) -> prometheus::Result<Self> {
-        Self {
+        Ok(Self {
            name,
            node_locks: DashMap::with_shard_amount(shards),
            config,
            timeout,
            epoch,
            metrics,
-        }
+        })
    }
    pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, ApiLockError> {
--- a/proxy/core/src/console/provider/mock.rs
+++ b/proxy/core/src/console/provider/mock.rs
@@ -5,7 +5,7 @@ use super::{
    AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
 };
 use crate::context::RequestMonitoring;
-use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, url::ApiUrl};
+use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use crate::{auth::IpPattern, cache::Cached};
 use crate::{
    console::{
@@ -15,7 +15,6 @@ use crate::{
    BranchId, EndpointId, ProjectId,
 };
 use futures::TryFutureExt;
 use proxy_sasl::scram;
 use std::{str::FromStr, sync::Arc};
 use thiserror::Error;
 use tokio_postgres::{config::SslMode, Client};
@@ -159,7 +158,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_role_secret(
        &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
        Ok(CachedRoleSecret::new_uncached(
@@ -169,7 +168,7 @@ impl super::Api for Api {
    async fn get_allowed_ips_and_secret(
        &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
        Ok((
@@ -183,7 +182,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
-        _ctx: &RequestMonitoring,
+        _ctx: &mut RequestMonitoring,
        _user_info: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, WakeComputeError> {
        self.do_wake_compute().map_ok(Cached::new_uncached).await
--- a/proxy/core/src/console/provider/neon.rs
+++ b/proxy/core/src/console/provider/neon.rs
@@ -13,11 +13,10 @@ use crate::{
    http,
    metrics::{CacheOutcome, Metrics},
    rate_limiter::WakeComputeRateLimiter,
-    EndpointCacheKey,
+    scram, EndpointCacheKey,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
 use futures::TryFutureExt;
 use proxy_sasl::scram;
 use std::{sync::Arc, time::Duration};
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
@@ -58,7 +57,7 @@ impl Api {
    async fn do_get_auth_info(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        if !self
@@ -70,7 +69,7 @@ impl Api {
            info!("endpoint is not valid, skipping the request");
            return Ok(AuthInfo::default());
        }
-        let request_id = ctx.session_id().to_string();
+        let request_id = ctx.session_id.to_string();
        let application_name = ctx.console_application_name();
        async {
            let request = self
@@ -78,7 +77,7 @@ impl Api {
                .get("proxy_get_role_secret")
                .header("X-Request-ID", &request_id)
                .header("Authorization", format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", ctx.session_id())])
+                .query(&[("session_id", ctx.session_id)])
                .query(&[
                    ("application_name", application_name.as_str()),
                    ("project", user_info.endpoint.as_str()),
@@ -88,7 +87,7 @@ impl Api {
            info!(url = request.url().as_str(), "sending http request");
            let start = Instant::now();
-            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
+            let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Cplane);
            let response = self.endpoint.execute(request).await?;
            drop(pause);
            info!(duration = ?start.elapsed(), "received http response");
@@ -131,10 +130,10 @@ impl Api {
    async fn do_wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<NodeInfo, WakeComputeError> {
-        let request_id = ctx.session_id().to_string();
+        let request_id = ctx.session_id.to_string();
        let application_name = ctx.console_application_name();
        async {
            let mut request_builder = self
@@ -142,7 +141,7 @@ impl Api {
                .get("proxy_wake_compute")
                .header("X-Request-ID", &request_id)
                .header("Authorization", format!("Bearer {}", &self.jwt))
-                .query(&[("session_id", ctx.session_id())])
+                .query(&[("session_id", ctx.session_id)])
                .query(&[
                    ("application_name", application_name.as_str()),
                    ("project", user_info.endpoint.as_str()),
@@ -157,7 +156,7 @@ impl Api {
            info!(url = request.url().as_str(), "sending http request");
            let start = Instant::now();
-            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
+            let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Cplane);
            let response = self.endpoint.execute(request).await?;
            drop(pause);
            info!(duration = ?start.elapsed(), "received http response");
@@ -193,7 +192,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_role_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
        let normalized_ep = &user_info.endpoint.normalize();
@@ -227,7 +226,7 @@ impl super::Api for Api {
    async fn get_allowed_ips_and_secret(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
        let normalized_ep = &user_info.endpoint.normalize();
@@ -269,7 +268,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
-        ctx: &RequestMonitoring,
+        ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, WakeComputeError> {
        let key = user_info.endpoint_cache_key();
--- a/proxy/core/src/context.rs
+++ b/proxy/core/src/context.rs
@@ -7,14 +7,13 @@ use smol_str::SmolStr;
 use std::net::IpAddr;
 use tokio::sync::mpsc;
 use tracing::{field::display, info, info_span, Span};
 use try_lock::TryLock;
 use uuid::Uuid;
 use crate::{
    console::messages::{ColdStartInfo, MetricsAuxInfo},
    error::ErrorKind,
    intern::{BranchIdInt, ProjectIdInt},
-    metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting},
+    metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol},
    DbName, EndpointId, RoleName,
 };
@@ -29,15 +28,7 @@ pub static LOG_CHAN_DISCONNECT: OnceCell<mpsc::WeakUnboundedSender<RequestData>>
 ///
 /// This data should **not** be used for connection logic, only for observability and limiting purposes.
 /// All connection logic should instead use strongly typed state machines, not a bunch of Options.
-pub struct RequestMonitoring(
+pub struct RequestMonitoring {
    /// To allow easier use of the ctx object, we have interior mutability.
    /// I would typically use a RefCell but that would break the `Send` requirements
    /// so we need something with thread-safety. `TryLock` is a cheap alternative
    /// that offers similar semantics to a `RefCell` but with synchronisation.
    TryLock<RequestMonitoringInner>,
 );
 struct RequestMonitoringInner {
    pub peer_addr: IpAddr,
    pub session_id: Uuid,
    pub protocol: Protocol,
@@ -94,7 +85,7 @@ impl RequestMonitoring {
            role = tracing::field::Empty,
        );
-        let inner = RequestMonitoringInner {
+        Self {
            peer_addr,
            session_id,
            protocol,
@@ -119,9 +110,7 @@ impl RequestMonitoring {
            disconnect_sender: LOG_CHAN_DISCONNECT.get().and_then(|tx| tx.upgrade()),
            latency_timer: LatencyTimer::new(protocol),
            disconnect_timestamp: None,
-        };
+        }
        Self(TryLock::new(inner))
    }
    #[cfg(test)]
@@ -130,177 +119,48 @@ impl RequestMonitoring {
    }
    pub fn console_application_name(&self) -> String {
        let this = self.0.try_lock().expect("should not deadlock");
        format!(
            "{}/{}",
-            this.application.as_deref().unwrap_or_default(),
+            self.application.as_deref().unwrap_or_default(),
-            this.protocol
+            self.protocol
        )
    }
-    pub fn set_rejected(&self, rejected: bool) {
+    pub fn set_rejected(&mut self, rejected: bool) {
-        let mut this = self.0.try_lock().expect("should not deadlock");
+        self.rejected = Some(rejected);
        this.rejected = Some(rejected);
    }
-    pub fn set_cold_start_info(&self, info: ColdStartInfo) {
+    pub fn set_cold_start_info(&mut self, info: ColdStartInfo) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .set_cold_start_info(info);
    }
    pub fn set_db_options(&self, options: StartupMessageParams) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        this.set_application(options.get("application_name").map(SmolStr::from));
        if let Some(user) = options.get("user") {
            this.set_user(user.into());
        }
        if let Some(dbname) = options.get("database") {
            this.set_dbname(dbname.into());
        }
        this.pg_options = Some(options);
    }
    pub fn set_project(&self, x: MetricsAuxInfo) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        if this.endpoint_id.is_none() {
            this.set_endpoint_id(x.endpoint_id.as_str().into())
        }
        this.branch = Some(x.branch_id);
        this.project = Some(x.project_id);
        this.set_cold_start_info(x.cold_start_info);
    }
    pub fn set_project_id(&self, project_id: ProjectIdInt) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        this.project = Some(project_id);
    }
    pub fn set_endpoint_id(&self, endpoint_id: EndpointId) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .set_endpoint_id(endpoint_id);
    }
    pub fn set_dbname(&self, dbname: DbName) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .set_dbname(dbname);
    }
    pub fn set_user(&self, user: RoleName) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .set_user(user);
    }
    pub fn set_auth_method(&self, auth_method: AuthMethod) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        this.auth_method = Some(auth_method);
    }
    pub fn has_private_peer_addr(&self) -> bool {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .has_private_peer_addr()
    }
    pub fn set_error_kind(&self, kind: ErrorKind) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        // Do not record errors from the private address to metrics.
        if !this.has_private_peer_addr() {
            Metrics::get().proxy.errors_total.inc(kind);
        }
        if let Some(ep) = &this.endpoint_id {
            let metric = &Metrics::get().proxy.endpoints_affected_by_errors;
            let label = metric.with_labels(kind);
            metric.get_metric(label).measure(ep);
        }
        this.error_kind = Some(kind);
    }
    pub fn set_success(&self) {
        let mut this = self.0.try_lock().expect("should not deadlock");
        this.success = true;
    }
    pub fn log_connect(&self) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .log_connect();
    }
    pub fn protocol(&self) -> Protocol {
        self.0.try_lock().expect("should not deadlock").protocol
    }
    pub fn span(&self) -> Span {
        self.0.try_lock().expect("should not deadlock").span.clone()
    }
    pub fn session_id(&self) -> Uuid {
        self.0.try_lock().expect("should not deadlock").session_id
    }
    pub fn peer_addr(&self) -> IpAddr {
        self.0.try_lock().expect("should not deadlock").peer_addr
    }
    pub fn cold_start_info(&self) -> ColdStartInfo {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .cold_start_info
    }
    pub fn latency_timer_pause(&self, waiting_for: Waiting) -> LatencyTimerPause {
        LatencyTimerPause {
            ctx: self,
            start: tokio::time::Instant::now(),
            waiting_for,
        }
    }
    pub fn success(&self) {
        self.0
            .try_lock()
            .expect("should not deadlock")
            .latency_timer
            .success()
    }
 }
 pub struct LatencyTimerPause<'a> {
    ctx: &'a RequestMonitoring,
    start: tokio::time::Instant,
    waiting_for: Waiting,
 }
 impl Drop for LatencyTimerPause<'_> {
    fn drop(&mut self) {
        self.ctx
            .0
            .try_lock()
            .expect("should not deadlock")
            .latency_timer
            .unpause(self.start, self.waiting_for);
    }
 }
 impl RequestMonitoringInner {
    fn set_cold_start_info(&mut self, info: ColdStartInfo) {
        self.cold_start_info = info;
        self.latency_timer.cold_start_info(info);
    }
-    fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {
+    pub fn set_db_options(&mut self, options: StartupMessageParams) {
        self.set_application(options.get("application_name").map(SmolStr::from));
        if let Some(user) = options.get("user") {
            self.set_user(user.into());
        }
        if let Some(dbname) = options.get("database") {
            self.set_dbname(dbname.into());
        }
        self.pg_options = Some(options);
    }
    pub fn set_project(&mut self, x: MetricsAuxInfo) {
        if self.endpoint_id.is_none() {
            self.set_endpoint_id(x.endpoint_id.as_str().into())
        }
        self.branch = Some(x.branch_id);
        self.project = Some(x.project_id);
        self.set_cold_start_info(x.cold_start_info);
    }
    pub fn set_project_id(&mut self, project_id: ProjectIdInt) {
        self.project = Some(project_id);
    }
    pub fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {
        if self.endpoint_id.is_none() {
            self.span.record("ep", display(&endpoint_id));
            let metric = &Metrics::get().proxy.connecting_endpoints;
@@ -316,23 +176,44 @@ impl RequestMonitoringInner {
        }
    }
-    fn set_dbname(&mut self, dbname: DbName) {
+    pub fn set_dbname(&mut self, dbname: DbName) {
        self.dbname = Some(dbname);
    }
-    fn set_user(&mut self, user: RoleName) {
+    pub fn set_user(&mut self, user: RoleName) {
        self.span.record("role", display(&user));
        self.user = Some(user);
    }
-    fn has_private_peer_addr(&self) -> bool {
+    pub fn set_auth_method(&mut self, auth_method: AuthMethod) {
        self.auth_method = Some(auth_method);
    }
    pub fn has_private_peer_addr(&self) -> bool {
        match self.peer_addr {
            IpAddr::V4(ip) => ip.is_private(),
            _ => false,
        }
    }
-    fn log_connect(&mut self) {
+    pub fn set_error_kind(&mut self, kind: ErrorKind) {
        // Do not record errors from the private address to metrics.
        if !self.has_private_peer_addr() {
            Metrics::get().proxy.errors_total.inc(kind);
        }
        if let Some(ep) = &self.endpoint_id {
            let metric = &Metrics::get().proxy.endpoints_affected_by_errors;
            let label = metric.with_labels(kind);
            metric.get_metric(label).measure(ep);
        }
        self.error_kind = Some(kind);
    }
    pub fn set_success(&mut self) {
        self.success = true;
    }
    pub fn log_connect(&mut self) {
        let outcome = if self.success {
            ConnectOutcome::Success
        } else {
@@ -375,7 +256,7 @@ impl RequestMonitoringInner {
    }
 }
-impl Drop for RequestMonitoringInner {
+impl Drop for RequestMonitoring {
    fn drop(&mut self) {
        if self.sender.is_some() {
            self.log_connect();
--- a/proxy/core/src/context/parquet.rs
+++ b/proxy/core/src/context/parquet.rs
@@ -23,7 +23,7 @@ use utils::backoff;
 use crate::{config::remote_storage_from_toml, context::LOG_CHAN_DISCONNECT};
-use super::{RequestMonitoringInner, LOG_CHAN};
+use super::{RequestMonitoring, LOG_CHAN};
 #[derive(clap::Args, Clone, Debug)]
 pub struct ParquetUploadArgs {
@@ -118,8 +118,8 @@ impl<'a> serde::Serialize for Options<'a> {
    }
 }
-impl From<&RequestMonitoringInner> for RequestData {
+impl From<&RequestMonitoring> for RequestData {
-    fn from(value: &RequestMonitoringInner) -> Self {
+    fn from(value: &RequestMonitoring) -> Self {
        Self {
            session_id: value.session_id,
            peer_addr: value.peer_addr.to_string(),
--- a/proxy/core/src/error.rs
+++ b/proxy/core/src/error.rs
--- a/proxy/core/src/http.rs
+++ b/proxy/core/src/http.rs
@@ -6,12 +6,6 @@ pub mod health_server;
 use std::time::Duration;
 use anyhow::bail;
 use bytes::Bytes;
 use http_body_util::BodyExt;
 use hyper1::body::Body;
 use serde::de::DeserializeOwned;
 pub use reqwest::{Request, Response, StatusCode};
 pub use reqwest_middleware::{ClientWithMiddleware, Error};
 pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
@@ -102,33 +96,6 @@ impl Endpoint {
    }
 }
 pub async fn parse_json_body_with_limit<D: DeserializeOwned>(
    mut b: impl Body<Data = Bytes, Error = reqwest::Error> + Unpin,
    limit: usize,
 ) -> anyhow::Result<D> {
    // We could use `b.limited().collect().await.to_bytes()` here
    // but this ends up being slightly more efficient as far as I can tell.
    // check the lower bound of the size hint.
    // in reqwest, this value is influenced by the Content-Length header.
    let lower_bound = match usize::try_from(b.size_hint().lower()) {
        Ok(bound) if bound <= limit => bound,
        _ => bail!("content length exceeds limit"),
    };
    let mut bytes = Vec::with_capacity(lower_bound);
    while let Some(frame) = b.frame().await.transpose()? {
        if let Ok(data) = frame.into_data() {
            if bytes.len() + data.len() > limit {
                bail!("content length exceeds limit")
            }
            bytes.extend_from_slice(&data);
        }
    }
    Ok(serde_json::from_slice::<D>(&bytes)?)
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/proxy/core/src/http/health_server.rs
+++ b/proxy/core/src/http/health_server.rs
--- a/Show More
+++ b/Show More