diff --git a/.dockerignore b/.dockerignore
index 9e2d2e7108..7ead48db7c 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -24,3 +24,4 @@
 !storage_controller/
 !vendor/postgres-*/
 !workspace_hack/
+!build_tools/patches
diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml
index 9a0261d430..0eddfe5da6 100644
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -121,6 +121,8 @@ runs:
         export DEFAULT_PG_VERSION=${PG_VERSION#v}
         export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
         export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
+        export ASAN_OPTIONS=detect_leaks=0:detect_stack_use_after_return=0:abort_on_error=1:strict_string_checks=1:check_initialization_order=1:strict_init_order=1
+        export UBSAN_OPTIONS=abort_on_error=1:print_stacktrace=1
 
         if [ "${BUILD_TYPE}" = "remote" ]; then
           export REMOTE_ENV=1
diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml
index 1dec8106b4..86a791497c 100644
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -23,6 +23,11 @@ on:
         description: 'a json object of postgres versions and lfc states to run regression tests on'
         required: true
         type: string
+      sanitizers:
+        description: 'enabled or disabled'
+        required: false
+        default: 'disabled'
+        type: string
 
 defaults:
   run:
@@ -87,6 +92,7 @@ jobs:
       - name: Set env variables
         env:
           ARCH: ${{ inputs.arch }}
+          SANITIZERS: ${{ inputs.sanitizers }}
         run: |
           CARGO_FEATURES="--features testing"
           if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
@@ -99,8 +105,14 @@ jobs:
             cov_prefix=""
             CARGO_FLAGS="--locked --release"
           fi
+          if [[ $SANITIZERS == 'enabled' ]]; then
+            make_vars="WITH_SANITIZERS=yes"
+          else
+            make_vars=""
+          fi
           {
             echo "cov_prefix=${cov_prefix}"
+            echo "make_vars=${make_vars}"
             echo "CARGO_FEATURES=${CARGO_FEATURES}"
             echo "CARGO_FLAGS=${CARGO_FLAGS}"
             echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
@@ -136,35 +148,39 @@ jobs:
 
       - name: Build postgres v14
         if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v14 -j$(nproc)
+        run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
 
       - name: Build postgres v15
         if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v15 -j$(nproc)
+        run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
 
       - name: Build postgres v16
         if: steps.cache_pg_16.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v16 -j$(nproc)
+        run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
 
       - name: Build postgres v17
         if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v17 -j$(nproc)
+        run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
 
       - name: Build neon extensions
-        run: mold -run make neon-pg-ext -j$(nproc)
+        run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
 
       - name: Build walproposer-lib
-        run: mold -run make walproposer-lib -j$(nproc)
+        run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
 
       - name: Run cargo build
+        env:
+          WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
         run: |
-          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
+          export ASAN_OPTIONS=detect_leaks=0
+          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
 
       # Do install *before* running rust tests because they might recompile the
       # binaries with different features/flags.
       - name: Install rust binaries
         env:
           ARCH: ${{ inputs.arch }}
+          SANITIZERS: ${{ inputs.sanitizers }}
         run: |
           # Install target binaries
           mkdir -p /tmp/neon/bin/
@@ -179,7 +195,7 @@ jobs:
           done
 
           # Install test executables and write list of all binaries (for code coverage)
-          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
+          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' && $SANITIZERS != 'enabled' ]]; then
             # Keep bloated coverage data files away from the rest of the artifact
             mkdir -p /tmp/coverage/
 
@@ -212,6 +228,7 @@ jobs:
           role-duration-seconds: 18000 # 5 hours
 
       - name: Run rust tests
+        if: ${{ inputs.sanitizers != 'enabled' }}
         env:
           NEXTEST_RETRIES: 3
         run: |
@@ -273,6 +290,7 @@ jobs:
           DATABASE_URL: postgresql://localhost:1235/storage_controller
           POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
         run: |
+          export ASAN_OPTIONS=detect_leaks=0
           /tmp/neon/bin/neon_local init
           /tmp/neon/bin/neon_local storage_controller start
 
@@ -319,7 +337,7 @@ jobs:
       - name: Pytest regression tests
         continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
         uses: ./.github/actions/run-python-test-set
-        timeout-minutes: 60
+        timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 60 || 180 }}
         with:
           build_type: ${{ inputs.build-type }}
           test_selection: regress
@@ -337,6 +355,7 @@ jobs:
           PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
           PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
           USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
+          SANITIZERS: ${{ inputs.sanitizers }}
 
       # Temporary disable this step until we figure out why it's so flaky
       # Ref https://github.com/neondatabase/neon/issues/4540
diff --git a/.github/workflows/_push-to-acr.yml b/.github/workflows/_push-to-acr.yml
deleted file mode 100644
index c304172ff7..0000000000
--- a/.github/workflows/_push-to-acr.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: Push images to ACR
-on:
-  workflow_call:
-    inputs:
-      client_id:
-        description: Client ID of Azure managed identity or Entra app
-        required: true
-        type: string
-      image_tag:
-        description: Tag for the container image
-        required: true
-        type: string
-      images:
-        description: Images to push
-        required: true
-        type: string
-      registry_name:
-        description: Name of the container registry
-        required: true
-        type: string
-      subscription_id:
-        description: Azure subscription ID
-        required: true
-        type: string
-      tenant_id:
-        description: Azure tenant ID
-        required: true
-        type: string
-
-jobs:
-  push-to-acr:
-    runs-on: ubuntu-22.04
-    permissions:
-      contents: read  # This is required for actions/checkout
-      id-token: write # This is required for Azure Login to work.
-
-    steps:
-      - name: Azure login
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
-        with:
-          client-id: ${{ inputs.client_id }}
-          subscription-id: ${{ inputs.subscription_id }}
-          tenant-id: ${{ inputs.tenant_id }}
-
-      - name: Login to ACR
-        run: |
-          az acr login --name=${{ inputs.registry_name }}
-
-      - name: Copy docker images to ACR ${{ inputs.registry_name }}
-        run: |
-          images='${{ inputs.images }}'
-          for image in ${images}; do
-            docker buildx imagetools create \
-              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
-                                                        neondatabase/${image}:${{ inputs.image_tag }}
-          done
diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml
new file mode 100644
index 0000000000..3c97c8a67a
--- /dev/null
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -0,0 +1,101 @@
+name: Push images to Container Registry
+on:
+  workflow_call:
+    inputs:
+      # Example: {"docker.io/neondatabase/neon:13196061314":["369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
+      image-map:
+        description: JSON map of images, mapping from a source image to an array of target images that should be pushed.
+        required: true
+        type: string
+      aws-region:
+        description: AWS region to log in to. Required when pushing to ECR.
+        required: false
+        type: string
+      aws-account-ids:
+        description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR.
+        required: false
+        type: string
+      azure-client-id:
+        description: Client ID of Azure managed identity or Entra app. Required when pushing to ACR.
+        required: false
+        type: string
+      azure-subscription-id:
+        description: Azure subscription ID. Required when pushing to ACR.
+        required: false
+        type: string
+      azure-tenant-id:
+        description: Azure tenant ID. Required when pushing to ACR.
+        required: false
+        type: string
+      acr-registry-name:
+        description: ACR registry name. Required when pushing to ACR.
+        required: false
+        type: string
+    secrets:
+      docker-hub-username:
+        description: Docker Hub username. Required when pushing to Docker Hub.
+        required: false
+      docker-hub-password:
+        description: Docker Hub password. Required when pushing to Docker Hub.
+        required: false
+      aws-role-to-assume:
+        description: AWS role to assume. Required when pushing to ECR.
+        required: false
+
+permissions: {}
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+jobs:
+  push-to-container-registry:
+    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write  # Required for aws/azure login
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          sparse-checkout: scripts/push_with_image_map.py
+          sparse-checkout-cone-mode: false
+
+      - name: Print image-map
+        run: echo '${{ inputs.image-map }}' | jq
+
+      - name: Configure AWS credentials
+        if: contains(inputs.image-map, 'amazonaws.com/')
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: "${{ inputs.aws-region }}"
+          role-to-assume: "${{ secrets.aws-role-to-assume }}"
+          role-duration-seconds: 3600
+
+      - name: Login to ECR
+        if: contains(inputs.image-map, 'amazonaws.com/')
+        uses: aws-actions/amazon-ecr-login@v2
+        with:
+          registries: "${{ inputs.aws-account-ids }}"
+
+      - name: Configure Azure credentials
+        if: contains(inputs.image-map, 'azurecr.io/')
+        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
+        with:
+          client-id: ${{ inputs.azure-client-id }}
+          subscription-id: ${{ inputs.azure-subscription-id }}
+          tenant-id: ${{ inputs.azure-tenant-id }}
+
+      - name: Login to ACR
+        if: contains(inputs.image-map, 'azurecr.io/')
+        run: |
+          az acr login --name=${{ inputs.acr-registry-name }}
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.docker-hub-username }}
+          password: ${{ secrets.docker-hub-password }}
+
+      - name: Copy docker images to target registries
+        run: python scripts/push_with_image_map.py
+        env:
+          IMAGE_MAP: ${{ inputs.image-map }}
diff --git a/.github/workflows/approved-for-ci-run.yml b/.github/workflows/approved-for-ci-run.yml
index fc2f36c74b..f4e1e2e96c 100644
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -67,9 +67,9 @@ jobs:
 
       - uses: actions/checkout@v4
         with:
-          ref: main
+          ref: ${{ github.event.pull_request.head.sha }}
           token: ${{ secrets.CI_ACCESS_TOKEN }}
-      
+
       - name: Look for existing PR
         id: get-pr
         env:
@@ -77,7 +77,7 @@ jobs:
         run: |
           ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
           echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
-      
+
       - name: Get changed labels
         id: get-labels
         if: steps.get-pr.outputs.ALREADY_CREATED != ''
@@ -94,10 +94,6 @@ jobs:
           echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
           echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
 
-      - uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.pull_request.head.sha }}
-
       - run: git checkout -b "${BRANCH}"
 
       - run: git push --force origin "${BRANCH}"
@@ -105,7 +101,7 @@ jobs:
 
       - name: Create a Pull Request for CI run (if required)
         if: steps.get-pr.outputs.ALREADY_CREATED == ''
-        env: 
+        env:
           GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
         run: |
           cat << EOF > body.md
@@ -142,7 +138,7 @@ jobs:
 
       - run: git push --force origin "${BRANCH}"
         if: steps.get-pr.outputs.ALREADY_CREATED != ''
-             
+
   cleanup:
     # Close PRs and delete branchs if the original PR is closed.
 
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5a4bdecb99..88cb395958 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -497,7 +497,7 @@ jobs:
 
   trigger-e2e-tests:
     if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images-dev, tag ]
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
     uses: ./.github/workflows/trigger-e2e-tests.yml
     secrets: inherit
 
@@ -571,21 +571,6 @@ jobs:
                                              neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
                                              neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
 
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Push multi-arch image to ECR
-        run: |
-          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{ needs.tag.outputs.build-tag }} \
-                                                                                neondatabase/neon:${{ needs.tag.outputs.build-tag }}
-
   compute-node-image-arch:
     needs: [ check-permissions, build-build-tools-image, tag ]
     permissions:
@@ -632,16 +617,6 @@ jobs:
           username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
           password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
       - uses: docker/login-action@v3
         with:
           registry: cache.neon.build
@@ -729,21 +704,6 @@ jobs:
                                              neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
                                              neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
 
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR
-        run: |
-          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                                                                neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
-
   vm-compute-node-image:
     needs: [ check-permissions, tag, compute-node-image ]
     runs-on: [ self-hosted, large ]
@@ -876,133 +836,109 @@ jobs:
           docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
           docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
 
-  promote-images-dev:
-    needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
+  generate-image-maps:
+    needs: [ tag ]
     runs-on: ubuntu-22.04
-
-    permissions:
-      id-token: write # aws-actions/configure-aws-credentials
-      statuses: write
-      contents: read
-
-    env:
-      VERSIONS: v14 v15 v16 v17
-
+    outputs:
+      neon-dev: ${{ steps.generate.outputs.neon-dev }}
+      neon-prod: ${{ steps.generate.outputs.neon-prod }}
+      compute-dev: ${{ steps.generate.outputs.compute-dev }}
+      compute-prod: ${{ steps.generate.outputs.compute-prod }}
     steps:
-      - uses: docker/login-action@v3
+      - uses: actions/checkout@v4
         with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+          sparse-checkout: scripts/generate_image_maps.py
+          sparse-checkout-cone-mode: false
 
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
+      - name: Generate Image Maps
+        id: generate
+        run: python scripts/generate_image_maps.py
+        env:
+          BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
+          BRANCH: "${{ github.ref_name }}"
+          DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
+          PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
 
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Copy vm-compute-node images to ECR
-        run: |
-          for version in ${VERSIONS}; do
-            docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }} \
-                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-          done
-
-  promote-images-prod:
-    needs: [ check-permissions, tag, test-images, promote-images-dev ]
-    runs-on: ubuntu-22.04
-    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-
-    permissions:
-      id-token: write # aws-actions/configure-aws-credentials
-      statuses: write
-      contents: read
-
-    env:
-      VERSIONS: v14 v15 v16 v17
-
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Add latest tag to images
-        if: github.ref_name == 'main'
-        run: |
-          for repo in neondatabase 369495373322.dkr.ecr.eu-central-1.amazonaws.com; do
-            docker buildx imagetools create -t $repo/neon:latest \
-                                               $repo/neon:${{ needs.tag.outputs.build-tag }}
-
-            for version in ${VERSIONS}; do
-              docker buildx imagetools create -t $repo/compute-node-${version}:latest \
-                                                 $repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-
-              docker buildx imagetools create -t $repo/vm-compute-node-${version}:latest \
-                                                 $repo/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-            done
-          done
-          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
-                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
-
-      - name: Configure AWS-prod credentials
-        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          mask-aws-account-id: true
-          role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
-
-      - name: Login to prod ECR
-        uses: docker/login-action@v3
-        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        with:
-          registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
-
-      - name: Copy all images to prod ECR
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        run: |
-          for image in neon {vm-,}compute-node-{v14,v15,v16,v17}; do
-            docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
-                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
-          done
-
-  push-to-acr-dev:
-    if: github.ref_name == 'main'
-    needs: [ tag, promote-images-dev ]
-    uses: ./.github/workflows/_push-to-acr.yml
+  push-neon-image-dev:
+    needs: [ generate-image-maps, neon-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
     with:
-      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
-      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
+      image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
+      aws-region: eu-central-1
+      aws-account-ids: "369495373322"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
-  push-to-acr-prod:
+  push-compute-image-dev:
+    needs: [ generate-image-maps, vm-compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
+      aws-region: eu-central-1
+      aws-account-ids: "369495373322"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  push-neon-image-prod:
     if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images-prod ]
-    uses: ./.github/workflows/_push-to-acr.yml
+    needs: [ generate-image-maps, neon-image, test-images ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
     with:
-      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
-      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
+      image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
+      aws-region: eu-central-1
+      aws-account-ids: "093970136003"
+      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  push-compute-image-prod:
+    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+    needs: [ generate-image-maps, vm-compute-node-image, test-images ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
+      aws-region: eu-central-1
+      aws-account-ids: "093970136003"
+      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  # This is a bit of a special case so we're not using a generated image map.
+  add-latest-tag-to-neon-extensions-test-image:
+    if: github.ref_name == 'main'
+    needs: [ tag, compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+        }
+    secrets:
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
   trigger-custom-extensions-build-and-wait:
     needs: [ check-permissions, tag ]
@@ -1084,7 +1020,7 @@ jobs:
           exit 1
 
   deploy:
-    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
     # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
     if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
     permissions:
@@ -1337,7 +1273,7 @@ jobs:
           done
 
   pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
+    needs: [ build-build-tools-image, push-compute-image-prod, push-neon-image-prod, build-and-test-locally ]
     if: github.ref_name == 'main'
     uses: ./.github/workflows/pin-build-tools-image.yml
     with:
@@ -1362,7 +1298,8 @@ jobs:
       - check-codestyle-rust
       - check-dependencies-rust
       - files-changed
-      - promote-images-dev
+      - push-compute-image-dev
+      - push-neon-image-dev
       - test-images
       - trigger-custom-extensions-build-and-wait
     runs-on: ubuntu-22.04
@@ -1379,6 +1316,7 @@ jobs:
           || needs.check-codestyle-python.result == 'skipped'
           || needs.check-codestyle-rust.result == 'skipped'
           || needs.files-changed.result == 'skipped'
-          || needs.promote-images-dev.result == 'skipped'
+          || needs.push-compute-image-dev.result == 'skipped'
+          || needs.push-neon-image-dev.result == 'skipped'
           || needs.test-images.result == 'skipped'
           || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
diff --git a/.github/workflows/build_and_test_with_sanitizers.yml b/.github/workflows/build_and_test_with_sanitizers.yml
new file mode 100644
index 0000000000..2bc938509f
--- /dev/null
+++ b/.github/workflows/build_and_test_with_sanitizers.yml
@@ -0,0 +1,134 @@
+name: Build and Test with Sanitizers
+
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:   '0 1 * * *' # run once a day, timezone is utc
+  workflow_dispatch:
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow per any non-`main` branch.
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
+  cancel-in-progress: true
+
+env:
+  RUST_BACKTRACE: 1
+  COPT: '-Werror'
+
+jobs:
+  tag:
+    runs-on: [ self-hosted, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    outputs:
+      build-tag: ${{steps.build-tag.outputs.tag}}
+
+    steps:
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get build tag
+        run: |
+          echo run:$GITHUB_RUN_ID
+          echo ref:$GITHUB_REF_NAME
+          echo rev:$(git rev-list --count HEAD)
+          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
+            echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
+          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+            echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
+          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
+            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
+          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
+            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
+          else
+            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
+            echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
+          fi
+        shell: bash
+        id: build-tag
+
+  build-build-tools-image:
+    uses: ./.github/workflows/build-build-tools-image.yml
+    secrets: inherit
+
+  build-and-test-locally:
+    needs: [ tag, build-build-tools-image ]
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ x64, arm64 ]
+        build-type: [ release ]
+    uses: ./.github/workflows/_build-and-test-locally.yml
+    with:
+      arch: ${{ matrix.arch }}
+      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
+      build-tag: ${{ needs.tag.outputs.build-tag }}
+      build-type: ${{ matrix.build-type }}
+      test-cfg: '[{"pg_version":"v17"}]'
+      sanitizers: enabled
+    secrets: inherit
+
+
+  create-test-report:
+    needs: [ build-and-test-locally, build-build-tools-image ]
+    if: ${{ !cancelled() }}
+    permissions:
+      id-token: write # aws-actions/configure-aws-credentials
+      statuses: write
+      contents: write
+      pull-requests: write
+    outputs:
+      report-url: ${{ steps.create-allure-report.outputs.report-url }}
+
+    runs-on: [ self-hosted, small ]
+    container:
+      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Create Allure report
+        if: ${{ !cancelled() }}
+        id: create-allure-report
+        uses: ./.github/actions/allure-report-generate
+        with:
+          store-test-results-into-db: true
+          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        env:
+          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
+
+      - uses: actions/github-script@v7
+        if: ${{ !cancelled() }}
+        with:
+          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
+          retries: 5
+          script: |
+            const report = {
+              reportUrl:     "${{ steps.create-allure-report.outputs.report-url }}",
+              reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
+            }
+
+            const coverage = {}
+
+            const script = require("./scripts/comment-test-report.js")
+            await script({
+              github,
+              context,
+              fetch,
+              report,
+              coverage,
+            })
diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml
index 31696248b0..27ed1e4cff 100644
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -15,7 +15,14 @@ env:
   E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
 
 jobs:
+  check-permissions:
+    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
+    uses: ./.github/workflows/check-permissions.yml
+    with:
+      github-event-name: ${{ github.event_name }}
+
   cancel-previous-e2e-tests:
+    needs: [ check-permissions ]
     if: github.event_name == 'pull_request'
     runs-on: ubuntu-22.04
 
@@ -29,6 +36,7 @@ jobs:
               --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
 
   tag:
+    needs: [ check-permissions ]
     runs-on: ubuntu-22.04
     outputs:
       build-tag: ${{ steps.build-tag.outputs.tag }}
@@ -68,7 +76,7 @@ jobs:
       GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
       TAG: ${{ needs.tag.outputs.build-tag }}
     steps:
-      - name: Wait for `promote-images-dev` job to finish
+      - name: Wait for `push-{neon,compute}-image-dev` job to finish
         # It's important to have a timeout here, the script in the step can run infinitely
         timeout-minutes: 60
         run: |
@@ -79,20 +87,20 @@ jobs:
           # For PRs we use the run id as the tag
           BUILD_AND_TEST_RUN_ID=${TAG}
           while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
-            case "$conclusion" in
-              success)
-                break
-                ;;
-              failure | cancelled | skipped)
-                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
-                exit 1
-                ;;
-              *)
-                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
-                sleep 60
-                ;;
-            esac
+            gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '[.jobs[] | select((.name | startswith("push-neon-image-dev")) or (.name | startswith("push-compute-image-dev"))) | {"name": .name, "conclusion": .conclusion, "url": .url}]' > jobs.json
+            if [ $(jq '[.[] | select(.conclusion == "success")]' jobs.json) -eq 2 ]; then
+              break
+            fi
+            jq -c '.[]' jobs.json | while read -r job; do
+              case $(echo $job | jq .conclusion) in
+                failure | cancelled | skipped)
+                  echo "The '$(echo $job | jq .name)' job didn't succeed: '$(echo $job | jq .conclusion)'. See log in '$(echo $job | jq .url)' Exiting..."
+                  exit 1
+                  ;;
+              esac
+            done
+            echo "The 'push-{neon,compute}-image-dev' jobs haven't succeeded yet. Waiting..."
+            sleep 60
           done
 
       - name: Set e2e-platforms
diff --git a/Cargo.lock b/Cargo.lock
index de1b1218ca..407c8170bb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -300,9 +300,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 
 [[package]]
 name = "aws-config"
-version = "1.5.15"
+version = "1.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc47e70fc35d054c8fcd296d47a61711f043ac80534a10b4f741904f81e73a90"
+checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -311,7 +311,7 @@ dependencies = [
  "aws-sdk-sts",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.60.7",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -342,9 +342,9 @@ dependencies = [
 
 [[package]]
 name = "aws-runtime"
-version = "1.5.4"
+version = "1.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bee7643696e7fdd74c10f9eb42848a87fe469d35eae9c3323f80aa98f350baac"
+checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -376,7 +376,7 @@ dependencies = [
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-query",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -399,7 +399,7 @@ dependencies = [
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -424,7 +424,7 @@ dependencies = [
  "aws-smithy-checksums",
  "aws-smithy-eventstream",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -447,15 +447,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.57.0"
+version = "1.50.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c54bab121fe1881a74c338c5f723d1592bf3b53167f80268a1274f404e1acc38"
+checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -469,15 +469,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.58.0"
+version = "1.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c8234fd024f7ac61c4e44ea008029bde934250f371efe7d4a39708397b1080c"
+checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -491,15 +491,15 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.58.0"
+version = "1.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba60e1d519d6f23a9df712c04fdeadd7872ac911c84b2f62a8bda92e129b7962"
+checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
- "aws-smithy-json",
+ "aws-smithy-json 0.61.1",
  "aws-smithy-query",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -514,9 +514,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.2.7"
+version = "1.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "690118821e46967b3c4501d67d7d52dd75106a9c54cf36cefa1985cedbe94e05"
+checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-eventstream",
@@ -543,9 +543,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.4"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e"
+checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -575,9 +575,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.60.6"
+version = "0.60.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b18559a41e0c909b77625adf2b8c50de480a8041e5e4a3f5f7d177db70abc5a"
+checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -586,9 +586,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.60.12"
+version = "0.60.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
+checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6"
 dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-runtime-api",
@@ -607,9 +607,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.61.2"
+version = "0.60.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422"
+checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6"
+dependencies = [
+ "aws-smithy-types",
+]
+
+[[package]]
+name = "aws-smithy-json"
+version = "0.61.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095"
 dependencies = [
  "aws-smithy-types",
 ]
@@ -626,9 +635,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.7.7"
+version = "1.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "865f7050bbc7107a6c98a397a9fcd9413690c27fa718446967cf03b2d3ac517e"
+checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -670,9 +679,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.12"
+version = "1.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a28f6feb647fb5e0d5b50f0472c19a7db9462b74e2fec01bb0b44eedcc834e97"
+checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -705,9 +714,9 @@ dependencies = [
 
 [[package]]
 name = "aws-types"
-version = "1.3.4"
+version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0df5a18c4f951c645300d365fec53a61418bcf4650f604f85fe2a665bfaa0c2"
+checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -777,7 +786,7 @@ dependencies = [
 [[package]]
 name = "azure_core"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -806,7 +815,7 @@ dependencies = [
 [[package]]
 name = "azure_identity"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
 dependencies = [
  "async-lock",
  "async-trait",
@@ -825,7 +834,7 @@ dependencies = [
 [[package]]
 name = "azure_storage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
 dependencies = [
  "RustyXML",
  "async-lock",
@@ -843,7 +852,7 @@ dependencies = [
 [[package]]
 name = "azure_storage_blobs"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
 dependencies = [
  "RustyXML",
  "azure_core",
@@ -863,7 +872,7 @@ dependencies = [
 [[package]]
 name = "azure_svc_blobstorage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
 dependencies = [
  "azure_core",
  "bytes",
@@ -1424,6 +1433,7 @@ dependencies = [
  "comfy-table",
  "compute_api",
  "futures",
+ "http-utils",
  "humantime",
  "humantime-serde",
  "hyper 0.14.30",
@@ -2748,6 +2758,38 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "http-utils"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "backtrace",
+ "bytes",
+ "fail",
+ "flate2",
+ "hyper 0.14.30",
+ "inferno 0.12.0",
+ "itertools 0.10.5",
+ "jemalloc_pprof",
+ "metrics",
+ "once_cell",
+ "pprof",
+ "regex",
+ "routerify",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+ "url",
+ "utils",
+ "uuid",
+ "workspace_hack",
+]
+
 [[package]]
 name = "httparse"
 version = "1.8.0"
@@ -4102,6 +4144,7 @@ dependencies = [
  "futures",
  "hex",
  "hex-literal",
+ "http-utils",
  "humantime",
  "humantime-serde",
  "hyper 0.14.30",
@@ -4202,6 +4245,7 @@ dependencies = [
  "anyhow",
  "bytes",
  "futures",
+ "http-utils",
  "pageserver_api",
  "postgres",
  "reqwest",
@@ -4908,6 +4952,7 @@ dependencies = [
  "hostname",
  "http 1.1.0",
  "http-body-util",
+ "http-utils",
  "humantime",
  "humantime-serde",
  "hyper 0.14.30",
@@ -5755,6 +5800,7 @@ dependencies = [
  "futures",
  "hex",
  "http 1.1.0",
+ "http-utils",
  "humantime",
  "hyper 0.14.30",
  "itertools 0.10.5",
@@ -5819,6 +5865,7 @@ dependencies = [
 name = "safekeeper_client"
 version = "0.1.0"
 dependencies = [
+ "http-utils",
  "reqwest",
  "safekeeper_api",
  "serde",
@@ -6401,6 +6448,7 @@ dependencies = [
  "fail",
  "futures",
  "hex",
+ "http-utils",
  "humantime",
  "hyper 0.14.30",
  "itertools 0.10.5",
@@ -7565,48 +7613,38 @@ dependencies = [
  "criterion",
  "diatomic-waker",
  "fail",
- "flate2",
  "futures",
  "git-version",
  "hex",
  "hex-literal",
  "humantime",
- "hyper 0.14.30",
  "inferno 0.12.0",
- "itertools 0.10.5",
- "jemalloc_pprof",
  "jsonwebtoken",
  "metrics",
  "nix 0.27.1",
  "once_cell",
  "pin-project-lite",
  "postgres_connection",
- "pprof",
  "pq_proto",
  "rand 0.8.5",
  "regex",
- "routerify",
  "scopeguard",
  "sentry",
  "serde",
  "serde_assert",
  "serde_json",
- "serde_path_to_error",
  "serde_with",
  "signal-hook",
  "strum",
  "strum_macros",
  "thiserror 1.0.69",
  "tokio",
- "tokio-stream",
  "tokio-tar",
  "tokio-util",
  "toml_edit",
  "tracing",
  "tracing-error",
  "tracing-subscriber",
- "url",
- "uuid",
  "walkdir",
 ]
 
@@ -8201,6 +8239,7 @@ dependencies = [
  "tracing-core",
  "tracing-log",
  "url",
+ "uuid",
  "zerocopy",
  "zeroize",
  "zstd",
diff --git a/Cargo.toml b/Cargo.toml
index 76b54ae1d8..7228623c6b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ members = [
     "storage_scrubber",
     "workspace_hack",
     "libs/compute_api",
+    "libs/http-utils",
     "libs/pageserver_api",
     "libs/postgres_ffi",
     "libs/safekeeper_api",
@@ -229,6 +230,7 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
+http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
diff --git a/Dockerfile b/Dockerfile
index 7ba54c8ca5..b399bcf7e4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,6 +10,28 @@ ARG STABLE_PG_VERSION=16
 ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
 
+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}
+
 # Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
 WORKDIR /home/nonroot
@@ -59,7 +81,7 @@ RUN set -e \
 
 # Build final image
 #
-FROM debian:${DEBIAN_FLAVOR}
+FROM $BASE_IMAGE_SHA
 ARG DEFAULT_PG_VERSION
 WORKDIR /data
 
@@ -112,4 +134,3 @@ EXPOSE 6400
 EXPOSE 9898
 
 CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
-
diff --git a/Makefile b/Makefile
index d1238caebf..42ee643bb5 100644
--- a/Makefile
+++ b/Makefile
@@ -10,18 +10,29 @@ ICU_PREFIX_DIR := /usr/local/icu
 # environment variable.
 #
 BUILD_TYPE ?= debug
+WITH_SANITIZERS ?= no
 ifeq ($(BUILD_TYPE),release)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl
 	PG_CFLAGS = -O2 -g3 $(CFLAGS)
+	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS = -O0 -g3 $(CFLAGS)
+	PG_LDFLAGS = $(LDFLAGS)
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
 
+ifeq ($(WITH_SANITIZERS),yes)
+	PG_CFLAGS += -fsanitize=address -fsanitize=undefined -fno-sanitize-recover
+	COPT += -Wno-error # to avoid failing on warnings induced by sanitizers
+	PG_LDFLAGS = -fsanitize=address -fsanitize=undefined -static-libasan -static-libubsan $(LDFLAGS)
+	export CC := gcc
+	export ASAN_OPTIONS := detect_leaks=0
+endif
+
 ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
 	# Exclude static build openssl, icu for local build (MacOS, Linux)
 	# Only keep for build type release and debug
@@ -33,7 +44,9 @@ endif
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	# Seccomp BPF is only available for Linux
-	PG_CONFIGURE_OPTS += --with-libseccomp
+	ifneq ($(WITH_SANITIZERS),yes)
+		PG_CONFIGURE_OPTS += --with-libseccomp
+	endif
 else ifeq ($(UNAME_S),Darwin)
 	PG_CFLAGS += -DUSE_PREFETCH
 	ifndef DISABLE_HOMEBREW
@@ -106,7 +119,7 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
 	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
 	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
-		CFLAGS='$(PG_CFLAGS)' \
+		CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
 		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
 		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
 
diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile
index 3ade57b175..fa72ca1bc2 100644
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -1,6 +1,29 @@
 ARG DEBIAN_VERSION=bookworm
+ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
 
-FROM debian:bookworm-slim AS pgcopydb_builder
+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}
+
+FROM $BASE_IMAGE_SHA AS pgcopydb_builder
 ARG DEBIAN_VERSION
 
 # Use strict mode for bash to catch errors early
@@ -9,9 +32,11 @@ SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 # By default, /bin/sh used in debian images will treat '\n' as eol,
 # but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
+COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
+
 RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
         set -e && \
         apt update && \
@@ -44,6 +69,7 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
         mkdir /tmp/pgcopydb && \
         tar -xzf /tmp/pgcopydb.tar.gz -C /tmp/pgcopydb --strip-components=1 && \
         cd /tmp/pgcopydb && \
+        patch -p1 < /pgcopydbv017.patch && \
         make -s clean && \
         make -s -j12 install && \
         libpq_path=$(find /lib /usr/lib -name "libpq.so.5" | head -n 1) && \
@@ -55,7 +81,7 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
         mkdir -p mkdir -p /pgcopydb/lib && touch /pgcopydb/lib/libpq.so.5; \
     fi
 
-FROM debian:${DEBIAN_VERSION}-slim AS build_tools
+FROM $BASE_IMAGE_SHA AS build_tools
 ARG DEBIAN_VERSION
 
 # Add nonroot user
@@ -72,7 +98,7 @@ COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/p
 COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5
 
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
 # System deps
@@ -135,7 +161,8 @@ RUN curl -fsSL \
     --output sql_exporter.tar.gz \
     && mkdir /tmp/sql_exporter \
     && tar xzvf sql_exporter.tar.gz -C /tmp/sql_exporter --strip-components=1 \
-    && mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter
+    && mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter \
+    && rm sql_exporter.tar.gz
 
 # protobuf-compiler (protoc)
 ENV PROTOC_VERSION=25.1
diff --git a/build_tools/patches/pgcopydbv017.patch b/build_tools/patches/pgcopydbv017.patch
new file mode 100644
index 0000000000..4e68793afc
--- /dev/null
+++ b/build_tools/patches/pgcopydbv017.patch
@@ -0,0 +1,57 @@
+diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c
+index d730b03..69a9be9 100644
+--- a/src/bin/pgcopydb/copydb.c
++++ b/src/bin/pgcopydb/copydb.c
+@@ -44,6 +44,7 @@ GUC dstSettings[] = {
+ 	{ "synchronous_commit", "'off'" },
+ 	{ "statement_timeout", "0" },
+ 	{ "lock_timeout", "0" },
++	{ "idle_in_transaction_session_timeout", "0" },
+ 	{ NULL, NULL },
+ };
+ 
+diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c
+index 94f2f46..e051ba8 100644
+--- a/src/bin/pgcopydb/pgsql.c
++++ b/src/bin/pgcopydb/pgsql.c
+@@ -2319,6 +2319,11 @@ pgsql_execute_log_error(PGSQL *pgsql,
+ 
+ 	LinesBuffer lbuf = { 0 };
+ 
++	if (message != NULL){
++		// make sure message is writable by splitLines
++		message = strdup(message);
++	}
++
+ 	if (!splitLines(&lbuf, message))
+ 	{
+ 		/* errors have already been logged */
+@@ -2332,6 +2337,7 @@ pgsql_execute_log_error(PGSQL *pgsql,
+ 				  PQbackendPID(pgsql->connection),
+ 				  lbuf.lines[lineNumber]);
+ 	}
++        free(message); // free copy of message we created above
+ 
+ 	if (pgsql->logSQL)
+ 	{
+@@ -3174,11 +3180,18 @@ pgcopy_log_error(PGSQL *pgsql, PGresult *res, const char *context)
+ 		/* errors have already been logged */
+ 		return;
+ 	}
+-
+ 	if (res != NULL)
+ 	{
+ 		char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
+-		strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
++		if (sqlstate == NULL)
++		{
++			// PQresultErrorField returned NULL!
++			pgsql->sqlstate[0] = '\0';  // Set to an empty string to avoid segfault
++		}
++		else
++		{
++			strlcpy(pgsql->sqlstate, sqlstate, sizeof(pgsql->sqlstate));
++		}
+ 	}
+ 
+ 	char *endpoint =
diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 43910f2622..6814aadcb9 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -83,7 +83,28 @@ ARG TAG=pinned
 ARG BUILD_TAG
 ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
-ARG ALPINE_CURL_VERSION=8.11.1
+
+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}
 
 # By default, build all PostgreSQL extensions. For quick local testing when you don't
 # care about the extensions, pass EXTENSIONS=none or EXTENSIONS=minimal
@@ -94,7 +115,7 @@ ARG EXTENSIONS=all
 # Layer "build-deps"
 #
 #########################################################################################
-FROM debian:$DEBIAN_FLAVOR AS build-deps
+FROM $BASE_IMAGE_SHA AS build-deps
 ARG DEBIAN_VERSION
 
 # Use strict mode for bash to catch errors early
@@ -103,7 +124,7 @@ SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 # By default, /bin/sh used in debian images will treat '\n' as eol,
 # but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
     echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
 
 RUN case $DEBIAN_VERSION in \
@@ -127,7 +148,7 @@ RUN case $DEBIAN_VERSION in \
     apt install --no-install-recommends --no-install-suggests -y \
     ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
     zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
-    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \
+    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip \
     $VERSION_INSTALLS \
     && apt clean && rm -rf /var/lib/apt/lists/*
 
@@ -139,11 +160,11 @@ RUN case $DEBIAN_VERSION in \
 #########################################################################################
 FROM build-deps AS pg-build
 ARG PG_VERSION
-COPY vendor/postgres-${PG_VERSION} postgres
+COPY vendor/postgres-${PG_VERSION:?} postgres
 RUN cd postgres && \
     export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
     --with-icu --with-libxml --with-libxslt --with-lz4" && \
-    if [ "${PG_VERSION}" != "v14" ]; then \
+    if [ "${PG_VERSION:?}" != "v14" ]; then \
         # zstd is available only from PG15
         export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
     fi && \
@@ -237,7 +258,7 @@ RUN case "${DEBIAN_VERSION}" in \
 
 # Postgis 3.5.0 supports v17
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         export POSTGIS_VERSION=3.5.0 \
         export POSTGIS_CHECKSUM=ca698a22cc2b2b3467ac4e063b43a28413f3004ddd505bdccdd74c56a647f510 \
@@ -312,7 +333,7 @@ FROM build-deps AS pgrouting-src
 ARG DEBIAN_VERSION
 ARG PG_VERSION
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         export PGROUTING_VERSION=3.6.2 \
         export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \
@@ -358,7 +379,7 @@ COPY compute/patches/plv8-3.1.10.patch .
 #
 # Use new version only for v17
 # because since v3.2, plv8 doesn't include plcoffee and plls extensions
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         export PLV8_TAG=v3.2.3 \
     ;; \
@@ -372,7 +393,7 @@ RUN case "${PG_VERSION}" in \
     git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
     tar -czf plv8.tar.gz --exclude .git plv8-src && \
     cd plv8-src && \
-    if [[ "${PG_VERSION}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
+    if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
 
 FROM pg-build AS plv8-build
 ARG PG_VERSION
@@ -392,7 +413,7 @@ RUN \
     find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
     # don't break computes with installed old version of plv8
     cd /usr/local/pgsql/lib/ && \
-    case "${PG_VERSION}" in \
+    case "${PG_VERSION:?}" in \
     "v17") \
         ln -s plv8-3.2.3.so plv8-3.1.8.so && \
         ln -s plv8-3.2.3.so plv8-3.1.5.so && \
@@ -729,7 +750,7 @@ FROM build-deps AS timescaledb-src
 ARG PG_VERSION
 
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
       "v14" | "v15") \
         export TIMESCALEDB_VERSION=2.10.1 \
         export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -767,7 +788,7 @@ ARG PG_VERSION
 
 # version-specific, has separate releases for each version
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
       "v14") \
         export PG_HINT_PLAN_VERSION=14_1_4_1 \
         export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -843,7 +864,7 @@ ARG PG_VERSION
 # https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
 
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         export RDKIT_VERSION=Release_2024_09_1 \
         export RDKIT_CHECKSUM=034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f \
@@ -970,7 +991,7 @@ ARG PG_VERSION
 #
 # last release v0.40.0 - Jul 22, 2024
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         export SEMVER_VERSION=0.40.0 \
         export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \
@@ -1006,7 +1027,7 @@ ARG PG_VERSION
 # This is our extension, support stopped in favor of pgvector
 # TODO: deprecate it
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
       "v14" | "v15") \
         export PG_EMBEDDING_VERSION=0.3.5 \
         export PG_EMBEDDING_CHECKSUM=0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 \
@@ -1039,7 +1060,7 @@ ARG PG_VERSION
 # This is an experimental extension, never got to real production.
 # !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in "v17") \
+RUN case "${PG_VERSION:?}" in "v17") \
     echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
     esac && \
     wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
@@ -1091,7 +1112,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
 FROM pg-build-nonroot-with-cargo AS rust-extensions-build
 ARG PG_VERSION
 
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
         'v17') \
             echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \
     esac && \
@@ -1270,7 +1291,7 @@ FROM build-deps AS pgx_ulid-src
 ARG PG_VERSION
 
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v14" | "v15" | "v16") \
         ;; \
     *) \
@@ -1302,7 +1323,7 @@ FROM build-deps AS pgx_ulid-pgrx12-src
 ARG PG_VERSION
 
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
     "v17") \
         ;; \
     *) \
@@ -1430,8 +1451,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_mooncake-src
 ARG PG_VERSION
 WORKDIR /ext-src
-RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.1/pg_mooncake-0.1.1.tar.gz -O pg_mooncake.tar.gz && \
-    echo "a2d16eff7948dde64f072609ca5d2962d6b4d07cb89d45952add473529c55f55 pg_mooncake.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \
+    echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \
     mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
     echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
     chmod a+x neon-test.sh
@@ -1578,7 +1599,15 @@ ENV BUILD_TAG=$BUILD_TAG
 USER nonroot
 # Copy entire project to get Cargo.* files with proper dependencies for the whole project
 COPY --chown=nonroot . .
-RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy
+RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
+    --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
+    --mount=type=cache,uid=1000,target=/home/nonroot/target \
+    mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
+    mkdir target-bin && \
+    cp target/release-line-debug-size-lto/compute_ctl \
+       target/release-line-debug-size-lto/fast_import \
+       target/release-line-debug-size-lto/local_proxy \
+       target-bin
 
 #########################################################################################
 #
@@ -1586,7 +1615,7 @@ RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin c
 #
 #########################################################################################
 
-FROM debian:$DEBIAN_FLAVOR AS pgbouncer
+FROM $BASE_IMAGE_SHA AS pgbouncer
 RUN set -e \
     && echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \
     && apt update \
@@ -1607,7 +1636,7 @@ RUN set -e \
     && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
     && cd pgbouncer \
     && ./autogen.sh \
-    && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
+    && ./configure --prefix=/usr/local/pgbouncer --without-openssl \
     && make -j $(nproc) dist_man_MANS= \
     && make install dist_man_MANS=
 
@@ -1616,13 +1645,12 @@ RUN set -e \
 # Layer "exporters"
 #
 #########################################################################################
-FROM alpine/curl:${ALPINE_CURL_VERSION} AS exporters
+FROM build-deps AS exporters
 ARG TARGETARCH
 # Keep sql_exporter version same as in build-tools.Dockerfile and
 # test_runner/regress/test_compute_metrics.py
 # See comment on the top of the file regading `echo`, `-e` and `\n`
-RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc; \
-    if [ "$TARGETARCH" = "amd64" ]; then\
+RUN if [ "$TARGETARCH" = "amd64" ]; then\
         postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
         pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
         sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
@@ -1641,6 +1669,29 @@ RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 30
     && echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
     && echo "${sql_exporter_sha256} sql_exporter" | sha256sum -c -
 
+#########################################################################################
+#
+# Layer "awscli"
+#
+#########################################################################################
+FROM build-deps AS awscli
+ARG TARGETARCH
+RUN set -ex; \
+    if [ "${TARGETARCH}" = "amd64" ]; then \
+        TARGETARCH_ALT="x86_64"; \
+        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
+    elif [ "${TARGETARCH}" = "arm64" ]; then \
+        TARGETARCH_ALT="aarch64"; \
+        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
+    else \
+        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
+    fi; \
+    curl --retry 5 -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
+    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
+    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
+    /tmp/awscliv2/aws/install; \
+    rm -rf /tmp/awscliv2.zip /tmp/awscliv2
+
 #########################################################################################
 #
 # Clean up postgres folder before inclusion
@@ -1673,7 +1724,7 @@ USER nonroot
 
 COPY --chown=nonroot compute compute
 
-RUN make PG_VERSION="${PG_VERSION}" -C compute
+RUN make PG_VERSION="${PG_VERSION:?}" -C compute
 
 #########################################################################################
 #
@@ -1706,8 +1757,8 @@ COPY --from=hll-src /ext-src/ /ext-src/
 COPY --from=plpgsql_check-src /ext-src/ /ext-src/
 #COPY --from=timescaledb-src /ext-src/ /ext-src/
 COPY --from=pg_hint_plan-src /ext-src/ /ext-src/
-COPY compute/patches/pg_hint_plan_${PG_VERSION}.patch /ext-src
-RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
+COPY compute/patches/pg_hint_plan_${PG_VERSION:?}.patch /ext-src
+RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION:?}.patch
 COPY --from=pg_cron-src /ext-src/ /ext-src/
 #COPY --from=pgx_ulid-src /ext-src/ /ext-src/
 #COPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/
@@ -1736,51 +1787,12 @@ ENV PGDATABASE=postgres
 # Put it all together into the final image
 #
 #########################################################################################
-FROM debian:$DEBIAN_FLAVOR
+FROM $BASE_IMAGE_SHA
 ARG DEBIAN_VERSION
 
 # Use strict mode for bash to catch errors early
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 
-# Add user postgres
-RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
-    echo "postgres:test_console_pass" | chpasswd && \
-    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    mkdir /var/db/postgres/pgbouncer && \
-    chown -R postgres:postgres /var/db/postgres && \
-    chmod 0750 /var/db/postgres/compute && \
-    chmod 0750 /var/db/postgres/pgbouncer && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
-    # create folder for file cache
-    mkdir -p -m 777 /neon/cache
-
-COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
-
-# pgbouncer and its config
-COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
-COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
-
-# local_proxy and its config
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
-RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
-
-# Metrics exporter binaries and configuration files
-COPY --from=exporters ./postgres_exporter /bin/postgres_exporter
-COPY --from=exporters ./pgbouncer_exporter /bin/pgbouncer_exporter
-COPY --from=exporters ./sql_exporter /bin/sql_exporter
-
-COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
-
-COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
-COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml             /etc/neon_collector.yml
-COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
-COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
-
-# Create remote extension download directory
-RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
-
 # Install:
 # libreadline8 for psql
 # liblz4-1 for lz4
@@ -1790,10 +1802,9 @@ RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/loca
 # libzstd1 for zstd
 # libboost* for rdkit
 # ca-certificates for communicating with s3 by compute_ctl
-
+# libevent for pgbouncer
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
     echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc
-
 RUN apt update && \
     case $DEBIAN_VERSION in \
       # Version-specific installs for Bullseye (PG14-PG16):
@@ -1828,33 +1839,57 @@ RUN apt update && \
         libxslt1.1 \
         libzstd1 \
         libcurl4 \
+        libevent-2.1-7 \
         locales \
         procps \
         ca-certificates \
-        curl \
-        unzip \
         $VERSION_INSTALLS && \
     apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
     localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
 
-# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step)
-ARG TARGETARCH
-RUN set -ex; \
-    if [ "${TARGETARCH}" = "amd64" ]; then \
-        TARGETARCH_ALT="x86_64"; \
-        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
-    elif [ "${TARGETARCH}" = "arm64" ]; then \
-        TARGETARCH_ALT="aarch64"; \
-        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
-    else \
-        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
-    fi; \
-    curl --retry 5 -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
-    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
-    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
-    /tmp/awscliv2/aws/install; \
-    rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \
-    true
+# Add user postgres
+RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
+    echo "postgres:test_console_pass" | chpasswd && \
+    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
+    mkdir /var/db/postgres/pgbouncer && \
+    chown -R postgres:postgres /var/db/postgres && \
+    chmod 0750 /var/db/postgres/compute && \
+    chmod 0750 /var/db/postgres/pgbouncer && \
+    # create folder for file cache
+    mkdir -p -m 777 /neon/cache && \
+    # Create remote extension download directory
+    mkdir /usr/local/download_extensions && \
+    chown -R postgres:postgres /usr/local/download_extensions
+
+# aws cli is used by fast_import
+COPY --from=awscli /usr/local/aws-cli /usr/local/aws-cli
+
+# pgbouncer and its config
+COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
+COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
+
+COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
+COPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/compute_ctl /usr/local/bin/compute_ctl
+COPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/fast_import /usr/local/bin/fast_import
+
+# local_proxy and its config
+COPY --from=compute-tools --chown=postgres /home/nonroot/target-bin/local_proxy /usr/local/bin/local_proxy
+RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
+
+# Metrics exporter binaries and configuration files
+COPY --from=exporters ./postgres_exporter /bin/postgres_exporter
+COPY --from=exporters ./pgbouncer_exporter /bin/pgbouncer_exporter
+COPY --from=exporters ./sql_exporter /bin/sql_exporter
+
+COPY --chown=postgres compute/etc/postgres_exporter.yml /etc/postgres_exporter.yml
+
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector.yml             /etc/neon_collector.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
+COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
+
+# Make the libraries we built available
+RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig
 
 ENV LANG=en_US.utf8
 USER postgres
diff --git a/compute/patches/pg_hint_plan_v16.patch b/compute/patches/pg_hint_plan_v16.patch
index 4039a036df..1fc3ffa609 100644
--- a/compute/patches/pg_hint_plan_v16.patch
+++ b/compute/patches/pg_hint_plan_v16.patch
@@ -6,16 +6,16 @@ index da723b8..5328114 100644
  ----
  -- No.A-1-1-3
  CREATE EXTENSION pg_hint_plan;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  -- No.A-1-2-3
  DROP EXTENSION pg_hint_plan;
  -- No.A-1-1-4
  CREATE SCHEMA other_schema;
  CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  ERROR:  extension "pg_hint_plan" must be installed in schema "hint_plan"
  CREATE EXTENSION pg_hint_plan;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  DROP SCHEMA other_schema;
  ----
  ---- No. A-5-1 comment pattern
@@ -35,7 +35,7 @@ index d372459..6282afe 100644
  SET client_min_messages TO LOG;
  SET pg_hint_plan.enable_hint TO on;
  CREATE EXTENSION file_fdw;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/file_fdw
  CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
  CREATE USER MAPPING FOR PUBLIC SERVER file_server;
  CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
diff --git a/compute/patches/pg_hint_plan_v17.patch b/compute/patches/pg_hint_plan_v17.patch
index dbf4e470ea..3442a094eb 100644
--- a/compute/patches/pg_hint_plan_v17.patch
+++ b/compute/patches/pg_hint_plan_v17.patch
@@ -6,16 +6,16 @@ index e7d68a1..65a056c 100644
  ----
  -- No.A-1-1-3
  CREATE EXTENSION pg_hint_plan;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  -- No.A-1-2-3
  DROP EXTENSION pg_hint_plan;
  -- No.A-1-1-4
  CREATE SCHEMA other_schema;
  CREATE EXTENSION pg_hint_plan SCHEMA other_schema;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  ERROR:  extension "pg_hint_plan" must be installed in schema "hint_plan"
  CREATE EXTENSION pg_hint_plan;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/pg_hint_plan
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/pg_hint_plan
  DROP SCHEMA other_schema;
  ----
  ---- No. A-5-1 comment pattern
@@ -168,7 +168,7 @@ index 017fa4b..98d989b 100644
  SET client_min_messages TO LOG;
  SET pg_hint_plan.enable_hint TO on;
  CREATE EXTENSION file_fdw;
-+LOG:  Sending request to compute_ctl: http://localhost:3080/extension_server/file_fdw
++LOG:  Sending request to compute_ctl: http://localhost:3081/extension_server/file_fdw
  CREATE SERVER file_server FOREIGN DATA WRAPPER file_fdw;
  CREATE USER MAPPING FOR PUBLIC SERVER file_server;
  CREATE FOREIGN TABLE ft1 (id int, val int) SERVER file_server OPTIONS (format 'csv', filename :'filename');
diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml
index 005143fff3..568f0b0444 100644
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -47,7 +47,9 @@ files:
       # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
       # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
       # regardless of hostname (ALL)
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
+      #
+      # Also allow it to shut down the VM. The fast_import job does that when it's finished.
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
   - filename: cgconfig.conf
     content: |
       # Configuration for cgroups in VM compute nodes
@@ -72,8 +74,8 @@ build: |
   # At time of migration to bookworm (2024-10-09), debian has a version of libcgroup/cgroup-tools 2.0.2,
   # and it _probably_ can be used as-is. However, we'll build it ourselves to minimise the changeset
   # for debian version migration.
-  #
-  FROM debian:bookworm-slim as libcgroup-builder
+  ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+  FROM debian@$BOOKWORM_SLIM_SHA as libcgroup-builder
   ENV LIBCGROUP_VERSION=v2.0.3
 
   RUN set -exu \
diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml
index 2fe50c3a45..124c40cf5d 100644
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -68,7 +68,8 @@ build: |
   # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
   # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
   # requires cgroup v2, so we'll build cgroup-tools ourselves.
-  FROM debian:bullseye-slim as libcgroup-builder
+  ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+  FROM debian@$BULLSEYE_SLIM_SHA as libcgroup-builder
   ENV LIBCGROUP_VERSION=v2.0.3
 
   RUN set -exu \
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 47fc9cb7fe..df47adda6c 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -41,12 +41,14 @@ use std::process::exit;
 use std::str::FromStr;
 use std::sync::atomic::Ordering;
 use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
+use std::time::SystemTime;
 use std::{thread, time::Duration};
 
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
 use compute_tools::disk_quota::set_disk_quota;
+use compute_tools::http::server::Server;
 use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
@@ -61,7 +63,6 @@ use compute_tools::compute::{
 };
 use compute_tools::configurator::launch_configurator;
 use compute_tools::extension_server::get_pg_version_string;
-use compute_tools::http::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
@@ -85,6 +86,19 @@ fn parse_remote_ext_config(arg: &str) -> Result<String> {
     }
 }
 
+/// Generate a compute ID if one is not supplied. This exists to keep forward
+/// compatibility tests working, but will be removed in a future iteration.
+fn generate_compute_id() -> String {
+    let now = SystemTime::now();
+
+    format!(
+        "compute-{}",
+        now.duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs()
+    )
+}
+
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
@@ -94,8 +108,20 @@ struct Cli {
     #[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
     pub remote_ext_config: Option<String>,
 
-    #[arg(long, default_value_t = 3080)]
-    pub http_port: u16,
+    /// The port to bind the external listening HTTP server to. Clients running
+    /// outside the compute will talk to the compute through this port. Keep
+    /// the previous name for this argument around for a smoother release
+    /// with the control plane.
+    ///
+    /// TODO: Remove the alias after the control plane release which teaches the
+    /// control plane about the renamed argument.
+    #[arg(long, alias = "http-port", default_value_t = 3080)]
+    pub external_http_port: u16,
+
+    /// The port to bind the internal listening HTTP server to. Clients like
+    /// the neon extension (for installing remote extensions) and local_proxy.
+    #[arg(long)]
+    pub internal_http_port: Option<u16>,
 
     #[arg(short = 'D', long, value_name = "DATADIR")]
     pub pgdata: String,
@@ -130,17 +156,26 @@ struct Cli {
     #[arg(short = 'S', long, group = "spec-path")]
     pub spec_path: Option<OsString>,
 
-    #[arg(short = 'i', long, group = "compute-id", conflicts_with_all = ["spec", "spec-path"])]
-    pub compute_id: Option<String>,
+    #[arg(short = 'i', long, group = "compute-id", default_value = generate_compute_id())]
+    pub compute_id: String,
 
-    #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], requires = "compute-id", value_name = "CONTROL_PLANE_API_BASE_URL")]
+    #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], value_name = "CONTROL_PLANE_API_BASE_URL")]
     pub control_plane_uri: Option<String>,
 }
 
 fn main() -> Result<()> {
     let cli = Cli::parse();
 
-    let build_tag = init()?;
+    // For historical reasons, the main thread that processes the spec and launches postgres
+    // is synchronous, but we always have this tokio runtime available and we "enter" it so
+    // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
+    // from all parts of compute_ctl.
+    let runtime = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()?;
+    let _rt_guard = runtime.enter();
+
+    let build_tag = runtime.block_on(init())?;
 
     let scenario = failpoint_support::init();
 
@@ -172,8 +207,8 @@ fn main() -> Result<()> {
     deinit_and_exit(wait_pg_result);
 }
 
-fn init() -> Result<String> {
-    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
+async fn init() -> Result<String> {
+    init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?;
 
     let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
     thread::spawn(move || {
@@ -259,20 +294,11 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
         });
     }
 
-    if cli.compute_id.is_none() {
-        panic!(
-            "compute spec should be provided by one of the following ways: \
-                --spec OR --spec-path OR --control-plane-uri and --compute-id"
-        );
-    };
     if cli.control_plane_uri.is_none() {
-        panic!("must specify both --control-plane-uri and --compute-id or none");
+        panic!("must specify --control-plane-uri");
     };
 
-    match get_spec_from_control_plane(
-        cli.control_plane_uri.as_ref().unwrap(),
-        cli.compute_id.as_ref().unwrap(),
-    ) {
+    match get_spec_from_control_plane(cli.control_plane_uri.as_ref().unwrap(), &cli.compute_id) {
         Ok(spec) => Ok(CliSpecParams {
             spec,
             live_config_allowed: true,
@@ -319,13 +345,15 @@ fn wait_spec(
     let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
         .context("cannot build tokio postgres config from connstr")?;
     let compute_node = ComputeNode {
+        compute_id: cli.compute_id.clone(),
         connstr,
         conn_conf,
         tokio_conn_conf,
         pgdata: cli.pgdata.clone(),
         pgbin: cli.pgbin.clone(),
         pgversion: get_pg_version_string(&cli.pgbin),
-        http_port: cli.http_port,
+        external_http_port: cli.external_http_port,
+        internal_http_port: cli.internal_http_port.unwrap_or(cli.external_http_port + 1),
         live_config_allowed,
         state: Mutex::new(new_state),
         state_changed: Condvar::new(),
@@ -343,10 +371,13 @@ fn wait_spec(
         compute.prewarm_postgres()?;
     }
 
-    // Launch http service first, so that we can serve control-plane requests
-    // while configuration is still in progress.
-    let _http_handle =
-        launch_http_server(cli.http_port, &compute).expect("cannot launch http endpoint thread");
+    // Launch the external HTTP server first, so that we can serve control plane
+    // requests while configuration is still in progress.
+    Server::External(cli.external_http_port).launch(&compute);
+
+    // The internal HTTP server could be launched later, but there isn't much
+    // sense in waiting.
+    Server::Internal(cli.internal_http_port.unwrap_or(cli.external_http_port + 1)).launch(&compute);
 
     if !spec_set {
         // No spec provided, hang waiting for it.
@@ -484,21 +515,6 @@ fn start_postgres(
             use std::env;
             use tokio_util::sync::CancellationToken;
 
-            // Note: it seems like you can make a runtime in an inner scope and
-            // if you start a task in it it won't be dropped. However, make it
-            // in the outermost scope just to be safe.
-            let rt = if env::var_os("AUTOSCALING").is_some() {
-                Some(
-                    tokio::runtime::Builder::new_multi_thread()
-                        .worker_threads(4)
-                        .enable_all()
-                        .build()
-                        .expect("failed to create tokio runtime for monitor")
-                )
-            } else {
-                None
-            };
-
             // This token is used internally by the monitor to clean up all threads
             let token = CancellationToken::new();
 
@@ -509,16 +525,19 @@ fn start_postgres(
                 Some(cli.filecache_connstr.clone())
             };
 
-            let vm_monitor = rt.as_ref().map(|rt| {
-                rt.spawn(vm_monitor::start(
+            let vm_monitor = if env::var_os("AUTOSCALING").is_some() {
+                let vm_monitor = tokio::spawn(vm_monitor::start(
                     Box::leak(Box::new(vm_monitor::Args {
                         cgroup: Some(cli.cgroup.clone()),
                         pgconnstr,
                         addr: cli.vm_monitor_addr.clone(),
                     })),
                     token.clone(),
-                ))
-            });
+                ));
+                Some(vm_monitor)
+            } else {
+                None
+            };
         }
     }
 
@@ -528,8 +547,6 @@ fn start_postgres(
             delay_exit,
             compute,
             #[cfg(target_os = "linux")]
-            rt,
-            #[cfg(target_os = "linux")]
             token,
             #[cfg(target_os = "linux")]
             vm_monitor,
@@ -537,15 +554,13 @@ fn start_postgres(
     ))
 }
 
-type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
+type PostgresHandle = (std::process::Child, tokio::task::JoinHandle<Result<()>>);
 
 struct StartPostgresResult {
     delay_exit: bool,
     // passed through from WaitSpecResult
     compute: Arc<ComputeNode>,
 
-    #[cfg(target_os = "linux")]
-    rt: Option<tokio::runtime::Runtime>,
     #[cfg(target_os = "linux")]
     token: tokio_util::sync::CancellationToken,
     #[cfg(target_os = "linux")]
@@ -564,10 +579,10 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
             .expect("failed to start waiting on Postgres process");
         PG_PID.store(0, Ordering::SeqCst);
 
-        // Process has exited, so we can join the logs thread.
-        let _ = logs_handle
-            .join()
-            .map_err(|e| tracing::error!("log thread panicked: {:?}", e));
+        // Process has exited. Wait for the log collecting task to finish.
+        let _ = tokio::runtime::Handle::current()
+            .block_on(logs_handle)
+            .map_err(|e| tracing::error!("log task panicked: {:?}", e));
 
         info!("Postgres exited with code {}, shutting down", ecode);
         exit_code = ecode.code()
@@ -588,8 +603,6 @@ fn cleanup_after_postgres_exit(
         vm_monitor,
         #[cfg(target_os = "linux")]
         token,
-        #[cfg(target_os = "linux")]
-        rt,
     }: StartPostgresResult,
 ) -> Result<bool> {
     // Terminate the vm_monitor so it releases the file watcher on
@@ -602,10 +615,6 @@ fn cleanup_after_postgres_exit(
                 token.cancel();
                 // Kills the actual task running the monitor
                 handle.abort();
-
-                // If handle is some, rt must have been used to produce it, and
-                // hence is also some
-                rt.unwrap().shutdown_timeout(Duration::from_secs(2));
             }
         }
     }
diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs
index c8440afb64..27cf1c2317 100644
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -60,6 +60,16 @@ struct Args {
     pg_lib_dir: Utf8PathBuf,
     #[clap(long)]
     pg_port: Option<u16>, // port to run postgres on, 5432 is default
+
+    /// Number of CPUs in the system. This is used to configure # of
+    /// parallel worker processes, for index creation.
+    #[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
+    num_cpus: Option<usize>,
+
+    /// Amount of RAM in the system. This is used to configure shared_buffers
+    /// and maintenance_work_mem.
+    #[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
+    memory_mb: Option<usize>,
 }
 
 #[serde_with::serde_as]
@@ -202,7 +212,16 @@ pub(crate) async fn main() -> anyhow::Result<()> {
     .await
     .context("initdb")?;
 
-    let nproc = num_cpus::get();
+    // If the caller didn't specify CPU / RAM to use for sizing, default to
+    // number of CPUs in the system, and pretty arbitrarily, 256 MB of RAM.
+    let nproc = args.num_cpus.unwrap_or_else(num_cpus::get);
+    let memory_mb = args.memory_mb.unwrap_or(256);
+
+    // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
+    // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
+    // available for misc other stuff that PostgreSQL uses memory for.
+    let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
+    let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
 
     //
     // Launch postgres process
@@ -212,12 +231,15 @@ pub(crate) async fn main() -> anyhow::Result<()> {
         .arg(&pgdata_dir)
         .args(["-p", &format!("{pg_port}")])
         .args(["-c", "wal_level=minimal"])
-        .args(["-c", "shared_buffers=10GB"])
+        .args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
         .args(["-c", "max_wal_senders=0"])
         .args(["-c", "fsync=off"])
         .args(["-c", "full_page_writes=off"])
         .args(["-c", "synchronous_commit=off"])
-        .args(["-c", "maintenance_work_mem=8388608"])
+        .args([
+            "-c",
+            &format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
+        ])
         .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
         .args(["-c", &format!("max_parallel_workers={nproc}")])
         .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
@@ -231,6 +253,14 @@ pub(crate) async fn main() -> anyhow::Result<()> {
         ])
         .env_clear()
         .env("LD_LIBRARY_PATH", &pg_lib_dir)
+        .env(
+            "ASAN_OPTIONS",
+            std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+        )
+        .env(
+            "UBSAN_OPTIONS",
+            std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+        )
         .stdout(std::process::Stdio::piped())
         .stderr(std::process::Stdio::piped())
         .spawn()
diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs
index 4a297cfacf..28b10ce21c 100644
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -140,5 +140,34 @@ pub async fn get_database_schema(
             warn!("pg_dump stderr: {}", line)
         }
     });
-    Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
+
+    #[allow(dead_code)]
+    struct SchemaStream<S> {
+        // We keep a reference to the child process to ensure it stays alive
+        // while the stream is being consumed. When SchemaStream is dropped,
+        // cmd will be dropped, which triggers kill_on_drop and terminates pg_dump
+        cmd: tokio::process::Child,
+        stream: S,
+    }
+
+    impl<S> Stream for SchemaStream<S>
+    where
+        S: Stream<Item = Result<bytes::Bytes, std::io::Error>> + Unpin,
+    {
+        type Item = Result<bytes::Bytes, std::io::Error>;
+
+        fn poll_next(
+            mut self: std::pin::Pin<&mut Self>,
+            cx: &mut std::task::Context<'_>,
+        ) -> std::task::Poll<Option<Self::Item>> {
+            Stream::poll_next(std::pin::Pin::new(&mut self.stream), cx)
+        }
+    }
+
+    let schema_stream = SchemaStream {
+        cmd,
+        stream: initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))),
+    };
+
+    Ok(schema_stream)
 }
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index fd76e404c6..d323ea3dcd 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -9,7 +9,6 @@ use std::str::FromStr;
 use std::sync::atomic::AtomicU32;
 use std::sync::atomic::Ordering;
 use std::sync::{Arc, Condvar, Mutex, RwLock};
-use std::thread;
 use std::time::Duration;
 use std::time::Instant;
 
@@ -59,6 +58,8 @@ pub static PG_PID: AtomicU32 = AtomicU32::new(0);
 
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
+    /// The ID of the compute
+    pub compute_id: String,
     // Url type maintains proper escaping
     pub connstr: url::Url,
     // We connect to Postgres from many different places, so build configs once
@@ -81,8 +82,10 @@ pub struct ComputeNode {
     /// - we push spec and it does configuration
     /// - but then it is restarted without any spec again
     pub live_config_allowed: bool,
-    /// The port that the compute's HTTP server listens on
-    pub http_port: u16,
+    /// The port that the compute's external HTTP server listens on
+    pub external_http_port: u16,
+    /// The port that the compute's internal HTTP server listens on
+    pub internal_http_port: u16,
     /// Volatile part of the `ComputeNode`, which should be used under `Mutex`.
     /// To allow HTTP API server to serving status requests, while configuration
     /// is in progress, lock should be held only for short periods of time to do
@@ -546,11 +549,7 @@ impl ComputeNode {
     pub fn check_safekeepers_synced(&self, compute_state: &ComputeState) -> Result<Option<Lsn>> {
         let start_time = Utc::now();
 
-        // Run actual work with new tokio runtime
-        let rt = tokio::runtime::Builder::new_current_thread()
-            .enable_all()
-            .build()
-            .expect("failed to create rt");
+        let rt = tokio::runtime::Handle::current();
         let result = rt.block_on(self.check_safekeepers_synced_async(compute_state));
 
         // Record runtime
@@ -597,9 +596,9 @@ impl ComputeNode {
         SYNC_SAFEKEEPERS_PID.store(0, Ordering::SeqCst);
 
         // Process has exited, so we can join the logs thread.
-        let _ = logs_handle
-            .join()
-            .map_err(|e| tracing::error!("log thread panicked: {:?}", e));
+        let _ = tokio::runtime::Handle::current()
+            .block_on(logs_handle)
+            .map_err(|e| tracing::error!("log task panicked: {:?}", e));
 
         if !sync_output.status.success() {
             anyhow::bail!(
@@ -634,7 +633,7 @@ impl ComputeNode {
         config::write_postgres_conf(
             &pgdata_path.join("postgresql.conf"),
             &pspec.spec,
-            self.http_port,
+            self.internal_http_port,
         )?;
 
         // Syncing safekeepers is only safe with primary nodes: if a primary
@@ -784,7 +783,7 @@ impl ComputeNode {
     pub fn start_postgres(
         &self,
         storage_auth_token: Option<String>,
-    ) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> {
+    ) -> Result<(std::process::Child, tokio::task::JoinHandle<Result<()>>)> {
         let pgdata_path = Path::new(&self.pgdata);
 
         // Run postgres as a child process.
@@ -800,7 +799,7 @@ impl ComputeNode {
             .expect("cannot start postgres process");
         PG_PID.store(pg.id(), Ordering::SeqCst);
 
-        // Start a thread to collect logs from stderr.
+        // Start a task to collect logs from stderr.
         let stderr = pg.stderr.take().expect("stderr should be captured");
         let logs_handle = handle_postgres_logs(stderr);
 
@@ -809,20 +808,28 @@ impl ComputeNode {
         Ok((pg, logs_handle))
     }
 
-    /// Do post configuration of the already started Postgres. This function spawns a background thread to
+    /// Do post configuration of the already started Postgres. This function spawns a background task to
     /// configure the database after applying the compute spec. Currently, it upgrades the neon extension
     /// version. In the future, it may upgrade all 3rd-party extensions.
     #[instrument(skip_all)]
     pub fn post_apply_config(&self) -> Result<()> {
-        let conf = self.get_conn_conf(Some("compute_ctl:post_apply_config"));
-        thread::spawn(move || {
-            let func = || {
-                let mut client = conf.connect(NoTls)?;
+        let conf = self.get_tokio_conn_conf(Some("compute_ctl:post_apply_config"));
+        tokio::spawn(async move {
+            let res = async {
+                let (mut client, connection) = conf.connect(NoTls).await?;
+                tokio::spawn(async move {
+                    if let Err(e) = connection.await {
+                        eprintln!("connection error: {}", e);
+                    }
+                });
+
                 handle_neon_extension_upgrade(&mut client)
+                    .await
                     .context("handle_neon_extension_upgrade")?;
                 Ok::<_, anyhow::Error>(())
-            };
-            if let Err(err) = func() {
+            }
+            .await;
+            if let Err(err) = res {
                 error!("error while post_apply_config: {err:#}");
             }
         });
@@ -919,13 +926,10 @@ impl ComputeNode {
         conf: Arc<tokio_postgres::Config>,
         concurrency: usize,
     ) -> Result<()> {
-        let rt = tokio::runtime::Builder::new_multi_thread()
-            .enable_all()
-            .build()?;
-
         info!("Applying config with max {} concurrency", concurrency);
         debug!("Config: {:?}", spec);
 
+        let rt = tokio::runtime::Handle::current();
         rt.block_on(async {
             // Proceed with post-startup configuration. Note, that order of operations is important.
             let client = Self::get_maintenance_client(&conf).await?;
@@ -1319,14 +1323,18 @@ impl ComputeNode {
         }
 
         // Run migrations separately to not hold up cold starts
-        thread::spawn(move || {
-            let conf = conf.as_ref().clone();
-            let mut conf = postgres::config::Config::from(conf);
+        tokio::spawn(async move {
+            let mut conf = conf.as_ref().clone();
             conf.application_name("compute_ctl:migrations");
 
-            match conf.connect(NoTls) {
-                Ok(mut client) => {
-                    if let Err(e) = handle_migrations(&mut client) {
+            match conf.connect(NoTls).await {
+                Ok((mut client, connection)) => {
+                    tokio::spawn(async move {
+                        if let Err(e) = connection.await {
+                            eprintln!("connection error: {}", e);
+                        }
+                    });
+                    if let Err(e) = handle_migrations(&mut client).await {
                         error!("Failed to run migrations: {}", e);
                     }
                 }
@@ -1363,16 +1371,11 @@ impl ComputeNode {
         if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
             info!("tuning pgbouncer");
 
-            let rt = tokio::runtime::Builder::new_current_thread()
-                .enable_all()
-                .build()
-                .expect("failed to create rt");
-
-            // Spawn a thread to do the tuning,
+            // Spawn a background task to do the tuning,
             // so that we don't block the main thread that starts Postgres.
             let pgbouncer_settings = pgbouncer_settings.clone();
-            let _handle = thread::spawn(move || {
-                let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
+            tokio::spawn(async move {
+                let res = tune_pgbouncer(pgbouncer_settings).await;
                 if let Err(err) = res {
                     error!("error while tuning pgbouncer: {err:?}");
                 }
@@ -1382,41 +1385,42 @@ impl ComputeNode {
         if let Some(ref local_proxy) = spec.local_proxy_config {
             info!("configuring local_proxy");
 
-            // Spawn a thread to do the configuration,
+            // Spawn a background task to do the configuration,
             // so that we don't block the main thread that starts Postgres.
             let local_proxy = local_proxy.clone();
-            let _handle = Some(thread::spawn(move || {
+            tokio::spawn(async move {
                 if let Err(err) = local_proxy::configure(&local_proxy) {
                     error!("error while configuring local_proxy: {err:?}");
                 }
-            }));
+            });
         }
 
         // Write new config
         let pgdata_path = Path::new(&self.pgdata);
         let postgresql_conf_path = pgdata_path.join("postgresql.conf");
-        config::write_postgres_conf(&postgresql_conf_path, &spec, self.http_port)?;
+        config::write_postgres_conf(&postgresql_conf_path, &spec, self.internal_http_port)?;
 
-        let max_concurrent_connections = spec.reconfigure_concurrency;
+        if !spec.skip_pg_catalog_updates {
+            let max_concurrent_connections = spec.reconfigure_concurrency;
+            // Temporarily reset max_cluster_size in config
+            // to avoid the possibility of hitting the limit, while we are reconfiguring:
+            // creating new extensions, roles, etc.
+            config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
+                self.pg_reload_conf()?;
 
-        // Temporarily reset max_cluster_size in config
-        // to avoid the possibility of hitting the limit, while we are reconfiguring:
-        // creating new extensions, roles, etc.
-        config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
-            self.pg_reload_conf()?;
+                if spec.mode == ComputeMode::Primary {
+                    let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap();
+                    conf.application_name("apply_config");
+                    let conf = Arc::new(conf);
 
-            if spec.mode == ComputeMode::Primary {
-                let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap();
-                conf.application_name("apply_config");
-                let conf = Arc::new(conf);
+                    let spec = Arc::new(spec.clone());
 
-                let spec = Arc::new(spec.clone());
+                    self.apply_spec_sql(spec, conf, max_concurrent_connections)?;
+                }
 
-                self.apply_spec_sql(spec, conf, max_concurrent_connections)?;
-            }
-
-            Ok(())
-        })?;
+                Ok(())
+            })?;
+        }
 
         self.pg_reload_conf()?;
 
@@ -1431,7 +1435,9 @@ impl ComputeNode {
     }
 
     #[instrument(skip_all)]
-    pub fn start_compute(&self) -> Result<(std::process::Child, std::thread::JoinHandle<()>)> {
+    pub fn start_compute(
+        &self,
+    ) -> Result<(std::process::Child, tokio::task::JoinHandle<Result<()>>)> {
         let compute_state = self.state.lock().unwrap().clone();
         let pspec = compute_state.pspec.as_ref().expect("spec must be set");
         info!(
@@ -1446,16 +1452,11 @@ impl ComputeNode {
         if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
             info!("tuning pgbouncer");
 
-            let rt = tokio::runtime::Builder::new_current_thread()
-                .enable_all()
-                .build()
-                .expect("failed to create rt");
-
-            // Spawn a thread to do the tuning,
+            // Spawn a background task to do the tuning,
             // so that we don't block the main thread that starts Postgres.
             let pgbouncer_settings = pgbouncer_settings.clone();
-            let _handle = thread::spawn(move || {
-                let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
+            let _handle = tokio::spawn(async move {
+                let res = tune_pgbouncer(pgbouncer_settings).await;
                 if let Err(err) = res {
                     error!("error while tuning pgbouncer: {err:?}");
                 }
@@ -1465,10 +1466,10 @@ impl ComputeNode {
         if let Some(local_proxy) = &pspec.spec.local_proxy_config {
             info!("configuring local_proxy");
 
-            // Spawn a thread to do the configuration,
+            // Spawn a background task to do the configuration,
             // so that we don't block the main thread that starts Postgres.
             let local_proxy = local_proxy.clone();
-            let _handle = thread::spawn(move || {
+            let _handle = tokio::spawn(async move {
                 if let Err(err) = local_proxy::configure(&local_proxy) {
                     error!("error while configuring local_proxy: {err:?}");
                 }
@@ -1487,7 +1488,8 @@ impl ComputeNode {
             extension_server::create_control_files(remote_extensions, &self.pgbin);
 
             let library_load_start_time = Utc::now();
-            let remote_ext_metrics = self.prepare_preload_libraries(&pspec.spec)?;
+            let rt = tokio::runtime::Handle::current();
+            let remote_ext_metrics = rt.block_on(self.prepare_preload_libraries(&pspec.spec))?;
 
             let library_load_time = Utc::now()
                 .signed_duration_since(library_load_start_time)
@@ -1542,7 +1544,7 @@ impl ComputeNode {
             self.post_apply_config()?;
 
             let conf = self.get_conn_conf(None);
-            thread::spawn(move || {
+            tokio::task::spawn_blocking(|| {
                 let res = get_installed_extensions(conf);
                 match res {
                     Ok(extensions) => {
@@ -1891,7 +1893,6 @@ LIMIT 100",
         Ok(ext_version)
     }
 
-    #[tokio::main]
     pub async fn prepare_preload_libraries(
         &self,
         spec: &ComputeSpec,
diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs
index a2043529a1..d88f26ca20 100644
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -51,9 +51,12 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
 pub fn launch_configurator(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
     let compute = Arc::clone(compute);
 
+    let runtime = tokio::runtime::Handle::current();
+
     thread::Builder::new()
         .name("compute-configurator".into())
         .spawn(move || {
+            let _rt_guard = runtime.enter();
             configurator_main_loop(&compute);
             info!("configurator thread is exited");
         })
diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs
index a596bea504..93eb6ef5b7 100644
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -4,11 +4,9 @@ use http::{header::CONTENT_TYPE, StatusCode};
 use serde::Serialize;
 use tracing::error;
 
-pub use server::launch_http_server;
-
 mod extract;
 mod routes;
-mod server;
+pub mod server;
 
 /// Convenience response builder for JSON responses
 struct JsonResponse;
diff --git a/compute_tools/src/http/routes/failpoints.rs b/compute_tools/src/http/routes/failpoints.rs
index 2ec4511676..836417d784 100644
--- a/compute_tools/src/http/routes/failpoints.rs
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -1,7 +1,21 @@
 use axum::response::{IntoResponse, Response};
 use http::StatusCode;
+use serde::{Deserialize, Serialize};
 use tracing::info;
-use utils::failpoint_support::{apply_failpoint, ConfigureFailpointsRequest};
+use utils::failpoint_support::apply_failpoint;
+
+pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
+
+/// Information for configuring a single fail point
+#[derive(Debug, Serialize, Deserialize)]
+pub struct FailpointConfig {
+    /// Name of the fail point
+    pub name: String,
+    /// List of actions to take, using the format described in `fail::cfg`
+    ///
+    /// We also support `actions = "exit"` to cause the fail point to immediately exit.
+    pub actions: String,
+}
 
 use crate::http::{extract::Json, JsonResponse};
 
diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs
index e41ed9df2d..a523ecd96f 100644
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -1,7 +1,7 @@
 use std::{
+    fmt::Display,
     net::{IpAddr, Ipv6Addr, SocketAddr},
     sync::Arc,
-    thread,
     time::Duration,
 };
 
@@ -26,46 +26,65 @@ use super::routes::{
 };
 use crate::compute::ComputeNode;
 
-async fn handle_404() -> Response {
-    StatusCode::NOT_FOUND.into_response()
-}
-
 const X_REQUEST_ID: &str = "x-request-id";
 
-/// This middleware function allows compute_ctl to generate its own request ID
-/// if one isn't supplied. The control plane will always send one as a UUID. The
-/// neon Postgres extension on the other hand does not send one.
-async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
-    let headers = request.headers_mut();
-
-    if headers.get(X_REQUEST_ID).is_none() {
-        headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
-    }
-
-    next.run(request).await
+/// `compute_ctl` has two servers: internal and external. The internal server
+/// binds to the loopback interface and handles communication from clients on
+/// the compute. The external server is what receives communication from the
+/// control plane, the metrics scraper, etc. We make the distinction because
+/// certain routes in `compute_ctl` only need to be exposed to local processes
+/// like Postgres via the neon extension and local_proxy.
+#[derive(Clone, Copy, Debug)]
+pub enum Server {
+    Internal(u16),
+    External(u16),
 }
 
-/// Run the HTTP server and wait on it forever.
-#[tokio::main]
-async fn serve(port: u16, compute: Arc<ComputeNode>) {
-    let mut app = Router::new()
-        .route("/check_writability", post(check_writability::is_writable))
-        .route("/configure", post(configure::configure))
-        .route("/database_schema", get(database_schema::get_schema_dump))
-        .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
-        .route(
-            "/extension_server/{*filename}",
-            post(extension_server::download_extension),
-        )
-        .route("/extensions", post(extensions::install_extension))
-        .route("/grants", post(grants::add_grant))
-        .route("/insights", get(insights::get_insights))
-        .route("/metrics", get(metrics::get_metrics))
-        .route("/metrics.json", get(metrics_json::get_metrics))
-        .route("/status", get(status::get_status))
-        .route("/terminate", post(terminate::terminate))
-        .fallback(handle_404)
-        .layer(
+impl Display for Server {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Server::Internal(_) => f.write_str("internal"),
+            Server::External(_) => f.write_str("external"),
+        }
+    }
+}
+
+impl From<Server> for Router<Arc<ComputeNode>> {
+    fn from(server: Server) -> Self {
+        let mut router = Router::<Arc<ComputeNode>>::new();
+
+        router = match server {
+            Server::Internal(_) => {
+                router = router
+                    .route(
+                        "/extension_server/{*filename}",
+                        post(extension_server::download_extension),
+                    )
+                    .route("/extensions", post(extensions::install_extension))
+                    .route("/grants", post(grants::add_grant));
+
+                // Add in any testing support
+                if cfg!(feature = "testing") {
+                    use super::routes::failpoints;
+
+                    router = router.route("/failpoints", post(failpoints::configure_failpoints));
+                }
+
+                router
+            }
+            Server::External(_) => router
+                .route("/check_writability", post(check_writability::is_writable))
+                .route("/configure", post(configure::configure))
+                .route("/database_schema", get(database_schema::get_schema_dump))
+                .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects))
+                .route("/insights", get(insights::get_insights))
+                .route("/metrics", get(metrics::get_metrics))
+                .route("/metrics.json", get(metrics_json::get_metrics))
+                .route("/status", get(status::get_status))
+                .route("/terminate", post(terminate::terminate)),
+        };
+
+        router.fallback(Server::handle_404).method_not_allowed_fallback(Server::handle_405).layer(
             ServiceBuilder::new()
                 // Add this middleware since we assume the request ID exists
                 .layer(middleware::from_fn(maybe_add_request_id_header))
@@ -105,45 +124,88 @@ async fn serve(port: u16, compute: Arc<ComputeNode>) {
                 )
                 .layer(PropagateRequestIdLayer::x_request_id()),
         )
-        .with_state(compute);
+    }
+}
 
-    // Add in any testing support
-    if cfg!(feature = "testing") {
-        use super::routes::failpoints;
-
-        app = app.route("/failpoints", post(failpoints::configure_failpoints))
+impl Server {
+    async fn handle_404() -> impl IntoResponse {
+        StatusCode::NOT_FOUND
     }
 
-    // This usually binds to both IPv4 and IPv6 on Linux, see
-    // https://github.com/rust-lang/rust/pull/34440 for more information
-    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
-    let listener = match TcpListener::bind(&addr).await {
-        Ok(listener) => listener,
-        Err(e) => {
-            error!(
-                "failed to bind the compute_ctl HTTP server to port {}: {}",
-                port, e
-            );
-            return;
+    async fn handle_405() -> impl IntoResponse {
+        StatusCode::METHOD_NOT_ALLOWED
+    }
+
+    async fn listener(&self) -> Result<TcpListener> {
+        let addr = SocketAddr::new(self.ip(), self.port());
+        let listener = TcpListener::bind(&addr).await?;
+
+        Ok(listener)
+    }
+
+    fn ip(&self) -> IpAddr {
+        match self {
+            // TODO: Change this to Ipv6Addr::LOCALHOST when the GitHub runners
+            // allow binding to localhost
+            Server::Internal(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED),
+            Server::External(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED),
         }
-    };
-
-    if let Ok(local_addr) = listener.local_addr() {
-        info!("compute_ctl HTTP server listening on {}", local_addr);
-    } else {
-        info!("compute_ctl HTTP server listening on port {}", port);
     }
 
-    if let Err(e) = axum::serve(listener, app).await {
-        error!("compute_ctl HTTP server error: {}", e);
+    fn port(self) -> u16 {
+        match self {
+            Server::Internal(port) => port,
+            Server::External(port) => port,
+        }
+    }
+
+    async fn serve(self, compute: Arc<ComputeNode>) {
+        let listener = self.listener().await.unwrap_or_else(|e| {
+            // If we can't bind, the compute cannot operate correctly
+            panic!(
+                "failed to bind the compute_ctl {} HTTP server to {}: {}",
+                self,
+                SocketAddr::new(self.ip(), self.port()),
+                e
+            );
+        });
+
+        if tracing::enabled!(tracing::Level::INFO) {
+            let local_addr = match listener.local_addr() {
+                Ok(local_addr) => local_addr,
+                Err(_) => SocketAddr::new(self.ip(), self.port()),
+            };
+
+            info!(
+                "compute_ctl {} HTTP server listening at {}",
+                self, local_addr
+            );
+        }
+
+        let router = Router::from(self).with_state(compute);
+
+        if let Err(e) = axum::serve(listener, router).await {
+            error!("compute_ctl {} HTTP server error: {}", self, e);
+        }
+    }
+
+    pub fn launch(self, compute: &Arc<ComputeNode>) {
+        let state = Arc::clone(compute);
+
+        info!("Launching the {} server", self);
+
+        tokio::spawn(self.serve(state));
     }
 }
 
-/// Launch a separate HTTP server thread and return its `JoinHandle`.
-pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
-    let state = Arc::clone(state);
+/// This middleware function allows compute_ctl to generate its own request ID
+/// if one isn't supplied. The control plane will always send one as a UUID. The
+/// neon Postgres extension on the other hand does not send one.
+async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response {
+    let headers = request.headers_mut();
+    if headers.get(X_REQUEST_ID).is_none() {
+        headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap());
+    }
 
-    Ok(thread::Builder::new()
-        .name("http-server".into())
-        .spawn(move || serve(port, state))?)
+    next.run(request).await
 }
diff --git a/compute_tools/src/logger.rs b/compute_tools/src/logger.rs
index 00be5c13f9..3749dfc844 100644
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -11,7 +11,7 @@ use tracing_subscriber::prelude::*;
 /// set `OTEL_EXPORTER_OTLP_ENDPOINT=http://jaeger:4318`. See
 /// `tracing-utils` package description.
 ///
-pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
+pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
     // Initialize Logging
     let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
         .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));
@@ -22,7 +22,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
         .with_writer(std::io::stderr);
 
     // Initialize OpenTelemetry
-    let otlp_layer = tracing_utils::init_tracing_without_runtime("compute_ctl");
+    let otlp_layer = tracing_utils::init_tracing("compute_ctl").await;
 
     // Put it all together
     tracing_subscriber::registry()
diff --git a/compute_tools/src/migration.rs b/compute_tools/src/migration.rs
index 7b7b042d84..c5e05822c0 100644
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,6 +1,6 @@
 use anyhow::{Context, Result};
 use fail::fail_point;
-use postgres::{Client, Transaction};
+use tokio_postgres::{Client, Transaction};
 use tracing::{error, info};
 
 use crate::metrics::DB_MIGRATION_FAILED;
@@ -21,10 +21,11 @@ impl<'m> MigrationRunner<'m> {
     }
 
     /// Get the current value neon_migration.migration_id
-    fn get_migration_id(&mut self) -> Result<i64> {
+    async fn get_migration_id(&mut self) -> Result<i64> {
         let row = self
             .client
-            .query_one("SELECT id FROM neon_migration.migration_id", &[])?;
+            .query_one("SELECT id FROM neon_migration.migration_id", &[])
+            .await?;
 
         Ok(row.get::<&str, i64>("id"))
     }
@@ -34,7 +35,7 @@ impl<'m> MigrationRunner<'m> {
     /// This function has a fail point called compute-migration, which can be
     /// used if you would like to fail the application of a series of migrations
     /// at some point.
-    fn update_migration_id(txn: &mut Transaction, migration_id: i64) -> Result<()> {
+    async fn update_migration_id(txn: &mut Transaction<'_>, migration_id: i64) -> Result<()> {
         // We use this fail point in order to check that failing in the
         // middle of applying a series of migrations fails in an expected
         // manner
@@ -59,31 +60,38 @@ impl<'m> MigrationRunner<'m> {
             "UPDATE neon_migration.migration_id SET id = $1",
             &[&migration_id],
         )
+        .await
         .with_context(|| format!("update neon_migration.migration_id to {migration_id}"))?;
 
         Ok(())
     }
 
     /// Prepare the migrations the target database for handling migrations
-    fn prepare_database(&mut self) -> Result<()> {
+    async fn prepare_database(&mut self) -> Result<()> {
         self.client
-            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?;
-        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?;
-        self.client.simple_query(
-            "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
-        )?;
+            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")
+            .await?;
+        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)").await?;
         self.client
-            .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?;
+            .simple_query(
+                "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
+            )
+            .await?;
         self.client
-            .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?;
+            .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")
+            .await?;
+        self.client
+            .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")
+            .await?;
 
         Ok(())
     }
 
     /// Run an individual migration in a separate transaction block.
-    fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {
+    async fn run_migration(client: &mut Client, migration_id: i64, migration: &str) -> Result<()> {
         let mut txn = client
             .transaction()
+            .await
             .with_context(|| format!("begin transaction for migration {migration_id}"))?;
 
         if migration.starts_with("-- SKIP") {
@@ -92,35 +100,38 @@ impl<'m> MigrationRunner<'m> {
             // Even though we are skipping the migration, updating the
             // migration ID should help keep logic easy to understand when
             // trying to understand the state of a cluster.
-            Self::update_migration_id(&mut txn, migration_id)?;
+            Self::update_migration_id(&mut txn, migration_id).await?;
         } else {
             info!("Running migration id={}:\n{}\n", migration_id, migration);
 
             txn.simple_query(migration)
+                .await
                 .with_context(|| format!("apply migration {migration_id}"))?;
 
-            Self::update_migration_id(&mut txn, migration_id)?;
+            Self::update_migration_id(&mut txn, migration_id).await?;
         }
 
         txn.commit()
+            .await
             .with_context(|| format!("commit transaction for migration {migration_id}"))?;
 
         Ok(())
     }
 
     /// Run the configured set of migrations
-    pub fn run_migrations(mut self) -> Result<()> {
+    pub async fn run_migrations(mut self) -> Result<()> {
         self.prepare_database()
+            .await
             .context("prepare database to handle migrations")?;
 
-        let mut current_migration = self.get_migration_id()? as usize;
+        let mut current_migration = self.get_migration_id().await? as usize;
         while current_migration < self.migrations.len() {
             // The index lags the migration ID by 1, so the current migration
             // ID is also the next index
             let migration_id = (current_migration + 1) as i64;
             let migration = self.migrations[current_migration];
 
-            match Self::run_migration(self.client, migration_id, migration) {
+            match Self::run_migration(self.client, migration_id, migration).await {
                 Ok(_) => {
                     info!("Finished migration id={}", migration_id);
                 }
diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs
index e03b410699..86fcf99085 100644
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -7,7 +7,6 @@ use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::Child;
 use std::str::FromStr;
-use std::thread::JoinHandle;
 use std::time::{Duration, Instant};
 
 use anyhow::{bail, Result};
@@ -16,6 +15,7 @@ use ini::Ini;
 use notify::{RecursiveMode, Watcher};
 use postgres::config::Config;
 use tokio::io::AsyncBufReadExt;
+use tokio::task::JoinHandle;
 use tokio::time::timeout;
 use tokio_postgres;
 use tokio_postgres::NoTls;
@@ -477,23 +477,13 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result
     Ok(())
 }
 
-/// Spawn a thread that will read Postgres logs from `stderr`, join multiline logs
+/// Spawn a task that will read Postgres logs from `stderr`, join multiline logs
 /// and send them to the logger. In the future we may also want to add context to
 /// these logs.
-pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()> {
-    std::thread::spawn(move || {
-        let runtime = tokio::runtime::Builder::new_current_thread()
-            .enable_all()
-            .build()
-            .expect("failed to build tokio runtime");
-
-        let res = runtime.block_on(async move {
-            let stderr = tokio::process::ChildStderr::from_std(stderr)?;
-            handle_postgres_logs_async(stderr).await
-        });
-        if let Err(e) = res {
-            tracing::error!("error while processing postgres logs: {}", e);
-        }
+pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<Result<()>> {
+    tokio::spawn(async move {
+        let stderr = tokio::process::ChildStderr::from_std(stderr)?;
+        handle_postgres_logs_async(stderr).await
     })
 }
 
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 37d5d3a1a6..73950cd95a 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,8 +1,8 @@
 use anyhow::{anyhow, bail, Result};
-use postgres::Client;
 use reqwest::StatusCode;
 use std::fs::File;
 use std::path::Path;
+use tokio_postgres::Client;
 use tracing::{error, info, instrument, warn};
 
 use crate::config;
@@ -166,17 +166,17 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
 }
 
 #[instrument(skip_all)]
-pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
+pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
     info!("handle neon extension upgrade");
     let query = "ALTER EXTENSION neon UPDATE";
     info!("update neon extension version with query: {}", query);
-    client.simple_query(query)?;
+    client.simple_query(query).await?;
 
     Ok(())
 }
 
 #[instrument(skip_all)]
-pub fn handle_migrations(client: &mut Client) -> Result<()> {
+pub async fn handle_migrations(client: &mut Client) -> Result<()> {
     info!("handle migrations");
 
     // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -206,7 +206,9 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
         ),
     ];
 
-    MigrationRunner::new(client, &migrations).run_migrations()?;
+    MigrationRunner::new(client, &migrations)
+        .run_migrations()
+        .await?;
 
     Ok(())
 }
@@ -214,7 +216,7 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
 /// Connect to the database as superuser and pre-create anon extension
 /// if it is present in shared_preload_libraries
 #[instrument(skip_all)]
-pub fn handle_extension_anon(
+pub async fn handle_extension_anon(
     spec: &ComputeSpec,
     db_owner: &str,
     db_client: &mut Client,
@@ -227,7 +229,7 @@ pub fn handle_extension_anon(
             if !grants_only {
                 // check if extension is already initialized using anon.is_initialized()
                 let query = "SELECT anon.is_initialized()";
-                match db_client.query(query, &[]) {
+                match db_client.query(query, &[]).await {
                     Ok(rows) => {
                         if !rows.is_empty() {
                             let is_initialized: bool = rows[0].get(0);
@@ -249,7 +251,7 @@ pub fn handle_extension_anon(
                 // Users cannot create it themselves, because superuser is required.
                 let mut query = "CREATE EXTENSION IF NOT EXISTS anon CASCADE";
                 info!("creating anon extension with query: {}", query);
-                match db_client.query(query, &[]) {
+                match db_client.query(query, &[]).await {
                     Ok(_) => {}
                     Err(e) => {
                         error!("anon extension creation failed with error: {}", e);
@@ -259,7 +261,7 @@ pub fn handle_extension_anon(
 
                 // check that extension is installed
                 query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
-                let rows = db_client.query(query, &[])?;
+                let rows = db_client.query(query, &[]).await?;
                 if rows.is_empty() {
                     error!("anon extension is not installed");
                     return Ok(());
@@ -268,7 +270,7 @@ pub fn handle_extension_anon(
                 // Initialize anon extension
                 // This also requires superuser privileges, so users cannot do it themselves.
                 query = "SELECT anon.init()";
-                match db_client.query(query, &[]) {
+                match db_client.query(query, &[]).await {
                     Ok(_) => {}
                     Err(e) => {
                         error!("anon.init() failed with error: {}", e);
@@ -279,7 +281,7 @@ pub fn handle_extension_anon(
 
             // check that extension is installed, if not bail early
             let query = "SELECT extname FROM pg_extension WHERE extname = 'anon'";
-            match db_client.query(query, &[]) {
+            match db_client.query(query, &[]).await {
                 Ok(rows) => {
                     if rows.is_empty() {
                         error!("anon extension is not installed");
@@ -294,12 +296,12 @@ pub fn handle_extension_anon(
 
             let query = format!("GRANT ALL ON SCHEMA anon TO {}", db_owner);
             info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query)?;
+            db_client.simple_query(&query).await?;
 
             // Grant permissions to db_owner to use anon extension functions
             let query = format!("GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", db_owner);
             info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query)?;
+            db_client.simple_query(&query).await?;
 
             // This is needed, because some functions are defined as SECURITY DEFINER.
             // In Postgres SECURITY DEFINER functions are executed with the privileges
@@ -314,16 +316,16 @@ pub fn handle_extension_anon(
                 where nsp.nspname = 'anon';", db_owner);
 
             info!("change anon extension functions owner to db owner");
-            db_client.simple_query(&query)?;
+            db_client.simple_query(&query).await?;
 
             //  affects views as well
             let query = format!("GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", db_owner);
             info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query)?;
+            db_client.simple_query(&query).await?;
 
             let query = format!("GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", db_owner);
             info!("granting anon extension permissions with query: {}", query);
-            db_client.simple_query(&query)?;
+            db_client.simple_query(&query).await?;
         }
     }
 
diff --git a/control_plane/Cargo.toml b/control_plane/Cargo.toml
index f718102847..162c49ec7c 100644
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -33,6 +33,7 @@ postgres_backend.workspace = true
 safekeeper_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
+http-utils.workspace = true
 utils.workspace = true
 whoami.workspace = true
 
diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs
index af312d73a7..c668e68402 100644
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -261,7 +261,13 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
     let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", backtrace_setting);
 
     // Pass through these environment variables to the command
-    for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] {
+    for var in [
+        "LLVM_PROFILE_FILE",
+        "FAILPOINTS",
+        "RUST_LOG",
+        "ASAN_OPTIONS",
+        "UBSAN_OPTIONS",
+    ] {
         if let Some(val) = std::env::var_os(var) {
             filled_cmd = filled_cmd.env(var, val);
         }
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index ba67ffa2dd..02d793400a 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -552,8 +552,10 @@ struct EndpointCreateCmdArgs {
     lsn: Option<Lsn>,
     #[clap(long)]
     pg_port: Option<u16>,
+    #[clap(long, alias = "http-port")]
+    external_http_port: Option<u16>,
     #[clap(long)]
-    http_port: Option<u16>,
+    internal_http_port: Option<u16>,
     #[clap(long = "pageserver-id")]
     endpoint_pageserver_id: Option<NodeId>,
 
@@ -1353,7 +1355,8 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                 tenant_id,
                 timeline_id,
                 args.pg_port,
-                args.http_port,
+                args.external_http_port,
+                args.internal_http_port,
                 args.pg_version,
                 mode,
                 !args.update_catalog,
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index bc86d09103..3b2634204c 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,6 +37,8 @@
 //! ```
 //!
 use std::collections::BTreeMap;
+use std::net::IpAddr;
+use std::net::Ipv4Addr;
 use std::net::SocketAddr;
 use std::net::TcpStream;
 use std::path::PathBuf;
@@ -73,7 +75,8 @@ pub struct EndpointConf {
     timeline_id: TimelineId,
     mode: ComputeMode,
     pg_port: u16,
-    http_port: u16,
+    external_http_port: u16,
+    internal_http_port: u16,
     pg_version: u32,
     skip_pg_catalog_updates: bool,
     drop_subscriptions_before_start: bool,
@@ -128,7 +131,7 @@ impl ComputeControlPlane {
         1 + self
             .endpoints
             .values()
-            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
+            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.external_http_address.port()))
             .max()
             .unwrap_or(self.base_port)
     }
@@ -140,18 +143,27 @@ impl ComputeControlPlane {
         tenant_id: TenantId,
         timeline_id: TimelineId,
         pg_port: Option<u16>,
-        http_port: Option<u16>,
+        external_http_port: Option<u16>,
+        internal_http_port: Option<u16>,
         pg_version: u32,
         mode: ComputeMode,
         skip_pg_catalog_updates: bool,
         drop_subscriptions_before_start: bool,
     ) -> Result<Arc<Endpoint>> {
         let pg_port = pg_port.unwrap_or_else(|| self.get_port());
-        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
+        let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
+        let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
         let ep = Arc::new(Endpoint {
             endpoint_id: endpoint_id.to_owned(),
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
+            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
+            external_http_address: SocketAddr::new(
+                IpAddr::from(Ipv4Addr::UNSPECIFIED),
+                external_http_port,
+            ),
+            internal_http_address: SocketAddr::new(
+                IpAddr::from(Ipv4Addr::LOCALHOST),
+                internal_http_port,
+            ),
             env: self.env.clone(),
             timeline_id,
             mode,
@@ -176,7 +188,8 @@ impl ComputeControlPlane {
                 tenant_id,
                 timeline_id,
                 mode,
-                http_port,
+                external_http_port,
+                internal_http_port,
                 pg_port,
                 pg_version,
                 skip_pg_catalog_updates,
@@ -230,9 +243,10 @@ pub struct Endpoint {
     pub timeline_id: TimelineId,
     pub mode: ComputeMode,
 
-    // port and address of the Postgres server and `compute_ctl`'s HTTP API
+    // port and address of the Postgres server and `compute_ctl`'s HTTP APIs
     pub pg_address: SocketAddr,
-    pub http_address: SocketAddr,
+    pub external_http_address: SocketAddr,
+    pub internal_http_address: SocketAddr,
 
     // postgres major version in the format: 14, 15, etc.
     pg_version: u32,
@@ -287,8 +301,15 @@ impl Endpoint {
             serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
 
         Ok(Endpoint {
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
+            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),
+            external_http_address: SocketAddr::new(
+                IpAddr::from(Ipv4Addr::UNSPECIFIED),
+                conf.external_http_port,
+            ),
+            internal_http_address: SocketAddr::new(
+                IpAddr::from(Ipv4Addr::LOCALHOST),
+                conf.internal_http_port,
+            ),
             endpoint_id,
             env: env.clone(),
             timeline_id: conf.timeline_id,
@@ -650,24 +671,51 @@ impl Endpoint {
             println!("Also at '{}'", conn_str);
         }
         let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
-        cmd.args(["--http-port", &self.http_address.port().to_string()])
-            .args(["--pgdata", self.pgdata().to_str().unwrap()])
-            .args(["--connstr", &conn_str])
-            .args([
-                "--spec-path",
-                self.endpoint_path().join("spec.json").to_str().unwrap(),
-            ])
-            .args([
-                "--pgbin",
-                self.env
-                    .pg_bin_dir(self.pg_version)?
-                    .join("postgres")
-                    .to_str()
-                    .unwrap(),
-            ])
-            .stdin(std::process::Stdio::null())
-            .stderr(logfile.try_clone()?)
-            .stdout(logfile);
+        //cmd.args([
+        //    "--external-http-port",
+        //    &self.external_http_address.port().to_string(),
+        //])
+        //.args([
+        //    "--internal-http-port",
+        //    &self.internal_http_address.port().to_string(),
+        //])
+        cmd.args([
+            "--http-port",
+            &self.external_http_address.port().to_string(),
+        ])
+        .args(["--pgdata", self.pgdata().to_str().unwrap()])
+        .args(["--connstr", &conn_str])
+        .args([
+            "--spec-path",
+            self.endpoint_path().join("spec.json").to_str().unwrap(),
+        ])
+        .args([
+            "--pgbin",
+            self.env
+                .pg_bin_dir(self.pg_version)?
+                .join("postgres")
+                .to_str()
+                .unwrap(),
+        ])
+        // TODO: It would be nice if we generated compute IDs with the same
+        // algorithm as the real control plane.
+        //
+        // TODO: Add this back when
+        // https://github.com/neondatabase/neon/pull/10747 is merged.
+        //
+        //.args([
+        //    "--compute-id",
+        //    &format!(
+        //        "compute-{}",
+        //        SystemTime::now()
+        //            .duration_since(UNIX_EPOCH)
+        //            .unwrap()
+        //            .as_secs()
+        //    ),
+        //])
+        .stdin(std::process::Stdio::null())
+        .stderr(logfile.try_clone()?)
+        .stdout(logfile);
 
         if let Some(remote_ext_config) = remote_ext_config {
             cmd.args(["--remote-ext-config", remote_ext_config]);
@@ -754,8 +802,8 @@ impl Endpoint {
                 reqwest::Method::GET,
                 format!(
                     "http://{}:{}/status",
-                    self.http_address.ip(),
-                    self.http_address.port()
+                    self.external_http_address.ip(),
+                    self.external_http_address.port()
                 ),
             )
             .send()
@@ -828,8 +876,8 @@ impl Endpoint {
         let response = client
             .post(format!(
                 "http://{}:{}/configure",
-                self.http_address.ip(),
-                self.http_address.port()
+                self.external_http_address.ip(),
+                self.external_http_address.port()
             ))
             .header(CONTENT_TYPE.as_str(), "application/json")
             .body(format!(
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index dd37bfc407..28d130d9e0 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -357,6 +357,16 @@ impl PageServerNode {
                 .map(serde_json::from_str)
                 .transpose()
                 .context("Failed to parse 'compaction_algorithm' json")?,
+            compaction_l0_first: settings
+                .remove("compaction_l0_first")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'compaction_l0_first' as a bool")?,
+            compaction_l0_semaphore: settings
+                .remove("compaction_l0_semaphore")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'compaction_l0_semaphore' as a bool")?,
             l0_flush_delay_threshold: settings
                 .remove("l0_flush_delay_threshold")
                 .map(|x| x.parse::<usize>())
diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs
index f0c3722925..ce7751fb14 100644
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -17,8 +17,10 @@ use camino::Utf8PathBuf;
 use postgres_connection::PgConnectionConfig;
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
+
+use http_utils::error::HttpErrorBody;
 use utils::auth::{Claims, Scope};
-use utils::{http::error::HttpErrorBody, id::NodeId};
+use utils::id::NodeId;
 
 use crate::{
     background_process,
diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs
index c41ff22d15..9a2d30c861 100644
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -221,7 +221,17 @@ impl StorageController {
             "-p",
             &format!("{}", postgres_port),
         ];
-        let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
+        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
+        let envs = [
+            ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+            ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+        ];
+        let exitcode = Command::new(bin_path)
+            .args(args)
+            .envs(envs)
+            .spawn()?
+            .wait()
+            .await?;
 
         Ok(exitcode.success())
     }
@@ -242,6 +252,11 @@ impl StorageController {
 
         let pg_bin_dir = self.get_pg_bin_dir().await?;
         let createdb_path = pg_bin_dir.join("createdb");
+        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
+        let envs = [
+            ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+            ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+        ];
         let output = Command::new(&createdb_path)
             .args([
                 "-h",
@@ -254,6 +269,7 @@ impl StorageController {
                 &username(),
                 DB_NAME,
             ])
+            .envs(envs)
             .output()
             .await
             .expect("Failed to spawn createdb");
diff --git a/docker-compose/ext-src/pgjwt-src/test-upgrade.sh b/docker-compose/ext-src/pgjwt-src/test-upgrade.sh
index b7158d2340..efb8bfc184 100755
--- a/docker-compose/ext-src/pgjwt-src/test-upgrade.sh
+++ b/docker-compose/ext-src/pgjwt-src/test-upgrade.sh
@@ -2,4 +2,4 @@
 set -ex
 cd "$(dirname ${0})"
 patch -p1 <test-upgrade.patch
-pg_prove test.sql
\ No newline at end of file
+pg_prove -d contrib_regression test.sql
\ No newline at end of file
diff --git a/docs/rfcs/035-safekeeper-dynamic-membership-change.md b/docs/rfcs/035-safekeeper-dynamic-membership-change.md
index cea9af34ab..9b320c7285 100644
--- a/docs/rfcs/035-safekeeper-dynamic-membership-change.md
+++ b/docs/rfcs/035-safekeeper-dynamic-membership-change.md
@@ -285,10 +285,10 @@ To summarize, list of cplane changes:
 
 ### storage_controller implementation
 
-Current 'load everything on startup and keep in memory' easy design is fine.
-Single timeline shouldn't take more than 100 bytes (it's 16 byte tenant_id, 16
-byte timeline_id, int generation, vec of ~3 safekeeper ids plus some flags), so
-10^6 of timelines shouldn't take more than 100MB.
+If desired, we may continue using current 'load everything on startup and keep
+in memory' approach: single timeline shouldn't take more than 100 bytes (it's 16
+byte tenant_id, 16 byte timeline_id, int generation, vec of ~3 safekeeper ids
+plus some flags), so 10^6 of timelines shouldn't take more than 100MB.
 
 Similar to pageserver attachment Intents storage_controller would have in-memory
 `MigrationRequest` (or its absense) for each timeline and pool of tasks trying
@@ -296,7 +296,7 @@ to make these request reality; this ensures one instance of storage_controller
 won't do several migrations on the same timeline concurrently. In the first
 version it is simpler to have more manual control and no retries, i.e. migration
 failure removes the request. Later we can build retries and automatic
-scheduling/migration. `MigrationRequest` is
+scheduling/migration around. `MigrationRequest` is
 ```
 enum MigrationRequest {
     To(Vec<NodeId>),
@@ -313,9 +313,9 @@ similarly, in the first version it is ok to trigger it manually).
 #### Schema
 
 `safekeepers` table mirroring current `nodes` should be added, except that for
-`scheduling_policy` field (seems like `status` is a better name for it): it is enough
-to have at least in the beginning only 3 fields: 1) `active` 2) `offline` 3)
-`decomissioned`.
+`scheduling_policy`: it is enough to have at least in the beginning only 3
+fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
+3) `decomissioned` (node is removed).
 
 `timelines` table:
 ```
@@ -324,18 +324,24 @@ table! {
     timelines (tenant_id, timeline_id) {
         timeline_id -> Varchar,
         tenant_id -> Varchar,
+        start_lsn -> pg_lsn,
         generation -> Int4,
         sk_set -> Array<Int4>, // list of safekeeper ids
-        new_sk_set -> Nullable<Array<Int4>>, // list of safekeeper ids, null if not joint conf
+        new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
         cplane_notified_generation -> Int4,
+        deleted_at -> Nullable<Timestamptz>,
     }
 }
 ```
 
+`start_lsn` is needed to create timeline on safekeepers properly, see below. We
+might also want to add ancestor_timeline_id to preserve the hierarchy, but for
+this RFC it is not needed.
+
 #### API
 
 Node management is similar to pageserver:
-1) POST `/control/v1/safekeepers` upserts safekeeper.
+1) POST `/control/v1/safekeepers` inserts safekeeper.
 2) GET `/control/v1/safekeepers` lists safekeepers.
 3) GET `/control/v1/safekeepers/:node_id` gets safekeeper.
 4) PUT `/control/v1/safekepers/:node_id/status` changes status to e.g.
@@ -345,25 +351,15 @@ Node management is similar to pageserver:
 Safekeeper deploy scripts should register safekeeper at storage_contorller as
 they currently do with cplane, under the same id.
 
-Timeline creation/deletion: already existing POST `tenant/:tenant_id/timeline`
-would 1) choose initial set of safekeepers; 2) write to the db initial
-`Configuration` with `INSERT ON CONFLICT DO NOTHING` returning existing row in
-case of conflict; 3) create timeline on the majority of safekeepers (already
-created is ok).
+Timeline creation/deletion will work through already existing POST and DELETE
+`tenant/:tenant_id/timeline`. Cplane is expected to retry both until they
+succeed. See next section on the implementation details.
 
-We don't want to block timeline creation when one safekeeper is down. Currently
-this is solved by compute implicitly creating timeline on any safekeeper it is
-connected to. This creates ugly timeline state on safekeeper when timeline is
-created, but start LSN is not defined yet. It would be nice to remove this; to
-do that, controller can in the background retry to create timeline on
-safekeeper(s) which missed that during initial creation call. It can do that
-through `pull_timeline` from majority so it doesn't need to remember
-`parent_lsn` in its db.
-
-Timeline deletion removes the row from the db and forwards deletion to the
-current configuration members. Without additional actions deletions might leak,
-see below on this; initially let's ignore these, reporting to cplane success if
-at least one safekeeper deleted the timeline (this will remove s3 data).
+We don't want to block timeline creation/deletion when one safekeeper is down.
+Currently this is crutched by compute implicitly creating timeline on any
+safekeeper it is connected to. This creates ugly timeline state on safekeeper
+when timeline is created, but start LSN is not defined yet. Next section
+describes dealing with this.
 
 Tenant deletion repeats timeline deletion for all timelines.
 
@@ -395,26 +391,6 @@ Similar call should be added for the tenant.
 It would be great to have some way of subscribing to the results (apart from
 looking at logs/metrics).
 
-Migration is executed as described above. One subtlety is that (local) deletion on
-source safekeeper might fail, which is not a problem if we are going to
-decomission the node but leaves garbage otherwise. I'd propose in the first version
-1) Don't attempt deletion at all if node status is `offline`.
-2) If it failed, just issue warning.
-And add PUT `/control/v1/safekeepers/:node_id/scrub` endpoint which would find and
-remove garbage timelines for manual use. It will 1) list all timelines on the
-safekeeper 2) compare each one against configuration storage: if timeline
-doesn't exist at all (had been deleted), it can be deleted. Otherwise, it can
-be deleted under generation number if node is not member of current generation.
-
-Automating this is untrivial; we'd need to register all potential missing
-deletions <tenant_id, timeline_id, generation, node_id> in the same transaction
-which switches configurations. Similarly when timeline is fully deleted to
-prevent cplane operation from blocking when some safekeeper is not available
-deletion should be also registered.
-
-One more task pool should infinitely retry notifying control plane about changed
-safekeeper sets.
-
 3) GET `/control/v1/tenant/:tenant_id/timeline/:timeline_id/` should return
    current in memory state of the timeline and pending `MigrationRequest`,
    if any.
@@ -423,12 +399,153 @@ safekeeper sets.
    migration by switching configuration from the joint to the one with (previous) `sk_set` under CAS
    (incrementing generation as always).
 
+#### API implementation and reconciliation
+
+For timeline creation/deletion we want to preserve the basic assumption that
+unreachable minority (1 sk of 3) doesn't block their completion, but eventually
+we want to finish creation/deletion on nodes which missed it (unless they are
+removed). Similarly for migration; it may and should finish even though excluded
+members missed their exclusion. And of course e.g. such pending exclusion on
+node C after migration ABC -> ABD must not prevent next migration ABD -> ABE. As
+another example, if some node missed timeline creation it clearly must not block
+migration from it. Hence it is natural to have per safekeeper background
+reconciler which retries these ops until they succeed. There are 3 possible
+operation types, and the type is defined by timeline state (membership
+configuration and whether it is deleted) and safekeeper id: we may need to
+create timeline on sk (node added), locally delete it (node excluded, somewhat
+similar to detach) or globally delete it (timeline is deleted).
+
+Next, on storage controller restart in principle these pending operations can be
+figured out by comparing safekeepers state against storcon state. But it seems
+better to me to materialize them in the database; it is not expensive, avoids
+these startup scans which themselves can fail etc and makes it very easy to see
+outstanding work directly at the source of truth -- the db. So we can add table
+`safekeeper_timeline_pending_ops`
+```
+table! {
+    // timeline_id, sk_id is primary key
+    safekeeper_timeline_pending_ops (sk_id, tenant_id, timeline_id) {
+        sk_id -> int8,
+        tenant_id -> Varchar,
+        timeline_id -> Varchar,
+        generation -> Int4,
+        op_type -> Varchar,
+    }
+}
+```
+
+`op_type` can be `include` (seed from peers and ensure generation is up to
+date), `exclude` (remove locally) and `delete`. Field is actually not strictly
+needed as it can be computed from current configuration, but gives more explicit
+observability.
+
+`generation` is necessary there because after op is done reconciler must remove
+it and not remove another row with higher gen which in theory might appear.
+
+Any insert of row should overwrite (remove) all rows with the same sk and
+timeline id but lower `generation` as next op makes previous obsolete. Insertion
+of `op_type` `delete` overwrites all rows.
+
+About `exclude`: rather than adding explicit safekeeper http endpoint, it is
+reasonable to reuse membership switch endpoint: if safekeeper is not member
+of the configuration it locally removes the timeline on the switch. In this case
+404 should also be considered an 'ok' answer by the caller.
+
+So, main loop of per sk reconcile reads `safekeeper_timeline_pending_ops`
+joined with timeline configuration to get current conf (with generation `n`)
+for the safekeeper and does the jobs, infinitely retrying failures:
+1) If node is member (`include`):
+  - Check if timeline exists on it, if not, call pull_timeline on it from 
+     other members
+  - Call switch configuration to the current
+2) If node is not member (`exclude`):
+  - Call switch configuration to the current, 404 is ok.
+3) If timeline is deleted (`delete`), call delete.
+
+In cases 1 and 2 remove `safekeeper_timeline_pending_ops` for the sk and 
+timeline with generation <= `n` if `op_type` is not `delete`.
+In case 3 also remove `safekeeper_timeline_pending_ops` 
+entry + remove `timelines` entry if there is nothing left  in `safekeeper_timeline_pending_ops` for the timeline.
+
+Let's consider in details how APIs can be implemented from this angle.
+
+Timeline creation. It is assumed that cplane retries it until success, so all
+actions must be idempotent. Now, a tricky point here is timeline start LSN. For
+the initial (tenant creation) call cplane doesn't know it. However, setting
+start_lsn on safekeepers during creation is a good thing -- it provides a
+guarantee that walproposer can always find a common point in WAL histories of
+safekeeper and its own, and so absense of it would be a clear sign of
+corruption. The following sequence works:
+1) Create timeline (or observe that it exists) on pageserver,
+   figuring out last_record_lsn in response.
+2) Choose safekeepers and insert (ON CONFLICT DO NOTHING) timeline row into the
+   db. Note that last_record_lsn returned on the previous step is movable as it
+   changes once ingestion starts, insert must not overwrite it (as well as other
+   fields like membership conf). On the contrary, start_lsn used in the next
+   step must be set to the value in the db. cplane_notified_generation can be set
+   to 1 (initial generation) in insert to avoid notifying cplane about initial 
+   conf as cplane will receive it in timeline creation request anyway.
+3) Issue timeline creation calls to at least majority of safekeepers. Using
+   majority here is not necessary but handy because it guarantees that any live
+   majority will have at least one sk with created timeline and so
+   reconciliation task can use pull_timeline shared with migration instead of
+   create timeline special init case. OFC if timeline is already exists call is
+   ignored.
+4) For minority of safekeepers which could have missed creation insert
+   entries to `safekeeper_timeline_pending_ops`. We won't miss this insertion 
+   because response to cplane is sent only after it has happened, and cplane 
+   retries the call until 200 response.
+
+   There is a small question how request handler (timeline creation in this
+   case) would interact with per sk reconciler. As always I prefer to do the
+   simplest possible thing and here it seems to be just waking it up so it
+   re-reads the db for work to do. Passing work in memory is faster, but
+   that shouldn't matter, and path to scan db for work will exist anyway, 
+   simpler to reuse it.
+
+For pg version / wal segment size: while we may persist them in `timelines`
+table, it is not necessary as initial creation at step 3 can take them from
+pageserver or cplane creation call and later pull_timeline will carry them
+around.
+
+Timeline migration.
+1) CAS to the db to create joint conf, and in the same transaction create
+   `safekeeper_timeline_pending_ops` `include` entries to initialize new members
+   as well as deliver this conf to current ones; poke per sk reconcilers to work
+   on it. Also any conf change should also poke cplane notifier task(s).
+2) Once it becomes possible per alg description above, get out of joint conf
+   with another CAS. Task should get wakeups from per sk reconcilers because 
+   conf switch is required for advancement; however retries should be sleep
+   based as well as LSN advancement might be needed, though in happy path 
+   it isn't. To see whether further transition is possible on wakup migration
+   executor polls safekeepers per the algorithm. CAS creating new conf with only
+   new members should again insert entries to `safekeeper_timeline_pending_ops`
+   to switch them there, as well as `exclude` rows to remove timeline from 
+   old members.
+
+Timeline deletion: just set `deleted_at` on the timeline row and insert
+`safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
+per sk reconcilers.
+
+When node is removed (set to `decomissioned`), `safekeeper_timeline_pending_ops`
+for it must be cleared in the same transaction.
+
+One more task pool should infinitely retry notifying control plane about changed
+safekeeper sets (trying making `cplane_notified_generation` equal `generation`).
+
 #### Dealing with multiple instances of storage_controller
 
 Operations described above executed concurrently might create some errors but do
 not prevent progress, so while we normally don't want to run multiple instances
 of storage_controller it is fine to have it temporarily, e.g. during redeploy.
 
+To harden against some controller instance creating some work in
+`safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
+the job per sk reconcilers apart from explicit wakups should scan for work
+periodically. It is possible to remove that though if all db updates are
+protected with leadership token/term -- then such scans are needed only after
+leadership is acquired.
+
 Any interactions with db update in-memory controller state, e.g. if migration
 request failed because different one is in progress, controller remembers that
 and tries to finish it.
@@ -545,7 +662,7 @@ Aurora does this but similarly I don't think this is needed.
 
 We should use Compute <-> safekeeper protocol change to include other (long
 yearned) modifications:
-- send data in network order to make arm work.
+- send data in network order without putting whole structs to be arch independent
 - remove term_start_lsn from AppendRequest
 - add horizon to TermHistory
 - add to ProposerGreeting number of connection from this wp to sk
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index 2fc95c47c6..767a34bcbc 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -207,11 +207,11 @@ impl RemoteExtSpec {
         if !self
             .public_extensions
             .as_ref()
-            .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
+            .is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))
             && !self
                 .custom_extensions
                 .as_ref()
-                .is_some_and(|exts| exts.iter().any(|e| e == ext_name))
+                .is_some_and(|exts| exts.iter().any(|e| e == real_ext_name))
         {
             return Err(anyhow::anyhow!("extension {} is not found", real_ext_name));
         }
@@ -414,7 +414,7 @@ mod tests {
             "public_extensions": ["ext"],
             "custom_extensions": [],
             "library_index": {
-                "ext": "ext"
+                "extlib": "ext",
             },
             "extension_data": {
                 "ext": {
@@ -430,6 +430,12 @@ mod tests {
         rspec
             .get_ext("ext", false, "latest", "v17")
             .expect("Extension should be found");
+
+        // test library index for the case when library name
+        // doesn't match the extension name
+        rspec
+            .get_ext("extlib", true, "latest", "v17")
+            .expect("Library should be found");
     }
 
     #[test]
diff --git a/libs/http-utils/Cargo.toml b/libs/http-utils/Cargo.toml
new file mode 100644
index 0000000000..d72e4bd012
--- /dev/null
+++ b/libs/http-utils/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "http-utils"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+backtrace.workspace = true
+bytes.workspace = true
+inferno.workspace = true
+fail.workspace = true
+flate2.workspace = true
+hyper0.workspace = true
+itertools.workspace = true
+jemalloc_pprof.workspace = true
+once_cell.workspace = true
+pprof.workspace = true
+regex.workspace = true
+routerify.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+serde_path_to_error.workspace = true
+thiserror.workspace = true
+tracing.workspace = true
+tokio.workspace = true
+tokio-util.workspace = true
+url.workspace = true
+uuid.workspace = true
+
+# to use tokio channels as streams, this is faster to compile than async_stream
+# why is it only here? no other crate should use it, streams are rarely needed.
+tokio-stream = { version = "0.1.14" }
+
+metrics.workspace = true
+utils.workspace = true
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
diff --git a/libs/utils/src/http/endpoint.rs b/libs/http-utils/src/endpoint.rs
similarity index 99%
rename from libs/utils/src/http/endpoint.rs
rename to libs/http-utils/src/endpoint.rs
index 9f38373ca0..be97b341d1 100644
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/http-utils/src/endpoint.rs
@@ -1,7 +1,6 @@
-use crate::auth::{AuthError, Claims, SwappableJwtAuth};
-use crate::http::error::{api_error_handler, route_error_handler, ApiError};
-use crate::http::request::{get_query_param, parse_query_param};
+use crate::error::{api_error_handler, route_error_handler, ApiError};
 use crate::pprof;
+use crate::request::{get_query_param, parse_query_param};
 use ::pprof::protos::Message as _;
 use ::pprof::ProfilerGuardBuilder;
 use anyhow::{anyhow, Context};
@@ -19,6 +18,7 @@ use tokio::sync::{mpsc, Mutex, Notify};
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_util::io::ReaderStream;
 use tracing::{debug, info, info_span, warn, Instrument};
+use utils::auth::{AuthError, Claims, SwappableJwtAuth};
 
 use std::future::Future;
 use std::io::Write as _;
@@ -718,9 +718,9 @@ pub fn check_permission_with(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use futures::future::poll_fn;
     use hyper::service::Service;
     use routerify::RequestServiceBuilder;
+    use std::future::poll_fn;
     use std::net::{IpAddr, SocketAddr};
 
     #[tokio::test]
diff --git a/libs/utils/src/http/error.rs b/libs/http-utils/src/error.rs
similarity index 93%
rename from libs/utils/src/http/error.rs
rename to libs/http-utils/src/error.rs
index 02fc9e3b99..746305caec 100644
--- a/libs/utils/src/http/error.rs
+++ b/libs/http-utils/src/error.rs
@@ -5,6 +5,8 @@ use std::error::Error as StdError;
 use thiserror::Error;
 use tracing::{error, info, warn};
 
+use utils::auth::AuthError;
+
 #[derive(Debug, Error)]
 pub enum ApiError {
     #[error("Bad request: {0:#?}")]
@@ -96,6 +98,15 @@ impl ApiError {
     }
 }
 
+impl From<AuthError> for ApiError {
+    fn from(_value: AuthError) -> Self {
+        // Don't pass on the value of the AuthError as a precautionary measure.
+        // Being intentionally vague in public error communication hurts debugability
+        // but it is more secure.
+        ApiError::Forbidden("JWT authentication error".to_string())
+    }
+}
+
 #[derive(Serialize, Deserialize)]
 pub struct HttpErrorBody {
     pub msg: String,
diff --git a/libs/http-utils/src/failpoints.rs b/libs/http-utils/src/failpoints.rs
new file mode 100644
index 0000000000..8a1e0c8cf0
--- /dev/null
+++ b/libs/http-utils/src/failpoints.rs
@@ -0,0 +1,50 @@
+use crate::error::ApiError;
+use crate::json::{json_request, json_response};
+
+use hyper::{Body, Request, Response, StatusCode};
+use serde::{Deserialize, Serialize};
+use tokio_util::sync::CancellationToken;
+
+use utils::failpoint_support::apply_failpoint;
+
+pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
+
+/// Information for configuring a single fail point
+#[derive(Debug, Serialize, Deserialize)]
+pub struct FailpointConfig {
+    /// Name of the fail point
+    pub name: String,
+    /// List of actions to take, using the format described in `fail::cfg`
+    ///
+    /// We also support `actions = "exit"` to cause the fail point to immediately exit.
+    pub actions: String,
+}
+
+/// Configure failpoints through http.
+pub async fn failpoints_handler(
+    mut request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    if !fail::has_failpoints() {
+        return Err(ApiError::BadRequest(anyhow::anyhow!(
+            "Cannot manage failpoints because neon was compiled without failpoints support"
+        )));
+    }
+
+    let failpoints: ConfigureFailpointsRequest = json_request(&mut request).await?;
+    for fp in failpoints {
+        tracing::info!("cfg failpoint: {} {}", fp.name, fp.actions);
+
+        // We recognize one extra "action" that's not natively recognized
+        // by the failpoints crate: exit, to immediately kill the process
+        let cfg_result = apply_failpoint(&fp.name, &fp.actions);
+
+        if let Err(err_msg) = cfg_result {
+            return Err(ApiError::BadRequest(anyhow::anyhow!(
+                "Failed to configure failpoints: {err_msg}"
+            )));
+        }
+    }
+
+    json_response(StatusCode::OK, ())
+}
diff --git a/libs/utils/src/http/json.rs b/libs/http-utils/src/json.rs
similarity index 100%
rename from libs/utils/src/http/json.rs
rename to libs/http-utils/src/json.rs
diff --git a/libs/utils/src/http/mod.rs b/libs/http-utils/src/lib.rs
similarity index 82%
rename from libs/utils/src/http/mod.rs
rename to libs/http-utils/src/lib.rs
index 74ed6bb5b2..ae6a27aaa8 100644
--- a/libs/utils/src/http/mod.rs
+++ b/libs/http-utils/src/lib.rs
@@ -1,8 +1,12 @@
 pub mod endpoint;
 pub mod error;
+pub mod failpoints;
 pub mod json;
+pub mod pprof;
 pub mod request;
 
+extern crate hyper0 as hyper;
+
 /// Current fast way to apply simple http routing in various Neon binaries.
 /// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed.
 pub use routerify::{ext::RequestExt, RouterBuilder, RouterService};
diff --git a/libs/utils/src/pprof.rs b/libs/http-utils/src/pprof.rs
similarity index 100%
rename from libs/utils/src/pprof.rs
rename to libs/http-utils/src/pprof.rs
diff --git a/libs/utils/src/http/request.rs b/libs/http-utils/src/request.rs
similarity index 100%
rename from libs/utils/src/http/request.rs
rename to libs/http-utils/src/request.rs
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index a0b5feea94..79f068a47b 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -121,6 +121,7 @@ pub struct ConfigToml {
     pub wal_receiver_protocol: PostgresClientProtocol,
     pub page_service_pipelining: PageServicePipeliningConfig,
     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
+    pub enable_read_path_debugging: Option<bool>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -262,6 +263,11 @@ pub struct TenantConfigToml {
     /// size exceeds `compaction_upper_limit * checkpoint_distance`.
     pub compaction_upper_limit: usize,
     pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
+    /// If true, compact down L0 across all tenant timelines before doing regular compaction.
+    pub compaction_l0_first: bool,
+    /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
+    /// has an effect if `compaction_l0_first` is `true`.
+    pub compaction_l0_semaphore: bool,
     /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
     /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
     /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
@@ -490,7 +496,7 @@ impl Default for ConfigToml {
                 NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
             )),
             image_compression: (DEFAULT_IMAGE_COMPRESSION),
-            timeline_offloading: false,
+            timeline_offloading: true,
             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
             l0_flush: None,
             virtual_file_io_mode: None,
@@ -510,6 +516,11 @@ impl Default for ConfigToml {
             } else {
                 GetVectoredConcurrentIo::SidecarTask
             },
+            enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
+                Some(true)
+            } else {
+                None
+            },
         }
     }
 }
@@ -537,6 +548,8 @@ pub mod tenant_conf_defaults {
     // most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
     // calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
     pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
+    pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
+    pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
 
     pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
         crate::models::CompactionAlgorithm::Legacy;
@@ -586,6 +599,8 @@ impl Default for TenantConfigToml {
             compaction_algorithm: crate::models::CompactionAlgorithmSettings {
                 kind: DEFAULT_COMPACTION_ALGORITHM,
             },
+            compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
+            compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
             l0_flush_delay_threshold: None,
             l0_flush_stall_threshold: None,
             l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
@@ -616,7 +631,7 @@ impl Default for TenantConfigToml {
             image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
-            timeline_offloading: false,
+            timeline_offloading: true,
             wal_receiver_protocol_override: None,
             rel_size_v2_enabled: None,
             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 19beb37ab3..6dbfbec345 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -464,6 +464,10 @@ pub struct TenantConfigPatch {
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub compaction_l0_first: FieldPatch<bool>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub compaction_l0_semaphore: FieldPatch<bool>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub l0_flush_delay_threshold: FieldPatch<usize>,
     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     pub l0_flush_stall_threshold: FieldPatch<usize>,
@@ -529,6 +533,8 @@ pub struct TenantConfig {
     pub compaction_upper_limit: Option<usize>,
     // defer parsing compaction_algorithm, like eviction_policy
     pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
+    pub compaction_l0_first: Option<bool>,
+    pub compaction_l0_semaphore: Option<bool>,
     pub l0_flush_delay_threshold: Option<usize>,
     pub l0_flush_stall_threshold: Option<usize>,
     pub l0_flush_wait_upload: Option<bool>,
@@ -567,6 +573,8 @@ impl TenantConfig {
             mut compaction_threshold,
             mut compaction_upper_limit,
             mut compaction_algorithm,
+            mut compaction_l0_first,
+            mut compaction_l0_semaphore,
             mut l0_flush_delay_threshold,
             mut l0_flush_stall_threshold,
             mut l0_flush_wait_upload,
@@ -606,6 +614,10 @@ impl TenantConfig {
             .compaction_upper_limit
             .apply(&mut compaction_upper_limit);
         patch.compaction_algorithm.apply(&mut compaction_algorithm);
+        patch.compaction_l0_first.apply(&mut compaction_l0_first);
+        patch
+            .compaction_l0_semaphore
+            .apply(&mut compaction_l0_semaphore);
         patch
             .l0_flush_delay_threshold
             .apply(&mut l0_flush_delay_threshold);
@@ -669,6 +681,8 @@ impl TenantConfig {
             compaction_threshold,
             compaction_upper_limit,
             compaction_algorithm,
+            compaction_l0_first,
+            compaction_l0_semaphore,
             l0_flush_delay_threshold,
             l0_flush_stall_threshold,
             l0_flush_wait_upload,
diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs
index 9524a5149b..77dff4ac99 100644
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -76,7 +76,15 @@ impl Conf {
         let mut cmd = Command::new(path);
         cmd.env_clear()
             .env("LD_LIBRARY_PATH", self.pg_lib_dir()?)
-            .env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?);
+            .env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            );
         Ok(cmd)
     }
 
diff --git a/libs/postgres_initdb/src/lib.rs b/libs/postgres_initdb/src/lib.rs
index 2f072354fb..ed54696861 100644
--- a/libs/postgres_initdb/src/lib.rs
+++ b/libs/postgres_initdb/src/lib.rs
@@ -64,6 +64,14 @@ pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
         .env_clear()
         .env("LD_LIBRARY_PATH", library_search_path)
         .env("DYLD_LIBRARY_PATH", library_search_path)
+        .env(
+            "ASAN_OPTIONS",
+            std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+        )
+        .env(
+            "UBSAN_OPTIONS",
+            std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+        )
         .stdin(std::process::Stdio::null())
         // stdout invocation produces the same output every time, we don't need it
         .stdout(std::process::Stdio::null())
diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs
index dae141bf77..ff34158c9c 100644
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::{
     DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT,
-    DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+    DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
 };
 
 /// External backup storage configuration, enough for creating a client for that storage.
@@ -45,11 +45,11 @@ impl RemoteStorageKind {
 
 impl RemoteStorageConfig {
     /// Helper to fetch the configured concurrency limit.
-    pub fn concurrency_limit(&self) -> Option<usize> {
+    pub fn concurrency_limit(&self) -> usize {
         match &self.storage {
-            RemoteStorageKind::LocalFs { .. } => None,
-            RemoteStorageKind::AwsS3(c) => Some(c.concurrency_limit.into()),
-            RemoteStorageKind::AzureContainer(c) => Some(c.concurrency_limit.into()),
+            RemoteStorageKind::LocalFs { .. } => DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT,
+            RemoteStorageKind::AwsS3(c) => c.concurrency_limit.into(),
+            RemoteStorageKind::AzureContainer(c) => c.concurrency_limit.into(),
         }
     }
 }
diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs
index 7a864151ec..69b522d63e 100644
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -65,6 +65,12 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
 /// Here, a limit of max 20k concurrent connections was noted.
 /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
 pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
+/// Set this limit analogously to the S3 limit.
+///
+/// The local filesystem backend doesn't enforce a concurrency limit itself, but this also bounds
+/// the upload queue concurrency. Some tests create thousands of uploads, which slows down the
+/// quadratic scheduling of the upload queue, and there is no point spawning so many Tokio tasks.
+pub const DEFAULT_REMOTE_STORAGE_LOCALFS_CONCURRENCY_LIMIT: usize = 100;
 /// No limits on the client side, which currenltly means 1000 for AWS S3.
 /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
 pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml
index edb451a02c..0f10300959 100644
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -21,23 +21,17 @@ bytes.workspace = true
 camino.workspace = true
 chrono.workspace = true
 diatomic-waker.workspace = true
-flate2.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
-hyper0 = { workspace = true, features = ["full"] }
 inferno.workspace = true
-itertools.workspace = true
 fail.workspace = true
 futures = { workspace = true }
-jemalloc_pprof.workspace = true
 jsonwebtoken.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
-pprof.workspace = true
 regex.workspace = true
-routerify.workspace = true
 serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true
@@ -54,8 +48,6 @@ rand.workspace = true
 scopeguard.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
-url.workspace = true
-uuid.workspace = true
 walkdir.workspace = true
 
 pq_proto.workspace = true
@@ -64,12 +56,6 @@ metrics.workspace = true
 
 const_format.workspace = true
 
-# to use tokio channels as streams, this is faster to compile than async_stream
-# why is it only here? no other crate should use it, streams are rarely needed.
-tokio-stream = { version = "0.1.14" }
-
-serde_path_to_error.workspace = true
-
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
diff --git a/libs/utils/scripts/restore_from_wal.sh b/libs/utils/scripts/restore_from_wal.sh
index a8615c2337..f394d4c58d 100755
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -39,7 +39,7 @@ function initdb_with_args {
             ;;
     esac
 
-    eval env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "${cmd[*]}"
+    eval env -i LD_LIBRARY_PATH="$PG_BIN"/../lib ASAN_OPTIONS="${ASAN_OPTIONS-}" UBSAN_OPTIONS="${UBSAN_OPTIONS-}" "${cmd[*]}"
 }
 
 rm -fr "$DATA_DIR"
diff --git a/libs/utils/src/auth.rs b/libs/utils/src/auth.rs
index f7acc61ac1..4bfd0ab055 100644
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -10,7 +10,7 @@ use jsonwebtoken::{
 };
 use serde::{Deserialize, Serialize};
 
-use crate::{http::error::ApiError, id::TenantId};
+use crate::id::TenantId;
 
 /// Algorithm to use. We require EdDSA.
 const STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA;
@@ -90,15 +90,6 @@ impl Display for AuthError {
     }
 }
 
-impl From<AuthError> for ApiError {
-    fn from(_value: AuthError) -> Self {
-        // Don't pass on the value of the AuthError as a precautionary measure.
-        // Being intentionally vague in public error communication hurts debugability
-        // but it is more secure.
-        ApiError::Forbidden("JWT authentication error".to_string())
-    }
-}
-
 pub struct JwtAuth {
     decoding_keys: Vec<DecodingKey>,
     validation: Validation,
diff --git a/libs/utils/src/backoff.rs b/libs/utils/src/backoff.rs
index 096c7e5854..e6503fe377 100644
--- a/libs/utils/src/backoff.rs
+++ b/libs/utils/src/backoff.rs
@@ -1,4 +1,5 @@
 use std::fmt::{Debug, Display};
+use std::time::Duration;
 
 use futures::Future;
 use tokio_util::sync::CancellationToken;
@@ -29,6 +30,11 @@ pub async fn exponential_backoff(
     }
 }
 
+pub fn exponential_backoff_duration(n: u32, base_increment: f64, max_seconds: f64) -> Duration {
+    let seconds = exponential_backoff_duration_seconds(n, base_increment, max_seconds);
+    Duration::from_secs_f64(seconds)
+}
+
 pub fn exponential_backoff_duration_seconds(n: u32, base_increment: f64, max_seconds: f64) -> f64 {
     if n == 0 {
         0.0
diff --git a/libs/utils/src/failpoint_support.rs b/libs/utils/src/failpoint_support.rs
index 272c6ebb26..fc998ad9a9 100644
--- a/libs/utils/src/failpoint_support.rs
+++ b/libs/utils/src/failpoint_support.rs
@@ -1,13 +1,6 @@
 //! Failpoint support code shared between pageserver and safekeepers.
 
-use crate::http::{
-    error::ApiError,
-    json::{json_request, json_response},
-};
-use hyper::{Body, Request, Response, StatusCode};
-use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
-use tracing::*;
 
 /// Declare a failpoint that can use to `pause` failpoint action.
 /// We don't want to block the executor thread, hence, spawn_blocking + await.
@@ -184,45 +177,3 @@ fn exit_failpoint() {
     tracing::info!("Exit requested by failpoint");
     std::process::exit(1);
 }
-
-pub type ConfigureFailpointsRequest = Vec<FailpointConfig>;
-
-/// Information for configuring a single fail point
-#[derive(Debug, Serialize, Deserialize)]
-pub struct FailpointConfig {
-    /// Name of the fail point
-    pub name: String,
-    /// List of actions to take, using the format described in `fail::cfg`
-    ///
-    /// We also support `actions = "exit"` to cause the fail point to immediately exit.
-    pub actions: String,
-}
-
-/// Configure failpoints through http.
-pub async fn failpoints_handler(
-    mut request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    if !fail::has_failpoints() {
-        return Err(ApiError::BadRequest(anyhow::anyhow!(
-            "Cannot manage failpoints because neon was compiled without failpoints support"
-        )));
-    }
-
-    let failpoints: ConfigureFailpointsRequest = json_request(&mut request).await?;
-    for fp in failpoints {
-        info!("cfg failpoint: {} {}", fp.name, fp.actions);
-
-        // We recognize one extra "action" that's not natively recognized
-        // by the failpoints crate: exit, to immediately kill the process
-        let cfg_result = apply_failpoint(&fp.name, &fp.actions);
-
-        if let Err(err_msg) = cfg_result {
-            return Err(ApiError::BadRequest(anyhow::anyhow!(
-                "Failed to configure failpoints: {err_msg}"
-            )));
-        }
-    }
-
-    json_response(StatusCode::OK, ())
-}
diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs
index 1fb18e9e9a..820ff2d5ea 100644
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -2,8 +2,6 @@
 //! between other crates in this repository.
 #![deny(clippy::undocumented_unsafe_blocks)]
 
-extern crate hyper0 as hyper;
-
 pub mod backoff;
 
 /// `Lsn` type implements common tasks on Log Sequence Numbers
@@ -33,9 +31,6 @@ pub mod shard;
 mod hex;
 pub use hex::Hex;
 
-// http endpoint utils
-pub mod http;
-
 // definition of the Generation type for pageserver attachment APIs
 pub mod generation;
 
@@ -96,8 +91,6 @@ pub mod circuit_breaker;
 
 pub mod try_rcu;
 
-pub mod pprof;
-
 pub mod guard_arc_swap;
 
 // Re-export used in macro. Avoids adding git-version as dep in target crates.
diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs
index 753f05b6fd..4a6069294d 100644
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -8,19 +8,22 @@ use strum_macros::{EnumString, VariantNames};
 /// Logs a critical error, similarly to `tracing::error!`. This will:
 ///
 /// * Emit an ERROR log message with prefix "CRITICAL:" and a backtrace.
+/// * Trigger a pageable alert (via the metric below).
 /// * Increment libmetrics_tracing_event_count{level="critical"}, and indirectly level="error".
-/// * Trigger a pageable alert (via the metric above).
 /// * In debug builds, panic the process.
+///
+/// When including errors in the message, please use {err:?} to include the error cause and original
+/// backtrace.
 #[macro_export]
 macro_rules! critical {
-    ($($arg:tt)*) => {
+    ($($arg:tt)*) => {{
         if cfg!(debug_assertions) {
             panic!($($arg)*);
         }
         $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
         let backtrace = std::backtrace::Backtrace::capture();
         tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
-    };
+    }};
 }
 
 #[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 6e4eaa0efd..41ac3b69b8 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -79,6 +79,7 @@ pq_proto.workspace = true
 remote_storage.workspace = true
 storage_broker.workspace = true
 tenant_size_model.workspace = true
+http-utils.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true
 reqwest.workspace = true
diff --git a/pageserver/client/Cargo.toml b/pageserver/client/Cargo.toml
index f582d307a7..db77a395e0 100644
--- a/pageserver/client/Cargo.toml
+++ b/pageserver/client/Cargo.toml
@@ -11,6 +11,7 @@ testing = [ "pageserver_api/testing" ]
 pageserver_api.workspace = true
 thiserror.workspace = true
 reqwest = { workspace = true, features = [ "stream" ] }
+http-utils.workspace = true
 utils.workspace = true
 serde.workspace = true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs
index 0359bfcd0b..da7ec5abce 100644
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -1,11 +1,12 @@
 use std::{collections::HashMap, error::Error as _};
 
 use bytes::Bytes;
-use detach_ancestor::AncestorDetached;
-use pageserver_api::{models::*, shard::TenantShardId};
 use reqwest::{IntoUrl, Method, StatusCode};
+
+use detach_ancestor::AncestorDetached;
+use http_utils::error::HttpErrorBody;
+use pageserver_api::{models::*, shard::TenantShardId};
 use utils::{
-    http::error::HttpErrorBody,
     id::{TenantId, TimelineId},
     lsn::Lsn,
 };
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 5764728505..fa098e9364 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -592,7 +592,7 @@ fn start_pageserver(
         let router = http::make_router(router_state, launch_ts, http_auth.clone())?
             .build()
             .map_err(|err| anyhow!(err))?;
-        let service = utils::http::RouterService::new(router).unwrap();
+        let service = http_utils::RouterService::new(router).unwrap();
         let server = hyper0::Server::from_tcp(http_listener)?
             .serve(service)
             .with_graceful_shutdown({
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index ce480c70a0..c5368f6806 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -193,6 +193,10 @@ pub struct PageServerConf {
     pub page_service_pipelining: pageserver_api::config::PageServicePipeliningConfig,
 
     pub get_vectored_concurrent_io: pageserver_api::config::GetVectoredConcurrentIo,
+
+    /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
+    /// files read.
+    pub enable_read_path_debugging: bool,
 }
 
 /// Token for authentication to safekeepers
@@ -355,6 +359,7 @@ impl PageServerConf {
             wal_receiver_protocol,
             page_service_pipelining,
             get_vectored_concurrent_io,
+            enable_read_path_debugging,
         } = config_toml;
 
         let mut conf = PageServerConf {
@@ -440,6 +445,7 @@ impl PageServerConf {
                 .unwrap_or_default(),
             virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
             no_sync: no_sync.unwrap_or(false),
+            enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
         };
 
         // ------------------------------------------------------------
diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs
index 1d508f5fe9..a2395b0dca 100644
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -8,7 +8,6 @@ use std::time::Duration;
 
 use crate::controller_upcall_client::ControlPlaneGenerationsApi;
 use crate::metrics;
-use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
 use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::virtual_file::MaybeFatalIo;
@@ -463,45 +462,18 @@ impl DeletionQueueClient {
     ///
     /// The `current_generation` is the generation of this pageserver's current attachment.  The
     /// generations in `layers` are the generations in which those layers were written.
-    pub(crate) async fn push_layers(
+    pub(crate) fn push_layers(
         &self,
         tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         current_generation: Generation,
         layers: Vec<(LayerName, LayerFileMetadata)>,
     ) -> Result<(), DeletionQueueError> {
-        if current_generation.is_none() {
-            debug!("Enqueuing deletions in legacy mode, skipping queue");
+        // None generations are not valid for attached tenants: they must always be attached in
+        // a known generation.  None generations are still permitted for layers in the index because
+        // they may be historical.
+        assert!(!current_generation.is_none());
 
-            let mut layer_paths = Vec::new();
-            for (layer, meta) in layers {
-                layer_paths.push(remote_layer_path(
-                    &tenant_shard_id.tenant_id,
-                    &timeline_id,
-                    meta.shard,
-                    &layer,
-                    meta.generation,
-                ));
-            }
-            self.push_immediate(layer_paths).await?;
-            return self.flush_immediate().await;
-        }
-
-        self.push_layers_sync(tenant_shard_id, timeline_id, current_generation, layers)
-    }
-
-    /// When a Tenant has a generation, push_layers is always synchronous because
-    /// the ListValidator channel is an unbounded channel.
-    ///
-    /// This can be merged into push_layers when we remove the Generation-less mode
-    /// support (`<https://github.com/neondatabase/neon/issues/5395>`)
-    pub(crate) fn push_layers_sync(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        current_generation: Generation,
-        layers: Vec<(LayerName, LayerFileMetadata)>,
-    ) -> Result<(), DeletionQueueError> {
         metrics::DELETION_QUEUE
             .keys_submitted
             .inc_by(layers.len() as u64);
@@ -957,14 +929,12 @@ mod test {
 
         // File should still be there after we push it to the queue (we haven't pushed enough to flush anything)
         info!("Pushing");
-        client
-            .push_layers(
-                tenant_shard_id,
-                TIMELINE_ID,
-                now_generation,
-                [(layer_file_name_1.clone(), layer_metadata)].to_vec(),
-            )
-            .await?;
+        client.push_layers(
+            tenant_shard_id,
+            TIMELINE_ID,
+            now_generation,
+            [(layer_file_name_1.clone(), layer_metadata)].to_vec(),
+        )?;
         assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);
 
         assert_local_files(&[], &deletion_prefix);
@@ -1017,14 +987,12 @@ mod test {
         assert_remote_files(&[&remote_layer_name], &remote_timeline_path);
 
         tracing::debug!("Pushing...");
-        client
-            .push_layers(
-                tenant_shard_id,
-                TIMELINE_ID,
-                stale_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
-            )
-            .await?;
+        client.push_layers(
+            tenant_shard_id,
+            TIMELINE_ID,
+            stale_generation,
+            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+        )?;
 
         // We enqueued the operation in a stale generation: it should have failed validation
         tracing::debug!("Flushing...");
@@ -1032,14 +1000,12 @@ mod test {
         assert_remote_files(&[&remote_layer_name], &remote_timeline_path);
 
         tracing::debug!("Pushing...");
-        client
-            .push_layers(
-                tenant_shard_id,
-                TIMELINE_ID,
-                latest_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
-            )
-            .await?;
+        client.push_layers(
+            tenant_shard_id,
+            TIMELINE_ID,
+            latest_generation,
+            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+        )?;
 
         // We enqueued the operation in a fresh generation: it should have passed validation
         tracing::debug!("Flushing...");
@@ -1074,28 +1040,24 @@ mod test {
         // generation gets that treatment)
         let remote_layer_file_name_historical =
             ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;
-        client
-            .push_layers(
-                tenant_shard_id,
-                TIMELINE_ID,
-                now_generation.previous(),
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
-            )
-            .await?;
+        client.push_layers(
+            tenant_shard_id,
+            TIMELINE_ID,
+            now_generation.previous(),
+            [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+        )?;
 
         // Inject a deletion in the generation before generation_now: after restart,
         // this deletion should get executed, because we execute deletions in the
         // immediately previous generation on the same node.
         let remote_layer_file_name_previous =
             ctx.write_remote_layer(EXAMPLE_LAYER_NAME_ALT, layer_generation)?;
-        client
-            .push_layers(
-                tenant_shard_id,
-                TIMELINE_ID,
-                now_generation,
-                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
-            )
-            .await?;
+        client.push_layers(
+            tenant_shard_id,
+            TIMELINE_ID,
+            now_generation,
+            [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
+        )?;
 
         client.flush().await?;
         assert_remote_files(
@@ -1139,6 +1101,7 @@ pub(crate) mod mock {
     use tracing::info;
 
     use super::*;
+    use crate::tenant::remote_timeline_client::remote_layer_path;
     use std::sync::atomic::{AtomicUsize, Ordering};
 
     pub struct ConsumerState {
diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs
index ca44fbe6ae..738a783813 100644
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -61,6 +61,7 @@ use crate::{
         remote_timeline_client::LayerFileMetadata,
         secondary::SecondaryTenant,
         storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint},
+        tasks::sleep_random,
     },
     CancellableTask, DiskUsageEvictionTask,
 };
@@ -210,14 +211,8 @@ async fn disk_usage_eviction_task(
         info!("disk usage based eviction task finishing");
     };
 
-    use crate::tenant::tasks::random_init_delay;
-    {
-        if random_init_delay(task_config.period, &cancel)
-            .await
-            .is_err()
-        {
-            return;
-        }
+    if sleep_random(task_config.period, &cancel).await.is_err() {
+        return;
     }
 
     let mut iteration_no = 0;
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 94f7510a4a..bd196621c1 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -13,6 +13,12 @@ use enumset::EnumSet;
 use futures::future::join_all;
 use futures::StreamExt;
 use futures::TryFutureExt;
+use http_utils::endpoint::{
+    profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
+};
+use http_utils::failpoints::failpoints_handler;
+use http_utils::request::must_parse_query_param;
+use http_utils::request::{get_request_param, must_get_query_param, parse_query_param};
 use humantime::format_rfc3339;
 use hyper::header;
 use hyper::StatusCode;
@@ -60,13 +66,6 @@ use tokio::time::Instant;
 use tokio_util::io::StreamReader;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::auth::JwtAuth;
-use utils::failpoint_support::failpoints_handler;
-use utils::http::endpoint::{
-    profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
-};
-use utils::http::request::must_parse_query_param;
-use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
 
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
@@ -104,6 +103,13 @@ use crate::tenant::OffloadedTimeline;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
 use crate::DEFAULT_PG_VERSION;
 use crate::{disk_usage_eviction_task, tenant};
+use http_utils::{
+    endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
+    error::{ApiError, HttpErrorBody},
+    json::{json_request, json_request_maybe, json_response},
+    request::parse_request_param,
+    RequestExt, RouterBuilder,
+};
 use pageserver_api::models::{
     StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
     TimelineInfo,
@@ -111,13 +117,6 @@ use pageserver_api::models::{
 use utils::{
     auth::SwappableJwtAuth,
     generation::Generation,
-    http::{
-        endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
-        error::{ApiError, HttpErrorBody},
-        json::{json_request, json_request_maybe, json_response},
-        request::parse_request_param,
-        RequestExt, RouterBuilder,
-    },
     id::{TenantId, TimelineId},
     lsn::Lsn,
 };
@@ -561,7 +560,7 @@ async fn reload_auth_validation_keys_handler(
     let key_path = config.auth_validation_public_key_path.as_ref().unwrap();
     info!("Reloading public key(s) for verifying JWT tokens from {key_path:?}");
 
-    match JwtAuth::from_key_path(key_path) {
+    match utils::auth::JwtAuth::from_key_path(key_path) {
         Ok(new_auth) => {
             shared_auth.swap(new_auth);
             json_response(StatusCode::OK, ())
@@ -2152,6 +2151,7 @@ async fn timeline_compact_handler(
     let state = get_state(&request);
 
     let mut flags = EnumSet::empty();
+    flags |= CompactFlags::NoYield; // run compaction to completion
 
     if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
         flags |= CompactFlags::ForceL0Compaction;
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 6ab1178a7b..983a3079e4 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -6,7 +6,7 @@ use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
 use std::time::{Duration, Instant};
 
-use enum_map::EnumMap;
+use enum_map::{Enum as _, EnumMap};
 use futures::Future;
 use metrics::{
     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
@@ -104,7 +104,7 @@ pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::n
     .expect("failed to define a metric")
 });
 
-// Buckets for background operations like compaction, GC, size calculation
+// Buckets for background operation duration in seconds, like compaction, GC, size calculation.
 const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
 
 pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
@@ -236,7 +236,7 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(||
 
     GetVectoredLatency {
         map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
-            let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
+            let task_kind = TaskKind::from_usize(task_kind_idx);
 
             if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
                 let task_kind = task_kind.into();
@@ -259,7 +259,7 @@ pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
 
     ScanLatency {
         map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
-            let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
+            let task_kind = TaskKind::from_usize(task_kind_idx);
 
             if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
                 let task_kind = task_kind.into();
@@ -300,10 +300,10 @@ static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
 
 pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
     map: EnumMap::from_array(std::array::from_fn(|task_kind| {
-        let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
+        let task_kind = TaskKind::from_usize(task_kind);
         let task_kind: &'static str = task_kind.into();
         EnumMap::from_array(std::array::from_fn(|content_kind| {
-            let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
+            let content_kind = PageContentKind::from_usize(content_kind);
             let content_kind: &'static str = content_kind.into();
             PageCacheMetricsForTaskKind {
                 read_accesses_immutable: {
@@ -1366,10 +1366,7 @@ impl SmgrOpTimer {
     /// The first callers receives Some, subsequent ones None.
     ///
     /// See [`SmgrOpTimerState`] for more context.
-    pub(crate) fn observe_execution_end_flush_start(
-        &mut self,
-        at: Instant,
-    ) -> Option<SmgrOpFlushInProgress> {
+    pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {
         // NB: unlike the other observe_* methods, this one take()s.
         #[allow(clippy::question_mark)] // maintain similar code pattern.
         let Some(mut inner) = self.0.take() else {
@@ -1403,7 +1400,6 @@ impl SmgrOpTimer {
             ..
         } = inner;
         Some(SmgrOpFlushInProgress {
-            flush_started_at: at,
             global_micros: global_flush_in_progress_micros,
             per_timeline_micros: per_timeline_flush_in_progress_micros,
         })
@@ -1419,7 +1415,6 @@ impl SmgrOpTimer {
 /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
 /// and remove this struct from the code base.
 pub(crate) struct SmgrOpFlushInProgress {
-    flush_started_at: Instant,
     global_micros: IntCounter,
     per_timeline_micros: IntCounter,
 }
@@ -1438,12 +1433,13 @@ impl Drop for SmgrOpTimer {
         self.observe_throttle_start(now);
         self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
         self.observe_execution_start(now);
-        self.observe_execution_end_flush_start(now);
+        let maybe_flush_timer = self.observe_execution_end(now);
+        drop(maybe_flush_timer);
     }
 }
 
 impl SmgrOpFlushInProgress {
-    pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
+    pub(crate) async fn measure<Fut, O>(self, mut started_at: Instant, mut fut: Fut) -> O
     where
         Fut: std::future::Future<Output = O>,
     {
@@ -1455,12 +1451,12 @@ impl SmgrOpFlushInProgress {
         let mut observe_guard = scopeguard::guard(
             || {
                 let now = Instant::now();
-                let elapsed = now - self.flush_started_at;
+                let elapsed = now - started_at;
                 self.global_micros
                     .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
                 self.per_timeline_micros
                     .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
-                self.flush_started_at = now;
+                started_at = now;
             },
             |mut observe| {
                 observe();
@@ -1913,7 +1909,7 @@ pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy
 
     ComputeCommandCounters {
         map: EnumMap::from_array(std::array::from_fn(|i| {
-            let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
+            let command = ComputeCommandKind::from_usize(i);
             let command_str: &'static str = command.into();
             inner.with_label_values(&[command_str])
         })),
@@ -2213,11 +2209,13 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
 
 pub struct BackgroundLoopSemaphoreMetrics {
     counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
-    durations: EnumMap<BackgroundLoopKind, Counter>,
+    durations: EnumMap<BackgroundLoopKind, Histogram>,
+    waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
+    running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
 }
 
-pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
-    || {
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =
+    Lazy::new(|| {
         let counters = register_int_counter_pair_vec!(
             "pageserver_background_loop_semaphore_wait_start_count",
             "Counter for background loop concurrency-limiting semaphore acquire calls started",
@@ -2227,45 +2225,101 @@ pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics
         )
         .unwrap();
 
-        let durations = register_counter_vec!(
-            "pageserver_background_loop_semaphore_wait_duration_seconds",
-            "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
+        let durations = register_histogram_vec!(
+            "pageserver_background_loop_semaphore_wait_seconds",
+            "Seconds spent waiting on background loop semaphore acquisition",
+            &["task"],
+            vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],
+        )
+        .unwrap();
+
+        let waiting_tasks = register_int_gauge_vec!(
+            "pageserver_background_loop_semaphore_waiting_tasks",
+            "Number of background loop tasks waiting for semaphore",
+            &["task"],
+        )
+        .unwrap();
+
+        let running_tasks = register_int_gauge_vec!(
+            "pageserver_background_loop_semaphore_running_tasks",
+            "Number of background loop tasks running concurrently",
             &["task"],
         )
         .unwrap();
 
         BackgroundLoopSemaphoreMetrics {
-            counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
-                let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
+            counters: EnumMap::from_array(std::array::from_fn(|i| {
+                let kind = BackgroundLoopKind::from_usize(i);
                 counters.with_label_values(&[kind.into()])
             })),
-            durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
-                let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
+            durations: EnumMap::from_array(std::array::from_fn(|i| {
+                let kind = BackgroundLoopKind::from_usize(i);
                 durations.with_label_values(&[kind.into()])
             })),
+            waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {
+                let kind = BackgroundLoopKind::from_usize(i);
+                waiting_tasks.with_label_values(&[kind.into()])
+            })),
+            running_tasks: EnumMap::from_array(std::array::from_fn(|i| {
+                let kind = BackgroundLoopKind::from_usize(i);
+                running_tasks.with_label_values(&[kind.into()])
+            })),
         }
-    },
-);
+    });
 
 impl BackgroundLoopSemaphoreMetrics {
-    pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
-        struct Record<'a> {
-            metrics: &'a BackgroundLoopSemaphoreMetrics,
-            task: BackgroundLoopKind,
-            _counter_guard: metrics::IntCounterPairGuard,
-            start: Instant,
-        }
-        impl Drop for Record<'_> {
-            fn drop(&mut self) {
-                let elapsed = self.start.elapsed().as_secs_f64();
-                self.metrics.durations[self.task].inc_by(elapsed);
-            }
-        }
-        Record {
-            metrics: self,
+    /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the
+    /// semaphore is acquired, and drop it when the task completes or is cancelled.
+    pub(crate) fn record(
+        &self,
+        task: BackgroundLoopKind,
+    ) -> BackgroundLoopSemaphoreMetricsRecorder {
+        BackgroundLoopSemaphoreMetricsRecorder::start(self, task)
+    }
+}
+
+/// Records metrics for a background task.
+pub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {
+    metrics: &'a BackgroundLoopSemaphoreMetrics,
+    task: BackgroundLoopKind,
+    start: Instant,
+    wait_counter_guard: Option<metrics::IntCounterPairGuard>,
+}
+
+impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {
+    /// Starts recording semaphore metrics, by recording wait time and incrementing
+    /// `wait_start_count` and `waiting_tasks`.
+    fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {
+        metrics.waiting_tasks[task].inc();
+        Self {
+            metrics,
             task,
-            _counter_guard: self.counters[task].guard(),
             start: Instant::now(),
+            wait_counter_guard: Some(metrics.counters[task].guard()),
+        }
+    }
+
+    /// Signals that the semaphore has been acquired, and updates relevant metrics.
+    pub fn acquired(&mut self) -> Duration {
+        let waited = self.start.elapsed();
+        self.wait_counter_guard.take().expect("already acquired");
+        self.metrics.durations[self.task].observe(waited.as_secs_f64());
+        self.metrics.waiting_tasks[self.task].dec();
+        self.metrics.running_tasks[self.task].inc();
+        waited
+    }
+}
+
+impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {
+    /// The task either completed or was cancelled.
+    fn drop(&mut self) {
+        if self.wait_counter_guard.take().is_some() {
+            // Waiting.
+            self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());
+            self.metrics.waiting_tasks[self.task].dec();
+        } else {
+            // Running.
+            self.metrics.running_tasks[self.task].dec();
         }
     }
 }
@@ -2514,7 +2568,7 @@ pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> =
 
 pub(crate) struct WalRedoProcessCounters {
     pub(crate) started: IntCounter,
-    pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
+    pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,
     pub(crate) active_stderr_logger_tasks_started: IntCounter,
     pub(crate) active_stderr_logger_tasks_finished: IntCounter,
 }
@@ -2556,7 +2610,7 @@ impl Default for WalRedoProcessCounters {
         Self {
             started,
             killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
-                let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
+                let cause = WalRedoKillCause::from_usize(i);
                 let cause_str: &'static str = cause.into();
                 killed.with_label_values(&[cause_str])
             })),
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index e103338c7c..972dad34d4 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -489,7 +489,6 @@ impl timeline::handle::TenantManager<TenantManagerTypes> for TenantManagerWrappe
         let timeline = tenant_shard
             .get_timeline(timeline_id, true)
             .map_err(GetActiveTimelineError::Timeline)?;
-        set_tracing_field_shard_id(&timeline);
         Ok(timeline)
     }
 }
@@ -774,11 +773,11 @@ impl PageServerHandler {
 
         let batched_msg = match neon_fe_msg {
             PagestreamFeMessage::Exists(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await?;
+                debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();
+                let span = tracing::info_span!(parent: &parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());
                 let timer = record_op_start_and_throttle(
                     &shard,
                     metrics::SmgrQueryType::GetRelExists,
@@ -793,11 +792,10 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::Nblocks(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await?;
+                let span = tracing::info_span!(parent: &parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());
                 let timer = record_op_start_and_throttle(
                     &shard,
                     metrics::SmgrQueryType::GetRelSize,
@@ -812,11 +810,10 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::DbSize(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await?;
+                let span = tracing::info_span!(parent: &parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());
                 let timer = record_op_start_and_throttle(
                     &shard,
                     metrics::SmgrQueryType::GetDbSize,
@@ -831,11 +828,10 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::GetSlruSegment(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn);
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await?;
+                let span = tracing::info_span!(parent: &parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn, shard_id = %shard.tenant_shard_id.shard_slug());
                 let timer = record_op_start_and_throttle(
                     &shard,
                     metrics::SmgrQueryType::GetSlruSegment,
@@ -850,12 +846,20 @@ impl PageServerHandler {
                 }
             }
             PagestreamFeMessage::GetPage(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %req.hdr.request_lsn);
+                // avoid a somewhat costly Span::record() by constructing the entire span in one go.
+                macro_rules! mkspan {
+                    (before shard routing) => {{
+                        tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
+                    }};
+                    ($shard_id:expr) => {{
+                        tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
+                    }};
+                }
 
                 macro_rules! respond_error {
-                    ($error:expr) => {{
+                    ($span:expr, $error:expr) => {{
                         let error = BatchedFeMessage::RespondError {
-                            span,
+                            span: $span,
                             error: BatchedPageStreamError {
                                 req: req.hdr,
                                 err: $error,
@@ -868,27 +872,35 @@ impl PageServerHandler {
                 let key = rel_block_to_key(req.rel, req.blkno);
                 let shard = match timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Page(key))
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await
                 {
                     Ok(tl) => tl,
-                    Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
-                        // We already know this tenant exists in general, because we resolved it at
-                        // start of connection.  Getting a NotFound here indicates that the shard containing
-                        // the requested page is not present on this node: the client's knowledge of shard->pageserver
-                        // mapping is out of date.
-                        //
-                        // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
-                        // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
-                        // and talk to a different pageserver.
-                        return respond_error!(PageStreamError::Reconnect(
-                            "getpage@lsn request routed to wrong shard".into()
-                        ));
-                    }
                     Err(e) => {
-                        return respond_error!(e.into());
+                        let span = mkspan!(before shard routing);
+                        match e {
+                            GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_)) => {
+                                // We already know this tenant exists in general, because we resolved it at
+                                // start of connection.  Getting a NotFound here indicates that the shard containing
+                                // the requested page is not present on this node: the client's knowledge of shard->pageserver
+                                // mapping is out of date.
+                                //
+                                // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
+                                // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
+                                // and talk to a different pageserver.
+                                return respond_error!(
+                                    span,
+                                    PageStreamError::Reconnect(
+                                        "getpage@lsn request routed to wrong shard".into()
+                                    )
+                                );
+                            }
+                            e => {
+                                return respond_error!(span, e.into());
+                            }
+                        }
                     }
                 };
+                let span = mkspan!(shard.tenant_shard_id.shard_slug());
 
                 let timer = record_op_start_and_throttle(
                     &shard,
@@ -910,7 +922,7 @@ impl PageServerHandler {
                 {
                     Ok(lsn) => lsn,
                     Err(e) => {
-                        return respond_error!(e);
+                        return respond_error!(span, e);
                     }
                 };
                 BatchedFeMessage::GetPage {
@@ -922,11 +934,10 @@ impl PageServerHandler {
             }
             #[cfg(feature = "testing")]
             PagestreamFeMessage::Test(req) => {
-                let span = tracing::info_span!(parent: parent_span, "handle_test_request");
                 let shard = timeline_handles
                     .get(tenant_id, timeline_id, ShardSelector::Zero)
-                    .instrument(span.clone()) // sets `shard_id` field
                     .await?;
+                let span = tracing::info_span!(parent: &parent_span, "handle_test_request", shard_id = %shard.tenant_shard_id.shard_slug());
                 let timer =
                     record_op_start_and_throttle(&shard, metrics::SmgrQueryType::Test, received_at)
                         .await?;
@@ -1063,7 +1074,7 @@ impl PageServerHandler {
         };
 
         // invoke handler function
-        let (handler_results, span): (
+        let (mut handler_results, span): (
             Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
             _,
         ) = match batch {
@@ -1190,11 +1201,49 @@ impl PageServerHandler {
             }
         };
 
+        // We purposefully don't count flush time into the smgr operation timer.
+        //
+        // The reason is that current compute client will not perform protocol processing
+        // if the postgres backend process is doing things other than `->smgr_read()`.
+        // This is especially the case for prefetch.
+        //
+        // If the compute doesn't read from the connection, eventually TCP will backpressure
+        // all the way into our flush call below.
+        //
+        // The timer's underlying metric is used for a storage-internal latency SLO and
+        // we don't want to include latency in it that we can't control.
+        // And as pointed out above, in this case, we don't control the time that flush will take.
+        //
+        // We put each response in the batch onto the wire in a separate pgb_writer.flush()
+        // call, which (all unmeasured) adds syscall overhead but reduces time to first byte
+        // and avoids building up a "giant" contiguous userspace buffer to hold the entire response.
+        // TODO: vectored socket IO would be great, but pgb_writer doesn't support that.
+        let flush_timers = {
+            let flushing_start_time = Instant::now();
+            let mut flush_timers = Vec::with_capacity(handler_results.len());
+            for handler_result in &mut handler_results {
+                let flush_timer = match handler_result {
+                    Ok((_, timer)) => Some(
+                        timer
+                            .observe_execution_end(flushing_start_time)
+                            .expect("we are the first caller"),
+                    ),
+                    Err(_) => {
+                        // TODO: measure errors
+                        None
+                    }
+                };
+                flush_timers.push(flush_timer);
+            }
+            assert_eq!(flush_timers.len(), handler_results.len());
+            flush_timers
+        };
+
         // Map handler result to protocol behavior.
         // Some handler errors cause exit from pagestream protocol.
         // Other handler errors are sent back as an error message and we stay in pagestream protocol.
-        for handler_result in handler_results {
-            let (response_msg, timer) = match handler_result {
+        for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) {
+            let response_msg = match handler_result {
                 Err(e) => match &e.err {
                     PageStreamError::Shutdown => {
                         // If we fail to fulfil a request during shutdown, which may be _because_ of
@@ -1218,16 +1267,14 @@ impl PageServerHandler {
                         span.in_scope(|| {
                             error!("error reading relation or page version: {full:#}")
                         });
-                        (
-                            PagestreamBeMessage::Error(PagestreamErrorResponse {
-                                req: e.req,
-                                message: e.err.to_string(),
-                            }),
-                            None, // TODO: measure errors
-                        )
+
+                        PagestreamBeMessage::Error(PagestreamErrorResponse {
+                            req: e.req,
+                            message: e.err.to_string(),
+                        })
                     }
                 },
-                Ok((response_msg, timer)) => (response_msg, Some(timer)),
+                Ok((response_msg, _op_timer_already_observed)) => response_msg,
             };
 
             //
@@ -1238,30 +1285,12 @@ impl PageServerHandler {
                 &response_msg.serialize(protocol_version),
             ))?;
 
-            // We purposefully don't count flush time into the timer.
-            //
-            // The reason is that current compute client will not perform protocol processing
-            // if the postgres backend process is doing things other than `->smgr_read()`.
-            // This is especially the case for prefetch.
-            //
-            // If the compute doesn't read from the connection, eventually TCP will backpressure
-            // all the way into our flush call below.
-            //
-            // The timer's underlying metric is used for a storage-internal latency SLO and
-            // we don't want to include latency in it that we can't control.
-            // And as pointed out above, in this case, we don't control the time that flush will take.
-            let flushing_timer = timer.map(|mut timer| {
-                timer
-                    .observe_execution_end_flush_start(Instant::now())
-                    .expect("we are the first caller")
-            });
-
             // what we want to do
             let flush_fut = pgb_writer.flush();
             // metric for how long flushing takes
             let flush_fut = match flushing_timer {
                 Some(flushing_timer) => {
-                    futures::future::Either::Left(flushing_timer.measure(flush_fut))
+                    futures::future::Either::Left(flushing_timer.measure(Instant::now(), flush_fut))
                 }
                 None => futures::future::Either::Right(flush_fut),
             };
@@ -1280,8 +1309,6 @@ impl PageServerHandler {
                 }
                 Ok(())
             }
-            // and log the info! line inside the request span
-            .instrument(span.clone())
             .await?;
         }
         Ok(())
@@ -1342,7 +1369,7 @@ impl PageServerHandler {
             .take()
             .expect("implementation error: timeline_handles should not be locked");
 
-        let request_span = info_span!("request", shard_id = tracing::field::Empty);
+        let request_span = info_span!("request");
         let ((pgb_reader, timeline_handles), result) = match self.pipelining_config.clone() {
             PageServicePipeliningConfig::Pipelined(pipelining_config) => {
                 self.handle_pagerequests_pipelined(
@@ -1692,7 +1719,7 @@ impl PageServerHandler {
         // to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
         if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
             let gc_info = &timeline.gc_info.read().unwrap();
-            if !gc_info.leases.contains_key(&request_lsn) {
+            if !gc_info.lsn_covered_by_lease(request_lsn) {
                 return Err(
                     PageStreamError::BadRequest(format!(
                         "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
@@ -2036,6 +2063,13 @@ impl PageServerHandler {
             .unwrap()
             .get(tenant_id, timeline_id, ShardSelector::Zero)
             .await?;
+        set_tracing_field_shard_id(&timeline);
+
+        if timeline.is_archived() == Some(true) {
+            // TODO after a grace period, turn this log line into a hard error
+            tracing::warn!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
+            //return Err(QueryError::NotFound("timeline is archived".into()))
+        }
 
         let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
         if let Some(lsn) = lsn {
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index dcbf62b56c..00f332d797 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -612,11 +612,18 @@ impl Timeline {
         pausable_failpoint!("find-lsn-for-timestamp-pausable");
 
         let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
+        let gc_cutoff_planned = {
+            let gc_info = self.gc_info.read().unwrap();
+            gc_info.min_cutoff()
+        };
+        // Usually the planned cutoff is newer than the cutoff of the last gc run,
+        // but let's be defensive.
+        let gc_cutoff = gc_cutoff_planned.max(*gc_cutoff_lsn_guard);
         // We use this method to figure out the branching LSN for the new branch, but the
         // GC cutoff could be before the branching point and we cannot create a new branch
         // with LSN < `ancestor_lsn`. Thus, pick the maximum of these two to be
         // on the safe side.
-        let min_lsn = std::cmp::max(*gc_cutoff_lsn_guard, self.get_ancestor_lsn());
+        let min_lsn = std::cmp::max(gc_cutoff, self.get_ancestor_lsn());
         let max_lsn = self.get_last_record_lsn();
 
         // LSNs are always 8-byte aligned. low/mid/high represent the
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index 622738022a..cc93a06ccd 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -328,8 +328,8 @@ pub enum TaskKind {
     // Eviction. One per timeline.
     Eviction,
 
-    // Ingest housekeeping (flushing ephemeral layers on time threshold or disk pressure)
-    IngestHousekeeping,
+    // Tenant housekeeping (flush idle ephemeral layers, shut down idle walredo, etc.).
+    TenantHousekeeping,
 
     /// See [`crate::disk_usage_eviction_task`].
     DiskUsageEviction,
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index c1b408ed72..4c65991e45 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -20,6 +20,7 @@ use chrono::NaiveDateTime;
 use enumset::EnumSet;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
+use itertools::Itertools as _;
 use pageserver_api::models;
 use pageserver_api::models::CompactInfoResponse;
 use pageserver_api::models::LsnLease;
@@ -51,10 +52,13 @@ use timeline::compaction::GcCompactionQueue;
 use timeline::import_pgdata;
 use timeline::offload::offload_timeline;
 use timeline::offload::OffloadError;
+use timeline::CompactFlags;
 use timeline::CompactOptions;
+use timeline::CompactionError;
 use timeline::ShutdownMode;
 use tokio::io::BufReader;
 use tokio::sync::watch;
+use tokio::sync::Notify;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -349,6 +353,9 @@ pub struct Tenant {
     /// Overhead of mutex is acceptable because compaction is done with a multi-second period.
     compaction_circuit_breaker: std::sync::Mutex<CircuitBreaker>,
 
+    /// Signals the tenant compaction loop that there is L0 compaction work to be done.
+    pub(crate) l0_compaction_trigger: Arc<Notify>,
+
     /// Scheduled gc-compaction tasks.
     scheduled_compaction_tasks: std::sync::Mutex<HashMap<TimelineId, Arc<GcCompactionQueue>>>,
 
@@ -1690,12 +1697,7 @@ impl Tenant {
                     timeline_id,
                     index_part,
                     remote_metadata,
-                    TimelineResources {
-                        remote_client,
-                        pagestream_throttle: self.pagestream_throttle.clone(),
-                        pagestream_throttle_metrics: self.pagestream_throttle_metrics.clone(),
-                        l0_flush_global_state: self.l0_flush_global_state.clone(),
-                    },
+                    self.get_timeline_resources_for(remote_client),
                     LoadTimelineCause::Attach,
                     ctx,
                 )
@@ -2898,150 +2900,194 @@ impl Tenant {
             .await
     }
 
-    /// Perform one compaction iteration.
-    /// This function is periodically called by compactor task.
-    /// Also it can be explicitly requested per timeline through page server
-    /// api's 'compact' command.
+    /// Performs one compaction iteration. Called periodically from the compaction loop. Returns
+    /// whether another compaction is needed, if we still have pending work or if we yield for
+    /// immediate L0 compaction.
     ///
-    /// Returns whether we have pending compaction task.
+    /// Compaction can also be explicitly requested for a timeline via the HTTP API.
     async fn compaction_iteration(
         self: &Arc<Self>,
         cancel: &CancellationToken,
         ctx: &RequestContext,
-    ) -> Result<CompactionOutcome, timeline::CompactionError> {
-        // Don't start doing work during shutdown, or when broken, we do not need those in the logs
+    ) -> Result<CompactionOutcome, CompactionError> {
+        // Don't compact inactive tenants.
         if !self.is_active() {
-            return Ok(CompactionOutcome::Done);
+            return Ok(CompactionOutcome::Skipped);
         }
 
-        {
-            let conf = self.tenant_conf.load();
-
-            // Note that compaction usually requires deletions, but we don't respect
-            // may_delete_layers_hint here: that is because tenants in AttachedMulti
-            // should proceed with compaction even if they can't do deletion, to avoid
-            // accumulating dangerously deep stacks of L0 layers.  Deletions will be
-            // enqueued inside RemoteTimelineClient, and executed layer if/when we transition
-            // to AttachedSingle state.
-            if !conf.location.may_upload_layers_hint() {
-                info!("Skipping compaction in location state {:?}", conf.location);
-                return Ok(CompactionOutcome::Done);
-            }
+        // Don't compact tenants that can't upload layers. We don't check `may_delete_layers_hint`,
+        // since we need to compact L0 even in AttachedMulti to bound read amplification.
+        let location = self.tenant_conf.load().location;
+        if !location.may_upload_layers_hint() {
+            info!("skipping compaction in location state {location:?}");
+            return Ok(CompactionOutcome::Skipped);
         }
 
-        // Scan through the hashmap and collect a list of all the timelines,
-        // while holding the lock. Then drop the lock and actually perform the
-        // compactions.  We don't want to block everything else while the
-        // compaction runs.
-        let timelines_to_compact_or_offload;
-        {
-            let timelines = self.timelines.lock().unwrap();
-            timelines_to_compact_or_offload = timelines
-                .iter()
-                .filter_map(|(timeline_id, timeline)| {
-                    let (is_active, (can_offload, _)) =
-                        (timeline.is_active(), timeline.can_offload());
-                    let has_no_unoffloaded_children = {
-                        !timelines
-                            .iter()
-                            .any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
-                    };
-                    let config_allows_offload = self.conf.timeline_offloading
-                        || self
-                            .tenant_conf
-                            .load()
-                            .tenant_conf
-                            .timeline_offloading
-                            .unwrap_or_default();
-                    let can_offload =
-                        can_offload && has_no_unoffloaded_children && config_allows_offload;
-                    if (is_active, can_offload) == (false, false) {
-                        None
-                    } else {
-                        Some((*timeline_id, timeline.clone(), (is_active, can_offload)))
-                    }
-                })
-                .collect::<Vec<_>>();
-            drop(timelines);
-        }
-
-        // Before doing any I/O work, check our circuit breaker
+        // Don't compact if the circuit breaker is tripped.
         if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
-            info!("Skipping compaction due to previous failures");
-            return Ok(CompactionOutcome::Done);
+            info!("skipping compaction due to previous failures");
+            return Ok(CompactionOutcome::Skipped);
         }
 
-        let mut has_pending_task = false;
+        // Collect all timelines to compact, along with offload instructions and L0 counts.
+        let mut compact: Vec<Arc<Timeline>> = Vec::new();
+        let mut offload: HashSet<TimelineId> = HashSet::new();
+        let mut l0_counts: HashMap<TimelineId, usize> = HashMap::new();
 
-        for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
         {
-            // pending_task_left == None: cannot compact, maybe still pending tasks
-            // pending_task_left == Some(Pending): compaction task left
-            // pending_task_left == Some(Done): no compaction task left
-            let pending_task_left = if *can_compact {
-                let compaction_outcome = timeline
-                    .compact(cancel, EnumSet::empty(), ctx)
-                    .instrument(info_span!("compact_timeline", %timeline_id))
-                    .await
-                    .inspect_err(|e| match e {
-                        timeline::CompactionError::ShuttingDown => (),
-                        timeline::CompactionError::Offload(_) => {
-                            // Failures to offload timelines do not trip the circuit breaker, because
-                            // they do not do lots of writes the way compaction itself does: it is cheap
-                            // to retry, and it would be bad to stop all compaction because of an issue with offloading.
-                        }
-                        timeline::CompactionError::Other(e) => {
-                            self.compaction_circuit_breaker
-                                .lock()
-                                .unwrap()
-                                .fail(&CIRCUIT_BREAKERS_BROKEN, e);
-                        }
-                    })?;
-                if let CompactionOutcome::Pending = compaction_outcome {
-                    Some(CompactionOutcome::Pending)
-                } else {
-                    let queue = {
-                        let guard = self.scheduled_compaction_tasks.lock().unwrap();
-                        guard.get(timeline_id).cloned()
-                    };
-                    if let Some(queue) = queue {
-                        let outcome = queue
-                            .iteration(cancel, ctx, &self.gc_block, timeline)
-                            .await?;
-                        Some(outcome)
-                    } else {
-                        Some(CompactionOutcome::Done)
-                    }
+            let offload_enabled = self.get_timeline_offloading_enabled();
+            let timelines = self.timelines.lock().unwrap();
+            for (&timeline_id, timeline) in timelines.iter() {
+                // Skip inactive timelines.
+                if !timeline.is_active() {
+                    continue;
                 }
-            } else {
-                None
-            };
-            has_pending_task |= pending_task_left == Some(CompactionOutcome::Pending);
-            if pending_task_left == Some(CompactionOutcome::Done) && *can_offload {
-                pausable_failpoint!("before-timeline-auto-offload");
-                match offload_timeline(self, timeline)
-                    .instrument(info_span!("offload_timeline", %timeline_id))
-                    .await
-                {
-                    Err(OffloadError::NotArchived) => {
-                        // Ignore this, we likely raced with unarchival
-                        Ok(())
-                    }
-                    other => other,
-                }?;
+
+                // Schedule the timeline for compaction.
+                compact.push(timeline.clone());
+
+                // Schedule the timeline for offloading if eligible.
+                let can_offload = offload_enabled
+                    && timeline.can_offload().0
+                    && !timelines
+                        .iter()
+                        .any(|(_, tli)| tli.get_ancestor_timeline_id() == Some(timeline_id));
+                if can_offload {
+                    offload.insert(timeline_id);
+                }
+            }
+        } // release timelines lock
+
+        for timeline in &compact {
+            // Collect L0 counts. Can't await while holding lock above.
+            if let Ok(lm) = timeline.layers.read().await.layer_map() {
+                l0_counts.insert(timeline.timeline_id, lm.level0_deltas().len());
             }
         }
 
+        // Pass 1: L0 compaction across all timelines, in order of L0 count. We prioritize this to
+        // bound read amplification.
+        //
+        // TODO: this may spin on one or more ingest-heavy timelines, starving out image/GC
+        // compaction and offloading. We leave that as a potential problem to solve later. Consider
+        // splitting L0 and image/GC compaction to separate background jobs.
+        if self.get_compaction_l0_first() {
+            let compaction_threshold = self.get_compaction_threshold();
+            let compact_l0 = compact
+                .iter()
+                .map(|tli| (tli, l0_counts.get(&tli.timeline_id).copied().unwrap_or(0)))
+                .filter(|&(_, l0)| l0 >= compaction_threshold)
+                .sorted_by_key(|&(_, l0)| l0)
+                .rev()
+                .map(|(tli, _)| tli.clone())
+                .collect_vec();
+
+            let mut has_pending_l0 = false;
+            for timeline in compact_l0 {
+                let outcome = timeline
+                    .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx)
+                    .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
+                    .await
+                    .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
+                match outcome {
+                    CompactionOutcome::Done => {}
+                    CompactionOutcome::Skipped => {}
+                    CompactionOutcome::Pending => has_pending_l0 = true,
+                    CompactionOutcome::YieldForL0 => has_pending_l0 = true,
+                }
+            }
+            if has_pending_l0 {
+                return Ok(CompactionOutcome::YieldForL0); // do another pass
+            }
+        }
+
+        // Pass 2: image compaction and timeline offloading. If any timelines have accumulated
+        // more L0 layers, they may also be compacted here.
+        //
+        // NB: image compaction may yield if there is pending L0 compaction.
+        //
+        // TODO: it will only yield if there is pending L0 compaction on the same timeline. If a
+        // different timeline needs compaction, it won't. It should check `l0_compaction_trigger`.
+        // We leave this for a later PR.
+        //
+        // TODO: consider ordering timelines by some priority, e.g. time since last full compaction,
+        // amount of L1 delta debt or garbage, offload-eligible timelines first, etc.
+        let mut has_pending = false;
+        for timeline in compact {
+            if !timeline.is_active() {
+                continue;
+            }
+
+            let mut outcome = timeline
+                .compact(cancel, EnumSet::default(), ctx)
+                .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id))
+                .await
+                .inspect_err(|err| self.maybe_trip_compaction_breaker(err))?;
+
+            // If we're done compacting, check the scheduled GC compaction queue for more work.
+            if outcome == CompactionOutcome::Done {
+                let queue = self
+                    .scheduled_compaction_tasks
+                    .lock()
+                    .unwrap()
+                    .get(&timeline.timeline_id)
+                    .cloned();
+                if let Some(queue) = queue {
+                    outcome = queue
+                        .iteration(cancel, ctx, &self.gc_block, &timeline)
+                        .await?;
+                }
+            }
+
+            // If we're done compacting, offload the timeline if requested.
+            if outcome == CompactionOutcome::Done && offload.contains(&timeline.timeline_id) {
+                pausable_failpoint!("before-timeline-auto-offload");
+                offload_timeline(self, &timeline)
+                    .instrument(info_span!("offload_timeline", timeline_id = %timeline.timeline_id))
+                    .await
+                    .or_else(|err| match err {
+                        // Ignore this, we likely raced with unarchival.
+                        OffloadError::NotArchived => Ok(()),
+                        err => Err(err),
+                    })?;
+            }
+
+            match outcome {
+                CompactionOutcome::Done => {}
+                CompactionOutcome::Skipped => {}
+                CompactionOutcome::Pending => has_pending = true,
+                // This mostly makes sense when the L0-only pass above is enabled, since there's
+                // otherwise no guarantee that we'll start with the timeline that has high L0.
+                CompactionOutcome::YieldForL0 => return Ok(CompactionOutcome::YieldForL0),
+            }
+        }
+
+        // Success! Untrip the breaker if necessary.
         self.compaction_circuit_breaker
             .lock()
             .unwrap()
             .success(&CIRCUIT_BREAKERS_UNBROKEN);
 
-        Ok(if has_pending_task {
-            CompactionOutcome::Pending
-        } else {
-            CompactionOutcome::Done
-        })
+        match has_pending {
+            true => Ok(CompactionOutcome::Pending),
+            false => Ok(CompactionOutcome::Done),
+        }
+    }
+
+    /// Trips the compaction circuit breaker if appropriate.
+    pub(crate) fn maybe_trip_compaction_breaker(&self, err: &CompactionError) {
+        match err {
+            CompactionError::ShuttingDown => (),
+            // Offload failures don't trip the circuit breaker, since they're cheap to retry and
+            // shouldn't block compaction.
+            CompactionError::Offload(_) => {}
+            CompactionError::Other(err) => {
+                self.compaction_circuit_breaker
+                    .lock()
+                    .unwrap()
+                    .fail(&CIRCUIT_BREAKERS_BROKEN, err);
+            }
+        }
     }
 
     /// Cancel scheduled compaction tasks
@@ -3088,32 +3134,28 @@ impl Tenant {
         Ok(rx)
     }
 
-    // Call through to all timelines to freeze ephemeral layers if needed.  Usually
-    // this happens during ingest: this background housekeeping is for freezing layers
-    // that are open but haven't been written to for some time.
-    async fn ingest_housekeeping(&self) {
-        // Scan through the hashmap and collect a list of all the timelines,
-        // while holding the lock. Then drop the lock and actually perform the
-        // compactions.  We don't want to block everything else while the
-        // compaction runs.
-        let timelines = {
-            self.timelines
-                .lock()
-                .unwrap()
-                .values()
-                .filter_map(|timeline| {
-                    if timeline.is_active() {
-                        Some(timeline.clone())
-                    } else {
-                        None
-                    }
-                })
-                .collect::<Vec<_>>()
-        };
+    /// Performs periodic housekeeping, via the tenant housekeeping background task.
+    async fn housekeeping(&self) {
+        // Call through to all timelines to freeze ephemeral layers as needed. This usually happens
+        // during ingest, but we don't want idle timelines to hold open layers for too long.
+        let timelines = self
+            .timelines
+            .lock()
+            .unwrap()
+            .values()
+            .filter(|tli| tli.is_active())
+            .cloned()
+            .collect_vec();
 
-        for timeline in &timelines {
+        for timeline in timelines {
             timeline.maybe_freeze_ephemeral_layer().await;
         }
+
+        // Shut down walredo if idle.
+        const WALREDO_IDLE_TIMEOUT: Duration = Duration::from_secs(180);
+        if let Some(ref walredo_mgr) = self.walredo_mgr {
+            walredo_mgr.maybe_quiesce(WALREDO_IDLE_TIMEOUT);
+        }
     }
 
     pub fn timeline_has_no_attached_children(&self, timeline_id: TimelineId) -> bool {
@@ -3823,6 +3865,13 @@ impl Tenant {
             .unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)
     }
 
+    pub fn get_compaction_l0_first(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
+        tenant_conf
+            .compaction_l0_first
+            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_first)
+    }
+
     pub fn get_gc_horizon(&self) -> u64 {
         let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
         tenant_conf
@@ -3877,6 +3926,16 @@ impl Tenant {
             .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
     }
 
+    pub fn get_timeline_offloading_enabled(&self) -> bool {
+        if self.conf.timeline_offloading {
+            return true;
+        }
+        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
+        tenant_conf
+            .timeline_offloading
+            .unwrap_or(self.conf.default_tenant_conf.timeline_offloading)
+    }
+
     /// Generate an up-to-date TenantManifest based on the state of this Tenant.
     fn build_tenant_manifest(&self) -> TenantManifest {
         let timelines_offloaded = self.timelines_offloaded.lock().unwrap();
@@ -4115,6 +4174,7 @@ impl Tenant {
                 // use an extremely long backoff.
                 Some(Duration::from_secs(3600 * 24)),
             )),
+            l0_compaction_trigger: Arc::new(Notify::new()),
             scheduled_compaction_tasks: Mutex::new(Default::default()),
             activate_now_sem: tokio::sync::Semaphore::new(0),
             attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()),
@@ -4642,22 +4702,26 @@ impl Tenant {
 
         // check against last actual 'latest_gc_cutoff' first
         let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
-        src_timeline
-            .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
-            .context(format!(
-                "invalid branch start lsn: less than latest GC cutoff {}",
-                *latest_gc_cutoff_lsn,
-            ))
-            .map_err(CreateTimelineError::AncestorLsn)?;
-
-        // and then the planned GC cutoff
         {
             let gc_info = src_timeline.gc_info.read().unwrap();
-            let cutoff = gc_info.min_cutoff();
-            if start_lsn < cutoff {
-                return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
-                    "invalid branch start lsn: less than planned GC cutoff {cutoff}"
-                )));
+            let planned_cutoff = gc_info.min_cutoff();
+            if gc_info.lsn_covered_by_lease(start_lsn) {
+                tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *latest_gc_cutoff_lsn);
+            } else {
+                src_timeline
+                    .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
+                    .context(format!(
+                        "invalid branch start lsn: less than latest GC cutoff {}",
+                        *latest_gc_cutoff_lsn,
+                    ))
+                    .map_err(CreateTimelineError::AncestorLsn)?;
+
+                // and then the planned GC cutoff
+                if start_lsn < planned_cutoff {
+                    return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
+                        "invalid branch start lsn: less than planned GC cutoff {planned_cutoff}"
+                    )));
+                }
             }
         }
 
@@ -5019,12 +5083,19 @@ impl Tenant {
         )
     }
 
-    /// Call this before constructing a timeline, to build its required structures
+    /// Builds required resources for a new timeline.
     fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
+        let remote_client = self.build_timeline_remote_client(timeline_id);
+        self.get_timeline_resources_for(remote_client)
+    }
+
+    /// Builds timeline resources for the given remote client.
+    fn get_timeline_resources_for(&self, remote_client: RemoteTimelineClient) -> TimelineResources {
         TimelineResources {
-            remote_client: self.build_timeline_remote_client(timeline_id),
+            remote_client,
             pagestream_throttle: self.pagestream_throttle.clone(),
             pagestream_throttle_metrics: self.pagestream_throttle_metrics.clone(),
+            l0_compaction_trigger: self.l0_compaction_trigger.clone(),
             l0_flush_global_state: self.l0_flush_global_state.clone(),
         }
     }
@@ -5470,6 +5541,8 @@ pub(crate) mod harness {
                 compaction_threshold: Some(tenant_conf.compaction_threshold),
                 compaction_upper_limit: Some(tenant_conf.compaction_upper_limit),
                 compaction_algorithm: Some(tenant_conf.compaction_algorithm),
+                compaction_l0_first: Some(tenant_conf.compaction_l0_first),
+                compaction_l0_semaphore: Some(tenant_conf.compaction_l0_semaphore),
                 l0_flush_delay_threshold: tenant_conf.l0_flush_delay_threshold,
                 l0_flush_stall_threshold: tenant_conf.l0_flush_stall_threshold,
                 l0_flush_wait_upload: Some(tenant_conf.l0_flush_wait_upload),
@@ -7697,6 +7770,18 @@ mod tests {
             }
 
             tline.freeze_and_flush().await?;
+            // Force layers to L1
+            tline
+                .compact(
+                    &cancel,
+                    {
+                        let mut flags = EnumSet::new();
+                        flags.insert(CompactFlags::ForceL0Compaction);
+                        flags
+                    },
+                    &ctx,
+                )
+                .await?;
 
             if iter % 5 == 0 {
                 let (_, before_delta_file_accessed) =
@@ -7709,6 +7794,7 @@ mod tests {
                             let mut flags = EnumSet::new();
                             flags.insert(CompactFlags::ForceImageLayerCreation);
                             flags.insert(CompactFlags::ForceRepartition);
+                            flags.insert(CompactFlags::ForceL0Compaction);
                             flags
                         },
                         &ctx,
@@ -8155,6 +8241,8 @@ mod tests {
 
         let cancel = CancellationToken::new();
 
+        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
+        tline.force_set_disk_consistent_lsn(Lsn(0x40));
         tline
             .compact(
                 &cancel,
@@ -8168,8 +8256,7 @@ mod tests {
             )
             .await
             .unwrap();
-
-        // Image layers are created at last_record_lsn
+        // Image layers are created at repartition LSN
         let images = tline
             .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
             .await
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 972837dc44..7fdfd736ad 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -285,6 +285,14 @@ pub struct TenantConfOpt {
     #[serde(default)]
     pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
 
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(default)]
+    pub compaction_l0_first: Option<bool>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(default)]
+    pub compaction_l0_semaphore: Option<bool>,
+
     #[serde(skip_serializing_if = "Option::is_none")]
     #[serde(default)]
     pub l0_flush_delay_threshold: Option<usize>,
@@ -416,6 +424,12 @@ impl TenantConfOpt {
                 .as_ref()
                 .unwrap_or(&global_conf.compaction_algorithm)
                 .clone(),
+            compaction_l0_first: self
+                .compaction_l0_first
+                .unwrap_or(global_conf.compaction_l0_first),
+            compaction_l0_semaphore: self
+                .compaction_l0_semaphore
+                .unwrap_or(global_conf.compaction_l0_semaphore),
             l0_flush_delay_threshold: self
                 .l0_flush_delay_threshold
                 .or(global_conf.l0_flush_delay_threshold),
@@ -466,7 +480,7 @@ impl TenantConfOpt {
                 .lsn_lease_length_for_ts
                 .unwrap_or(global_conf.lsn_lease_length_for_ts),
             timeline_offloading: self
-                .lazy_slru_download
+                .timeline_offloading
                 .unwrap_or(global_conf.timeline_offloading),
             wal_receiver_protocol_override: self
                 .wal_receiver_protocol_override
@@ -493,6 +507,8 @@ impl TenantConfOpt {
             mut compaction_threshold,
             mut compaction_upper_limit,
             mut compaction_algorithm,
+            mut compaction_l0_first,
+            mut compaction_l0_semaphore,
             mut l0_flush_delay_threshold,
             mut l0_flush_stall_threshold,
             mut l0_flush_wait_upload,
@@ -538,6 +554,10 @@ impl TenantConfOpt {
             .compaction_upper_limit
             .apply(&mut compaction_upper_limit);
         patch.compaction_algorithm.apply(&mut compaction_algorithm);
+        patch.compaction_l0_first.apply(&mut compaction_l0_first);
+        patch
+            .compaction_l0_semaphore
+            .apply(&mut compaction_l0_semaphore);
         patch
             .l0_flush_delay_threshold
             .apply(&mut l0_flush_delay_threshold);
@@ -619,6 +639,8 @@ impl TenantConfOpt {
             compaction_threshold,
             compaction_upper_limit,
             compaction_algorithm,
+            compaction_l0_first,
+            compaction_l0_semaphore,
             l0_flush_delay_threshold,
             l0_flush_stall_threshold,
             l0_flush_wait_upload,
@@ -681,6 +703,8 @@ impl From<TenantConfOpt> for models::TenantConfig {
             compaction_period: value.compaction_period.map(humantime),
             compaction_threshold: value.compaction_threshold,
             compaction_upper_limit: value.compaction_upper_limit,
+            compaction_l0_first: value.compaction_l0_first,
+            compaction_l0_semaphore: value.compaction_l0_semaphore,
             l0_flush_delay_threshold: value.l0_flush_delay_threshold,
             l0_flush_stall_threshold: value.l0_flush_stall_threshold,
             l0_flush_wait_upload: value.l0_flush_wait_upload,
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index dfa89a765c..22ee560dbf 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -2816,8 +2816,8 @@ where
 }
 
 use {
-    crate::tenant::gc_result::GcResult, pageserver_api::models::TimelineGcRequest,
-    utils::http::error::ApiError,
+    crate::tenant::gc_result::GcResult, http_utils::error::ApiError,
+    pageserver_api::models::TimelineGcRequest,
 };
 
 #[cfg(test)]
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index bcba6d1f62..713efbb9a4 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -437,8 +437,7 @@ impl RemoteTimelineClient {
             .conf
             .remote_storage_config
             .as_ref()
-            .and_then(|r| r.concurrency_limit())
-            .unwrap_or(0);
+            .map_or(0, |r| r.concurrency_limit());
         let mut upload_queue = self.upload_queue.lock().unwrap();
         upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
         self.update_remote_physical_size_gauge(Some(index_part));
@@ -461,8 +460,7 @@ impl RemoteTimelineClient {
             .conf
             .remote_storage_config
             .as_ref()
-            .and_then(|r| r.concurrency_limit())
-            .unwrap_or(0);
+            .map_or(0, |r| r.concurrency_limit());
         let mut upload_queue = self.upload_queue.lock().unwrap();
         upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?;
         self.update_remote_physical_size_gauge(None);
@@ -484,8 +482,7 @@ impl RemoteTimelineClient {
             .conf
             .remote_storage_config
             .as_ref()
-            .and_then(|r| r.concurrency_limit())
-            .unwrap_or(0);
+            .map_or(0, |r| r.concurrency_limit());
 
         let mut upload_queue = self.upload_queue.lock().unwrap();
         upload_queue.initialize_with_current_remote_index_part(index_part, inprogress_limit)?;
@@ -520,7 +517,7 @@ impl RemoteTimelineClient {
             if let Ok(queue) = queue_locked.initialized_mut() {
                 let blocked_deletions = std::mem::take(&mut queue.blocked_deletions);
                 for d in blocked_deletions {
-                    if let Err(e) = self.deletion_queue_client.push_layers_sync(
+                    if let Err(e) = self.deletion_queue_client.push_layers(
                         self.tenant_shard_id,
                         self.timeline_id,
                         self.generation,
@@ -2154,7 +2151,6 @@ impl RemoteTimelineClient {
                                 self.generation,
                                 delete.layers.clone(),
                             )
-                            .await
                             .map_err(|e| anyhow::anyhow!(e))
                     }
                 }
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index 3800852ccc..f9f843ef6b 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -44,7 +44,7 @@ pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
 
 use self::inmemory_layer::InMemoryLayerFileId;
 
-use super::timeline::GetVectoredError;
+use super::timeline::{GetVectoredError, ReadPath};
 use super::PageReconstructError;
 
 pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
@@ -262,6 +262,8 @@ pub(crate) struct ValuesReconstructState {
 
     pub(crate) io_concurrency: IoConcurrency,
     num_active_ios: Arc<AtomicUsize>,
+
+    pub(crate) read_path: Option<ReadPath>,
 }
 
 /// The level of IO concurrency to be used on the read path
@@ -609,6 +611,7 @@ impl ValuesReconstructState {
             delta_layers_visited: 0,
             io_concurrency,
             num_active_ios: Arc::new(AtomicUsize::new(0)),
+            read_path: None,
         }
     }
 
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 92313afba7..40282defd4 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -353,7 +353,6 @@ impl Layer {
     /// while the guard exists.
     ///
     /// Returns None if the layer is currently evicted or becoming evicted.
-    #[cfg(test)]
     pub(crate) async fn keep_resident(&self) -> Option<ResidentLayer> {
         let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?;
 
@@ -530,7 +529,6 @@ impl ResidentOrWantedEvicted {
     /// This is not used on the read path (anything that calls
     /// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win
     /// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`].
-    #[cfg(test)]
     fn get(&self) -> Option<Arc<DownloadedLayer>> {
         match self {
             ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index d65f099182..029444e973 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -1,53 +1,83 @@
-//! This module contains functions to serve per-tenant background processes,
-//! such as compaction and GC
+//! This module contains per-tenant background processes, e.g. compaction and GC.
 
-use std::ops::ControlFlow;
-use std::str::FromStr;
+use std::cmp::max;
+use std::future::Future;
+use std::ops::{ControlFlow, RangeInclusive};
+use std::pin::pin;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 
+use once_cell::sync::Lazy;
+use rand::Rng;
+use scopeguard::defer;
+use tokio::sync::{Semaphore, SemaphorePermit};
+use tokio_util::sync::CancellationToken;
+use tracing::*;
+
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::metrics::TENANT_TASK_EVENTS;
-use crate::task_mgr;
-use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
+use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
+use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS};
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::compaction::CompactionOutcome;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
-use rand::Rng;
-use tokio_util::sync::CancellationToken;
-use tracing::*;
-use utils::{backoff, completion, pausable_failpoint};
+use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
+use utils::backoff::exponential_backoff_duration;
+use utils::completion::Barrier;
+use utils::pausable_failpoint;
 
-static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore> =
-    once_cell::sync::Lazy::new(|| {
-        let total_threads = task_mgr::TOKIO_WORKER_THREADS.get();
-        let permits = usize::max(
-            1,
-            // while a lot of the work is done on spawn_blocking, we still do
-            // repartitioning in the async context. this should give leave us some workers
-            // unblocked to be blocked on other work, hopefully easing any outside visible
-            // effects of restarts.
-            //
-            // 6/8 is a guess; previously we ran with unlimited 8 and more from
-            // spawn_blocking.
-            (total_threads * 3).checked_div(4).unwrap_or(0),
-        );
-        assert_ne!(permits, 0, "we will not be adding in permits later");
-        assert!(
-            permits < total_threads,
-            "need threads avail for shorter work"
-        );
-        tokio::sync::Semaphore::new(permits)
-    });
+/// Semaphore limiting concurrent background tasks (across all tenants).
+///
+/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.
+static CONCURRENT_BACKGROUND_TASKS: Lazy<Semaphore> = Lazy::new(|| {
+    let total_threads = TOKIO_WORKER_THREADS.get();
+    let permits = max(1, (total_threads * 3).checked_div(4).unwrap_or(0));
+    assert_ne!(permits, 0, "we will not be adding in permits later");
+    assert!(permits < total_threads, "need threads for other work");
+    Semaphore::new(permits)
+});
 
-#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr, enum_map::Enum)]
+/// Semaphore limiting concurrent L0 compaction tasks (across all tenants). This is only used if
+/// both `compaction_l0_semaphore` and `compaction_l0_first` are enabled.
+///
+/// This is a separate semaphore from background tasks, because L0 compaction needs to be responsive
+/// to avoid high read amp during heavy write workloads. Regular image/GC compaction is less
+/// important (e.g. due to page images in delta layers) and can wait for other background tasks.
+///
+/// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work. Note
+/// that this runs on the same Tokio runtime as `CONCURRENT_BACKGROUND_TASKS`, and shares the same
+/// thread pool.
+static CONCURRENT_L0_COMPACTION_TASKS: Lazy<Semaphore> = Lazy::new(|| {
+    let total_threads = TOKIO_WORKER_THREADS.get();
+    let permits = max(1, (total_threads * 3).checked_div(4).unwrap_or(0));
+    assert_ne!(permits, 0, "we will not be adding in permits later");
+    assert!(permits < total_threads, "need threads for other work");
+    Semaphore::new(permits)
+});
+
+/// Background jobs.
+///
+/// NB: not all of these acquire a CONCURRENT_BACKGROUND_TASKS semaphore permit, only the ones that
+/// do any significant IO or CPU work.
+#[derive(
+    Debug,
+    PartialEq,
+    Eq,
+    Clone,
+    Copy,
+    strum_macros::IntoStaticStr,
+    strum_macros::Display,
+    enum_map::Enum,
+)]
 #[strum(serialize_all = "snake_case")]
 pub(crate) enum BackgroundLoopKind {
+    /// L0Compaction runs as a separate pass within the Compaction loop, not a separate loop. It is
+    /// used to request the `CONCURRENT_L0_COMPACTION_TASKS` semaphore and associated metrics.
+    L0Compaction,
     Compaction,
     Gc,
     Eviction,
-    IngestHouseKeeping,
+    TenantHouseKeeping,
     ConsumptionMetricsCollectMetrics,
     ConsumptionMetricsSyntheticSizeWorker,
     InitialLogicalSizeCalculation,
@@ -55,36 +85,41 @@ pub(crate) enum BackgroundLoopKind {
     SecondaryDownload,
 }
 
-impl BackgroundLoopKind {
-    fn as_static_str(&self) -> &'static str {
-        self.into()
-    }
+pub struct BackgroundLoopSemaphorePermit<'a> {
+    _permit: SemaphorePermit<'static>,
+    _recorder: BackgroundLoopSemaphoreMetricsRecorder<'a>,
 }
 
-/// Cancellation safe.
-pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
+/// Acquires a semaphore permit, to limit concurrent background jobs.
+pub(crate) async fn acquire_concurrency_permit(
     loop_kind: BackgroundLoopKind,
     _ctx: &RequestContext,
-) -> tokio::sync::SemaphorePermit<'static> {
-    let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE.measure_acquisition(loop_kind);
+) -> BackgroundLoopSemaphorePermit<'static> {
+    let mut recorder = metrics::BACKGROUND_LOOP_SEMAPHORE.record(loop_kind);
 
     if loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation {
         pausable_failpoint!("initial-size-calculation-permit-pause");
     }
 
     // TODO: assert that we run on BACKGROUND_RUNTIME; requires tokio_unstable Handle::id();
-    match CONCURRENT_BACKGROUND_TASKS.acquire().await {
-        Ok(permit) => permit,
-        Err(_closed) => unreachable!("we never close the semaphore"),
+    let semaphore = match loop_kind {
+        BackgroundLoopKind::L0Compaction => &CONCURRENT_L0_COMPACTION_TASKS,
+        _ => &CONCURRENT_BACKGROUND_TASKS,
+    };
+    let permit = semaphore.acquire().await.expect("should never close");
+
+    recorder.acquired();
+
+    BackgroundLoopSemaphorePermit {
+        _permit: permit,
+        _recorder: recorder,
     }
 }
 
-/// Start per tenant background loops: compaction and gc.
-pub fn start_background_loops(
-    tenant: &Arc<Tenant>,
-    background_jobs_can_start: Option<&completion::Barrier>,
-) {
+/// Start per tenant background loops: compaction, GC, and ingest housekeeping.
+pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>) {
     let tenant_shard_id = tenant.tenant_shard_id;
+
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::Compaction,
@@ -93,13 +128,15 @@ pub fn start_background_loops(
         &format!("compactor for tenant {tenant_shard_id}"),
         {
             let tenant = Arc::clone(tenant);
-            let background_jobs_can_start = background_jobs_can_start.cloned();
+            let can_start = can_start.cloned();
             async move {
-                let cancel = task_mgr::shutdown_token();
+                let cancel = task_mgr::shutdown_token(); // NB: must be in async context
                 tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()) },
-                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
+                    _ = cancel.cancelled() => return Ok(()),
+                    _ = Barrier::maybe_wait(can_start) => {}
                 };
+                TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc());
                 compaction_loop(tenant, cancel)
                     // If you rename this span, change the RUST_LOG env variable in test_runner/performance/test_branch_creation.py
                     .instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
@@ -108,6 +145,7 @@ pub fn start_background_loops(
             }
         },
     );
+
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::GarbageCollector,
@@ -116,13 +154,15 @@ pub fn start_background_loops(
         &format!("garbage collector for tenant {tenant_shard_id}"),
         {
             let tenant = Arc::clone(tenant);
-            let background_jobs_can_start = background_jobs_can_start.cloned();
+            let can_start = can_start.cloned();
             async move {
-                let cancel = task_mgr::shutdown_token();
+                let cancel = task_mgr::shutdown_token(); // NB: must be in async context
                 tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()) },
-                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
+                    _ = cancel.cancelled() => return Ok(()),
+                    _ = Barrier::maybe_wait(can_start) => {}
                 };
+                TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc());
                 gc_loop(tenant, cancel)
                     .instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
@@ -133,21 +173,23 @@ pub fn start_background_loops(
 
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
-        TaskKind::IngestHousekeeping,
+        TaskKind::TenantHousekeeping,
         tenant_shard_id,
         None,
-        &format!("ingest housekeeping for tenant {tenant_shard_id}"),
+        &format!("housekeeping for tenant {tenant_shard_id}"),
         {
             let tenant = Arc::clone(tenant);
-            let background_jobs_can_start = background_jobs_can_start.cloned();
+            let can_start = can_start.cloned();
             async move {
-                let cancel = task_mgr::shutdown_token();
+                let cancel = task_mgr::shutdown_token(); // NB: must be in async context
                 tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()) },
-                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
+                    _ = cancel.cancelled() => return Ok(()),
+                    _ = Barrier::maybe_wait(can_start) => {}
                 };
-                ingest_housekeeping_loop(tenant, cancel)
-                    .instrument(info_span!("ingest_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
+                TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
+                defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc());
+                tenant_housekeeping_loop(tenant, cancel)
+                    .instrument(info_span!("tenant_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
                 Ok(())
             }
@@ -155,372 +197,292 @@ pub fn start_background_loops(
     );
 }
 
-///
-/// Compaction task's main loop
-///
+/// Compaction task's main loop.
 async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
+    const BASE_BACKOFF_SECS: f64 = 1.0;
     const MAX_BACKOFF_SECS: f64 = 300.0;
-    // How many errors we have seen consequtively
-    let mut error_run_count = 0;
+    const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);
 
-    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-    async {
-        let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
-        let mut first = true;
-        loop {
+    let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
+    let mut period = tenant.get_compaction_period();
+    let mut error_run = 0; // consecutive errors
+
+    // Stagger the compaction loop across tenants.
+    if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
+        return;
+    }
+    if sleep_random(period, &cancel).await.is_err() {
+        return;
+    }
+
+    loop {
+        // Recheck that we're still active.
+        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
+            return;
+        }
+
+        // Refresh the period. If compaction is disabled, check again in a bit.
+        period = tenant.get_compaction_period();
+        if period == Duration::ZERO {
+            #[cfg(not(feature = "testing"))]
+            info!("automatic compaction is disabled");
             tokio::select! {
-                _ = cancel.cancelled() => {
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(()) => (),
-                },
+                _ = tokio::time::sleep(RECHECK_CONFIG_INTERVAL) => {},
+                _ = cancel.cancelled() => return,
             }
+            continue;
+        }
 
-            let period = tenant.get_compaction_period();
+        // Wait for the next compaction run.
+        let backoff = exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);
+        tokio::select! {
+            _ = tokio::time::sleep(backoff), if error_run > 0 => {},
+            _ = tokio::time::sleep(period), if error_run == 0 => {},
+            _ = tenant.l0_compaction_trigger.notified(), if error_run == 0 => {},
+            _ = cancel.cancelled() => return,
+        }
 
-            // TODO: we shouldn't need to await to find tenant and this could be moved outside of
-            // loop, #3501. There are also additional "allowed_errors" in tests.
-            if first {
-                first = false;
-                if random_init_delay(period, &cancel).await.is_err() {
-                    break;
+        // Run compaction.
+        let iteration = Iteration {
+            started_at: Instant::now(),
+            period,
+            kind: BackgroundLoopKind::Compaction,
+        };
+        let IterationResult { output, elapsed } = iteration
+            .run(tenant.compaction_iteration(&cancel, &ctx))
+            .await;
+
+        match output {
+            Ok(outcome) => {
+                error_run = 0;
+                // If there's more compaction work, L0 or not, schedule an immediate run.
+                match outcome {
+                    CompactionOutcome::Done => {}
+                    CompactionOutcome::Skipped => {}
+                    CompactionOutcome::YieldForL0 => tenant.l0_compaction_trigger.notify_one(),
+                    CompactionOutcome::Pending => tenant.l0_compaction_trigger.notify_one(),
                 }
             }
 
-            let sleep_duration;
-            if period == Duration::ZERO {
-                #[cfg(not(feature = "testing"))]
-                info!("automatic compaction is disabled");
-                // check again in 10 seconds, in case it's been enabled again.
-                sleep_duration = Duration::from_secs(10)
-            } else {
-                let iteration = Iteration {
-                    started_at: Instant::now(),
-                    period,
-                    kind: BackgroundLoopKind::Compaction,
-                };
-
-                // Run compaction
-                let IterationResult { output, elapsed } = iteration
-                    .run(tenant.compaction_iteration(&cancel, &ctx))
-                    .await;
-                match output {
-                    Ok(outcome) => {
-                        error_run_count = 0;
-                        // schedule the next compaction immediately in case there is a pending compaction task
-                        sleep_duration = if let CompactionOutcome::Pending = outcome {
-                            Duration::from_secs(1)
-                        } else {
-                            period
-                        };
-                    }
-                    Err(e) => {
-                        let wait_duration = backoff::exponential_backoff_duration_seconds(
-                            error_run_count + 1,
-                            1.0,
-                            MAX_BACKOFF_SECS,
-                        );
-                        error_run_count += 1;
-                        let wait_duration = Duration::from_secs_f64(wait_duration);
-                        log_compaction_error(
-                            &e,
-                            error_run_count,
-                            &wait_duration,
-                            cancel.is_cancelled(),
-                        );
-                        sleep_duration = wait_duration;
-                    }
-                }
-
-                // the duration is recorded by performance tests by enabling debug in this function
-                tracing::debug!(
-                    elapsed_ms = elapsed.as_millis(),
-                    "compaction iteration complete"
-                );
-            };
-
-            // Perhaps we did no work and the walredo process has been idle for some time:
-            // give it a chance to shut down to avoid leaving walredo process running indefinitely.
-            // TODO: move this to a separate task (housekeeping loop) that isn't affected by the back-off,
-            // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens.
-            if let Some(walredo_mgr) = &tenant.walredo_mgr {
-                walredo_mgr.maybe_quiesce(period * 10);
-            }
-
-            // Sleep
-            if tokio::time::timeout(sleep_duration, cancel.cancelled())
-                .await
-                .is_ok()
-            {
-                break;
+            Err(err) => {
+                error_run += 1;
+                let backoff =
+                    exponential_backoff_duration(error_run, BASE_BACKOFF_SECS, MAX_BACKOFF_SECS);
+                log_compaction_error(&err, error_run, backoff, cancel.is_cancelled());
+                continue;
             }
         }
+
+        // NB: this log entry is recorded by performance tests.
+        debug!(
+            elapsed_ms = elapsed.as_millis(),
+            "compaction iteration complete"
+        );
     }
-    .await;
-    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
 }
 
 fn log_compaction_error(
-    e: &CompactionError,
-    error_run_count: u32,
-    sleep_duration: &std::time::Duration,
+    err: &CompactionError,
+    error_count: u32,
+    sleep_duration: Duration,
     task_cancelled: bool,
 ) {
     use crate::tenant::upload_queue::NotInitialized;
     use crate::tenant::PageReconstructError;
     use CompactionError::*;
 
-    enum LooksLike {
-        Info,
-        Error,
-    }
+    let level = match err {
+        ShuttingDown => return,
+        Offload(_) => Level::ERROR,
+        _ if task_cancelled => Level::INFO,
+        Other(err) => {
+            let root_cause = err.root_cause();
 
-    let decision = match e {
-        ShuttingDown => None,
-        Offload(_) => Some(LooksLike::Error),
-        _ if task_cancelled => Some(LooksLike::Info),
-        Other(e) => {
-            let root_cause = e.root_cause();
-
-            let is_stopping = {
-                let upload_queue = root_cause
-                    .downcast_ref::<NotInitialized>()
-                    .is_some_and(|e| e.is_stopping());
-
-                let timeline = root_cause
-                    .downcast_ref::<PageReconstructError>()
-                    .is_some_and(|e| e.is_stopping());
-
-                upload_queue || timeline
-            };
+            let upload_queue = root_cause
+                .downcast_ref::<NotInitialized>()
+                .is_some_and(|e| e.is_stopping());
+            let timeline = root_cause
+                .downcast_ref::<PageReconstructError>()
+                .is_some_and(|e| e.is_stopping());
+            let is_stopping = upload_queue || timeline;
 
             if is_stopping {
-                Some(LooksLike::Info)
+                Level::INFO
             } else {
-                Some(LooksLike::Error)
+                Level::ERROR
             }
         }
     };
 
-    match decision {
-        Some(LooksLike::Info) => info!(
-            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:#}",
-        ),
-        Some(LooksLike::Error) => error!(
-            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:?}",
-        ),
-        None => {}
+    match level {
+        Level::ERROR => {
+            error!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
+        }
+        Level::INFO => {
+            info!("Compaction failed {error_count} times, retrying in {sleep_duration:?}: {err:#}")
+        }
+        level => unimplemented!("unexpected level {level:?}"),
     }
 }
 
-///
-/// GC task's main loop
-///
+/// GC task's main loop.
 async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
     const MAX_BACKOFF_SECS: f64 = 300.0;
-    // How many errors we have seen consequtively
-    let mut error_run_count = 0;
+    let mut error_run = 0; // consecutive errors
 
-    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-    async {
-        // GC might require downloading, to find the cutoff LSN that corresponds to the
-        // cutoff specified as time.
-        let ctx =
-            RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
+    // GC might require downloading, to find the cutoff LSN that corresponds to the
+    // cutoff specified as time.
+    let ctx = RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
+    let mut first = true;
 
-        let mut first = true;
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => {
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(()) => (),
-                },
-            }
+    loop {
+        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
+            return;
+        }
 
-            let period = tenant.get_gc_period();
+        let period = tenant.get_gc_period();
 
-            if first {
-                first = false;
-
-                let delays = async {
-                    random_init_delay(period, &cancel).await?;
-                    Ok::<_, Cancelled>(())
-                };
-
-                if delays.await.is_err() {
-                    break;
-                }
-            }
-
-            let gc_horizon = tenant.get_gc_horizon();
-            let sleep_duration;
-            if period == Duration::ZERO || gc_horizon == 0 {
-                #[cfg(not(feature = "testing"))]
-                info!("automatic GC is disabled");
-                // check again in 10 seconds, in case it's been enabled again.
-                sleep_duration = Duration::from_secs(10);
-            } else {
-                let iteration = Iteration {
-                    started_at: Instant::now(),
-                    period,
-                    kind: BackgroundLoopKind::Gc,
-                };
-                // Run gc
-                let IterationResult { output, elapsed: _ } =
-                    iteration.run(tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &cancel, &ctx))
-                    .await;
-                match output {
-                    Ok(_) => {
-                        error_run_count = 0;
-                        sleep_duration = period;
-                    }
-                    Err(crate::tenant::GcError::TenantCancelled) => {
-                        return;
-                    }
-                    Err(e) => {
-                        let wait_duration = backoff::exponential_backoff_duration_seconds(
-                            error_run_count + 1,
-                            1.0,
-                            MAX_BACKOFF_SECS,
-                        );
-                        error_run_count += 1;
-                        let wait_duration = Duration::from_secs_f64(wait_duration);
-
-                        if matches!(e, crate::tenant::GcError::TimelineCancelled) {
-                            // Timeline was cancelled during gc. We might either be in an event
-                            // that affects the entire tenant (tenant deletion, pageserver shutdown),
-                            // or in one that affects the timeline only (timeline deletion).
-                            // Therefore, don't exit the loop.
-                            info!("Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}");
-                        } else {
-                            error!("Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}");
-                        }
-
-                        sleep_duration = wait_duration;
-                    }
-                }
-            };
-
-            if tokio::time::timeout(sleep_duration, cancel.cancelled())
-                .await
-                .is_ok()
-            {
+        if first {
+            first = false;
+            if sleep_random(period, &cancel).await.is_err() {
                 break;
             }
         }
-    }
-    .await;
-    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-}
-
-async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
-    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-    async {
-    let mut last_throttle_flag_reset_at = Instant::now();
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => {
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(()) => (),
-                },
-            }
-
-            // We run ingest housekeeping with the same frequency as compaction: it is not worth
-            // having a distinct setting.  But we don't run it in the same task, because compaction
-            // blocks on acquiring the background job semaphore.
-            let period = tenant.get_compaction_period();
-
-            // If compaction period is set to zero (to disable it), then we will use a reasonable default
-            let period = if period == Duration::ZERO {
-                humantime::Duration::from_str(
-                    pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD,
-                )
-                .unwrap()
-                .into()
-            } else {
-                period
-            };
-
-            // Jitter the period by +/- 5%
-            let period =
-                rand::thread_rng().gen_range((period * (95)) / 100..(period * (105)) / 100);
-
-            // Always sleep first: we do not need to do ingest housekeeping early in the lifetime of
-            // a tenant, since it won't have started writing any ephemeral files yet.
-            if tokio::time::timeout(period, cancel.cancelled())
-                .await
-                .is_ok()
-            {
-                break;
-            }
 
+        let gc_horizon = tenant.get_gc_horizon();
+        let sleep_duration;
+        if period == Duration::ZERO || gc_horizon == 0 {
+            #[cfg(not(feature = "testing"))]
+            info!("automatic GC is disabled");
+            // check again in 10 seconds, in case it's been enabled again.
+            sleep_duration = Duration::from_secs(10);
+        } else {
             let iteration = Iteration {
                 started_at: Instant::now(),
                 period,
-                kind: BackgroundLoopKind::IngestHouseKeeping,
+                kind: BackgroundLoopKind::Gc,
             };
-            iteration.run(tenant.ingest_housekeeping()).await;
-
-            // TODO: rename the background loop kind to something more generic, like, tenant housekeeping.
-            // Or just spawn another background loop for this throttle, it's not like it's super costly.
-            info_span!(parent: None, "pagestream_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
-                let now = Instant::now();
-                let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
-                let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.pagestream_throttle.reset_stats();
-                if count_throttled == 0 {
+            // Run gc
+            let IterationResult { output, elapsed: _ } = iteration
+                .run(tenant.gc_iteration(
+                    None,
+                    gc_horizon,
+                    tenant.get_pitr_interval(),
+                    &cancel,
+                    &ctx,
+                ))
+                .await;
+            match output {
+                Ok(_) => {
+                    error_run = 0;
+                    sleep_duration = period;
+                }
+                Err(crate::tenant::GcError::TenantCancelled) => {
                     return;
                 }
-                let allowed_rps = tenant.pagestream_throttle.steady_rps();
-                let delta = now - prev;
-                info!(
-                    n_seconds=%format_args!("{:.3}", delta.as_secs_f64()),
-                    count_accounted = count_accounted_finish,  // don't break existing log scraping
-                    count_throttled,
-                    sum_throttled_usecs,
-                    count_accounted_start, // log after pre-existing fields to not break existing log scraping
-                    allowed_rps=%format_args!("{allowed_rps:.0}"),
-                    "shard was throttled in the last n_seconds"
-                );
-            });
-        }
-    }
-    .await;
-    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-}
+                Err(e) => {
+                    error_run += 1;
+                    let wait_duration =
+                        exponential_backoff_duration(error_run, 1.0, MAX_BACKOFF_SECS);
 
-async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
-    // if the tenant has a proper status already, no need to wait for anything
-    if tenant.current_state() == TenantState::Active {
-        ControlFlow::Continue(())
-    } else {
-        let mut tenant_state_updates = tenant.subscribe_for_state_updates();
-        loop {
-            match tenant_state_updates.changed().await {
-                Ok(()) => {
-                    let new_state = &*tenant_state_updates.borrow();
-                    match new_state {
-                        TenantState::Active => {
-                            debug!("Tenant state changed to active, continuing the task loop");
-                            return ControlFlow::Continue(());
-                        }
-                        state => {
-                            debug!("Not running the task loop, tenant is not active: {state:?}");
-                            continue;
-                        }
+                    if matches!(e, crate::tenant::GcError::TimelineCancelled) {
+                        // Timeline was cancelled during gc. We might either be in an event
+                        // that affects the entire tenant (tenant deletion, pageserver shutdown),
+                        // or in one that affects the timeline only (timeline deletion).
+                        // Therefore, don't exit the loop.
+                        info!("Gc failed {error_run} times, retrying in {wait_duration:?}: {e:?}");
+                    } else {
+                        error!("Gc failed {error_run} times, retrying in {wait_duration:?}: {e:?}");
                     }
-                }
-                Err(_sender_dropped_error) => {
-                    return ControlFlow::Break(());
+
+                    sleep_duration = wait_duration;
                 }
             }
+        };
+
+        if tokio::time::timeout(sleep_duration, cancel.cancelled())
+            .await
+            .is_ok()
+        {
+            break;
+        }
+    }
+}
+
+/// Tenant housekeeping's main loop.
+async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
+    let mut last_throttle_flag_reset_at = Instant::now();
+    loop {
+        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
+            return;
+        }
+
+        // Use the same period as compaction; it's not worth a separate setting. But if it's set to
+        // zero (to disable compaction), then use a reasonable default. Jitter it by 5%.
+        let period = match tenant.get_compaction_period() {
+            Duration::ZERO => humantime::parse_duration(DEFAULT_COMPACTION_PERIOD).unwrap(),
+            period => period,
+        };
+
+        let Ok(period) = sleep_jitter(period, period * 5 / 100, &cancel).await else {
+            break;
+        };
+
+        // Do tenant housekeeping.
+        let iteration = Iteration {
+            started_at: Instant::now(),
+            period,
+            kind: BackgroundLoopKind::TenantHouseKeeping,
+        };
+        iteration.run(tenant.housekeeping()).await;
+
+        // Log any getpage throttling.
+        info_span!(parent: None, "pagestream_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
+            let now = Instant::now();
+            let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
+            let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.pagestream_throttle.reset_stats();
+            if count_throttled == 0 {
+                return;
+            }
+            let allowed_rps = tenant.pagestream_throttle.steady_rps();
+            let delta = now - prev;
+            info!(
+                n_seconds=%format_args!("{:.3}", delta.as_secs_f64()),
+                count_accounted = count_accounted_finish,  // don't break existing log scraping
+                count_throttled,
+                sum_throttled_usecs,
+                count_accounted_start, // log after pre-existing fields to not break existing log scraping
+                allowed_rps=%format_args!("{allowed_rps:.0}"),
+                "shard was throttled in the last n_seconds"
+            );
+        });
+    }
+}
+
+/// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.
+async fn wait_for_active_tenant(
+    tenant: &Arc<Tenant>,
+    cancel: &CancellationToken,
+) -> ControlFlow<()> {
+    if tenant.current_state() == TenantState::Active {
+        return ControlFlow::Continue(());
+    }
+
+    let mut update_rx = tenant.subscribe_for_state_updates();
+    loop {
+        tokio::select! {
+            _ = cancel.cancelled() => return ControlFlow::Break(()),
+            result = update_rx.changed() => if result.is_err() {
+                return ControlFlow::Break(());
+            }
+        }
+
+        match &*update_rx.borrow() {
+            TenantState::Active => {
+                debug!("Tenant state changed to active, continuing the task loop");
+                return ControlFlow::Continue(());
+            }
+            state => debug!("Not running the task loop, tenant is not active: {state:?}"),
         }
     }
 }
@@ -529,26 +491,41 @@ async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
 #[error("cancelled")]
 pub(crate) struct Cancelled;
 
-/// Provide a random delay for background task initialization.
+/// Sleeps for a random interval up to the given max value.
 ///
 /// This delay prevents a thundering herd of background tasks and will likely keep them running on
 /// different periods for more stable load.
-pub(crate) async fn random_init_delay(
-    period: Duration,
+pub(crate) async fn sleep_random(
+    max: Duration,
     cancel: &CancellationToken,
-) -> Result<(), Cancelled> {
-    if period == Duration::ZERO {
-        return Ok(());
-    }
+) -> Result<Duration, Cancelled> {
+    sleep_random_range(Duration::ZERO..=max, cancel).await
+}
 
-    let d = {
-        let mut rng = rand::thread_rng();
-        rng.gen_range(Duration::ZERO..=period)
-    };
-    match tokio::time::timeout(d, cancel.cancelled()).await {
-        Ok(_) => Err(Cancelled),
-        Err(_) => Ok(()),
+/// Sleeps for a random interval in the given range. Returns the duration.
+pub(crate) async fn sleep_random_range(
+    interval: RangeInclusive<Duration>,
+    cancel: &CancellationToken,
+) -> Result<Duration, Cancelled> {
+    let delay = rand::thread_rng().gen_range(interval);
+    if delay == Duration::ZERO {
+        return Ok(delay);
     }
+    tokio::select! {
+        _ = cancel.cancelled() => Err(Cancelled),
+        _ = tokio::time::sleep(delay) => Ok(delay),
+    }
+}
+
+/// Sleeps for an interval with a random jitter.
+pub(crate) async fn sleep_jitter(
+    duration: Duration,
+    jitter: Duration,
+    cancel: &CancellationToken,
+) -> Result<Duration, Cancelled> {
+    let from = duration.saturating_sub(jitter);
+    let to = duration.saturating_add(jitter);
+    sleep_random_range(from..=to, cancel).await
 }
 
 struct Iteration {
@@ -564,42 +541,25 @@ struct IterationResult<O> {
 
 impl Iteration {
     #[instrument(skip_all)]
-    pub(crate) async fn run<Fut, O>(self, fut: Fut) -> IterationResult<O>
-    where
-        Fut: std::future::Future<Output = O>,
-    {
-        let Self {
-            started_at,
-            period,
-            kind,
-        } = self;
-
-        let mut fut = std::pin::pin!(fut);
+    pub(crate) async fn run<F: Future<Output = O>, O>(self, fut: F) -> IterationResult<O> {
+        let mut fut = pin!(fut);
 
         // Wrap `fut` into a future that logs a message every `period` so that we get a
         // very obvious breadcrumb in the logs _while_ a slow iteration is happening.
-        let liveness_logger = async move {
-            loop {
-                match tokio::time::timeout(period, &mut fut).await {
-                    Ok(x) => return x,
-                    Err(_) => {
-                        // info level as per the same rationale why warn_when_period_overrun is info
-                        // =>  https://github.com/neondatabase/neon/pull/5724
-                        info!("still running");
-                    }
-                }
+        let output = loop {
+            match tokio::time::timeout(self.period, &mut fut).await {
+                Ok(r) => break r,
+                Err(_) => info!("still running"),
             }
         };
-
-        let output = liveness_logger.await;
-
-        let elapsed = started_at.elapsed();
-        warn_when_period_overrun(elapsed, period, kind);
+        let elapsed = self.started_at.elapsed();
+        warn_when_period_overrun(elapsed, self.period, self.kind);
 
         IterationResult { output, elapsed }
     }
 }
-/// Attention: the `task` and `period` beocme labels of a pageserver-wide prometheus metric.
+
+// NB: the `task` and `period` are used for metrics labels.
 pub(crate) fn warn_when_period_overrun(
     elapsed: Duration,
     period: Duration,
@@ -616,8 +576,8 @@ pub(crate) fn warn_when_period_overrun(
             ?task,
             "task iteration took longer than the configured period"
         );
-        crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
-            .with_label_values(&[task.as_static_str(), &format!("{}", period.as_secs())])
+        metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
+            .with_label_values(&[task.into(), &format!("{}", period.as_secs())])
             .inc();
     }
 }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index b6a349a209..aa71ccbbab 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -45,13 +45,12 @@ use rand::Rng;
 use remote_storage::DownloadError;
 use serde_with::serde_as;
 use storage_broker::BrokerClientChannel;
+use tokio::runtime::Handle;
 use tokio::sync::mpsc::Sender;
-use tokio::{
-    runtime::Handle,
-    sync::{oneshot, watch},
-};
+use tokio::sync::{oneshot, watch, Notify};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
+use utils::critical;
 use utils::rate_limit::RateLimit;
 use utils::{
     fs_ext,
@@ -226,6 +225,7 @@ pub struct TimelineResources {
     pub remote_client: RemoteTimelineClient,
     pub pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,
     pub pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
+    pub l0_compaction_trigger: Arc<Notify>,
     pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
 }
 
@@ -425,6 +425,9 @@ pub struct Timeline {
     /// If true, the last compaction failed.
     compaction_failed: AtomicBool,
 
+    /// Notifies the tenant compaction loop that there is pending L0 compaction work.
+    l0_compaction_trigger: Arc<Notify>,
+
     /// Make sure we only have one running gc at a time.
     ///
     /// Must only be taken in two places:
@@ -531,6 +534,9 @@ impl GcInfo {
     pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool {
         self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes)
     }
+    pub(crate) fn lsn_covered_by_lease(&self, lsn: Lsn) -> bool {
+        self.leases.contains_key(&lsn)
+    }
 }
 
 /// The `GcInfo` component describing which Lsns need to be retained.  Functionally, this
@@ -622,6 +628,71 @@ impl From<layer_manager::Shutdown> for GetVectoredError {
     }
 }
 
+/// A layer identifier when used in the [`ReadPath`] structure. This enum is for observability purposes
+/// only and not used by the "real read path".
+pub enum ReadPathLayerId {
+    PersistentLayer(PersistentLayerKey),
+    InMemoryLayer(Range<Lsn>),
+}
+
+impl std::fmt::Display for ReadPathLayerId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ReadPathLayerId::PersistentLayer(key) => write!(f, "{}", key),
+            ReadPathLayerId::InMemoryLayer(range) => {
+                write!(f, "in-mem {}..{}", range.start, range.end)
+            }
+        }
+    }
+}
+pub struct ReadPath {
+    keyspace: KeySpace,
+    lsn: Lsn,
+    path: Vec<(ReadPathLayerId, KeySpace, Range<Lsn>)>,
+}
+
+impl ReadPath {
+    pub fn new(keyspace: KeySpace, lsn: Lsn) -> Self {
+        Self {
+            keyspace,
+            lsn,
+            path: Vec::new(),
+        }
+    }
+
+    pub fn record_layer_visit(
+        &mut self,
+        layer_to_read: &ReadableLayer,
+        keyspace_to_read: &KeySpace,
+        lsn_range: &Range<Lsn>,
+    ) {
+        let id = match layer_to_read {
+            ReadableLayer::PersistentLayer(layer) => {
+                ReadPathLayerId::PersistentLayer(layer.layer_desc().key())
+            }
+            ReadableLayer::InMemoryLayer(layer) => {
+                ReadPathLayerId::InMemoryLayer(layer.get_lsn_range())
+            }
+        };
+        self.path
+            .push((id, keyspace_to_read.clone(), lsn_range.clone()));
+    }
+}
+
+impl std::fmt::Display for ReadPath {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        writeln!(f, "Read path for {} at lsn {}:", self.keyspace, self.lsn)?;
+        for (idx, (layer_id, keyspace, lsn_range)) in self.path.iter().enumerate() {
+            writeln!(
+                f,
+                "{}: {} {}..{} {}",
+                idx, layer_id, lsn_range.start, lsn_range.end, keyspace
+            )?;
+        }
+        Ok(())
+    }
+}
+
 #[derive(thiserror::Error)]
 pub struct MissingKeyError {
     key: Key,
@@ -629,6 +700,8 @@ pub struct MissingKeyError {
     cont_lsn: Lsn,
     request_lsn: Lsn,
     ancestor_lsn: Option<Lsn>,
+    /// Debug information about the read path if there's an error
+    read_path: Option<ReadPath>,
     backtrace: Option<std::backtrace::Backtrace>,
 }
 
@@ -645,10 +718,15 @@ impl std::fmt::Display for MissingKeyError {
             "could not find data for key {} (shard {:?}) at LSN {}, request LSN {}",
             self.key, self.shard, self.cont_lsn, self.request_lsn
         )?;
+
         if let Some(ref ancestor_lsn) = self.ancestor_lsn {
             write!(f, ", ancestor {}", ancestor_lsn)?;
         }
 
+        if let Some(ref read_path) = self.read_path {
+            write!(f, "\n{}", read_path)?;
+        }
+
         if let Some(ref backtrace) = self.backtrace {
             write!(f, "\n{}", backtrace)?;
         }
@@ -798,8 +876,12 @@ pub(crate) enum CompactFlags {
     ForceRepartition,
     ForceImageLayerCreation,
     ForceL0Compaction,
+    OnlyL0Compaction,
     EnhancedGcBottomMostCompaction,
     DryRun,
+    /// Disables compaction yielding e.g. due to high L0 count. This is set e.g. when requesting
+    /// compaction via HTTP API.
+    NoYield,
 }
 
 #[serde_with::serde_as]
@@ -1065,6 +1147,7 @@ impl Timeline {
                 request_lsn: lsn,
                 ancestor_lsn: None,
                 backtrace: None,
+                read_path: None,
             })),
         }
     }
@@ -1191,6 +1274,13 @@ impl Timeline {
         reconstruct_state: &mut ValuesReconstructState,
         ctx: &RequestContext,
     ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
+        let read_path = if self.conf.enable_read_path_debugging {
+            Some(ReadPath::new(keyspace.clone(), lsn))
+        } else {
+            None
+        };
+        reconstruct_state.read_path = read_path;
+
         let traversal_res: Result<(), _> = self
             .get_vectored_reconstruct_data(keyspace.clone(), lsn, reconstruct_state, ctx)
             .await;
@@ -1467,6 +1557,7 @@ impl Timeline {
             let lsn = xlog_utils::normalize_lsn(lsn, WAL_SEGMENT_SIZE);
 
             let mut gc_info = self.gc_info.write().unwrap();
+            let planned_cutoff = gc_info.min_cutoff();
 
             let valid_until = SystemTime::now() + length;
 
@@ -1487,7 +1578,7 @@ impl Timeline {
                     existing_lease.clone()
                 }
                 Entry::Vacant(vacant) => {
-                    // Reject already GC-ed LSN (lsn < latest_gc_cutoff) if we are in AttachedSingle and
+                    // Reject already GC-ed LSN if we are in AttachedSingle and
                     // not blocked by the lsn lease deadline.
                     let validate = {
                         let conf = self.tenant_conf.load();
@@ -1498,7 +1589,10 @@ impl Timeline {
                     if init || validate {
                         let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
                         if lsn < *latest_gc_cutoff_lsn {
-                            bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
+                            bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
+                        }
+                        if lsn < planned_cutoff {
+                            bail!("tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", lsn, planned_cutoff);
                         }
                     }
 
@@ -1700,35 +1794,48 @@ impl Timeline {
         .await
     }
 
-    /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
-    /// compaction tasks.
+    /// Outermost timeline compaction operation; downloads needed layers.
+    ///
+    /// NB: the cancellation token is usually from a background task, but can also come from a
+    /// request task.
     pub(crate) async fn compact_with_options(
         self: &Arc<Self>,
         cancel: &CancellationToken,
         options: CompactOptions,
         ctx: &RequestContext,
     ) -> Result<CompactionOutcome, CompactionError> {
-        // most likely the cancellation token is from background task, but in tests it could be the
-        // request task as well.
+        // Acquire the compaction lock and task semaphore.
+        //
+        // L0-only compaction uses a separate semaphore (if enabled) to make sure it isn't starved
+        // out by other background tasks (including image compaction). We request this via
+        // `BackgroundLoopKind::L0Compaction`.
+        //
+        // If this is a regular compaction pass, and L0-only compaction is enabled in the config,
+        // then we should yield for immediate L0 compaction if necessary while we're waiting for the
+        // background task semaphore. There's no point yielding otherwise, since we'd just end up
+        // right back here.
+        let is_l0_only = options.flags.contains(CompactFlags::OnlyL0Compaction);
+        let semaphore_kind = match is_l0_only && self.get_compaction_l0_semaphore() {
+            true => BackgroundLoopKind::L0Compaction,
+            false => BackgroundLoopKind::Compaction,
+        };
+        let yield_for_l0 = !is_l0_only
+            && self.get_compaction_l0_first()
+            && !options.flags.contains(CompactFlags::NoYield);
 
-        let prepare = async move {
+        let acquire = async move {
             let guard = self.compaction_lock.lock().await;
-
-            let permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
-                BackgroundLoopKind::Compaction,
-                ctx,
-            )
-            .await;
-
+            let permit = super::tasks::acquire_concurrency_permit(semaphore_kind, ctx).await;
             (guard, permit)
         };
 
-        // this wait probably never needs any "long time spent" logging, because we already nag if
-        // compaction task goes over it's period (20s) which is quite often in production.
         let (_guard, _permit) = tokio::select! {
-            tuple = prepare => { tuple },
-            _ = self.cancel.cancelled() => return Ok(CompactionOutcome::Done),
-            _ = cancel.cancelled() => return Ok(CompactionOutcome::Done),
+            (guard, permit) = acquire => (guard, permit),
+            _ = self.l0_compaction_trigger.notified(), if yield_for_l0 => {
+                return Ok(CompactionOutcome::YieldForL0);
+            }
+            _ = self.cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
+            _ = cancel.cancelled() => return Ok(CompactionOutcome::Skipped),
         };
 
         let last_record_lsn = self.get_last_record_lsn();
@@ -1736,7 +1843,7 @@ impl Timeline {
         // Last record Lsn could be zero in case the timeline was just created
         if !last_record_lsn.is_valid() {
             warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
-            return Ok(CompactionOutcome::Done);
+            return Ok(CompactionOutcome::Skipped);
         }
 
         let result = match self.get_compaction_algorithm_settings().kind {
@@ -2238,6 +2345,20 @@ impl Timeline {
             .unwrap_or(self.conf.default_tenant_conf.compaction_upper_limit)
     }
 
+    pub fn get_compaction_l0_first(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
+        tenant_conf
+            .compaction_l0_first
+            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_first)
+    }
+
+    pub fn get_compaction_l0_semaphore(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
+        tenant_conf
+            .compaction_l0_semaphore
+            .unwrap_or(self.conf.default_tenant_conf.compaction_l0_semaphore)
+    }
+
     fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
         // Disable L0 flushes by default. This and compaction needs further tuning.
         const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
@@ -2579,6 +2700,7 @@ impl Timeline {
 
                 compaction_lock: tokio::sync::Mutex::default(),
                 compaction_failed: AtomicBool::default(),
+                l0_compaction_trigger: resources.l0_compaction_trigger,
                 gc_lock: tokio::sync::Mutex::default(),
 
                 standby_horizon: AtomicLsn::new(0),
@@ -2628,7 +2750,7 @@ impl Timeline {
                 return;
             }
             FlushLoopState::Exited => {
-                warn!(
+                info!(
                     "ignoring attempt to restart exited flush_loop {}/{}",
                     self.tenant_shard_id, self.timeline_id
                 );
@@ -3052,7 +3174,7 @@ impl Timeline {
             let self_ref = &self;
             let skip_concurrency_limiter = &skip_concurrency_limiter;
             async move {
-                let wait_for_permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
+                let wait_for_permit = super::tasks::acquire_concurrency_permit(
                     BackgroundLoopKind::InitialLogicalSizeCalculation,
                     background_ctx,
                 );
@@ -3498,6 +3620,7 @@ impl Timeline {
                 request_lsn,
                 ancestor_lsn: Some(timeline.ancestor_lsn),
                 backtrace: None,
+                read_path: std::mem::take(&mut reconstruct_state.read_path),
             }));
         }
 
@@ -3616,6 +3739,9 @@ impl Timeline {
             }
 
             if let Some((layer_to_read, keyspace_to_read, lsn_range)) = fringe.next_layer() {
+                if let Some(ref mut read_path) = reconstruct_state.read_path {
+                    read_path.record_layer_visit(&layer_to_read, &keyspace_to_read, &lsn_range);
+                }
                 let next_cont_lsn = lsn_range.start;
                 layer_to_read
                     .get_values_reconstruct_data(
@@ -3916,6 +4042,12 @@ impl Timeline {
                 }
                 let flush_duration = flush_timer.stop_and_record();
 
+                // Notify the tenant compaction loop if L0 compaction is needed.
+                let l0_count = *watch_l0.borrow();
+                if l0_count >= self.get_compaction_threshold() {
+                    self.l0_compaction_trigger.notify_one();
+                }
+
                 // Delay the next flush to backpressure if compaction can't keep up. We delay by the
                 // flush duration such that the flush takes 2x as long. This is propagated up to WAL
                 // ingestion by having ephemeral layer rolls wait for flushes.
@@ -4088,6 +4220,7 @@ impl Timeline {
                     ImageLayerCreationMode::Initial,
                     ctx,
                     LastImageLayerCreationStatus::Initial,
+                    false, // don't yield for L0, we're flushing L0
                 )
                 .await?;
             debug_assert!(
@@ -4660,6 +4793,7 @@ impl Timeline {
         mode: ImageLayerCreationMode,
         ctx: &RequestContext,
         last_status: LastImageLayerCreationStatus,
+        yield_for_l0: bool,
     ) -> Result<(Vec<ResidentLayer>, LastImageLayerCreationStatus), CreateImageLayersError> {
         let timer = self.metrics.create_images_time_histo.start_timer();
 
@@ -4856,7 +4990,7 @@ impl Timeline {
 
             if let ImageLayerCreationMode::Try = mode {
                 // We have at least made some progress
-                if batch_image_writer.pending_layer_num() >= 1 {
+                if yield_for_l0 && batch_image_writer.pending_layer_num() >= 1 {
                     // The `Try` mode is currently only used on the compaction path. We want to avoid
                     // image layer generation taking too long time and blocking L0 compaction. So in this
                     // mode, we also inspect the current number of L0 layers and skip image layer generation
@@ -5804,10 +5938,11 @@ impl Timeline {
                 let img = match res {
                     Ok(img) => img,
                     Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled),
-                    Err(walredo::Error::Other(e)) => {
+                    Err(walredo::Error::Other(err)) => {
+                        critical!("walredo failure during page reconstruction: {err:?}");
                         return Err(PageReconstructError::WalRedo(
-                            e.context("reconstruct a page image"),
-                        ))
+                            err.context("reconstruct a page image"),
+                        ));
                     }
                 };
                 Ok(img)
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index cfde070442..5b915c50d3 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -10,8 +10,8 @@ use std::sync::Arc;
 
 use super::layer_manager::LayerManager;
 use super::{
-    CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
-    LastImageLayerCreationStatus, RecordedDuration, Timeline,
+    CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, GetVectoredError,
+    ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration, Timeline,
 };
 
 use anyhow::{anyhow, bail, Context};
@@ -26,6 +26,7 @@ use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
 use serde::Serialize;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, info_span, trace, warn, Instrument};
+use utils::critical;
 use utils::id::TimelineId;
 
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
@@ -608,6 +609,11 @@ pub enum CompactionOutcome {
     /// Still has pending layers to be compacted after this round. Ideally, the scheduler
     /// should immediately schedule another compaction.
     Pending,
+    /// A timeline needs L0 compaction. Yield and schedule an immediate L0 compaction pass (only
+    /// guaranteed when `compaction_l0_first` is enabled).
+    YieldForL0,
+    /// Compaction was skipped, because the timeline is ineligible for compaction.
+    Skipped,
 }
 
 impl Timeline {
@@ -686,10 +692,25 @@ impl Timeline {
 
         // Define partitioning schema if needed
 
+        let l0_l1_boundary_lsn = {
+            // We do the repartition on the L0-L1 boundary. All data below the boundary
+            // are compacted by L0 with low read amplification, thus making the `repartition`
+            // function run fast.
+            let guard = self.layers.read().await;
+            let l0_min_lsn = guard
+                .layer_map()?
+                .level0_deltas()
+                .iter()
+                .map(|l| l.get_lsn_range().start)
+                .min()
+                .unwrap_or(self.get_disk_consistent_lsn());
+            l0_min_lsn.max(self.get_ancestor_lsn())
+        };
+
         // 1. L0 Compact
-        let l0_compaction_outcome = {
+        let l0_outcome = {
             let timer = self.metrics.compact_time_histo.start_timer();
-            let l0_compaction_outcome = self
+            let l0_outcome = self
                 .compact_level0(
                     target_file_size,
                     options.flags.contains(CompactFlags::ForceL0Compaction),
@@ -697,83 +718,103 @@ impl Timeline {
                 )
                 .await?;
             timer.stop_and_record();
-            l0_compaction_outcome
+            l0_outcome
         };
 
-        if let CompactionOutcome::Pending = l0_compaction_outcome {
-            // Yield and do not do any other kind of compaction. True means
-            // that we have pending L0 compaction tasks and the compaction scheduler
-            // will prioritize compacting this tenant/timeline again.
-            info!("skipping image layer generation and shard ancestor compaction due to L0 compaction did not include all layers.");
-            return Ok(CompactionOutcome::Pending);
+        if options.flags.contains(CompactFlags::OnlyL0Compaction) {
+            return Ok(l0_outcome);
         }
 
-        // 2. Repartition and create image layers if necessary
-        let partition_count = match self
-            .repartition(
-                self.get_last_record_lsn(), // TODO: use L0-L1 boundary
-                self.get_compaction_target_size(),
-                options.flags,
-                ctx,
-            )
-            .await
+        // Yield if we have pending L0 compaction. The scheduler will do another pass.
+        if (l0_outcome == CompactionOutcome::Pending || l0_outcome == CompactionOutcome::YieldForL0)
+            && !options.flags.contains(CompactFlags::NoYield)
         {
-            Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
-                // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
-                let image_ctx = RequestContextBuilder::extend(ctx)
-                    .access_stats_behavior(AccessStatsBehavior::Skip)
-                    .build();
+            info!("image/ancestor compaction yielding for L0 compaction");
+            return Ok(CompactionOutcome::YieldForL0);
+        }
 
-                let mut partitioning = dense_partitioning;
-                partitioning
-                    .parts
-                    .extend(sparse_partitioning.into_dense().parts);
+        if l0_l1_boundary_lsn < self.partitioning.read().1 {
+            // We never go backwards when repartition and create image layers.
+            info!("skipping image layer generation because repartition LSN is greater than L0-L1 boundary LSN.");
+        } else {
+            // 2. Repartition and create image layers if necessary
+            match self
+                .repartition(
+                    l0_l1_boundary_lsn,
+                    self.get_compaction_target_size(),
+                    options.flags,
+                    ctx,
+                )
+                .await
+            {
+                Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
+                    // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
+                    let image_ctx = RequestContextBuilder::extend(ctx)
+                        .access_stats_behavior(AccessStatsBehavior::Skip)
+                        .build();
 
-                // 3. Create new image layers for partitions that have been modified "enough".
-                let (image_layers, outcome) = self
-                    .create_image_layers(
-                        &partitioning,
-                        lsn,
-                        if options
-                            .flags
-                            .contains(CompactFlags::ForceImageLayerCreation)
-                        {
-                            ImageLayerCreationMode::Force
-                        } else {
-                            ImageLayerCreationMode::Try
-                        },
-                        &image_ctx,
-                        self.last_image_layer_creation_status
-                            .load()
-                            .as_ref()
-                            .clone(),
-                    )
-                    .await?;
+                    let mut partitioning = dense_partitioning;
+                    partitioning
+                        .parts
+                        .extend(sparse_partitioning.into_dense().parts);
 
-                self.last_image_layer_creation_status
-                    .store(Arc::new(outcome.clone()));
+                    // 3. Create new image layers for partitions that have been modified "enough".
+                    let (image_layers, outcome) = self
+                        .create_image_layers(
+                            &partitioning,
+                            lsn,
+                            if options
+                                .flags
+                                .contains(CompactFlags::ForceImageLayerCreation)
+                            {
+                                ImageLayerCreationMode::Force
+                            } else {
+                                ImageLayerCreationMode::Try
+                            },
+                            &image_ctx,
+                            self.last_image_layer_creation_status
+                                .load()
+                                .as_ref()
+                                .clone(),
+                            !options.flags.contains(CompactFlags::NoYield),
+                        )
+                        .await
+                        .inspect_err(|err| {
+                            if let CreateImageLayersError::GetVectoredError(
+                                GetVectoredError::MissingKey(_),
+                            ) = err
+                            {
+                                critical!("missing key during compaction: {err:?}");
+                            }
+                        })?;
 
-                self.upload_new_image_layers(image_layers)?;
-                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
-                    // Yield and do not do any other kind of compaction.
-                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
-                    return Ok(CompactionOutcome::Pending);
+                    self.last_image_layer_creation_status
+                        .store(Arc::new(outcome.clone()));
+
+                    self.upload_new_image_layers(image_layers)?;
+                    if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
+                        // Yield and do not do any other kind of compaction.
+                        info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
+                        return Ok(CompactionOutcome::YieldForL0);
+                    }
                 }
-                partitioning.parts.len()
-            }
-            Err(err) => {
-                // no partitioning? This is normal, if the timeline was just created
-                // as an empty timeline. Also in unit tests, when we use the timeline
-                // as a simple key-value store, ignoring the datadir layout. Log the
-                // error but continue.
-                //
-                // Suppress error when it's due to cancellation
-                if !self.cancel.is_cancelled() && !err.is_cancelled() {
-                    tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
+                Err(err) => {
+                    // no partitioning? This is normal, if the timeline was just created
+                    // as an empty timeline. Also in unit tests, when we use the timeline
+                    // as a simple key-value store, ignoring the datadir layout. Log the
+                    // error but continue.
+                    //
+                    // Suppress error when it's due to cancellation
+                    if !self.cancel.is_cancelled() && !err.is_cancelled() {
+                        tracing::error!(
+                            "could not compact, repartitioning keyspace failed: {err:?}"
+                        );
+                    }
                 }
-                1
-            }
-        };
+            };
+        }
+
+        let partition_count = self.partitioning.read().0 .0.parts.len();
 
         // 4. Shard ancestor compaction
 
@@ -2229,8 +2270,11 @@ impl Timeline {
             split_key_ranges.push((start, end));
         }
         split_key_ranges.sort();
-        let guard = self.layers.read().await;
-        let layer_map = guard.layer_map()?;
+        let all_layers = {
+            let guard = self.layers.read().await;
+            let layer_map = guard.layer_map()?;
+            layer_map.iter_historic_layers().collect_vec()
+        };
         let mut current_start = None;
         let ranges_num = split_key_ranges.len();
         for (idx, (start, end)) in split_key_ranges.into_iter().enumerate() {
@@ -2242,14 +2286,23 @@ impl Timeline {
                 // We have already processed this partition.
                 continue;
             }
-            let res = layer_map.range_search(start..end, compact_below_lsn);
-            let total_size = res.found.keys().map(|x| x.layer.file_size()).sum::<u64>();
+            let overlapping_layers = {
+                let mut desc = Vec::new();
+                for layer in all_layers.iter() {
+                    if overlaps_with(&layer.get_key_range(), &(start..end))
+                        && layer.get_lsn_range().start <= compact_below_lsn
+                    {
+                        desc.push(layer.clone());
+                    }
+                }
+                desc
+            };
+            let total_size = overlapping_layers.iter().map(|x| x.file_size).sum::<u64>();
             if total_size > sub_compaction_max_job_size_mb * 1024 * 1024 || ranges_num == idx + 1 {
                 // Try to extend the compaction range so that we include at least one full layer file.
-                let extended_end = res
-                    .found
-                    .keys()
-                    .map(|layer| layer.layer.key_range.end)
+                let extended_end = overlapping_layers
+                    .iter()
+                    .map(|layer| layer.key_range.end)
                     .min();
                 // It is possible that the search range does not contain any layer files when we reach the end of the loop.
                 // In this case, we simply use the specified key range end.
@@ -2276,7 +2329,6 @@ impl Timeline {
                 current_start = Some(end);
             }
         }
-        drop(guard);
         Ok(compact_jobs)
     }
 
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 3c828c8a9e..93b7efedb8 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -17,13 +17,11 @@ use crate::{
         metadata::TimelineMetadata,
         remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
         CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant,
-        TenantManifestError, TimelineOrOffloaded,
+        TenantManifestError, Timeline, TimelineOrOffloaded,
     },
     virtual_file::MaybeFatalIo,
 };
 
-use super::{Timeline, TimelineResources};
-
 /// Mark timeline as deleted in S3 so we won't pick it up next time
 /// during attach or pageserver restart.
 /// See comment in persist_index_part_with_deleted_flag.
@@ -296,12 +294,7 @@ impl DeleteTimelineFlow {
                 timeline_id,
                 local_metadata,
                 None, // Ancestor is not needed for deletion.
-                TimelineResources {
-                    remote_client,
-                    pagestream_throttle: tenant.pagestream_throttle.clone(),
-                    pagestream_throttle_metrics: tenant.pagestream_throttle_metrics.clone(),
-                    l0_flush_global_state: tenant.l0_flush_global_state.clone(),
-                },
+                tenant.get_timeline_resources_for(remote_client),
                 // Important. We dont pass ancestor above because it can be missing.
                 // Thus we need to skip the validation here.
                 CreateTimelineCause::Delete,
@@ -341,6 +334,13 @@ impl DeleteTimelineFlow {
         let tenant_shard_id = timeline.tenant_shard_id();
         let timeline_id = timeline.timeline_id();
 
+        // Take a tenant gate guard, because timeline deletion needs access to the tenant to update its manifest.
+        let Ok(tenant_guard) = tenant.gate.enter() else {
+            // It is safe to simply skip here, because we only schedule background work once the timeline is durably marked for deletion.
+            info!("Tenant is shutting down, timeline deletion will be resumed when it next starts");
+            return;
+        };
+
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
@@ -348,6 +348,8 @@ impl DeleteTimelineFlow {
             Some(timeline_id),
             "timeline_delete",
             async move {
+                let _guard = tenant_guard;
+
                 if let Err(err) = Self::background(guard, conf, &tenant, &timeline, remote_client).await {
                     // Only log as an error if it's not a cancellation.
                     if matches!(err, DeleteTimelineError::Cancelled) {
diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs
index f8bc4352e2..e0084d3eef 100644
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -6,17 +6,20 @@ use crate::{
     task_mgr::TaskKind,
     tenant::{
         remote_timeline_client::index::GcBlockingReason::DetachAncestor,
-        storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
+        storage_layer::{
+            layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer,
+        },
         Tenant,
     },
     virtual_file::{MaybeFatalIo, VirtualFile},
 };
 use anyhow::Context;
+use http_utils::error::ApiError;
 use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity};
 use tokio::sync::Semaphore;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
-use utils::{completion, generation::Generation, http::error::ApiError, id::TimelineId, lsn::Lsn};
+use utils::{completion, generation::Generation, id::TimelineId, lsn::Lsn};
 
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
@@ -351,18 +354,7 @@ pub(super) async fn prepare(
 
         // FIXME: the fsync should be mandatory, after both rewrites and copies
         if wrote_any {
-            let timeline_dir = VirtualFile::open(
-                &detached
-                    .conf
-                    .timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
-                ctx,
-            )
-            .await
-            .fatal_err("VirtualFile::open for timeline dir fsync");
-            timeline_dir
-                .sync_all()
-                .await
-                .fatal_err("VirtualFile::sync_all timeline dir");
+            fsync_timeline_dir(detached, ctx).await;
         }
     }
 
@@ -376,7 +368,7 @@ pub(super) async fn prepare(
         tasks.spawn(
             async move {
                 let _permit = limiter.acquire().await;
-                let owned = remote_copy(
+                let (owned, did_hardlink) = remote_copy(
                     &adopted,
                     &timeline,
                     timeline.generation,
@@ -384,16 +376,20 @@ pub(super) async fn prepare(
                     &timeline.cancel,
                 )
                 .await?;
-                tracing::info!(layer=%owned, "remote copied");
-                Ok(owned)
+                tracing::info!(layer=%owned, did_hard_link=%did_hardlink, "remote copied");
+                Ok((owned, did_hardlink))
             }
             .in_current_span(),
         );
     }
 
+    let mut should_fsync = false;
     while let Some(res) = tasks.join_next().await {
         match res {
-            Ok(Ok(owned)) => {
+            Ok(Ok((owned, did_hardlink))) => {
+                if did_hardlink {
+                    should_fsync = true;
+                }
                 new_layers.push(owned);
             }
             Ok(Err(failed)) => {
@@ -403,7 +399,10 @@ pub(super) async fn prepare(
         }
     }
 
-    // TODO: fsync directory again if we hardlinked something
+    // fsync directory again if we hardlinked something
+    if should_fsync {
+        fsync_timeline_dir(detached, ctx).await;
+    }
 
     let prepared = PreparedTimelineDetach { layers: new_layers };
 
@@ -629,35 +628,52 @@ async fn copy_lsn_prefix(
     }
 }
 
-/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
-/// storage on successful return without the adopted layer being added to `index_part.json`.
+/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote
+/// storage on successful return. without the adopted layer being added to `index_part.json`.
+/// Returns (Layer, did hardlink)
 async fn remote_copy(
     adopted: &Layer,
     adoptee: &Arc<Timeline>,
     generation: Generation,
     shard_identity: ShardIdentity,
     cancel: &CancellationToken,
-) -> Result<Layer, Error> {
-    // depending if Layer::keep_resident we could hardlink
-
+) -> Result<(Layer, bool), Error> {
     let mut metadata = adopted.metadata();
     debug_assert!(metadata.generation <= generation);
     metadata.generation = generation;
     metadata.shard = shard_identity.shard_index();
 
-    let owned = crate::tenant::storage_layer::Layer::for_evicted(
-        adoptee.conf,
-        adoptee,
-        adopted.layer_desc().layer_name(),
-        metadata,
-    );
+    let conf = adoptee.conf;
+    let file_name = adopted.layer_desc().layer_name();
 
-    adoptee
+    // depending if Layer::keep_resident, do a hardlink
+    let did_hardlink;
+    let owned = if let Some(adopted_resident) = adopted.keep_resident().await {
+        let adopted_path = adopted_resident.local_path();
+        let adoptee_path = local_layer_path(
+            conf,
+            &adoptee.tenant_shard_id,
+            &adoptee.timeline_id,
+            &file_name,
+            &metadata.generation,
+        );
+        std::fs::hard_link(adopted_path, &adoptee_path)
+            .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
+        did_hardlink = true;
+        Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()
+    } else {
+        did_hardlink = false;
+        Layer::for_evicted(conf, adoptee, file_name, metadata)
+    };
+
+    let layer = adoptee
         .remote_client
         .copy_timeline_layer(adopted, &owned, cancel)
         .await
         .map(move |()| owned)
-        .map_err(|e| Error::launder(e, Error::Prepare))
+        .map_err(|e| Error::launder(e, Error::Prepare))?;
+
+    Ok((layer, did_hardlink))
 }
 
 pub(crate) enum DetachingAndReparenting {
@@ -1001,3 +1017,16 @@ fn check_no_archived_children_of_ancestor(
     }
     Ok(())
 }
+
+async fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) {
+    let path = &timeline
+        .conf
+        .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id);
+    let timeline_dir = VirtualFile::open(&path, ctx)
+        .await
+        .fatal_err("VirtualFile::open for timeline dir fsync");
+    timeline_dir
+        .sync_all()
+        .await
+        .fatal_err("VirtualFile::sync_all timeline dir");
+}
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index 26c2861b93..77c33349e0 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -30,8 +30,11 @@ use crate::{
     pgdatadir_mapping::CollectKeySpaceError,
     task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
     tenant::{
-        size::CalculateSyntheticSizeError, storage_layer::LayerVisibilityHint,
-        tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
+        size::CalculateSyntheticSizeError,
+        storage_layer::LayerVisibilityHint,
+        tasks::{sleep_random, BackgroundLoopKind, BackgroundLoopSemaphorePermit},
+        timeline::EvictionError,
+        LogicalSizeCalculationCause, Tenant,
     },
 };
 
@@ -80,8 +83,6 @@ impl Timeline {
 
     #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
     async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>) {
-        use crate::tenant::tasks::random_init_delay;
-
         // acquire the gate guard only once within a useful span
         let Ok(guard) = self.gate.enter() else {
             return;
@@ -94,7 +95,7 @@ impl Timeline {
                 EvictionPolicy::OnlyImitiate(lat) => lat.period,
                 EvictionPolicy::NoEviction => Duration::from_secs(10),
             };
-            if random_init_delay(period, &self.cancel).await.is_err() {
+            if sleep_random(period, &self.cancel).await.is_err() {
                 return;
             }
         }
@@ -330,11 +331,9 @@ impl Timeline {
         &self,
         cancel: &CancellationToken,
         ctx: &RequestContext,
-    ) -> ControlFlow<(), tokio::sync::SemaphorePermit<'static>> {
-        let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
-            BackgroundLoopKind::Eviction,
-            ctx,
-        );
+    ) -> ControlFlow<(), BackgroundLoopSemaphorePermit<'static>> {
+        let acquire_permit =
+            crate::tenant::tasks::acquire_concurrency_permit(BackgroundLoopKind::Eviction, ctx);
 
         tokio::select! {
             permit = acquire_permit => ControlFlow::Continue(permit),
@@ -374,7 +373,7 @@ impl Timeline {
         p: &EvictionPolicyLayerAccessThreshold,
         cancel: &CancellationToken,
         gate: &GateGuard,
-        permit: tokio::sync::SemaphorePermit<'static>,
+        permit: BackgroundLoopSemaphorePermit<'static>,
         ctx: &RequestContext,
     ) -> ControlFlow<()> {
         if !self.tenant_shard_id.is_shard_zero() {
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index de917377cb..23db4f88d2 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -39,7 +39,7 @@ use crate::{
 use postgres_backend::is_expected_io_error;
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::waldecoder::WalStreamDecoder;
-use utils::{id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol};
+use utils::{critical, id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol};
 use utils::{pageserver_feedback::PageserverFeedback, sync::gate::GateError};
 
 /// Status of the connection.
@@ -393,6 +393,13 @@ pub(super) async fn handle_walreceiver_connection(
                         .await
                         .with_context(|| {
                             format!("could not ingest record at {local_next_record_lsn}")
+                        })
+                        .inspect_err(|err| {
+                            // TODO: we can't differentiate cancellation errors with
+                            // anyhow::Error, so just ignore it if we're cancelled.
+                            if !cancellation.is_cancelled() {
+                                critical!("{err:?}")
+                            }
                         })?;
 
                     uncommitted_records += 1;
@@ -520,6 +527,13 @@ pub(super) async fn handle_walreceiver_connection(
                             .await
                             .with_context(|| {
                                 format!("could not ingest record at {next_record_lsn}")
+                            })
+                            .inspect_err(|err| {
+                                // TODO: we can't differentiate cancellation errors with
+                                // anyhow::Error, so just ignore it if we're cancelled.
+                                if !cancellation.is_cancelled() {
+                                    critical!("{err:?}")
+                                }
                             })?;
                         if !ingested {
                             tracing::debug!("ingest: filtered out record @ LSN {next_record_lsn}");
diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs
index 7e9477cfbc..bf30b92ea5 100644
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -79,6 +79,14 @@ impl WalRedoProcess {
             .env_clear()
             .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
             .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
             // NB: The redo process is not trusted after we sent it the first
             // walredo work. Before that, it is trusted. Specifically, we trust
             // it to
diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
index 01da61f84b..a61dc9f4c6 100644
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -509,47 +509,44 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
 
-	tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
+	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
 	hash = get_hash_value(lfc_hash, &tag);
-	chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
+	chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 
 	LWLockAcquire(lfc_lock, LW_SHARED);
 
+	if (!LFC_ENABLED())
+	{
+		LWLockRelease(lfc_lock);
+		return 0;
+	}
 	while (true)
 	{
-		int		this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
-		if (LFC_ENABLED())
-		{
-			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+		int		this_chunk = Min(nblocks - i, BLOCKS_PER_CHUNK - chunk_offs);
+		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
 
-			if (entry != NULL)
+		if (entry != NULL)
+		{
+			for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
 			{
-				for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
+				if ((entry->bitmap[chunk_offs >> 5] & 
+					 ((uint32)1 << (chunk_offs & 31))) != 0)
 				{
-					if ((entry->bitmap[chunk_offs >> 5] & 
-						((uint32)1 << (chunk_offs & 31))) != 0)
-					{
-						BITMAP_SET(bitmap, i);
-						found++;
-					}
+					BITMAP_SET(bitmap, i);
+					found++;
 				}
 			}
-			else
-			{
-				i += this_chunk;
-			}
 		}
 		else
 		{
-			LWLockRelease(lfc_lock);
-			return found;
+			i += this_chunk;
 		}
 
 		/*
 		 * Break out of the iteration before doing expensive stuff for
 		 * a next iteration
 		 */
-		if (i + 1 >= nblocks)
+		if (i >= nblocks)
 			break;
 
 		/*
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 012bd479bc..8051970176 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -3011,7 +3011,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 		start_ts = GetCurrentTimestamp();
 
 		if (RecoveryInProgress() && MyBackendType != B_STARTUP)
-			XLogWaitForReplayOf(reqlsns[0].request_lsn);
+			XLogWaitForReplayOf(reqlsns->request_lsn);
 
 		/*
 		 * Try to find prefetched page in the list of received pages.
diff --git a/poetry.lock b/poetry.lock
index c471d3e69c..fd200159b9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1030,52 +1030,56 @@ files = [
 
 [[package]]
 name = "cryptography"
-version = "43.0.1"
+version = "44.0.1"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
-python-versions = ">=3.7"
+python-versions = "!=3.9.0,!=3.9.1,>=3.7"
 groups = ["main"]
 files = [
-    {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a"},
-    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042"},
-    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494"},
-    {file = "cryptography-43.0.1-cp37-abi3-win32.whl", hash = "sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2"},
-    {file = "cryptography-43.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d"},
-    {file = "cryptography-43.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1"},
-    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa"},
-    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4"},
-    {file = "cryptography-43.0.1-cp39-abi3-win32.whl", hash = "sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47"},
-    {file = "cryptography-43.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2"},
-    {file = "cryptography-43.0.1.tar.gz", hash = "sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d"},
+    {file = "cryptography-44.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd"},
+    {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0"},
+    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf"},
+    {file = "cryptography-44.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864"},
+    {file = "cryptography-44.0.1-cp37-abi3-win32.whl", hash = "sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a"},
+    {file = "cryptography-44.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00"},
+    {file = "cryptography-44.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62"},
+    {file = "cryptography-44.0.1-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41"},
+    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b"},
+    {file = "cryptography-44.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7"},
+    {file = "cryptography-44.0.1-cp39-abi3-win32.whl", hash = "sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9"},
+    {file = "cryptography-44.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4"},
+    {file = "cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7"},
+    {file = "cryptography-44.0.1.tar.gz", hash = "sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14"},
 ]
 
 [package.dependencies]
 cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
 
 [package.extras]
-docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
-docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"]
-nox = ["nox"]
-pep8test = ["check-sdist", "click", "mypy", "ruff"]
-sdist = ["build"]
+docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"]
+docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"]
+nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"]
+pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
+sdist = ["build (>=1.0.0)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
+test = ["certifi (>=2024)", "cryptography-vectors (==44.0.1)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
 test-randomorder = ["pytest-randomly"]
 
 [[package]]
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index d7880ea7b9..3aa6ac3a76 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -37,6 +37,7 @@ hex.workspace = true
 hmac.workspace = true
 hostname.workspace = true
 http.workspace = true
+http-utils.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
 hyper0.workspace = true
diff --git a/proxy/README.md b/proxy/README.md
index ecd54fbbd8..1156bfd352 100644
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -37,8 +37,8 @@ To play with it locally one may start proxy over a local postgres installation
 
 If both postgres and proxy are running you may send a SQL query:
 ```console
-curl -k -X POST 'https://proxy.localtest.me:4444/sql' \
-  -H 'Neon-Connection-String: postgres://stas:pass@proxy.localtest.me:4444/postgres' \
+curl -k -X POST 'https://proxy.local.neon.build:4444/sql' \
+  -H 'Neon-Connection-String: postgres://stas:pass@proxy.local.neon.build:4444/postgres' \
   -H 'Content-Type: application/json' \
   --data '{
     "query":"SELECT $1::int[] as arr, $2::jsonb as obj, 42 as num",
@@ -104,7 +104,7 @@ cases where it is hard to use rows represented as objects (e.g. when several fie
 
 ## Test proxy locally
 
-Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.localtest.me` which resolves to `127.0.0.1`.
+Proxy determines project name from the subdomain, request to the `round-rice-566201.somedomain.tld` will be routed to the project named `round-rice-566201`. Unfortunately, `/etc/hosts` does not support domain wildcards, so we can use *.local.neon.build` which resolves to `127.0.0.1`.
 
 We will need to have a postgres instance. Assuming that we have set up docker we can set it up as follows:
 ```sh
@@ -125,7 +125,7 @@ docker exec -it proxy-postgres psql -U postgres -c "CREATE ROLE proxy WITH SUPER
 
 Let's create self-signed certificate by running:
 ```sh
-openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.localtest.me"
+openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj "/CN=*.local.neon.build"
 ```
 
 Then we need to build proxy with 'testing' feature and run, e.g.:
@@ -136,5 +136,5 @@ RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backe
 Now from client you can start a new session:
 
 ```sh
-PGSSLROOTCERT=./server.crt psql  "postgresql://proxy:password@endpoint.localtest.me:4432/postgres?sslmode=verify-full"
+PGSSLROOTCERT=./server.crt psql  "postgresql://proxy:password@endpoint.local.neon.build:4432/postgres?sslmode=verify-full"
 ```
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index 7ef096207a..dc595844c5 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -108,6 +108,10 @@ impl<T> Backend<'_, T> {
             Self::Local(_) => panic!("Local backend has no API"),
         }
     }
+
+    pub(crate) fn is_local_proxy(&self) -> bool {
+        matches!(self, Self::Local(_))
+    }
 }
 
 impl<'a, T> Backend<'a, T> {
diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index 7a855bf54b..8f225dc1e0 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -1,416 +1,7 @@
-use std::net::SocketAddr;
-use std::pin::pin;
-use std::str::FromStr;
-use std::sync::Arc;
-use std::time::Duration;
-
-use anyhow::{bail, ensure, Context};
-use camino::{Utf8Path, Utf8PathBuf};
-use compute_api::spec::LocalProxySpec;
-use futures::future::Either;
-use proxy::auth::backend::jwt::JwkCache;
-use proxy::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
-use proxy::auth::{self};
-use proxy::cancellation::CancellationHandler;
-use proxy::config::{
-    self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,
-};
-use proxy::control_plane::locks::ApiLocks;
-use proxy::control_plane::messages::{EndpointJwksResponse, JwksSettings};
-use proxy::http::health_server::AppMetrics;
-use proxy::intern::RoleNameInt;
-use proxy::metrics::{Metrics, ThreadPoolMetrics};
-use proxy::rate_limiter::{
-    BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo,
-};
-use proxy::scram::threadpool::ThreadPool;
-use proxy::serverless::cancel_set::CancelSet;
-use proxy::serverless::{self, GlobalConnPoolOptions};
-use proxy::tls::client_config::compute_client_config_with_root_certs;
-use proxy::types::RoleName;
-use proxy::url::ApiUrl;
-
-project_git_version!(GIT_VERSION);
-project_build_tag!(BUILD_TAG);
-
-use clap::Parser;
-use thiserror::Error;
-use tokio::net::TcpListener;
-use tokio::sync::Notify;
-use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, warn};
-use utils::sentry_init::init_sentry;
-use utils::{pid_file, project_build_tag, project_git_version};
-
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-/// Neon proxy/router
-#[derive(Parser)]
-#[command(version = GIT_VERSION, about)]
-struct LocalProxyCliArgs {
-    /// listen for incoming metrics connections on ip:port
-    #[clap(long, default_value = "127.0.0.1:7001")]
-    metrics: String,
-    /// listen for incoming http connections on ip:port
-    #[clap(long)]
-    http: String,
-    /// timeout for the TLS handshake
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    handshake_timeout: tokio::time::Duration,
-    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
-    connect_compute_lock: String,
-    #[clap(flatten)]
-    sql_over_http: SqlOverHttpArgs,
-    /// User rate limiter max number of requests per second.
-    ///
-    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
-    /// Can be given multiple times for different bucket sizes.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
-    user_rps_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
-    /// Whether to retry the connection to the compute node
-    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
-    connect_to_compute_retry: String,
-    /// Address of the postgres server
-    #[clap(long, default_value = "127.0.0.1:5432")]
-    postgres: SocketAddr,
-    /// Address of the compute-ctl api service
-    #[clap(long, default_value = "http://127.0.0.1:3080/")]
-    compute_ctl: ApiUrl,
-    /// Path of the local proxy config file
-    #[clap(long, default_value = "./local_proxy.json")]
-    config_path: Utf8PathBuf,
-    /// Path of the local proxy PID file
-    #[clap(long, default_value = "./local_proxy.pid")]
-    pid_path: Utf8PathBuf,
-}
-
-#[derive(clap::Args, Clone, Copy, Debug)]
-struct SqlOverHttpArgs {
-    /// How many connections to pool for each endpoint. Excess connections are discarded
-    #[clap(long, default_value_t = 200)]
-    sql_over_http_pool_max_total_conns: usize,
-
-    /// How long pooled connections should remain idle for before closing
-    #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
-    sql_over_http_idle_timeout: tokio::time::Duration,
-
-    #[clap(long, default_value_t = 100)]
-    sql_over_http_client_conn_threshold: u64,
-
-    #[clap(long, default_value_t = 16)]
-    sql_over_http_cancel_set_shards: usize,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_request_size_bytes: usize,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_response_size_bytes: usize,
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy::logging::init_local_proxy()?;
-    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
-    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
-
-    Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
-
-    // TODO: refactor these to use labels
-    debug!("Version: {GIT_VERSION}");
-    debug!("Build_tag: {BUILD_TAG}");
-    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
-        revision: GIT_VERSION,
-        build_tag: BUILD_TAG,
-    });
-
-    let jemalloc = match proxy::jemalloc::MetricRecorder::new() {
-        Ok(t) => Some(t),
-        Err(e) => {
-            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
-            None
-        }
-    };
-
-    let args = LocalProxyCliArgs::parse();
-    let config = build_config(&args)?;
-    let auth_backend = build_auth_backend(&args)?;
-
-    // before we bind to any ports, write the process ID to a file
-    // so that compute-ctl can find our process later
-    // in order to trigger the appropriate SIGHUP on config change.
-    //
-    // This also claims a "lock" that makes sure only one instance
-    // of local_proxy runs at a time.
-    let _process_guard = loop {
-        match pid_file::claim_for_current_process(&args.pid_path) {
-            Ok(guard) => break guard,
-            Err(e) => {
-                // compute-ctl might have tried to read the pid-file to let us
-                // know about some config change. We should try again.
-                error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
-                tokio::time::sleep(Duration::from_secs(1)).await;
-            }
-        }
-    };
-
-    let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
-    let http_listener = TcpListener::bind(args.http).await?;
-    let shutdown = CancellationToken::new();
-
-    // todo: should scale with CU
-    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
-        LeakyBucketConfig {
-            rps: 10.0,
-            max: 100.0,
-        },
-        16,
-    ));
-
-    let mut maintenance_tasks = JoinSet::new();
-
-    let refresh_config_notify = Arc::new(Notify::new());
-    maintenance_tasks.spawn(proxy::signals::handle(shutdown.clone(), {
-        let refresh_config_notify = Arc::clone(&refresh_config_notify);
-        move || {
-            refresh_config_notify.notify_one();
-        }
-    }));
-
-    // trigger the first config load **after** setting up the signal hook
-    // to avoid the race condition where:
-    // 1. No config file registered when local_proxy starts up
-    // 2. The config file is written but the signal hook is not yet received
-    // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
-    refresh_config_notify.notify_one();
-    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
-
-    maintenance_tasks.spawn(proxy::http::health_server::task_main(
-        metrics_listener,
-        AppMetrics {
-            jemalloc,
-            neon_metrics,
-            proxy: proxy::metrics::Metrics::get(),
-        },
-    ));
-
-    let task = serverless::task_main(
-        config,
-        auth_backend,
-        http_listener,
-        shutdown.clone(),
-        Arc::new(CancellationHandler::new(&config.connect_to_compute, None)),
-        endpoint_rate_limiter,
-    );
-
-    match futures::future::select(pin!(maintenance_tasks.join_next()), pin!(task)).await {
-        // exit immediately on maintenance task completion
-        Either::Left((Some(res), _)) => match proxy::error::flatten_err(res)? {},
-        // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
-        Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
-        // exit immediately on client task error
-        Either::Right((res, _)) => res?,
-    }
-
-    Ok(())
-}
-
-/// ProxyConfig is created at proxy startup, and lives forever.
-fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
-    let config::ConcurrencyLockOptions {
-        shards,
-        limiter,
-        epoch,
-        timeout,
-    } = args.connect_compute_lock.parse()?;
-    info!(
-        ?limiter,
-        shards,
-        ?epoch,
-        "Using NodeLocks (connect_compute)"
-    );
-    let connect_compute_locks = ApiLocks::new(
-        "connect_compute_lock",
-        limiter,
-        shards,
-        timeout,
-        epoch,
-        &Metrics::get().proxy.connect_compute_lock,
-    )?;
-
-    let http_config = HttpConfig {
-        accept_websockets: false,
-        pool_options: GlobalConnPoolOptions {
-            gc_epoch: Duration::from_secs(60),
-            pool_shards: 2,
-            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
-            opt_in: false,
-
-            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_total_conns,
-            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
-        },
-        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
-        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
-        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
-        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
-    };
-
-    let compute_config = ComputeConfig {
-        retry: RetryConfig::parse(RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)?,
-        tls: Arc::new(compute_client_config_with_root_certs()?),
-        timeout: Duration::from_secs(2),
-    };
-
-    Ok(Box::leak(Box::new(ProxyConfig {
-        tls_config: None,
-        metric_collection: None,
-        http_config,
-        authentication_config: AuthenticationConfig {
-            jwks_cache: JwkCache::default(),
-            thread_pool: ThreadPool::new(0),
-            scram_protocol_timeout: Duration::from_secs(10),
-            rate_limiter_enabled: false,
-            rate_limiter: BucketRateLimiter::new(vec![]),
-            rate_limit_ip_subnet: 64,
-            ip_allowlist_check_enabled: true,
-            is_vpc_acccess_proxy: false,
-            is_auth_broker: false,
-            accept_jwts: true,
-            console_redirect_confirmation_timeout: Duration::ZERO,
-        },
-        proxy_protocol_v2: config::ProxyProtocolV2::Rejected,
-        handshake_timeout: Duration::from_secs(10),
-        region: "local".into(),
-        wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,
-        connect_compute_locks,
-        connect_to_compute: compute_config,
-    })))
-}
-
-/// auth::Backend is created at proxy startup, and lives forever.
-fn build_auth_backend(
-    args: &LocalProxyCliArgs,
-) -> anyhow::Result<&'static auth::Backend<'static, ()>> {
-    let auth_backend = proxy::auth::Backend::Local(proxy::auth::backend::MaybeOwned::Owned(
-        LocalBackend::new(args.postgres, args.compute_ctl.clone()),
-    ));
-
-    Ok(Box::leak(Box::new(auth_backend)))
-}
-
-#[derive(Error, Debug)]
-enum RefreshConfigError {
-    #[error(transparent)]
-    Read(#[from] std::io::Error),
-    #[error(transparent)]
-    Parse(#[from] serde_json::Error),
-    #[error(transparent)]
-    Validate(anyhow::Error),
-}
-
-async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
-    let mut init = true;
-    loop {
-        rx.notified().await;
-
-        match refresh_config_inner(&path).await {
-            Ok(()) => {}
-            // don't log for file not found errors if this is the first time we are checking
-            // for computes that don't use local_proxy, this is not an error.
-            Err(RefreshConfigError::Read(e))
-                if init && e.kind() == std::io::ErrorKind::NotFound =>
-            {
-                debug!(error=?e, ?path, "could not read config file");
-            }
-            Err(e) => {
-                error!(error=?e, ?path, "could not read config file");
-            }
-        }
-
-        init = false;
-    }
-}
-
-async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
-    let bytes = tokio::fs::read(&path).await?;
-    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
-
-    let mut jwks_set = vec![];
-
-    fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result<JwksSettings> {
-        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
-
-        ensure!(
-            jwks_url.has_authority()
-                && (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
-            "Invalid JWKS url. Must be HTTP",
-        );
-
-        ensure!(
-            jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
-            "Invalid JWKS url. No domain listed",
-        );
-
-        // clear username, password and ports
-        jwks_url
-            .set_username("")
-            .expect("url can be a base and has a valid host and is not a file. should not error");
-        jwks_url
-            .set_password(None)
-            .expect("url can be a base and has a valid host and is not a file. should not error");
-        // local testing is hard if we need to have a specific restricted port
-        if cfg!(not(feature = "testing")) {
-            jwks_url.set_port(None).expect(
-                "url can be a base and has a valid host and is not a file. should not error",
-            );
-        }
-
-        // clear query params
-        jwks_url.set_fragment(None);
-        jwks_url.query_pairs_mut().clear().finish();
-
-        if jwks_url.scheme() != "https" {
-            // local testing is hard if we need to set up https support.
-            if cfg!(not(feature = "testing")) {
-                jwks_url
-                    .set_scheme("https")
-                    .expect("should not error to set the scheme to https if it was http");
-            } else {
-                warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
-            }
-        }
-
-        Ok(JwksSettings {
-            id: jwks.id,
-            jwks_url,
-            provider_name: jwks.provider_name,
-            jwt_audience: jwks.jwt_audience,
-            role_names: jwks
-                .role_names
-                .into_iter()
-                .map(RoleName::from)
-                .map(|s| RoleNameInt::from(&s))
-                .collect(),
-        })
-    }
-
-    for jwks in data.jwks.into_iter().flatten() {
-        jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?);
-    }
-
-    info!("successfully loaded new config");
-    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
-
-    Ok(())
+    proxy::binary::local_proxy::run().await
 }
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index 97d870a83a..0c3326af85 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -1,299 +1,10 @@
-/// A stand-alone program that routes connections, e.g. from
-/// `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.
-///
-/// This allows connecting to pods/services running in the same Kubernetes cluster from
-/// the outside. Similar to an ingress controller for HTTPS.
-use std::{net::SocketAddr, sync::Arc};
-
-use anyhow::{anyhow, bail, ensure, Context};
-use clap::Arg;
-use futures::future::Either;
-use futures::TryFutureExt;
-use itertools::Itertools;
-use proxy::context::RequestContext;
-use proxy::metrics::{Metrics, ThreadPoolMetrics};
-use proxy::protocol2::ConnectionInfo;
-use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
-use proxy::stream::{PqStream, Stream};
-use proxy::tls::TlsServerEndPoint;
-use rustls::crypto::ring;
-use rustls::pki_types::PrivateKeyDer;
-use tokio::io::{AsyncRead, AsyncWrite};
-use tokio::net::TcpListener;
-use tokio_util::sync::CancellationToken;
-use tracing::{error, info, Instrument};
-use utils::project_git_version;
-use utils::sentry_init::init_sentry;
-
-project_git_version!(GIT_VERSION);
-
-fn cli() -> clap::Command {
-    clap::Command::new("Neon proxy/router")
-        .version(GIT_VERSION)
-        .arg(
-            Arg::new("listen")
-                .short('l')
-                .long("listen")
-                .help("listen for incoming client connections on ip:port")
-                .default_value("127.0.0.1:4432"),
-        )
-        .arg(
-            Arg::new("tls-key")
-                .short('k')
-                .long("tls-key")
-                .help("path to TLS key for client postgres connections")
-                .required(true),
-        )
-        .arg(
-            Arg::new("tls-cert")
-                .short('c')
-                .long("tls-cert")
-                .help("path to TLS cert for client postgres connections")
-                .required(true),
-        )
-        .arg(
-            Arg::new("dest")
-                .short('d')
-                .long("destination")
-                .help("append this domain zone to the SNI hostname to get the destination address")
-                .required(true),
-        )
-}
+//! A stand-alone program that routes connections, e.g. from
+//! `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.
+//!
+//! This allows connecting to pods/services running in the same Kubernetes cluster from
+//! the outside. Similar to an ingress controller for HTTPS.
 
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy::logging::init().await?;
-    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
-    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
-
-    Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
-
-    let args = cli().get_matches();
-    let destination: String = args.get_one::<String>("dest").unwrap().parse()?;
-
-    // Configure TLS
-    let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
-        args.get_one::<String>("tls-key"),
-        args.get_one::<String>("tls-cert"),
-    ) {
-        (Some(key_path), Some(cert_path)) => {
-            let key = {
-                let key_bytes = std::fs::read(key_path).context("TLS key file")?;
-
-                let mut keys =
-                    rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec();
-
-                ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
-                PrivateKeyDer::Pkcs8(
-                    keys.pop()
-                        .unwrap()
-                        .context(format!("Failed to read TLS keys at '{key_path}'"))?,
-                )
-            };
-
-            let cert_chain_bytes = std::fs::read(cert_path)
-                .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
-
-            let cert_chain: Vec<_> = {
-                rustls_pemfile::certs(&mut &cert_chain_bytes[..])
-                .try_collect()
-                .with_context(|| {
-                    format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
-                })?
-            };
-
-            // needed for channel bindings
-            let first_cert = cert_chain.first().context("missing certificate")?;
-            let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
-
-            let tls_config =
-                rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
-                    .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
-                    .context("ring should support TLS1.2 and TLS1.3")?
-                    .with_no_client_auth()
-                    .with_single_cert(cert_chain, key)?
-                    .into();
-
-            (tls_config, tls_server_end_point)
-        }
-        _ => bail!("tls-key and tls-cert must be specified"),
-    };
-
-    // Start listening for incoming client connections
-    let proxy_address: SocketAddr = args.get_one::<String>("listen").unwrap().parse()?;
-    info!("Starting sni router on {proxy_address}");
-    let proxy_listener = TcpListener::bind(proxy_address).await?;
-
-    let cancellation_token = CancellationToken::new();
-
-    let main = tokio::spawn(task_main(
-        Arc::new(destination),
-        tls_config,
-        tls_server_end_point,
-        proxy_listener,
-        cancellation_token.clone(),
-    ));
-    let signals_task = tokio::spawn(proxy::signals::handle(cancellation_token, || {}));
-
-    // the signal task cant ever succeed.
-    // the main task can error, or can succeed on cancellation.
-    // we want to immediately exit on either of these cases
-    let signal = match futures::future::select(signals_task, main).await {
-        Either::Left((res, _)) => proxy::error::flatten_err(res)?,
-        Either::Right((res, _)) => return proxy::error::flatten_err(res),
-    };
-
-    // maintenance tasks return `Infallible` success values, this is an impossible value
-    // so this match statically ensures that there are no possibilities for that value
-    match signal {}
-}
-
-async fn task_main(
-    dest_suffix: Arc<String>,
-    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
-    listener: tokio::net::TcpListener,
-    cancellation_token: CancellationToken,
-) -> anyhow::Result<()> {
-    // When set for the server socket, the keepalive setting
-    // will be inherited by all accepted client sockets.
-    socket2::SockRef::from(&listener).set_keepalive(true)?;
-
-    let connections = tokio_util::task::task_tracker::TaskTracker::new();
-
-    while let Some(accept_result) =
-        run_until_cancelled(listener.accept(), &cancellation_token).await
-    {
-        let (socket, peer_addr) = accept_result?;
-
-        let session_id = uuid::Uuid::new_v4();
-        let tls_config = Arc::clone(&tls_config);
-        let dest_suffix = Arc::clone(&dest_suffix);
-
-        connections.spawn(
-            async move {
-                socket
-                    .set_nodelay(true)
-                    .context("failed to set socket option")?;
-
-                info!(%peer_addr, "serving");
-                let ctx = RequestContext::new(
-                    session_id,
-                    ConnectionInfo {
-                        addr: peer_addr,
-                        extra: None,
-                    },
-                    proxy::metrics::Protocol::SniRouter,
-                    "sni",
-                );
-                handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
-            }
-            .unwrap_or_else(|e| {
-                // Acknowledge that the task has finished with an error.
-                error!("per-client task finished with an error: {e:#}");
-            })
-            .instrument(tracing::info_span!("handle_client", ?session_id)),
-        );
-    }
-
-    connections.close();
-    drop(listener);
-
-    connections.wait().await;
-
-    info!("all client connections have finished");
-    Ok(())
-}
-
-const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
-
-async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: &RequestContext,
-    raw_stream: S,
-    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
-) -> anyhow::Result<Stream<S>> {
-    let mut stream = PqStream::new(Stream::from_raw(raw_stream));
-
-    let msg = stream.read_startup_packet().await?;
-    use pq_proto::FeStartupPacket::*;
-
-    match msg {
-        SslRequest { direct: false } => {
-            stream
-                .write_message(&pq_proto::BeMessage::EncryptionResponse(true))
-                .await?;
-
-            // Upgrade raw stream into a secure TLS-backed stream.
-            // NOTE: We've consumed `tls`; this fact will be used later.
-
-            let (raw, read_buf) = stream.into_inner();
-            // TODO: Normally, client doesn't send any data before
-            // server says TLS handshake is ok and read_buf is empty.
-            // However, you could imagine pipelining of postgres
-            // SSLRequest + TLS ClientHello in one hunk similar to
-            // pipelining in our node js driver. We should probably
-            // support that by chaining read_buf with the stream.
-            if !read_buf.is_empty() {
-                bail!("data is sent before server replied with EncryptionResponse");
-            }
-
-            Ok(Stream::Tls {
-                tls: Box::new(
-                    raw.upgrade(tls_config, !ctx.has_private_peer_addr())
-                        .await?,
-                ),
-                tls_server_end_point,
-            })
-        }
-        unexpected => {
-            info!(
-                ?unexpected,
-                "unexpected startup packet, rejecting connection"
-            );
-            stream
-                .throw_error_str(ERR_INSECURE_CONNECTION, proxy::error::ErrorKind::User)
-                .await?
-        }
-    }
-}
-
-async fn handle_client(
-    ctx: RequestContext,
-    dest_suffix: Arc<String>,
-    tls_config: Arc<rustls::ServerConfig>,
-    tls_server_end_point: TlsServerEndPoint,
-    stream: impl AsyncRead + AsyncWrite + Unpin,
-) -> anyhow::Result<()> {
-    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config, tls_server_end_point).await?;
-
-    // Cut off first part of the SNI domain
-    // We receive required destination details in the format of
-    //   `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
-    let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
-    let dest: Vec<&str> = sni
-        .split_once('.')
-        .context("invalid SNI")?
-        .0
-        .splitn(3, "--")
-        .collect();
-    let port = dest[2].parse::<u16>().context("invalid port")?;
-    let destination = format!("{}.{}.{}:{}", dest[0], dest[1], dest_suffix, port);
-
-    info!("destination: {}", destination);
-
-    let mut client = tokio::net::TcpStream::connect(destination).await?;
-
-    // doesn't yet matter as pg-sni-router doesn't report analytics logs
-    ctx.set_success();
-    ctx.log_connect();
-
-    // Starting from here we only proxy the client's traffic.
-    info!("performing the proxy pass...");
-
-    match copy_bidirectional_client_compute(&mut tls_stream, &mut client).await {
-        Ok(_) => Ok(()),
-        Err(ErrorSource::Client(err)) => Err(err).context("client"),
-        Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
-    }
+    proxy::binary::pg_sni_router::run().await
 }
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index de685a82c6..7d4b44841d 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,831 +1,7 @@
-use std::net::SocketAddr;
-use std::pin::pin;
-use std::sync::Arc;
-use std::time::Duration;
-
-use anyhow::bail;
-use futures::future::Either;
-use proxy::auth::backend::jwt::JwkCache;
-use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
-use proxy::cancellation::{handle_cancel_messages, CancellationHandler};
-use proxy::config::{
-    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig,
-    ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
-};
-use proxy::context::parquet::ParquetUploadArgs;
-use proxy::http::health_server::AppMetrics;
-use proxy::metrics::Metrics;
-use proxy::rate_limiter::{
-    EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo, WakeComputeRateLimiter,
-};
-use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use proxy::redis::kv_ops::RedisKVClient;
-use proxy::redis::{elasticache, notifications};
-use proxy::scram::threadpool::ThreadPool;
-use proxy::serverless::cancel_set::CancelSet;
-use proxy::serverless::GlobalConnPoolOptions;
-use proxy::tls::client_config::compute_client_config_with_root_certs;
-use proxy::{auth, control_plane, http, serverless, usage_metrics};
-use remote_storage::RemoteStorageConfig;
-use tokio::net::TcpListener;
-use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
-use tracing::{info, warn, Instrument};
-use utils::sentry_init::init_sentry;
-use utils::{project_build_tag, project_git_version};
-
-project_git_version!(GIT_VERSION);
-project_build_tag!(BUILD_TAG);
-
-use clap::{Parser, ValueEnum};
-
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
-#[derive(Clone, Debug, ValueEnum)]
-enum AuthBackendType {
-    #[value(name("cplane-v1"), alias("control-plane"))]
-    ControlPlaneV1,
-
-    #[value(name("link"), alias("control-redirect"))]
-    ConsoleRedirect,
-
-    #[cfg(feature = "testing")]
-    Postgres,
-}
-
-/// Neon proxy/router
-#[derive(Parser)]
-#[command(version = GIT_VERSION, about)]
-struct ProxyCliArgs {
-    /// Name of the region this proxy is deployed in
-    #[clap(long, default_value_t = String::new())]
-    region: String,
-    /// listen for incoming client connections on ip:port
-    #[clap(short, long, default_value = "127.0.0.1:4432")]
-    proxy: String,
-    #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]
-    auth_backend: AuthBackendType,
-    /// listen for management callback connection on ip:port
-    #[clap(short, long, default_value = "127.0.0.1:7000")]
-    mgmt: String,
-    /// listen for incoming http connections (metrics, etc) on ip:port
-    #[clap(long, default_value = "127.0.0.1:7001")]
-    http: String,
-    /// listen for incoming wss connections on ip:port
-    #[clap(long)]
-    wss: Option<String>,
-    /// redirect unauthenticated users to the given uri in case of console redirect auth
-    #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
-    uri: String,
-    /// cloud API endpoint for authenticating users
-    #[clap(
-        short,
-        long,
-        default_value = "http://localhost:3000/authenticate_proxy_request/"
-    )]
-    auth_endpoint: String,
-    /// JWT used to connect to control plane.
-    #[clap(
-        long,
-        value_name = "JWT",
-        default_value = "",
-        env = "NEON_PROXY_TO_CONTROLPLANE_TOKEN"
-    )]
-    control_plane_token: Arc<str>,
-    /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    is_auth_broker: bool,
-    /// path to TLS key for client postgres connections
-    ///
-    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
-    #[clap(short = 'k', long, alias = "ssl-key")]
-    tls_key: Option<String>,
-    /// path to TLS cert for client postgres connections
-    ///
-    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
-    #[clap(short = 'c', long, alias = "ssl-cert")]
-    tls_cert: Option<String>,
-    /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`.
-    #[clap(long, alias = "allow-ssl-keylogfile")]
-    allow_tls_keylogfile: bool,
-    /// path to directory with TLS certificates for client postgres connections
-    #[clap(long)]
-    certs_dir: Option<String>,
-    /// timeout for the TLS handshake
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    handshake_timeout: tokio::time::Duration,
-    /// http endpoint to receive periodic metric updates
-    #[clap(long)]
-    metric_collection_endpoint: Option<String>,
-    /// how often metrics should be sent to a collection endpoint
-    #[clap(long)]
-    metric_collection_interval: Option<String>,
-    /// cache for `wake_compute` api method (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    wake_compute_cache: String,
-    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
-    wake_compute_lock: String,
-    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
-    connect_compute_lock: String,
-    #[clap(flatten)]
-    sql_over_http: SqlOverHttpArgs,
-    /// timeout for scram authentication protocol
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    scram_protocol_timeout: tokio::time::Duration,
-    /// size of the threadpool for password hashing
-    #[clap(long, default_value_t = 4)]
-    scram_thread_pool_size: u8,
-    /// Endpoint rate limiter max number of requests per second.
-    ///
-    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
-    /// Can be given multiple times for different bucket sizes.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
-    endpoint_rps_limit: Vec<RateBucketInfo>,
-    /// Wake compute rate limiter max number of requests per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
-    wake_compute_limit: Vec<RateBucketInfo>,
-    /// Whether the auth rate limiter actually takes effect (for testing)
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    auth_rate_limit_enabled: bool,
-    /// Authentication rate limiter max number of hashes per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
-    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
-    /// Redis rate limiter max number of requests per second.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
-    redis_rps_limit: Vec<RateBucketInfo>,
-    /// Cancellation channel size (max queue size for redis kv client)
-    #[clap(long, default_value = "1024")]
-    cancellation_ch_size: usize,
-    /// cache for `allowed_ips` (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    allowed_ips_cache: String,
-    /// cache for `role_secret` (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
-    role_secret_cache: String,
-    /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
-    #[clap(long)]
-    redis_notifications: Option<String>,
-    /// what from the available authentications type to use for the regional redis we have. Supported are "irsa" and "plain".
-    #[clap(long, default_value = "irsa")]
-    redis_auth_type: String,
-    /// redis host for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_host: Option<String>,
-    /// redis port for streaming connections (might be different from the notifications host)
-    #[clap(long)]
-    redis_port: Option<u16>,
-    /// redis cluster name, used in aws elasticache
-    #[clap(long)]
-    redis_cluster_name: Option<String>,
-    /// redis user_id, used in aws elasticache
-    #[clap(long)]
-    redis_user_id: Option<String>,
-    /// aws region to retrieve credentials
-    #[clap(long, default_value_t = String::new())]
-    aws_region: String,
-    /// cache for `project_info` (use `size=0` to disable)
-    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
-    project_info_cache: String,
-    /// cache for all valid endpoints
-    #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
-    endpoint_cache_config: String,
-    #[clap(flatten)]
-    parquet_upload: ParquetUploadArgs,
-
-    /// interval for backup metric collection
-    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
-    metric_backup_collection_interval: std::time::Duration,
-    /// remote storage configuration for backup metric collection
-    /// Encoded as toml (same format as pageservers), eg
-    /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
-    #[clap(long, value_parser = remote_storage_from_toml)]
-    metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,
-    /// chunk size for backup metric collection
-    /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
-    #[clap(long, default_value = "4194304")]
-    metric_backup_collection_chunk_size: usize,
-    /// Whether to retry the connection to the compute node
-    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
-    connect_to_compute_retry: String,
-    /// Whether to retry the wake_compute request
-    #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
-    wake_compute_retry: String,
-
-    /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
-    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    is_private_access_proxy: bool,
-
-    /// Configure whether all incoming requests have a Proxy Protocol V2 packet.
-    // TODO(conradludgate): switch default to rejected or required once we've updated all deployments
-    #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Supported)]
-    proxy_protocol_v2: ProxyProtocolV2,
-
-    /// Time the proxy waits for the webauth session to be confirmed by the control plane.
-    // TODO: rename to `console_redirect_confirmation_timeout`.
-    #[clap(long, default_value = "2m", value_parser = humantime::parse_duration)]
-    webauth_confirmation_timeout: std::time::Duration,
-}
-
-#[derive(clap::Args, Clone, Copy, Debug)]
-struct SqlOverHttpArgs {
-    /// timeout for http connection requests
-    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
-    sql_over_http_timeout: tokio::time::Duration,
-
-    /// Whether the SQL over http pool is opt-in
-    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    sql_over_http_pool_opt_in: bool,
-
-    /// How many connections to pool for each endpoint. Excess connections are discarded
-    #[clap(long, default_value_t = 20)]
-    sql_over_http_pool_max_conns_per_endpoint: usize,
-
-    /// How many connections to pool for each endpoint. Excess connections are discarded
-    #[clap(long, default_value_t = 20000)]
-    sql_over_http_pool_max_total_conns: usize,
-
-    /// How long pooled connections should remain idle for before closing
-    #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
-    sql_over_http_idle_timeout: tokio::time::Duration,
-
-    /// Duration each shard will wait on average before a GC sweep.
-    /// A longer time will causes sweeps to take longer but will interfere less frequently.
-    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
-    sql_over_http_pool_gc_epoch: tokio::time::Duration,
-
-    /// How many shards should the global pool have. Must be a power of two.
-    /// More shards will introduce less contention for pool operations, but can
-    /// increase memory used by the pool
-    #[clap(long, default_value_t = 128)]
-    sql_over_http_pool_shards: usize,
-
-    #[clap(long, default_value_t = 10000)]
-    sql_over_http_client_conn_threshold: u64,
-
-    #[clap(long, default_value_t = 64)]
-    sql_over_http_cancel_set_shards: usize,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_request_size_bytes: usize,
-
-    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
-    sql_over_http_max_response_size_bytes: usize,
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = proxy::logging::init().await?;
-    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
-    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
-
-    // TODO: refactor these to use labels
-    info!("Version: {GIT_VERSION}");
-    info!("Build_tag: {BUILD_TAG}");
-    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
-        revision: GIT_VERSION,
-        build_tag: BUILD_TAG,
-    });
-
-    let jemalloc = match proxy::jemalloc::MetricRecorder::new() {
-        Ok(t) => Some(t),
-        Err(e) => {
-            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
-            None
-        }
-    };
-
-    let args = ProxyCliArgs::parse();
-    let config = build_config(&args)?;
-    let auth_backend = build_auth_backend(&args)?;
-
-    match auth_backend {
-        Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
-        Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
-    };
-    info!("Using region: {}", args.aws_region);
-
-    // TODO: untangle the config args
-    let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
-        ("plain", redis_url) => match redis_url {
-            None => {
-                bail!("plain auth requires redis_notifications to be set");
-            }
-            Some(url) => Some(
-                ConnectionWithCredentialsProvider::new_with_static_credentials(url.to_string()),
-            ),
-        },
-        ("irsa", _) => match (&args.redis_host, args.redis_port) {
-            (Some(host), Some(port)) => Some(
-                ConnectionWithCredentialsProvider::new_with_credentials_provider(
-                    host.to_string(),
-                    port,
-                    elasticache::CredentialsProvider::new(
-                        args.aws_region,
-                        args.redis_cluster_name,
-                        args.redis_user_id,
-                    )
-                    .await,
-                ),
-            ),
-            (None, None) => {
-                warn!("irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client");
-                None
-            }
-            _ => {
-                bail!("redis-host and redis-port must be specified together");
-            }
-        },
-        _ => {
-            bail!("unknown auth type given");
-        }
-    };
-
-    let redis_notifications_client = if let Some(url) = args.redis_notifications {
-        Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url.to_string()))
-    } else {
-        regional_redis_client.clone()
-    };
-
-    // Check that we can bind to address before further initialization
-    let http_address: SocketAddr = args.http.parse()?;
-    info!("Starting http on {http_address}");
-    let http_listener = TcpListener::bind(http_address).await?.into_std()?;
-
-    let mgmt_address: SocketAddr = args.mgmt.parse()?;
-    info!("Starting mgmt on {mgmt_address}");
-    let mgmt_listener = TcpListener::bind(mgmt_address).await?;
-
-    let proxy_listener = if !args.is_auth_broker {
-        let proxy_address: SocketAddr = args.proxy.parse()?;
-        info!("Starting proxy on {proxy_address}");
-
-        Some(TcpListener::bind(proxy_address).await?)
-    } else {
-        None
-    };
-
-    // TODO: rename the argument to something like serverless.
-    // It now covers more than just websockets, it also covers SQL over HTTP.
-    let serverless_listener = if let Some(serverless_address) = args.wss {
-        let serverless_address: SocketAddr = serverless_address.parse()?;
-        info!("Starting wss on {serverless_address}");
-        Some(TcpListener::bind(serverless_address).await?)
-    } else if args.is_auth_broker {
-        bail!("wss arg must be present for auth-broker")
-    } else {
-        None
-    };
-
-    let cancellation_token = CancellationToken::new();
-
-    let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
-    RateBucketInfo::validate(redis_rps_limit)?;
-
-    let redis_kv_client = regional_redis_client
-        .as_ref()
-        .map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
-
-    // channel size should be higher than redis client limit to avoid blocking
-    let cancel_ch_size = args.cancellation_ch_size;
-    let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
-    let cancellation_handler = Arc::new(CancellationHandler::new(
-        &config.connect_to_compute,
-        Some(tx_cancel),
-    ));
-
-    // bit of a hack - find the min rps and max rps supported and turn it into
-    // leaky bucket config instead
-    let max = args
-        .endpoint_rps_limit
-        .iter()
-        .map(|x| x.rps())
-        .max_by(f64::total_cmp)
-        .unwrap_or(EndpointRateLimiter::DEFAULT.max);
-    let rps = args
-        .endpoint_rps_limit
-        .iter()
-        .map(|x| x.rps())
-        .min_by(f64::total_cmp)
-        .unwrap_or(EndpointRateLimiter::DEFAULT.rps);
-    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
-        LeakyBucketConfig { rps, max },
-        64,
-    ));
-
-    // client facing tasks. these will exit on error or on cancellation
-    // cancellation returns Ok(())
-    let mut client_tasks = JoinSet::new();
-    match auth_backend {
-        Either::Left(auth_backend) => {
-            if let Some(proxy_listener) = proxy_listener {
-                client_tasks.spawn(proxy::proxy::task_main(
-                    config,
-                    auth_backend,
-                    proxy_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                    endpoint_rate_limiter.clone(),
-                ));
-            }
-
-            if let Some(serverless_listener) = serverless_listener {
-                client_tasks.spawn(serverless::task_main(
-                    config,
-                    auth_backend,
-                    serverless_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                    endpoint_rate_limiter.clone(),
-                ));
-            }
-        }
-        Either::Right(auth_backend) => {
-            if let Some(proxy_listener) = proxy_listener {
-                client_tasks.spawn(proxy::console_redirect_proxy::task_main(
-                    config,
-                    auth_backend,
-                    proxy_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                ));
-            }
-        }
-    }
-
-    client_tasks.spawn(proxy::context::parquet::worker(
-        cancellation_token.clone(),
-        args.parquet_upload,
-    ));
-
-    // maintenance tasks. these never return unless there's an error
-    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::signals::handle(cancellation_token.clone(), || {}));
-    maintenance_tasks.spawn(http::health_server::task_main(
-        http_listener,
-        AppMetrics {
-            jemalloc,
-            neon_metrics,
-            proxy: proxy::metrics::Metrics::get(),
-        },
-    ));
-    maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
-
-    if let Some(metrics_config) = &config.metric_collection {
-        // TODO: Add gc regardles of the metric collection being enabled.
-        maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
-    }
-
-    if let Either::Left(auth::Backend::ControlPlane(api, _)) = &auth_backend {
-        if let proxy::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api {
-            match (redis_notifications_client, regional_redis_client.clone()) {
-                (None, None) => {}
-                (client1, client2) => {
-                    let cache = api.caches.project_info.clone();
-                    if let Some(client) = client1 {
-                        maintenance_tasks.spawn(notifications::task_main(
-                            client,
-                            cache.clone(),
-                            args.region.clone(),
-                        ));
-                    }
-                    if let Some(client) = client2 {
-                        maintenance_tasks.spawn(notifications::task_main(
-                            client,
-                            cache.clone(),
-                            args.region.clone(),
-                        ));
-                    }
-                    maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
-                }
-            }
-
-            if let Some(mut redis_kv_client) = redis_kv_client {
-                maintenance_tasks.spawn(async move {
-                    redis_kv_client.try_connect().await?;
-                    handle_cancel_messages(&mut redis_kv_client, rx_cancel).await
-                });
-            }
-
-            if let Some(regional_redis_client) = regional_redis_client {
-                let cache = api.caches.endpoints_cache.clone();
-                let con = regional_redis_client;
-                let span = tracing::info_span!("endpoints_cache");
-                maintenance_tasks.spawn(
-                    async move { cache.do_read(con, cancellation_token.clone()).await }
-                        .instrument(span),
-                );
-            }
-        }
-    }
-
-    let maintenance = loop {
-        // get one complete task
-        match futures::future::select(
-            pin!(maintenance_tasks.join_next()),
-            pin!(client_tasks.join_next()),
-        )
-        .await
-        {
-            // exit immediately on maintenance task completion
-            Either::Left((Some(res), _)) => break proxy::error::flatten_err(res)?,
-            // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
-            Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
-            // exit immediately on client task error
-            Either::Right((Some(res), _)) => proxy::error::flatten_err(res)?,
-            // exit if all our client tasks have shutdown gracefully
-            Either::Right((None, _)) => return Ok(()),
-        }
-    };
-
-    // maintenance tasks return Infallible success values, this is an impossible value
-    // so this match statically ensures that there are no possibilities for that value
-    match maintenance {}
-}
-
-/// ProxyConfig is created at proxy startup, and lives forever.
-fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
-    let thread_pool = ThreadPool::new(args.scram_thread_pool_size);
-    Metrics::install(thread_pool.metrics.clone());
-
-    let tls_config = match (&args.tls_key, &args.tls_cert) {
-        (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
-            key_path,
-            cert_path,
-            args.certs_dir.as_ref(),
-            args.allow_tls_keylogfile,
-        )?),
-        (None, None) => None,
-        _ => bail!("either both or neither tls-key and tls-cert must be specified"),
-    };
-
-    let backup_metric_collection_config = config::MetricBackupCollectionConfig {
-        interval: args.metric_backup_collection_interval,
-        remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
-        chunk_size: args.metric_backup_collection_chunk_size,
-    };
-
-    let metric_collection = match (
-        &args.metric_collection_endpoint,
-        &args.metric_collection_interval,
-    ) {
-        (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
-            endpoint: endpoint.parse()?,
-            interval: humantime::parse_duration(interval)?,
-            backup_metric_collection_config,
-        }),
-        (None, None) => None,
-        _ => bail!(
-            "either both or neither metric-collection-endpoint \
-             and metric-collection-interval must be specified"
-        ),
-    };
-
-    let config::ConcurrencyLockOptions {
-        shards,
-        limiter,
-        epoch,
-        timeout,
-    } = args.connect_compute_lock.parse()?;
-    info!(
-        ?limiter,
-        shards,
-        ?epoch,
-        "Using NodeLocks (connect_compute)"
-    );
-    let connect_compute_locks = control_plane::locks::ApiLocks::new(
-        "connect_compute_lock",
-        limiter,
-        shards,
-        timeout,
-        epoch,
-        &Metrics::get().proxy.connect_compute_lock,
-    )?;
-
-    let http_config = HttpConfig {
-        accept_websockets: !args.is_auth_broker,
-        pool_options: GlobalConnPoolOptions {
-            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
-            gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
-            pool_shards: args.sql_over_http.sql_over_http_pool_shards,
-            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
-            opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
-            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
-        },
-        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
-        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
-        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
-        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
-    };
-    let authentication_config = AuthenticationConfig {
-        jwks_cache: JwkCache::default(),
-        thread_pool,
-        scram_protocol_timeout: args.scram_protocol_timeout,
-        rate_limiter_enabled: args.auth_rate_limit_enabled,
-        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
-        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
-        ip_allowlist_check_enabled: !args.is_private_access_proxy,
-        is_vpc_acccess_proxy: args.is_private_access_proxy,
-        is_auth_broker: args.is_auth_broker,
-        accept_jwts: args.is_auth_broker,
-        console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
-    };
-
-    let compute_config = ComputeConfig {
-        retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,
-        tls: Arc::new(compute_client_config_with_root_certs()?),
-        timeout: Duration::from_secs(2),
-    };
-
-    let config = ProxyConfig {
-        tls_config,
-        metric_collection,
-        http_config,
-        authentication_config,
-        proxy_protocol_v2: args.proxy_protocol_v2,
-        handshake_timeout: args.handshake_timeout,
-        region: args.region.clone(),
-        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
-        connect_compute_locks,
-        connect_to_compute: compute_config,
-    };
-
-    let config = Box::leak(Box::new(config));
-
-    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
-
-    Ok(config)
-}
-
-/// auth::Backend is created at proxy startup, and lives forever.
-fn build_auth_backend(
-    args: &ProxyCliArgs,
-) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {
-    match &args.auth_backend {
-        AuthBackendType::ControlPlaneV1 => {
-            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
-            let project_info_cache_config: ProjectInfoCacheOptions =
-                args.project_info_cache.parse()?;
-            let endpoint_cache_config: config::EndpointCacheConfig =
-                args.endpoint_cache_config.parse()?;
-
-            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
-            info!(
-                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
-            );
-            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
-            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
-                wake_compute_cache_config,
-                project_info_cache_config,
-                endpoint_cache_config,
-            )));
-
-            let config::ConcurrencyLockOptions {
-                shards,
-                limiter,
-                epoch,
-                timeout,
-            } = args.wake_compute_lock.parse()?;
-            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
-                "wake_compute_lock",
-                limiter,
-                shards,
-                timeout,
-                epoch,
-                &Metrics::get().wake_compute_lock,
-            )?));
-            tokio::spawn(locks.garbage_collect_worker());
-
-            let url: proxy::url::ApiUrl = args.auth_endpoint.parse()?;
-
-            let endpoint = http::Endpoint::new(url, http::new_client());
-
-            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
-            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
-            let wake_compute_endpoint_rate_limiter =
-                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
-
-            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
-                endpoint,
-                args.control_plane_token.clone(),
-                caches,
-                locks,
-                wake_compute_endpoint_rate_limiter,
-            );
-
-            let api = control_plane::client::ControlPlaneClient::ProxyV1(api);
-            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
-            let config = Box::leak(Box::new(auth_backend));
-
-            Ok(Either::Left(config))
-        }
-
-        #[cfg(feature = "testing")]
-        AuthBackendType::Postgres => {
-            let url = args.auth_endpoint.parse()?;
-            let api = control_plane::client::mock::MockControlPlane::new(
-                url,
-                !args.is_private_access_proxy,
-            );
-            let api = control_plane::client::ControlPlaneClient::PostgresMock(api);
-
-            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
-
-            let config = Box::leak(Box::new(auth_backend));
-
-            Ok(Either::Left(config))
-        }
-
-        AuthBackendType::ConsoleRedirect => {
-            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
-            let project_info_cache_config: ProjectInfoCacheOptions =
-                args.project_info_cache.parse()?;
-            let endpoint_cache_config: config::EndpointCacheConfig =
-                args.endpoint_cache_config.parse()?;
-
-            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
-            info!(
-                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
-            );
-            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
-            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
-                wake_compute_cache_config,
-                project_info_cache_config,
-                endpoint_cache_config,
-            )));
-
-            let config::ConcurrencyLockOptions {
-                shards,
-                limiter,
-                epoch,
-                timeout,
-            } = args.wake_compute_lock.parse()?;
-            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
-                "wake_compute_lock",
-                limiter,
-                shards,
-                timeout,
-                epoch,
-                &Metrics::get().wake_compute_lock,
-            )?));
-
-            let url = args.uri.clone().parse()?;
-            let ep_url: proxy::url::ApiUrl = args.auth_endpoint.parse()?;
-            let endpoint = http::Endpoint::new(ep_url, http::new_client());
-            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
-            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
-            let wake_compute_endpoint_rate_limiter =
-                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
-
-            // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
-            // and locks are not used in ConsoleRedirectBackend,
-            // but they are required by the NeonControlPlaneClient
-            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
-                endpoint,
-                args.control_plane_token.clone(),
-                caches,
-                locks,
-                wake_compute_endpoint_rate_limiter,
-            );
-
-            let backend = ConsoleRedirectBackend::new(url, api);
-            let config = Box::leak(Box::new(backend));
-
-            Ok(Either::Right(config))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::time::Duration;
-
-    use clap::Parser;
-    use proxy::rate_limiter::RateBucketInfo;
-
-    #[test]
-    fn parse_endpoint_rps_limit() {
-        let config = super::ProxyCliArgs::parse_from([
-            "proxy",
-            "--endpoint-rps-limit",
-            "100@1s",
-            "--endpoint-rps-limit",
-            "20@30s",
-        ]);
-
-        assert_eq!(
-            config.endpoint_rps_limit,
-            vec![
-                RateBucketInfo::new(100, Duration::from_secs(1)),
-                RateBucketInfo::new(20, Duration::from_secs(30)),
-            ]
-        );
-    }
+    proxy::binary::proxy::run().await
 }
diff --git a/proxy/src/binary/local_proxy.rs b/proxy/src/binary/local_proxy.rs
new file mode 100644
index 0000000000..e0d8515375
--- /dev/null
+++ b/proxy/src/binary/local_proxy.rs
@@ -0,0 +1,410 @@
+use std::net::SocketAddr;
+use std::pin::pin;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::Duration;
+
+use crate::auth::backend::jwt::JwkCache;
+use crate::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
+use crate::auth::{self};
+use crate::cancellation::CancellationHandler;
+use crate::config::{
+    self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,
+};
+use crate::control_plane::locks::ApiLocks;
+use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};
+use crate::http::health_server::AppMetrics;
+use crate::intern::RoleNameInt;
+use crate::metrics::{Metrics, ThreadPoolMetrics};
+use crate::rate_limiter::{
+    BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo,
+};
+use crate::scram::threadpool::ThreadPool;
+use crate::serverless::cancel_set::CancelSet;
+use crate::serverless::{self, GlobalConnPoolOptions};
+use crate::tls::client_config::compute_client_config_with_root_certs;
+use crate::types::RoleName;
+use crate::url::ApiUrl;
+use anyhow::{bail, ensure, Context};
+use camino::{Utf8Path, Utf8PathBuf};
+use compute_api::spec::LocalProxySpec;
+use futures::future::Either;
+
+project_git_version!(GIT_VERSION);
+project_build_tag!(BUILD_TAG);
+
+use clap::Parser;
+use thiserror::Error;
+use tokio::net::TcpListener;
+use tokio::sync::Notify;
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, error, info, warn};
+use utils::sentry_init::init_sentry;
+use utils::{pid_file, project_build_tag, project_git_version};
+
+/// Neon proxy/router
+#[derive(Parser)]
+#[command(version = GIT_VERSION, about)]
+struct LocalProxyCliArgs {
+    /// listen for incoming metrics connections on ip:port
+    #[clap(long, default_value = "127.0.0.1:7001")]
+    metrics: String,
+    /// listen for incoming http connections on ip:port
+    #[clap(long)]
+    http: String,
+    /// timeout for the TLS handshake
+    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
+    handshake_timeout: tokio::time::Duration,
+    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
+    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
+    connect_compute_lock: String,
+    #[clap(flatten)]
+    sql_over_http: SqlOverHttpArgs,
+    /// User rate limiter max number of requests per second.
+    ///
+    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
+    /// Can be given multiple times for different bucket sizes.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
+    user_rps_limit: Vec<RateBucketInfo>,
+    /// Whether the auth rate limiter actually takes effect (for testing)
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    auth_rate_limit_enabled: bool,
+    /// Authentication rate limiter max number of hashes per second.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
+    auth_rate_limit: Vec<RateBucketInfo>,
+    /// The IP subnet to use when considering whether two IP addresses are considered the same.
+    #[clap(long, default_value_t = 64)]
+    auth_rate_limit_ip_subnet: u8,
+    /// Whether to retry the connection to the compute node
+    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
+    connect_to_compute_retry: String,
+    /// Address of the postgres server
+    #[clap(long, default_value = "127.0.0.1:5432")]
+    postgres: SocketAddr,
+    /// Address of the internal compute-ctl api service
+    #[clap(long, default_value = "http://127.0.0.1:3081/")]
+    compute_ctl: ApiUrl,
+    /// Path of the local proxy config file
+    #[clap(long, default_value = "./local_proxy.json")]
+    config_path: Utf8PathBuf,
+    /// Path of the local proxy PID file
+    #[clap(long, default_value = "./local_proxy.pid")]
+    pid_path: Utf8PathBuf,
+}
+
+#[derive(clap::Args, Clone, Copy, Debug)]
+struct SqlOverHttpArgs {
+    /// How many connections to pool for each endpoint. Excess connections are discarded
+    #[clap(long, default_value_t = 200)]
+    sql_over_http_pool_max_total_conns: usize,
+
+    /// How long pooled connections should remain idle for before closing
+    #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
+    sql_over_http_idle_timeout: tokio::time::Duration,
+
+    #[clap(long, default_value_t = 100)]
+    sql_over_http_client_conn_threshold: u64,
+
+    #[clap(long, default_value_t = 16)]
+    sql_over_http_cancel_set_shards: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_request_size_bytes: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_response_size_bytes: usize,
+}
+
+pub async fn run() -> anyhow::Result<()> {
+    let _logging_guard = crate::logging::init_local_proxy()?;
+    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
+    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
+    Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
+
+    // TODO: refactor these to use labels
+    debug!("Version: {GIT_VERSION}");
+    debug!("Build_tag: {BUILD_TAG}");
+    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
+        revision: GIT_VERSION,
+        build_tag: BUILD_TAG,
+    });
+
+    let jemalloc = match crate::jemalloc::MetricRecorder::new() {
+        Ok(t) => Some(t),
+        Err(e) => {
+            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
+            None
+        }
+    };
+
+    let args = LocalProxyCliArgs::parse();
+    let config = build_config(&args)?;
+    let auth_backend = build_auth_backend(&args);
+
+    // before we bind to any ports, write the process ID to a file
+    // so that compute-ctl can find our process later
+    // in order to trigger the appropriate SIGHUP on config change.
+    //
+    // This also claims a "lock" that makes sure only one instance
+    // of local_proxy runs at a time.
+    let _process_guard = loop {
+        match pid_file::claim_for_current_process(&args.pid_path) {
+            Ok(guard) => break guard,
+            Err(e) => {
+                // compute-ctl might have tried to read the pid-file to let us
+                // know about some config change. We should try again.
+                error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+            }
+        }
+    };
+
+    let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
+    let http_listener = TcpListener::bind(args.http).await?;
+    let shutdown = CancellationToken::new();
+
+    // todo: should scale with CU
+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
+        LeakyBucketConfig {
+            rps: 10.0,
+            max: 100.0,
+        },
+        16,
+    ));
+
+    let mut maintenance_tasks = JoinSet::new();
+
+    let refresh_config_notify = Arc::new(Notify::new());
+    maintenance_tasks.spawn(crate::signals::handle(shutdown.clone(), {
+        let refresh_config_notify = Arc::clone(&refresh_config_notify);
+        move || {
+            refresh_config_notify.notify_one();
+        }
+    }));
+
+    // trigger the first config load **after** setting up the signal hook
+    // to avoid the race condition where:
+    // 1. No config file registered when local_proxy starts up
+    // 2. The config file is written but the signal hook is not yet received
+    // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config.
+    refresh_config_notify.notify_one();
+    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
+
+    maintenance_tasks.spawn(crate::http::health_server::task_main(
+        metrics_listener,
+        AppMetrics {
+            jemalloc,
+            neon_metrics,
+            proxy: crate::metrics::Metrics::get(),
+        },
+    ));
+
+    let task = serverless::task_main(
+        config,
+        auth_backend,
+        http_listener,
+        shutdown.clone(),
+        Arc::new(CancellationHandler::new(&config.connect_to_compute, None)),
+        endpoint_rate_limiter,
+    );
+
+    match futures::future::select(pin!(maintenance_tasks.join_next()), pin!(task)).await {
+        // exit immediately on maintenance task completion
+        Either::Left((Some(res), _)) => match crate::error::flatten_err(res)? {},
+        // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
+        Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
+        // exit immediately on client task error
+        Either::Right((res, _)) => res?,
+    }
+
+    Ok(())
+}
+
+/// ProxyConfig is created at proxy startup, and lives forever.
+fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
+    let config::ConcurrencyLockOptions {
+        shards,
+        limiter,
+        epoch,
+        timeout,
+    } = args.connect_compute_lock.parse()?;
+    info!(
+        ?limiter,
+        shards,
+        ?epoch,
+        "Using NodeLocks (connect_compute)"
+    );
+    let connect_compute_locks = ApiLocks::new(
+        "connect_compute_lock",
+        limiter,
+        shards,
+        timeout,
+        epoch,
+        &Metrics::get().proxy.connect_compute_lock,
+    );
+
+    let http_config = HttpConfig {
+        accept_websockets: false,
+        pool_options: GlobalConnPoolOptions {
+            gc_epoch: Duration::from_secs(60),
+            pool_shards: 2,
+            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
+            opt_in: false,
+
+            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_total_conns,
+            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
+        },
+        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
+        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
+        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
+        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
+    };
+
+    let compute_config = ComputeConfig {
+        retry: RetryConfig::parse(RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)?,
+        tls: Arc::new(compute_client_config_with_root_certs()?),
+        timeout: Duration::from_secs(2),
+    };
+
+    Ok(Box::leak(Box::new(ProxyConfig {
+        tls_config: None,
+        metric_collection: None,
+        http_config,
+        authentication_config: AuthenticationConfig {
+            jwks_cache: JwkCache::default(),
+            thread_pool: ThreadPool::new(0),
+            scram_protocol_timeout: Duration::from_secs(10),
+            rate_limiter_enabled: false,
+            rate_limiter: BucketRateLimiter::new(vec![]),
+            rate_limit_ip_subnet: 64,
+            ip_allowlist_check_enabled: true,
+            is_vpc_acccess_proxy: false,
+            is_auth_broker: false,
+            accept_jwts: true,
+            console_redirect_confirmation_timeout: Duration::ZERO,
+        },
+        proxy_protocol_v2: config::ProxyProtocolV2::Rejected,
+        handshake_timeout: Duration::from_secs(10),
+        region: "local".into(),
+        wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,
+        connect_compute_locks,
+        connect_to_compute: compute_config,
+    })))
+}
+
+/// auth::Backend is created at proxy startup, and lives forever.
+fn build_auth_backend(args: &LocalProxyCliArgs) -> &'static auth::Backend<'static, ()> {
+    let auth_backend = crate::auth::Backend::Local(crate::auth::backend::MaybeOwned::Owned(
+        LocalBackend::new(args.postgres, args.compute_ctl.clone()),
+    ));
+
+    Box::leak(Box::new(auth_backend))
+}
+
+#[derive(Error, Debug)]
+enum RefreshConfigError {
+    #[error(transparent)]
+    Read(#[from] std::io::Error),
+    #[error(transparent)]
+    Parse(#[from] serde_json::Error),
+    #[error(transparent)]
+    Validate(anyhow::Error),
+}
+
+async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
+    let mut init = true;
+    loop {
+        rx.notified().await;
+
+        match refresh_config_inner(&path).await {
+            Ok(()) => {}
+            // don't log for file not found errors if this is the first time we are checking
+            // for computes that don't use local_proxy, this is not an error.
+            Err(RefreshConfigError::Read(e))
+                if init && e.kind() == std::io::ErrorKind::NotFound =>
+            {
+                debug!(error=?e, ?path, "could not read config file");
+            }
+            Err(e) => {
+                error!(error=?e, ?path, "could not read config file");
+            }
+        }
+
+        init = false;
+    }
+}
+
+async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> {
+    let bytes = tokio::fs::read(&path).await?;
+    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
+
+    let mut jwks_set = vec![];
+
+    fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result<JwksSettings> {
+        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
+
+        ensure!(
+            jwks_url.has_authority()
+                && (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
+            "Invalid JWKS url. Must be HTTP",
+        );
+
+        ensure!(
+            jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
+            "Invalid JWKS url. No domain listed",
+        );
+
+        // clear username, password and ports
+        jwks_url
+            .set_username("")
+            .expect("url can be a base and has a valid host and is not a file. should not error");
+        jwks_url
+            .set_password(None)
+            .expect("url can be a base and has a valid host and is not a file. should not error");
+        // local testing is hard if we need to have a specific restricted port
+        if cfg!(not(feature = "testing")) {
+            jwks_url.set_port(None).expect(
+                "url can be a base and has a valid host and is not a file. should not error",
+            );
+        }
+
+        // clear query params
+        jwks_url.set_fragment(None);
+        jwks_url.query_pairs_mut().clear().finish();
+
+        if jwks_url.scheme() != "https" {
+            // local testing is hard if we need to set up https support.
+            if cfg!(not(feature = "testing")) {
+                jwks_url
+                    .set_scheme("https")
+                    .expect("should not error to set the scheme to https if it was http");
+            } else {
+                warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
+            }
+        }
+
+        Ok(JwksSettings {
+            id: jwks.id,
+            jwks_url,
+            _provider_name: jwks.provider_name,
+            jwt_audience: jwks.jwt_audience,
+            role_names: jwks
+                .role_names
+                .into_iter()
+                .map(RoleName::from)
+                .map(|s| RoleNameInt::from(&s))
+                .collect(),
+        })
+    }
+
+    for jwks in data.jwks.into_iter().flatten() {
+        jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?);
+    }
+
+    info!("successfully loaded new config");
+    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
+
+    Ok(())
+}
diff --git a/proxy/src/binary/mod.rs b/proxy/src/binary/mod.rs
new file mode 100644
index 0000000000..dc07d3e675
--- /dev/null
+++ b/proxy/src/binary/mod.rs
@@ -0,0 +1,7 @@
+//! All binaries have the body of their main() defined here, so that the code
+//! is also covered by code style configs in lib.rs and the unused-code check is
+//! more effective when practically all modules are private to the lib.
+
+pub mod local_proxy;
+pub mod pg_sni_router;
+pub mod proxy;
diff --git a/proxy/src/binary/pg_sni_router.rs b/proxy/src/binary/pg_sni_router.rs
new file mode 100644
index 0000000000..235e9674c6
--- /dev/null
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -0,0 +1,304 @@
+/// A stand-alone program that routes connections, e.g. from
+/// `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.
+///
+/// This allows connecting to pods/services running in the same Kubernetes cluster from
+/// the outside. Similar to an ingress controller for HTTPS.
+use std::{net::SocketAddr, sync::Arc};
+
+use crate::context::RequestContext;
+use crate::metrics::{Metrics, ThreadPoolMetrics};
+use crate::protocol2::ConnectionInfo;
+use crate::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
+use crate::stream::{PqStream, Stream};
+use crate::tls::TlsServerEndPoint;
+use anyhow::{anyhow, bail, ensure, Context};
+use clap::Arg;
+use futures::future::Either;
+use futures::TryFutureExt;
+use itertools::Itertools;
+use rustls::crypto::ring;
+use rustls::pki_types::PrivateKeyDer;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::net::TcpListener;
+use tokio_util::sync::CancellationToken;
+use tracing::{error, info, Instrument};
+use utils::project_git_version;
+use utils::sentry_init::init_sentry;
+
+project_git_version!(GIT_VERSION);
+
+fn cli() -> clap::Command {
+    clap::Command::new("Neon proxy/router")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("listen")
+                .short('l')
+                .long("listen")
+                .help("listen for incoming client connections on ip:port")
+                .default_value("127.0.0.1:4432"),
+        )
+        .arg(
+            Arg::new("tls-key")
+                .short('k')
+                .long("tls-key")
+                .help("path to TLS key for client postgres connections")
+                .required(true),
+        )
+        .arg(
+            Arg::new("tls-cert")
+                .short('c')
+                .long("tls-cert")
+                .help("path to TLS cert for client postgres connections")
+                .required(true),
+        )
+        .arg(
+            Arg::new("dest")
+                .short('d')
+                .long("destination")
+                .help("append this domain zone to the SNI hostname to get the destination address")
+                .required(true),
+        )
+}
+
+pub async fn run() -> anyhow::Result<()> {
+    let _logging_guard = crate::logging::init().await?;
+    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
+    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
+    Metrics::install(Arc::new(ThreadPoolMetrics::new(0)));
+
+    let args = cli().get_matches();
+    let destination: String = args
+        .get_one::<String>("dest")
+        .expect("string argument defined")
+        .parse()?;
+
+    // Configure TLS
+    let (tls_config, tls_server_end_point): (Arc<rustls::ServerConfig>, TlsServerEndPoint) = match (
+        args.get_one::<String>("tls-key"),
+        args.get_one::<String>("tls-cert"),
+    ) {
+        (Some(key_path), Some(cert_path)) => {
+            let key = {
+                let key_bytes = std::fs::read(key_path).context("TLS key file")?;
+
+                let mut keys =
+                    rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec();
+
+                ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
+                PrivateKeyDer::Pkcs8(
+                    keys.pop()
+                        .expect("keys should not be empty")
+                        .context(format!("Failed to read TLS keys at '{key_path}'"))?,
+                )
+            };
+
+            let cert_chain_bytes = std::fs::read(cert_path)
+                .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
+
+            let cert_chain: Vec<_> = {
+                rustls_pemfile::certs(&mut &cert_chain_bytes[..])
+                .try_collect()
+                .with_context(|| {
+                    format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
+                })?
+            };
+
+            // needed for channel bindings
+            let first_cert = cert_chain.first().context("missing certificate")?;
+            let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
+
+            let tls_config =
+                rustls::ServerConfig::builder_with_provider(Arc::new(ring::default_provider()))
+                    .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])
+                    .context("ring should support TLS1.2 and TLS1.3")?
+                    .with_no_client_auth()
+                    .with_single_cert(cert_chain, key)?
+                    .into();
+
+            (tls_config, tls_server_end_point)
+        }
+        _ => bail!("tls-key and tls-cert must be specified"),
+    };
+
+    // Start listening for incoming client connections
+    let proxy_address: SocketAddr = args
+        .get_one::<String>("listen")
+        .expect("string argument defined")
+        .parse()?;
+    info!("Starting sni router on {proxy_address}");
+    let proxy_listener = TcpListener::bind(proxy_address).await?;
+
+    let cancellation_token = CancellationToken::new();
+
+    let main = tokio::spawn(task_main(
+        Arc::new(destination),
+        tls_config,
+        tls_server_end_point,
+        proxy_listener,
+        cancellation_token.clone(),
+    ));
+    let signals_task = tokio::spawn(crate::signals::handle(cancellation_token, || {}));
+
+    // the signal task cant ever succeed.
+    // the main task can error, or can succeed on cancellation.
+    // we want to immediately exit on either of these cases
+    let signal = match futures::future::select(signals_task, main).await {
+        Either::Left((res, _)) => crate::error::flatten_err(res)?,
+        Either::Right((res, _)) => return crate::error::flatten_err(res),
+    };
+
+    // maintenance tasks return `Infallible` success values, this is an impossible value
+    // so this match statically ensures that there are no possibilities for that value
+    match signal {}
+}
+
+async fn task_main(
+    dest_suffix: Arc<String>,
+    tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
+    listener: tokio::net::TcpListener,
+    cancellation_token: CancellationToken,
+) -> anyhow::Result<()> {
+    // When set for the server socket, the keepalive setting
+    // will be inherited by all accepted client sockets.
+    socket2::SockRef::from(&listener).set_keepalive(true)?;
+
+    let connections = tokio_util::task::task_tracker::TaskTracker::new();
+
+    while let Some(accept_result) =
+        run_until_cancelled(listener.accept(), &cancellation_token).await
+    {
+        let (socket, peer_addr) = accept_result?;
+
+        let session_id = uuid::Uuid::new_v4();
+        let tls_config = Arc::clone(&tls_config);
+        let dest_suffix = Arc::clone(&dest_suffix);
+
+        connections.spawn(
+            async move {
+                socket
+                    .set_nodelay(true)
+                    .context("failed to set socket option")?;
+
+                info!(%peer_addr, "serving");
+                let ctx = RequestContext::new(
+                    session_id,
+                    ConnectionInfo {
+                        addr: peer_addr,
+                        extra: None,
+                    },
+                    crate::metrics::Protocol::SniRouter,
+                    "sni",
+                );
+                handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
+            }
+            .unwrap_or_else(|e| {
+                // Acknowledge that the task has finished with an error.
+                error!("per-client task finished with an error: {e:#}");
+            })
+            .instrument(tracing::info_span!("handle_client", ?session_id)),
+        );
+    }
+
+    connections.close();
+    drop(listener);
+
+    connections.wait().await;
+
+    info!("all client connections have finished");
+    Ok(())
+}
+
+const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
+
+async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
+    ctx: &RequestContext,
+    raw_stream: S,
+    tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
+) -> anyhow::Result<Stream<S>> {
+    let mut stream = PqStream::new(Stream::from_raw(raw_stream));
+
+    let msg = stream.read_startup_packet().await?;
+    use pq_proto::FeStartupPacket::SslRequest;
+
+    match msg {
+        SslRequest { direct: false } => {
+            stream
+                .write_message(&pq_proto::BeMessage::EncryptionResponse(true))
+                .await?;
+
+            // Upgrade raw stream into a secure TLS-backed stream.
+            // NOTE: We've consumed `tls`; this fact will be used later.
+
+            let (raw, read_buf) = stream.into_inner();
+            // TODO: Normally, client doesn't send any data before
+            // server says TLS handshake is ok and read_buf is empty.
+            // However, you could imagine pipelining of postgres
+            // SSLRequest + TLS ClientHello in one hunk similar to
+            // pipelining in our node js driver. We should probably
+            // support that by chaining read_buf with the stream.
+            if !read_buf.is_empty() {
+                bail!("data is sent before server replied with EncryptionResponse");
+            }
+
+            Ok(Stream::Tls {
+                tls: Box::new(
+                    raw.upgrade(tls_config, !ctx.has_private_peer_addr())
+                        .await?,
+                ),
+                tls_server_end_point,
+            })
+        }
+        unexpected => {
+            info!(
+                ?unexpected,
+                "unexpected startup packet, rejecting connection"
+            );
+            stream
+                .throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
+                .await?
+        }
+    }
+}
+
+async fn handle_client(
+    ctx: RequestContext,
+    dest_suffix: Arc<String>,
+    tls_config: Arc<rustls::ServerConfig>,
+    tls_server_end_point: TlsServerEndPoint,
+    stream: impl AsyncRead + AsyncWrite + Unpin,
+) -> anyhow::Result<()> {
+    let mut tls_stream = ssl_handshake(&ctx, stream, tls_config, tls_server_end_point).await?;
+
+    // Cut off first part of the SNI domain
+    // We receive required destination details in the format of
+    //   `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
+    let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
+    let dest: Vec<&str> = sni
+        .split_once('.')
+        .context("invalid SNI")?
+        .0
+        .splitn(3, "--")
+        .collect();
+    let port = dest[2].parse::<u16>().context("invalid port")?;
+    let destination = format!("{}.{}.{}:{}", dest[0], dest[1], dest_suffix, port);
+
+    info!("destination: {}", destination);
+
+    let mut client = tokio::net::TcpStream::connect(destination).await?;
+
+    // doesn't yet matter as pg-sni-router doesn't report analytics logs
+    ctx.set_success();
+    ctx.log_connect();
+
+    // Starting from here we only proxy the client's traffic.
+    info!("performing the proxy pass...");
+
+    match copy_bidirectional_client_compute(&mut tls_stream, &mut client).await {
+        Ok(_) => Ok(()),
+        Err(ErrorSource::Client(err)) => Err(err).context("client"),
+        Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
+    }
+}
diff --git a/proxy/src/binary/proxy.rs b/proxy/src/binary/proxy.rs
new file mode 100644
index 0000000000..e38c49ca10
--- /dev/null
+++ b/proxy/src/binary/proxy.rs
@@ -0,0 +1,827 @@
+use std::net::SocketAddr;
+use std::pin::pin;
+use std::sync::Arc;
+use std::time::Duration;
+
+use crate::auth::backend::jwt::JwkCache;
+use crate::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
+use crate::cancellation::{handle_cancel_messages, CancellationHandler};
+use crate::config::{
+    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig,
+    ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
+};
+use crate::context::parquet::ParquetUploadArgs;
+use crate::http::health_server::AppMetrics;
+use crate::metrics::Metrics;
+use crate::rate_limiter::{
+    EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo, WakeComputeRateLimiter,
+};
+use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
+use crate::redis::kv_ops::RedisKVClient;
+use crate::redis::{elasticache, notifications};
+use crate::scram::threadpool::ThreadPool;
+use crate::serverless::cancel_set::CancelSet;
+use crate::serverless::GlobalConnPoolOptions;
+use crate::tls::client_config::compute_client_config_with_root_certs;
+use crate::{auth, control_plane, http, serverless, usage_metrics};
+use anyhow::bail;
+use futures::future::Either;
+use remote_storage::RemoteStorageConfig;
+use tokio::net::TcpListener;
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::{info, warn, Instrument};
+use utils::sentry_init::init_sentry;
+use utils::{project_build_tag, project_git_version};
+
+project_git_version!(GIT_VERSION);
+project_build_tag!(BUILD_TAG);
+
+use clap::{Parser, ValueEnum};
+
+#[derive(Clone, Debug, ValueEnum)]
+enum AuthBackendType {
+    #[value(name("cplane-v1"), alias("control-plane"))]
+    ControlPlaneV1,
+
+    #[value(name("link"), alias("control-redirect"))]
+    ConsoleRedirect,
+
+    #[cfg(any(test, feature = "testing"))]
+    Postgres,
+}
+
+/// Neon proxy/router
+#[derive(Parser)]
+#[command(version = GIT_VERSION, about)]
+struct ProxyCliArgs {
+    /// Name of the region this proxy is deployed in
+    #[clap(long, default_value_t = String::new())]
+    region: String,
+    /// listen for incoming client connections on ip:port
+    #[clap(short, long, default_value = "127.0.0.1:4432")]
+    proxy: String,
+    #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]
+    auth_backend: AuthBackendType,
+    /// listen for management callback connection on ip:port
+    #[clap(short, long, default_value = "127.0.0.1:7000")]
+    mgmt: String,
+    /// listen for incoming http connections (metrics, etc) on ip:port
+    #[clap(long, default_value = "127.0.0.1:7001")]
+    http: String,
+    /// listen for incoming wss connections on ip:port
+    #[clap(long)]
+    wss: Option<String>,
+    /// redirect unauthenticated users to the given uri in case of console redirect auth
+    #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
+    uri: String,
+    /// cloud API endpoint for authenticating users
+    #[clap(
+        short,
+        long,
+        default_value = "http://localhost:3000/authenticate_proxy_request/"
+    )]
+    auth_endpoint: String,
+    /// JWT used to connect to control plane.
+    #[clap(
+        long,
+        value_name = "JWT",
+        default_value = "",
+        env = "NEON_PROXY_TO_CONTROLPLANE_TOKEN"
+    )]
+    control_plane_token: Arc<str>,
+    /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    is_auth_broker: bool,
+    /// path to TLS key for client postgres connections
+    ///
+    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
+    #[clap(short = 'k', long, alias = "ssl-key")]
+    tls_key: Option<String>,
+    /// path to TLS cert for client postgres connections
+    ///
+    /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
+    #[clap(short = 'c', long, alias = "ssl-cert")]
+    tls_cert: Option<String>,
+    /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`.
+    #[clap(long, alias = "allow-ssl-keylogfile")]
+    allow_tls_keylogfile: bool,
+    /// path to directory with TLS certificates for client postgres connections
+    #[clap(long)]
+    certs_dir: Option<String>,
+    /// timeout for the TLS handshake
+    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
+    handshake_timeout: tokio::time::Duration,
+    /// http endpoint to receive periodic metric updates
+    #[clap(long)]
+    metric_collection_endpoint: Option<String>,
+    /// how often metrics should be sent to a collection endpoint
+    #[clap(long)]
+    metric_collection_interval: Option<String>,
+    /// cache for `wake_compute` api method (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    wake_compute_cache: String,
+    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
+    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
+    wake_compute_lock: String,
+    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
+    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
+    connect_compute_lock: String,
+    #[clap(flatten)]
+    sql_over_http: SqlOverHttpArgs,
+    /// timeout for scram authentication protocol
+    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
+    scram_protocol_timeout: tokio::time::Duration,
+    /// size of the threadpool for password hashing
+    #[clap(long, default_value_t = 4)]
+    scram_thread_pool_size: u8,
+    /// Endpoint rate limiter max number of requests per second.
+    ///
+    /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
+    /// Can be given multiple times for different bucket sizes.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
+    endpoint_rps_limit: Vec<RateBucketInfo>,
+    /// Wake compute rate limiter max number of requests per second.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
+    wake_compute_limit: Vec<RateBucketInfo>,
+    /// Whether the auth rate limiter actually takes effect (for testing)
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    auth_rate_limit_enabled: bool,
+    /// Authentication rate limiter max number of hashes per second.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
+    auth_rate_limit: Vec<RateBucketInfo>,
+    /// The IP subnet to use when considering whether two IP addresses are considered the same.
+    #[clap(long, default_value_t = 64)]
+    auth_rate_limit_ip_subnet: u8,
+    /// Redis rate limiter max number of requests per second.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
+    redis_rps_limit: Vec<RateBucketInfo>,
+    /// Cancellation channel size (max queue size for redis kv client)
+    #[clap(long, default_value = "1024")]
+    cancellation_ch_size: usize,
+    /// cache for `allowed_ips` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    allowed_ips_cache: String,
+    /// cache for `role_secret` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    role_secret_cache: String,
+    /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
+    #[clap(long)]
+    redis_notifications: Option<String>,
+    /// what from the available authentications type to use for the regional redis we have. Supported are "irsa" and "plain".
+    #[clap(long, default_value = "irsa")]
+    redis_auth_type: String,
+    /// redis host for streaming connections (might be different from the notifications host)
+    #[clap(long)]
+    redis_host: Option<String>,
+    /// redis port for streaming connections (might be different from the notifications host)
+    #[clap(long)]
+    redis_port: Option<u16>,
+    /// redis cluster name, used in aws elasticache
+    #[clap(long)]
+    redis_cluster_name: Option<String>,
+    /// redis user_id, used in aws elasticache
+    #[clap(long)]
+    redis_user_id: Option<String>,
+    /// aws region to retrieve credentials
+    #[clap(long, default_value_t = String::new())]
+    aws_region: String,
+    /// cache for `project_info` (use `size=0` to disable)
+    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
+    project_info_cache: String,
+    /// cache for all valid endpoints
+    #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
+    endpoint_cache_config: String,
+    #[clap(flatten)]
+    parquet_upload: ParquetUploadArgs,
+
+    /// interval for backup metric collection
+    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
+    metric_backup_collection_interval: std::time::Duration,
+    /// remote storage configuration for backup metric collection
+    /// Encoded as toml (same format as pageservers), eg
+    /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
+    #[clap(long, value_parser = remote_storage_from_toml)]
+    metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,
+    /// chunk size for backup metric collection
+    /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
+    #[clap(long, default_value = "4194304")]
+    metric_backup_collection_chunk_size: usize,
+    /// Whether to retry the connection to the compute node
+    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
+    connect_to_compute_retry: String,
+    /// Whether to retry the wake_compute request
+    #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
+    wake_compute_retry: String,
+
+    /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    is_private_access_proxy: bool,
+
+    /// Configure whether all incoming requests have a Proxy Protocol V2 packet.
+    // TODO(conradludgate): switch default to rejected or required once we've updated all deployments
+    #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Supported)]
+    proxy_protocol_v2: ProxyProtocolV2,
+
+    /// Time the proxy waits for the webauth session to be confirmed by the control plane.
+    // TODO: rename to `console_redirect_confirmation_timeout`.
+    #[clap(long, default_value = "2m", value_parser = humantime::parse_duration)]
+    webauth_confirmation_timeout: std::time::Duration,
+}
+
+#[derive(clap::Args, Clone, Copy, Debug)]
+struct SqlOverHttpArgs {
+    /// timeout for http connection requests
+    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
+    sql_over_http_timeout: tokio::time::Duration,
+
+    /// Whether the SQL over http pool is opt-in
+    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    sql_over_http_pool_opt_in: bool,
+
+    /// How many connections to pool for each endpoint. Excess connections are discarded
+    #[clap(long, default_value_t = 20)]
+    sql_over_http_pool_max_conns_per_endpoint: usize,
+
+    /// How many connections to pool for each endpoint. Excess connections are discarded
+    #[clap(long, default_value_t = 20000)]
+    sql_over_http_pool_max_total_conns: usize,
+
+    /// How long pooled connections should remain idle for before closing
+    #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
+    sql_over_http_idle_timeout: tokio::time::Duration,
+
+    /// Duration each shard will wait on average before a GC sweep.
+    /// A longer time will causes sweeps to take longer but will interfere less frequently.
+    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
+    sql_over_http_pool_gc_epoch: tokio::time::Duration,
+
+    /// How many shards should the global pool have. Must be a power of two.
+    /// More shards will introduce less contention for pool operations, but can
+    /// increase memory used by the pool
+    #[clap(long, default_value_t = 128)]
+    sql_over_http_pool_shards: usize,
+
+    #[clap(long, default_value_t = 10000)]
+    sql_over_http_client_conn_threshold: u64,
+
+    #[clap(long, default_value_t = 64)]
+    sql_over_http_cancel_set_shards: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_request_size_bytes: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_response_size_bytes: usize,
+}
+
+pub async fn run() -> anyhow::Result<()> {
+    let _logging_guard = crate::logging::init().await?;
+    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
+    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
+    // TODO: refactor these to use labels
+    info!("Version: {GIT_VERSION}");
+    info!("Build_tag: {BUILD_TAG}");
+    let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
+        revision: GIT_VERSION,
+        build_tag: BUILD_TAG,
+    });
+
+    let jemalloc = match crate::jemalloc::MetricRecorder::new() {
+        Ok(t) => Some(t),
+        Err(e) => {
+            tracing::error!(error = ?e, "could not start jemalloc metrics loop");
+            None
+        }
+    };
+
+    let args = ProxyCliArgs::parse();
+    let config = build_config(&args)?;
+    let auth_backend = build_auth_backend(&args)?;
+
+    match auth_backend {
+        Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
+        Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
+    };
+    info!("Using region: {}", args.aws_region);
+
+    // TODO: untangle the config args
+    let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
+        ("plain", redis_url) => match redis_url {
+            None => {
+                bail!("plain auth requires redis_notifications to be set");
+            }
+            Some(url) => Some(
+                ConnectionWithCredentialsProvider::new_with_static_credentials(url.to_string()),
+            ),
+        },
+        ("irsa", _) => match (&args.redis_host, args.redis_port) {
+            (Some(host), Some(port)) => Some(
+                ConnectionWithCredentialsProvider::new_with_credentials_provider(
+                    host.to_string(),
+                    port,
+                    elasticache::CredentialsProvider::new(
+                        args.aws_region,
+                        args.redis_cluster_name,
+                        args.redis_user_id,
+                    )
+                    .await,
+                ),
+            ),
+            (None, None) => {
+                warn!("irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client");
+                None
+            }
+            _ => {
+                bail!("redis-host and redis-port must be specified together");
+            }
+        },
+        _ => {
+            bail!("unknown auth type given");
+        }
+    };
+
+    let redis_notifications_client = if let Some(url) = args.redis_notifications {
+        Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url))
+    } else {
+        regional_redis_client.clone()
+    };
+
+    // Check that we can bind to address before further initialization
+    let http_address: SocketAddr = args.http.parse()?;
+    info!("Starting http on {http_address}");
+    let http_listener = TcpListener::bind(http_address).await?.into_std()?;
+
+    let mgmt_address: SocketAddr = args.mgmt.parse()?;
+    info!("Starting mgmt on {mgmt_address}");
+    let mgmt_listener = TcpListener::bind(mgmt_address).await?;
+
+    let proxy_listener = if args.is_auth_broker {
+        None
+    } else {
+        let proxy_address: SocketAddr = args.proxy.parse()?;
+        info!("Starting proxy on {proxy_address}");
+
+        Some(TcpListener::bind(proxy_address).await?)
+    };
+
+    // TODO: rename the argument to something like serverless.
+    // It now covers more than just websockets, it also covers SQL over HTTP.
+    let serverless_listener = if let Some(serverless_address) = args.wss {
+        let serverless_address: SocketAddr = serverless_address.parse()?;
+        info!("Starting wss on {serverless_address}");
+        Some(TcpListener::bind(serverless_address).await?)
+    } else if args.is_auth_broker {
+        bail!("wss arg must be present for auth-broker")
+    } else {
+        None
+    };
+
+    let cancellation_token = CancellationToken::new();
+
+    let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
+    RateBucketInfo::validate(redis_rps_limit)?;
+
+    let redis_kv_client = regional_redis_client
+        .as_ref()
+        .map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
+
+    // channel size should be higher than redis client limit to avoid blocking
+    let cancel_ch_size = args.cancellation_ch_size;
+    let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
+    let cancellation_handler = Arc::new(CancellationHandler::new(
+        &config.connect_to_compute,
+        Some(tx_cancel),
+    ));
+
+    // bit of a hack - find the min rps and max rps supported and turn it into
+    // leaky bucket config instead
+    let max = args
+        .endpoint_rps_limit
+        .iter()
+        .map(|x| x.rps())
+        .max_by(f64::total_cmp)
+        .unwrap_or(EndpointRateLimiter::DEFAULT.max);
+    let rps = args
+        .endpoint_rps_limit
+        .iter()
+        .map(|x| x.rps())
+        .min_by(f64::total_cmp)
+        .unwrap_or(EndpointRateLimiter::DEFAULT.rps);
+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
+        LeakyBucketConfig { rps, max },
+        64,
+    ));
+
+    // client facing tasks. these will exit on error or on cancellation
+    // cancellation returns Ok(())
+    let mut client_tasks = JoinSet::new();
+    match auth_backend {
+        Either::Left(auth_backend) => {
+            if let Some(proxy_listener) = proxy_listener {
+                client_tasks.spawn(crate::proxy::task_main(
+                    config,
+                    auth_backend,
+                    proxy_listener,
+                    cancellation_token.clone(),
+                    cancellation_handler.clone(),
+                    endpoint_rate_limiter.clone(),
+                ));
+            }
+
+            if let Some(serverless_listener) = serverless_listener {
+                client_tasks.spawn(serverless::task_main(
+                    config,
+                    auth_backend,
+                    serverless_listener,
+                    cancellation_token.clone(),
+                    cancellation_handler.clone(),
+                    endpoint_rate_limiter.clone(),
+                ));
+            }
+        }
+        Either::Right(auth_backend) => {
+            if let Some(proxy_listener) = proxy_listener {
+                client_tasks.spawn(crate::console_redirect_proxy::task_main(
+                    config,
+                    auth_backend,
+                    proxy_listener,
+                    cancellation_token.clone(),
+                    cancellation_handler.clone(),
+                ));
+            }
+        }
+    }
+
+    client_tasks.spawn(crate::context::parquet::worker(
+        cancellation_token.clone(),
+        args.parquet_upload,
+    ));
+
+    // maintenance tasks. these never return unless there's an error
+    let mut maintenance_tasks = JoinSet::new();
+    maintenance_tasks.spawn(crate::signals::handle(cancellation_token.clone(), || {}));
+    maintenance_tasks.spawn(http::health_server::task_main(
+        http_listener,
+        AppMetrics {
+            jemalloc,
+            neon_metrics,
+            proxy: crate::metrics::Metrics::get(),
+        },
+    ));
+    maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
+
+    if let Some(metrics_config) = &config.metric_collection {
+        // TODO: Add gc regardles of the metric collection being enabled.
+        maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
+    }
+
+    #[cfg_attr(not(any(test, feature = "testing")), expect(irrefutable_let_patterns))]
+    if let Either::Left(auth::Backend::ControlPlane(api, ())) = &auth_backend {
+        if let crate::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api {
+            match (redis_notifications_client, regional_redis_client.clone()) {
+                (None, None) => {}
+                (client1, client2) => {
+                    let cache = api.caches.project_info.clone();
+                    if let Some(client) = client1 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    if let Some(client) = client2 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
+                }
+            }
+
+            if let Some(mut redis_kv_client) = redis_kv_client {
+                maintenance_tasks.spawn(async move {
+                    redis_kv_client.try_connect().await?;
+                    handle_cancel_messages(&mut redis_kv_client, rx_cancel).await
+                });
+            }
+
+            if let Some(regional_redis_client) = regional_redis_client {
+                let cache = api.caches.endpoints_cache.clone();
+                let con = regional_redis_client;
+                let span = tracing::info_span!("endpoints_cache");
+                maintenance_tasks.spawn(
+                    async move { cache.do_read(con, cancellation_token.clone()).await }
+                        .instrument(span),
+                );
+            }
+        }
+    }
+
+    let maintenance = loop {
+        // get one complete task
+        match futures::future::select(
+            pin!(maintenance_tasks.join_next()),
+            pin!(client_tasks.join_next()),
+        )
+        .await
+        {
+            // exit immediately on maintenance task completion
+            Either::Left((Some(res), _)) => break crate::error::flatten_err(res)?,
+            // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
+            Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
+            // exit immediately on client task error
+            Either::Right((Some(res), _)) => crate::error::flatten_err(res)?,
+            // exit if all our client tasks have shutdown gracefully
+            Either::Right((None, _)) => return Ok(()),
+        }
+    };
+
+    // maintenance tasks return Infallible success values, this is an impossible value
+    // so this match statically ensures that there are no possibilities for that value
+    match maintenance {}
+}
+
+/// ProxyConfig is created at proxy startup, and lives forever.
+fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
+    let thread_pool = ThreadPool::new(args.scram_thread_pool_size);
+    Metrics::install(thread_pool.metrics.clone());
+
+    let tls_config = match (&args.tls_key, &args.tls_cert) {
+        (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
+            key_path,
+            cert_path,
+            args.certs_dir.as_ref(),
+            args.allow_tls_keylogfile,
+        )?),
+        (None, None) => None,
+        _ => bail!("either both or neither tls-key and tls-cert must be specified"),
+    };
+
+    let backup_metric_collection_config = config::MetricBackupCollectionConfig {
+        remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
+        chunk_size: args.metric_backup_collection_chunk_size,
+    };
+
+    let metric_collection = match (
+        &args.metric_collection_endpoint,
+        &args.metric_collection_interval,
+    ) {
+        (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
+            endpoint: endpoint.parse()?,
+            interval: humantime::parse_duration(interval)?,
+            backup_metric_collection_config,
+        }),
+        (None, None) => None,
+        _ => bail!(
+            "either both or neither metric-collection-endpoint \
+             and metric-collection-interval must be specified"
+        ),
+    };
+
+    let config::ConcurrencyLockOptions {
+        shards,
+        limiter,
+        epoch,
+        timeout,
+    } = args.connect_compute_lock.parse()?;
+    info!(
+        ?limiter,
+        shards,
+        ?epoch,
+        "Using NodeLocks (connect_compute)"
+    );
+    let connect_compute_locks = control_plane::locks::ApiLocks::new(
+        "connect_compute_lock",
+        limiter,
+        shards,
+        timeout,
+        epoch,
+        &Metrics::get().proxy.connect_compute_lock,
+    );
+
+    let http_config = HttpConfig {
+        accept_websockets: !args.is_auth_broker,
+        pool_options: GlobalConnPoolOptions {
+            max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
+            gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
+            pool_shards: args.sql_over_http.sql_over_http_pool_shards,
+            idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
+            opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
+            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
+        },
+        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
+        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
+        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
+        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
+    };
+    let authentication_config = AuthenticationConfig {
+        jwks_cache: JwkCache::default(),
+        thread_pool,
+        scram_protocol_timeout: args.scram_protocol_timeout,
+        rate_limiter_enabled: args.auth_rate_limit_enabled,
+        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
+        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
+        ip_allowlist_check_enabled: !args.is_private_access_proxy,
+        is_vpc_acccess_proxy: args.is_private_access_proxy,
+        is_auth_broker: args.is_auth_broker,
+        accept_jwts: args.is_auth_broker,
+        console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
+    };
+
+    let compute_config = ComputeConfig {
+        retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,
+        tls: Arc::new(compute_client_config_with_root_certs()?),
+        timeout: Duration::from_secs(2),
+    };
+
+    let config = ProxyConfig {
+        tls_config,
+        metric_collection,
+        http_config,
+        authentication_config,
+        proxy_protocol_v2: args.proxy_protocol_v2,
+        handshake_timeout: args.handshake_timeout,
+        region: args.region.clone(),
+        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
+        connect_compute_locks,
+        connect_to_compute: compute_config,
+    };
+
+    let config = Box::leak(Box::new(config));
+
+    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
+
+    Ok(config)
+}
+
+/// auth::Backend is created at proxy startup, and lives forever.
+fn build_auth_backend(
+    args: &ProxyCliArgs,
+) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {
+    match &args.auth_backend {
+        AuthBackendType::ControlPlaneV1 => {
+            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
+            let project_info_cache_config: ProjectInfoCacheOptions =
+                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;
+
+            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
+            info!(
+                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
+            );
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
+            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
+                wake_compute_cache_config,
+                project_info_cache_config,
+                endpoint_cache_config,
+            )));
+
+            let config::ConcurrencyLockOptions {
+                shards,
+                limiter,
+                epoch,
+                timeout,
+            } = args.wake_compute_lock.parse()?;
+            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
+            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
+                "wake_compute_lock",
+                limiter,
+                shards,
+                timeout,
+                epoch,
+                &Metrics::get().wake_compute_lock,
+            )));
+            tokio::spawn(locks.garbage_collect_worker());
+
+            let url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
+
+            let endpoint = http::Endpoint::new(url, http::new_client());
+
+            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
+            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
+            let wake_compute_endpoint_rate_limiter =
+                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
+
+            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
+                endpoint,
+                args.control_plane_token.clone(),
+                caches,
+                locks,
+                wake_compute_endpoint_rate_limiter,
+            );
+
+            let api = control_plane::client::ControlPlaneClient::ProxyV1(api);
+            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
+            let config = Box::leak(Box::new(auth_backend));
+
+            Ok(Either::Left(config))
+        }
+
+        #[cfg(any(test, feature = "testing"))]
+        AuthBackendType::Postgres => {
+            let url = args.auth_endpoint.parse()?;
+            let api = control_plane::client::mock::MockControlPlane::new(
+                url,
+                !args.is_private_access_proxy,
+            );
+            let api = control_plane::client::ControlPlaneClient::PostgresMock(api);
+
+            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
+
+            let config = Box::leak(Box::new(auth_backend));
+
+            Ok(Either::Left(config))
+        }
+
+        AuthBackendType::ConsoleRedirect => {
+            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
+            let project_info_cache_config: ProjectInfoCacheOptions =
+                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;
+
+            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
+            info!(
+                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
+            );
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
+            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
+                wake_compute_cache_config,
+                project_info_cache_config,
+                endpoint_cache_config,
+            )));
+
+            let config::ConcurrencyLockOptions {
+                shards,
+                limiter,
+                epoch,
+                timeout,
+            } = args.wake_compute_lock.parse()?;
+            info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
+            let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
+                "wake_compute_lock",
+                limiter,
+                shards,
+                timeout,
+                epoch,
+                &Metrics::get().wake_compute_lock,
+            )));
+
+            let url = args.uri.clone().parse()?;
+            let ep_url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
+            let endpoint = http::Endpoint::new(ep_url, http::new_client());
+            let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
+            RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
+            let wake_compute_endpoint_rate_limiter =
+                Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
+
+            // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
+            // and locks are not used in ConsoleRedirectBackend,
+            // but they are required by the NeonControlPlaneClient
+            let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
+                endpoint,
+                args.control_plane_token.clone(),
+                caches,
+                locks,
+                wake_compute_endpoint_rate_limiter,
+            );
+
+            let backend = ConsoleRedirectBackend::new(url, api);
+            let config = Box::leak(Box::new(backend));
+
+            Ok(Either::Right(config))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::time::Duration;
+
+    use crate::rate_limiter::RateBucketInfo;
+    use clap::Parser;
+
+    #[test]
+    fn parse_endpoint_rps_limit() {
+        let config = super::ProxyCliArgs::parse_from([
+            "proxy",
+            "--endpoint-rps-limit",
+            "100@1s",
+            "--endpoint-rps-limit",
+            "20@30s",
+        ]);
+
+        assert_eq!(
+            config.endpoint_rps_limit,
+            vec![
+                RateBucketInfo::new(100, Duration::from_secs(1)),
+                RateBucketInfo::new(20, Duration::from_secs(30)),
+            ]
+        );
+    }
+}
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index 4d919f374a..e84f1676e2 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -69,17 +69,35 @@ pub async fn handle_cancel_messages(
                     value,
                     resp_tx,
                     _guard,
-                    expire: _,
+                    expire,
                 } => {
+                    let res = client.hset(&key, field, value).await;
                     if let Some(resp_tx) = resp_tx {
-                        resp_tx
-                            .send(client.hset(key, field, value).await)
-                            .inspect_err(|e| {
-                                tracing::debug!("failed to send StoreCancelKey response: {:?}", e);
-                            })
-                            .ok();
+                        if res.is_ok() {
+                            resp_tx
+                                .send(client.expire(key, expire).await)
+                                .inspect_err(|e| {
+                                    tracing::debug!(
+                                        "failed to send StoreCancelKey response: {:?}",
+                                        e
+                                    );
+                                })
+                                .ok();
+                        } else {
+                            resp_tx
+                                .send(res)
+                                .inspect_err(|e| {
+                                    tracing::debug!(
+                                        "failed to send StoreCancelKey response: {:?}",
+                                        e
+                                    );
+                                })
+                                .ok();
+                        }
+                    } else if res.is_ok() {
+                        drop(client.expire(key, expire).await);
                     } else {
-                        drop(client.hset(key, field, value).await);
+                        tracing::warn!("failed to store cancel key: {:?}", res);
                     }
                 }
                 CancelKeyOp::GetCancelData {
@@ -436,7 +454,7 @@ impl Session {
         &self.key
     }
 
-    // Send the store key op to the cancellation handler
+    // Send the store key op to the cancellation handler and set TTL for the key
     pub(crate) async fn write_cancel_key(
         &self,
         cancel_closure: CancelClosure,
diff --git a/proxy/src/compute_ctl/mod.rs b/proxy/src/compute_ctl/mod.rs
index 60fdf107d4..ab3179afb2 100644
--- a/proxy/src/compute_ctl/mod.rs
+++ b/proxy/src/compute_ctl/mod.rs
@@ -42,14 +42,14 @@ pub enum Privilege {
 #[derive(Error, Debug)]
 pub enum ComputeCtlError {
     #[error("connection error: {0}")]
-    ConnectionError(#[source] reqwest_middleware::Error),
+    Connection(#[source] reqwest_middleware::Error),
     #[error("request error [{status}]: {body:?}")]
-    RequestError {
+    Request {
         status: StatusCode,
         body: Option<GenericAPIError>,
     },
     #[error("response parsing error: {0}")]
-    ResponseError(#[source] reqwest::Error),
+    Response(#[source] reqwest::Error),
 }
 
 impl ComputeCtlApi {
@@ -89,14 +89,14 @@ impl ComputeCtlApi {
             .json(req)
             .send()
             .await
-            .map_err(ComputeCtlError::ConnectionError)?;
+            .map_err(ComputeCtlError::Connection)?;
 
         let status = resp.status();
         if status.is_client_error() || status.is_server_error() {
             let body = resp.json().await.ok();
-            return Err(ComputeCtlError::RequestError { status, body });
+            return Err(ComputeCtlError::Request { status, body });
         }
 
-        resp.json().await.map_err(ComputeCtlError::ResponseError)
+        resp.json().await.map_err(ComputeCtlError::Response)
     }
 }
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 1dcd37712e..460e0cff54 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -151,7 +151,6 @@ impl FromStr for EndpointCacheConfig {
 }
 #[derive(Debug)]
 pub struct MetricBackupCollectionConfig {
-    pub interval: Duration,
     pub remote_storage_config: Option<RemoteStorageConfig>,
     pub chunk_size: usize,
 }
diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs
index 4f1dd39d92..0537ae6a62 100644
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -187,6 +187,10 @@ pub async fn worker(
     let rx = futures::stream::poll_fn(move |cx| rx.poll_recv(cx));
     let rx = rx.map(RequestData::from);
 
+    let storage = GenericRemoteStorage::from_config(&remote_storage_config)
+        .await
+        .context("remote storage init")?;
+
     let properties = WriterProperties::builder()
         .set_data_page_size_limit(config.parquet_upload_page_size)
         .set_compression(config.parquet_upload_compression);
@@ -220,18 +224,18 @@ pub async fn worker(
         let rx_disconnect = futures::stream::poll_fn(move |cx| rx_disconnect.poll_recv(cx));
         let rx_disconnect = rx_disconnect.map(RequestData::from);
 
+        let storage_disconnect =
+            GenericRemoteStorage::from_config(&disconnect_events_storage_config)
+                .await
+                .context("remote storage for disconnect events init")?;
         let parquet_config_disconnect = parquet_config.clone();
         tokio::try_join!(
-            worker_inner(remote_storage_config, rx, parquet_config),
-            worker_inner(
-                disconnect_events_storage_config,
-                rx_disconnect,
-                parquet_config_disconnect
-            )
+            worker_inner(storage, rx, parquet_config),
+            worker_inner(storage_disconnect, rx_disconnect, parquet_config_disconnect)
         )
         .map(|_| ())
     } else {
-        worker_inner(remote_storage_config, rx, parquet_config).await
+        worker_inner(storage, rx, parquet_config).await
     }
 }
 
@@ -247,32 +251,18 @@ struct ParquetConfig {
     test_remote_failures: u64,
 }
 
-impl ParquetConfig {
-    async fn storage(
-        &self,
-        storage_config: &RemoteStorageConfig,
-    ) -> anyhow::Result<GenericRemoteStorage> {
-        let storage = GenericRemoteStorage::from_config(storage_config)
-            .await
-            .context("remote storage init")?;
-
-        #[cfg(any(test, feature = "testing"))]
-        if self.test_remote_failures > 0 {
-            return Ok(GenericRemoteStorage::unreliable_wrapper(
-                storage,
-                self.test_remote_failures,
-            ));
-        }
-
-        Ok(storage)
-    }
-}
-
 async fn worker_inner(
-    storage_config: RemoteStorageConfig,
+    storage: GenericRemoteStorage,
     rx: impl Stream<Item = RequestData>,
     config: ParquetConfig,
 ) -> anyhow::Result<()> {
+    #[cfg(any(test, feature = "testing"))]
+    let storage = if config.test_remote_failures > 0 {
+        GenericRemoteStorage::unreliable_wrapper(storage, config.test_remote_failures)
+    } else {
+        storage
+    };
+
     let mut rx = std::pin::pin!(rx);
 
     let mut rows = Vec::with_capacity(config.rows_per_group);
@@ -295,7 +285,7 @@ async fn worker_inner(
         }
         if len > config.file_size || force {
             last_upload = time::Instant::now();
-            let file = upload_parquet(w, len, &storage_config, &config).await?;
+            let file = upload_parquet(w, len, &storage).await?;
             w = SerializedFileWriter::new(file, schema.clone(), config.propeties.clone())?;
             len = 0;
         }
@@ -308,7 +298,7 @@ async fn worker_inner(
     }
 
     if !w.flushed_row_groups().is_empty() {
-        let _rtchk: Writer<BytesMut> = upload_parquet(w, len, &storage_config, &config).await?;
+        let _rtchk: Writer<BytesMut> = upload_parquet(w, len, &storage).await?;
     }
 
     Ok(())
@@ -350,8 +340,7 @@ where
 async fn upload_parquet(
     mut w: SerializedFileWriter<Writer<BytesMut>>,
     len: i64,
-    storage_config: &RemoteStorageConfig,
-    config: &ParquetConfig,
+    storage: &GenericRemoteStorage,
 ) -> anyhow::Result<Writer<BytesMut>> {
     let len_uncompressed = w
         .flushed_row_groups()
@@ -388,15 +377,6 @@ async fn upload_parquet(
         size, compression, "uploading request parquet file"
     );
 
-    // A bug in azure-sdk means that the identity-token-file that expires after
-    // 1 hour is not refreshed. This identity-token is used to fetch the actual azure storage
-    // tokens that last for 24 hours. After this 24 hour period, azure-sdk tries to refresh
-    // the storage token, but the identity token has now expired.
-    // <https://github.com/Azure/azure-sdk-for-rust/issues/1739>
-    //
-    // To work around this, we recreate the storage every time.
-    let storage = config.storage(storage_config).await?;
-
     let year = now.year();
     let month = now.month();
     let day = now.day();
@@ -451,8 +431,8 @@ mod tests {
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
     use remote_storage::{
-        RemoteStorageConfig, RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-        DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+        GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind, S3Config,
+        DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
     };
     use tokio::sync::mpsc;
     use tokio::time;
@@ -579,11 +559,12 @@ mod tests {
             timeout: std::time::Duration::from_secs(120),
             small_timeout: std::time::Duration::from_secs(30),
         };
-
-        worker_inner(remote_storage_config, rx, config)
+        let storage = GenericRemoteStorage::from_config(&remote_storage_config)
             .await
             .unwrap();
 
+        worker_inner(storage, rx, config).await.unwrap();
+
         let mut files = WalkDir::new(tmpdir.as_std_path())
             .into_iter()
             .filter_map(|entry| entry.ok())
diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs
index a06943726e..c28ff4789d 100644
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -212,15 +212,15 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
         timeout: Duration,
         epoch: std::time::Duration,
         metrics: &'static ApiLockMetrics,
-    ) -> prometheus::Result<Self> {
-        Ok(Self {
+    ) -> Self {
+        Self {
             name,
             node_locks: ClashMap::with_shard_amount(shards),
             config,
             timeout,
             epoch,
             metrics,
-        })
+        }
     }
 
     pub(crate) async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, ApiLockError> {
diff --git a/proxy/src/control_plane/messages.rs b/proxy/src/control_plane/messages.rs
index 5883d02b92..8d6b2e96f5 100644
--- a/proxy/src/control_plane/messages.rs
+++ b/proxy/src/control_plane/messages.rs
@@ -361,7 +361,8 @@ pub struct EndpointJwksResponse {
 pub struct JwksSettings {
     pub id: String,
     pub jwks_url: url::Url,
-    pub provider_name: String,
+    #[serde(rename = "provider_name")]
+    pub _provider_name: String,
     pub jwt_audience: Option<String>,
     pub role_names: Vec<RoleNameInt>,
 }
diff --git a/proxy/src/http/health_server.rs b/proxy/src/http/health_server.rs
index 6ca091feb7..141f319567 100644
--- a/proxy/src/http/health_server.rs
+++ b/proxy/src/http/health_server.rs
@@ -3,16 +3,16 @@ use std::net::TcpListener;
 use std::sync::{Arc, Mutex};
 
 use anyhow::{anyhow, bail};
+use http_utils::endpoint::{self, request_span};
+use http_utils::error::ApiError;
+use http_utils::json::json_response;
+use http_utils::{RouterBuilder, RouterService};
 use hyper0::header::CONTENT_TYPE;
 use hyper0::{Body, Request, Response, StatusCode};
 use measured::text::BufferedTextEncoder;
 use measured::MetricGroup;
 use metrics::NeonMetrics;
 use tracing::{info, info_span};
-use utils::http::endpoint::{self, request_span};
-use utils::http::error::ApiError;
-use utils::http::json::json_response;
-use utils::http::{RouterBuilder, RouterService};
 
 use crate::ext::{LockExt, TaskExt};
 use crate::jemalloc;
diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs
index c56474edd7..a9e5fbc85b 100644
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -72,34 +72,36 @@
 // List of temporarily allowed lints to unblock beta/nightly.
 #![allow(unknown_lints)]
 
-pub mod auth;
-pub mod cache;
-pub mod cancellation;
-pub mod compute;
-pub mod compute_ctl;
-pub mod config;
-pub mod console_redirect_proxy;
-pub mod context;
-pub mod control_plane;
-pub mod error;
+pub mod binary;
+
+mod auth;
+mod cache;
+mod cancellation;
+mod compute;
+mod compute_ctl;
+mod config;
+mod console_redirect_proxy;
+mod context;
+mod control_plane;
+mod error;
 mod ext;
-pub mod http;
-pub mod intern;
-pub mod jemalloc;
-pub mod logging;
-pub mod metrics;
-pub mod parse;
-pub mod protocol2;
-pub mod proxy;
-pub mod rate_limiter;
-pub mod redis;
-pub mod sasl;
-pub mod scram;
-pub mod serverless;
-pub mod signals;
-pub mod stream;
-pub mod tls;
-pub mod types;
-pub mod url;
-pub mod usage_metrics;
-pub mod waiters;
+mod http;
+mod intern;
+mod jemalloc;
+mod logging;
+mod metrics;
+mod parse;
+mod protocol2;
+mod proxy;
+mod rate_limiter;
+mod redis;
+mod sasl;
+mod scram;
+mod serverless;
+mod signals;
+mod stream;
+mod tls;
+mod types;
+mod url;
+mod usage_metrics;
+mod waiters;
diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs
index 25bcc81108..f3447e063e 100644
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -205,7 +205,7 @@ pub enum Protocol {
 }
 
 impl Protocol {
-    pub fn as_str(&self) -> &'static str {
+    pub fn as_str(self) -> &'static str {
         match self {
             Protocol::Http => "http",
             Protocol::Ws => "ws",
@@ -385,6 +385,7 @@ pub enum Waiting {
 
 #[derive(FixedCardinalityLabel, Copy, Clone)]
 #[label(singleton = "kind")]
+#[allow(clippy::enum_variant_names)]
 pub enum RedisMsgKind {
     HSet,
     HSetMultiple,
diff --git a/proxy/src/redis/cancellation_publisher.rs b/proxy/src/redis/cancellation_publisher.rs
index 30d8b83e60..186fece4b2 100644
--- a/proxy/src/redis/cancellation_publisher.rs
+++ b/proxy/src/redis/cancellation_publisher.rs
@@ -5,9 +5,6 @@ use pq_proto::CancelKeyData;
 use tokio::sync::Mutex;
 use uuid::Uuid;
 
-use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};
-
 pub trait CancellationPublisherMut: Send + Sync + 'static {
     #[allow(async_fn_in_trait)]
     async fn try_publish(
@@ -79,36 +76,3 @@ impl<P: CancellationPublisherMut> CancellationPublisher for Arc<Mutex<P>> {
             .await
     }
 }
-
-pub struct RedisPublisherClient {
-    #[allow(dead_code)]
-    client: ConnectionWithCredentialsProvider,
-    _region_id: String,
-    _limiter: GlobalRateLimiter,
-}
-
-impl RedisPublisherClient {
-    pub fn new(
-        client: ConnectionWithCredentialsProvider,
-        region_id: String,
-        info: &'static [RateBucketInfo],
-    ) -> anyhow::Result<Self> {
-        Ok(Self {
-            client,
-            _region_id: region_id,
-            _limiter: GlobalRateLimiter::new(info.into()),
-        })
-    }
-
-    #[allow(dead_code)]
-    pub(crate) async fn try_connect(&mut self) -> anyhow::Result<()> {
-        match self.client.connect().await {
-            Ok(()) => {}
-            Err(e) => {
-                tracing::error!("failed to connect to redis: {e}");
-                return Err(e);
-            }
-        }
-        Ok(())
-    }
-}
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index 0fb4a8a6cc..edc2935618 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -400,9 +400,9 @@ fn create_random_jwk() -> (SigningKey, jose_jwk::Key) {
 pub(crate) enum HttpConnError {
     #[error("pooled connection closed at inconsistent state")]
     ConnectionClosedAbruptly(#[from] tokio::sync::watch::error::SendError<uuid::Uuid>),
-    #[error("could not connection to postgres in compute")]
+    #[error("could not connect to postgres in compute")]
     PostgresConnectionError(#[from] postgres_client::Error),
-    #[error("could not connection to local-proxy in compute")]
+    #[error("could not connect to local-proxy in compute")]
     LocalProxyConnectionError(#[from] LocalProxyConnError),
     #[error("could not parse JWT payload")]
     JwtPayloadError(serde_json::Error),
diff --git a/proxy/src/serverless/http_util.rs b/proxy/src/serverless/http_util.rs
index d5c948777c..95a28663a5 100644
--- a/proxy/src/serverless/http_util.rs
+++ b/proxy/src/serverless/http_util.rs
@@ -6,8 +6,8 @@ use bytes::Bytes;
 use http::{Response, StatusCode};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
+use http_utils::error::ApiError;
 use serde::Serialize;
-use utils::http::error::ApiError;
 
 /// Like [`ApiError::into_response`]
 pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper::Error>> {
@@ -59,14 +59,14 @@ pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes,
     }
 }
 
-/// Same as [`utils::http::error::HttpErrorBody`]
+/// Same as [`http_utils::error::HttpErrorBody`]
 #[derive(Serialize)]
 struct HttpErrorBody {
     pub(crate) msg: String,
 }
 
 impl HttpErrorBody {
-    /// Same as [`utils::http::error::HttpErrorBody::response_from_msg_and_status`]
+    /// Same as [`http_utils::error::HttpErrorBody::response_from_msg_and_status`]
     fn response_from_msg_and_status(
         msg: String,
         status: StatusCode,
@@ -74,7 +74,7 @@ impl HttpErrorBody {
         HttpErrorBody { msg }.to_response(status)
     }
 
-    /// Same as [`utils::http::error::HttpErrorBody::to_response`]
+    /// Same as [`http_utils::error::HttpErrorBody::to_response`]
     fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper::Error>> {
         Response::builder()
             .status(status)
@@ -92,7 +92,7 @@ impl HttpErrorBody {
     }
 }
 
-/// Same as [`utils::http::json::json_response`]
+/// Same as [`http_utils::json::json_response`]
 pub(crate) fn json_response<T: Serialize>(
     status: StatusCode,
     data: T,
diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs
index 6888772362..8289500159 100644
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -28,6 +28,7 @@ use futures::TryFutureExt;
 use http::{Method, Response, StatusCode};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Empty};
+use http_utils::error::ApiError;
 use hyper::body::Incoming;
 use hyper_util::rt::TokioExecutor;
 use hyper_util::server::conn::auto::Builder;
@@ -41,7 +42,6 @@ use tokio_rustls::TlsAcceptor;
 use tokio_util::sync::CancellationToken;
 use tokio_util::task::TaskTracker;
 use tracing::{info, warn, Instrument};
-use utils::http::error::ApiError;
 
 use crate::cancellation::CancellationHandler;
 use crate::config::{ProxyConfig, ProxyProtocolV2};
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index 3e42787a09..5982fe225d 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -8,21 +8,22 @@ use http::header::AUTHORIZATION;
 use http::Method;
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
+use http_utils::error::ApiError;
 use hyper::body::Incoming;
 use hyper::http::{HeaderName, HeaderValue};
 use hyper::{header, HeaderMap, Request, Response, StatusCode};
+use indexmap::IndexMap;
 use postgres_client::error::{DbError, ErrorPosition, SqlState};
 use postgres_client::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
 use pq_proto::StartupMessageParamsBuilder;
 use serde::Serialize;
+use serde_json::value::RawValue;
 use serde_json::Value;
 use tokio::time::{self, Instant};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info};
 use typed_json::json;
 use url::Url;
-use urlencoding;
-use utils::http::error::ApiError;
 use uuid::Uuid;
 
 use super::backend::{LocalProxyConnError, PoolingBackend};
@@ -249,6 +250,50 @@ pub(crate) async fn handle(
     let mut response = match result {
         Ok(r) => {
             ctx.set_success();
+
+            // Handling the error response from local proxy here
+            if config.authentication_config.is_auth_broker && r.status().is_server_error() {
+                let status = r.status();
+
+                let body_bytes = r
+                    .collect()
+                    .await
+                    .map_err(|e| {
+                        ApiError::InternalServerError(anyhow::Error::msg(format!(
+                            "could not collect http body: {e}"
+                        )))
+                    })?
+                    .to_bytes();
+
+                if let Ok(mut json_map) =
+                    serde_json::from_slice::<IndexMap<&str, &RawValue>>(&body_bytes)
+                {
+                    let message = json_map.get("message");
+                    if let Some(message) = message {
+                        let msg: String = match serde_json::from_str(message.get()) {
+                            Ok(msg) => msg,
+                            Err(_) => {
+                                "Unable to parse the response message from server".to_string()
+                            }
+                        };
+
+                        error!("Error response from local_proxy: {status} {msg}");
+
+                        json_map.retain(|key, _| !key.starts_with("neon:")); // remove all the neon-related keys
+
+                        let resp_json = serde_json::to_string(&json_map)
+                            .unwrap_or("failed to serialize the response message".to_string());
+
+                        return json_response(status, resp_json);
+                    }
+                }
+
+                error!("Unable to parse the response message from local_proxy");
+                return json_response(
+                    status,
+                    json!({ "message": "Unable to parse the response message from server".to_string() }),
+                );
+            }
             r
         }
         Err(e @ SqlOverHttpError::Cancelled(_)) => {
@@ -618,8 +663,6 @@ async fn handle_db_inner(
 
     let authenticate_and_connect = Box::pin(
         async {
-            let is_local_proxy = matches!(backend.auth_backend, crate::auth::Backend::Local(_));
-
             let keys = match auth {
                 AuthData::Password(pw) => {
                     backend
@@ -634,7 +677,9 @@ async fn handle_db_inner(
             };
 
             let client = match keys.keys {
-                ComputeCredentialKeys::JwtPayload(payload) if is_local_proxy => {
+                ComputeCredentialKeys::JwtPayload(payload)
+                    if backend.auth_backend.is_local_proxy() =>
+                {
                     let mut client = backend.connect_to_local_postgres(ctx, conn_info).await?;
                     let (cli_inner, _dsc) = client.client_inner();
                     cli_inner.set_jwt_session(&payload).await?;
diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml
index 0eb511f1cc..d12ebc1030 100644
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -63,6 +63,7 @@ sha2.workspace = true
 sd-notify.workspace = true
 storage_broker.workspace = true
 tokio-stream.workspace = true
+http-utils.workspace = true
 utils.workspace = true
 wal_decoder.workspace = true
 env_logger.workspace = true
diff --git a/safekeeper/client/Cargo.toml b/safekeeper/client/Cargo.toml
index 6c5a52de3a..0b660aaf32 100644
--- a/safekeeper/client/Cargo.toml
+++ b/safekeeper/client/Cargo.toml
@@ -5,6 +5,7 @@ edition.workspace = true
 license.workspace = true
 
 [dependencies]
+http-utils.workspace = true
 safekeeper_api.workspace = true
 thiserror.workspace = true
 reqwest = { workspace = true, features = [ "stream" ] }
diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs
index f65bfaa6d5..df049f3eba 100644
--- a/safekeeper/client/src/mgmt_api.rs
+++ b/safekeeper/client/src/mgmt_api.rs
@@ -3,11 +3,11 @@
 //! Partially copied from pageserver client; some parts might be better to be
 //! united.
 
+use http_utils::error::HttpErrorBody;
 use reqwest::{IntoUrl, Method, StatusCode};
 use safekeeper_api::models::{TimelineCreateRequest, TimelineStatus};
 use std::error::Error as _;
 use utils::{
-    http::error::HttpErrorBody,
     id::{NodeId, TenantId, TimelineId},
     logging::SecretString,
 };
diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs
index d82a713f8a..6e160b7a5e 100644
--- a/safekeeper/src/http/mod.rs
+++ b/safekeeper/src/http/mod.rs
@@ -14,7 +14,7 @@ pub async fn task_main(
     let router = make_router(conf, global_timelines)
         .build()
         .map_err(|err| anyhow::anyhow!(err))?;
-    let service = utils::http::RouterService::new(router).unwrap();
+    let service = http_utils::RouterService::new(router).unwrap();
     let server = hyper::Server::from_tcp(http_listener)?;
     server.serve(service).await?;
     Ok(()) // unreachable
diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs
index 7ec08ecf9a..a64bf1ddd8 100644
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -1,3 +1,4 @@
+use http_utils::failpoints::failpoints_handler;
 use hyper::{Body, Request, Response, StatusCode};
 use safekeeper_api::models;
 use safekeeper_api::models::AcceptorStateStatus;
@@ -17,25 +18,23 @@ use tokio::task;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, Instrument};
-use utils::failpoint_support::failpoints_handler;
-use utils::http::endpoint::{
+
+use http_utils::endpoint::{
     profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
-    ChannelWriter,
 };
-use utils::http::request::parse_query_param;
+use http_utils::{
+    endpoint::{self, auth_middleware, check_permission_with, ChannelWriter},
+    error::ApiError,
+    json::{json_request, json_response},
+    request::{ensure_no_body, parse_query_param, parse_request_param},
+    RequestExt, RouterBuilder,
+};
 
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use safekeeper_api::models::{SkTimelineInfo, TimelineCopyRequest};
 use safekeeper_api::models::{TimelineCreateRequest, TimelineTermBumpRequest};
 use utils::{
     auth::SwappableJwtAuth,
-    http::{
-        endpoint::{self, auth_middleware, check_permission_with},
-        error::ApiError,
-        json::{json_request, json_response},
-        request::{ensure_no_body, parse_request_param},
-        RequestExt, RouterBuilder,
-    },
     id::{TenantId, TenantTimelineId, TimelineId},
     lsn::Lsn,
 };
diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs
index ea09ce364d..5916675c3f 100644
--- a/safekeeper/src/send_interpreted_wal.rs
+++ b/safekeeper/src/send_interpreted_wal.rs
@@ -15,7 +15,8 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::sync::mpsc::error::SendError;
 use tokio::task::JoinHandle;
 use tokio::time::MissedTickBehavior;
-use tracing::{info_span, Instrument};
+use tracing::{error, info, info_span, Instrument};
+use utils::critical;
 use utils::lsn::Lsn;
 use utils::postgres_client::Compression;
 use utils::postgres_client::InterpretedFormat;
@@ -120,6 +121,20 @@ pub enum InterpretedWalReaderError {
     WalStreamClosed,
 }
 
+enum CurrentPositionUpdate {
+    Reset(Lsn),
+    NotReset(Lsn),
+}
+
+impl CurrentPositionUpdate {
+    fn current_position(&self) -> Lsn {
+        match self {
+            CurrentPositionUpdate::Reset(lsn) => *lsn,
+            CurrentPositionUpdate::NotReset(lsn) => *lsn,
+        }
+    }
+}
+
 impl InterpretedWalReaderState {
     fn current_position(&self) -> Option<Lsn> {
         match self {
@@ -129,6 +144,26 @@ impl InterpretedWalReaderState {
             InterpretedWalReaderState::Done => None,
         }
     }
+
+    // Reset the current position of the WAL reader if the requested starting position
+    // of the new shard is smaller than the current value.
+    fn maybe_reset(&mut self, new_shard_start_pos: Lsn) -> CurrentPositionUpdate {
+        match self {
+            InterpretedWalReaderState::Running {
+                current_position, ..
+            } => {
+                if new_shard_start_pos < *current_position {
+                    *current_position = new_shard_start_pos;
+                    CurrentPositionUpdate::Reset(*current_position)
+                } else {
+                    CurrentPositionUpdate::NotReset(*current_position)
+                }
+            }
+            InterpretedWalReaderState::Done => {
+                panic!("maybe_reset called on finished reader")
+            }
+        }
+    }
 }
 
 pub(crate) struct AttachShardNotification {
@@ -179,11 +214,10 @@ impl InterpretedWalReader {
                     metric.dec();
                 }
 
-                let res = reader.run_impl(start_pos).await;
-                if let Err(ref err) = res {
-                    tracing::error!("Task finished with error: {err}");
-                }
-                res
+                reader
+                    .run_impl(start_pos)
+                    .await
+                    .inspect_err(|err| critical!("failed to read WAL record: {err:?}"))
             }
             .instrument(info_span!("interpreted wal reader")),
         );
@@ -239,11 +273,10 @@ impl InterpretedWalReader {
             metric.dec();
         }
 
-        let res = self.run_impl(start_pos).await;
-        if let Err(err) = res {
-            tracing::error!("Interpreted wal reader encountered error: {err}");
+        if let Err(err) = self.run_impl(start_pos).await {
+            critical!("failed to read WAL record: {err:?}");
         } else {
-            tracing::info!("Interpreted wal reader exiting");
+            info!("interpreted wal reader exiting");
         }
 
         Err(CopyStreamHandlerEnd::Other(anyhow!(
@@ -410,15 +443,24 @@ impl InterpretedWalReader {
                         };
 
                         senders.push(ShardSenderState { sender_id: new_sender_id, tx: sender, next_record_lsn: start_pos});
-                        let current_pos = self.state.read().unwrap().current_position().unwrap();
-                        if start_pos < current_pos {
-                            self.wal_stream.reset(start_pos).await;
-                            wal_decoder = WalStreamDecoder::new(start_pos, self.pg_version);
-                        }
+
+                        // If the shard is subscribing below the current position the we need
+                        // to update the cursor that tracks where we are at in the WAL
+                        // ([`Self::state`]) and reset the WAL stream itself
+                        // (`[Self::wal_stream`]). This must be done atomically from the POV of
+                        // anything outside the select statement.
+                        let position_reset = self.state.write().unwrap().maybe_reset(start_pos);
+                        match position_reset {
+                            CurrentPositionUpdate::Reset(to) => {
+                                self.wal_stream.reset(to).await;
+                                wal_decoder = WalStreamDecoder::new(to, self.pg_version);
+                            },
+                            CurrentPositionUpdate::NotReset(_) => {}
+                        };
 
                         tracing::info!(
                             "Added shard sender {} with start_pos={} current_pos={}",
-                            ShardSenderId::new(shard_id, new_sender_id), start_pos, current_pos
+                            ShardSenderId::new(shard_id, new_sender_id), start_pos, position_reset.current_position()
                         );
                     }
                 }
@@ -584,7 +626,7 @@ mod tests {
             .unwrap();
 
         let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
+        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None)
             .await
             .unwrap();
         let end_pos = end_watch.get();
@@ -715,7 +757,6 @@ mod tests {
         const MSG_COUNT: usize = 200;
         const PG_VERSION: u32 = 17;
         const SHARD_COUNT: u8 = 2;
-        const ATTACHED_SHARDS: u8 = 4;
 
         let start_lsn = Lsn::from_str("0/149FD18").unwrap();
         let env = Env::new(true).unwrap();
@@ -725,9 +766,11 @@ mod tests {
             .unwrap();
 
         let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
-            .await
-            .unwrap();
+        let mut next_record_lsns = Vec::default();
+        let end_watch =
+            Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns))
+                .await
+                .unwrap();
         let end_pos = end_watch.get();
 
         let streaming_wal_reader = StreamingWalReader::new(
@@ -746,38 +789,71 @@ mod tests {
         )
         .unwrap();
 
-        let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
-        let mut batch_receivers = vec![rx];
+        struct Sender {
+            tx: Option<tokio::sync::mpsc::Sender<Batch>>,
+            rx: tokio::sync::mpsc::Receiver<Batch>,
+            shard: ShardIdentity,
+            start_lsn: Lsn,
+            received_next_record_lsns: Vec<Lsn>,
+        }
 
+        impl Sender {
+            fn new(start_lsn: Lsn, shard: ShardIdentity) -> Self {
+                let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
+                Self {
+                    tx: Some(tx),
+                    rx,
+                    shard,
+                    start_lsn,
+                    received_next_record_lsns: Vec::default(),
+                }
+            }
+        }
+
+        assert!(next_record_lsns.len() > 7);
+        let start_lsns = vec![
+            next_record_lsns[5],
+            next_record_lsns[1],
+            next_record_lsns[3],
+        ];
+        let mut senders = start_lsns
+            .into_iter()
+            .map(|lsn| Sender::new(lsn, shard_0))
+            .collect::<Vec<_>>();
+
+        let first_sender = senders.first_mut().unwrap();
         let handle = InterpretedWalReader::spawn(
             streaming_wal_reader,
-            start_lsn,
-            tx,
-            shard_0,
+            first_sender.start_lsn,
+            first_sender.tx.take().unwrap(),
+            first_sender.shard,
             PG_VERSION,
             &Some("pageserver".to_string()),
         );
 
-        for _ in 0..(ATTACHED_SHARDS - 1) {
-            let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
-            handle.fanout(shard_0, tx, start_lsn).unwrap();
-            batch_receivers.push(rx);
+        for sender in senders.iter_mut().skip(1) {
+            handle
+                .fanout(sender.shard, sender.tx.take().unwrap(), sender.start_lsn)
+                .unwrap();
         }
 
-        loop {
-            let batch = batch_receivers.first_mut().unwrap().recv().await.unwrap();
-            for rx in batch_receivers.iter_mut().skip(1) {
-                let other_batch = rx.recv().await.unwrap();
-
-                assert_eq!(batch.wal_end_lsn, other_batch.wal_end_lsn);
-                assert_eq!(
-                    batch.available_wal_end_lsn,
-                    other_batch.available_wal_end_lsn
+        for sender in senders.iter_mut() {
+            loop {
+                let batch = sender.rx.recv().await.unwrap();
+                tracing::info!(
+                    "Sender with start_lsn={} received batch ending at {} with {} records",
+                    sender.start_lsn,
+                    batch.wal_end_lsn,
+                    batch.records.records.len()
                 );
-            }
 
-            if batch.wal_end_lsn == batch.available_wal_end_lsn {
-                break;
+                for rec in batch.records.records {
+                    sender.received_next_record_lsns.push(rec.next_record_lsn);
+                }
+
+                if batch.wal_end_lsn == batch.available_wal_end_lsn {
+                    break;
+                }
             }
         }
 
@@ -792,5 +868,20 @@ mod tests {
         }
 
         assert!(done);
+
+        for sender in senders {
+            tracing::info!(
+                "Validating records received by sender with start_lsn={}",
+                sender.start_lsn
+            );
+
+            assert!(sender.received_next_record_lsns.is_sorted());
+            let expected = next_record_lsns
+                .iter()
+                .filter(|lsn| **lsn > sender.start_lsn)
+                .copied()
+                .collect::<Vec<_>>();
+            assert_eq!(sender.received_next_record_lsns, expected);
+        }
     }
 }
diff --git a/safekeeper/src/test_utils.rs b/safekeeper/src/test_utils.rs
index 4e851c5b3d..79ceddd366 100644
--- a/safekeeper/src/test_utils.rs
+++ b/safekeeper/src/test_utils.rs
@@ -122,6 +122,7 @@ impl Env {
         start_lsn: Lsn,
         msg_size: usize,
         msg_count: usize,
+        mut next_record_lsns: Option<&mut Vec<Lsn>>,
     ) -> anyhow::Result<EndWatch> {
         let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(receive_wal::MSG_QUEUE_SIZE);
         let (reply_tx, mut reply_rx) = tokio::sync::mpsc::channel(receive_wal::REPLY_QUEUE_SIZE);
@@ -130,7 +131,7 @@ impl Env {
 
         WalAcceptor::spawn(tli.wal_residence_guard().await?, msg_rx, reply_tx, Some(0));
 
-        let prefix = c"p";
+        let prefix = c"neon-file:";
         let prefixlen = prefix.to_bytes_with_nul().len();
         assert!(msg_size >= prefixlen);
         let message = vec![0; msg_size - prefixlen];
@@ -139,6 +140,9 @@ impl Env {
             &mut WalGenerator::new(LogicalMessageGenerator::new(prefix, &message), start_lsn);
         for _ in 0..msg_count {
             let (lsn, record) = walgen.next().unwrap();
+            if let Some(ref mut lsns) = next_record_lsns {
+                lsns.push(lsn);
+            }
 
             let req = AppendRequest {
                 h: AppendRequestHeader {
diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs
index 5eb0bd7146..4341f13824 100644
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -14,6 +14,7 @@ use tokio_util::sync::CancellationToken;
 use utils::id::TenantId;
 use utils::sync::gate::Gate;
 
+use http_utils::error::ApiError;
 use std::cmp::max;
 use std::ops::{Deref, DerefMut};
 use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
@@ -22,7 +23,6 @@ use std::time::Duration;
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio::{sync::watch, time::Instant};
 use tracing::*;
-use utils::http::error::ApiError;
 use utils::{
     id::{NodeId, TenantTimelineId},
     lsn::Lsn,
@@ -592,6 +592,8 @@ impl Timeline {
         assert!(self.cancel.is_cancelled());
         assert!(self.gate.close_complete());
 
+        info!("deleting timeline {} from disk", self.ttid);
+
         // Close associated FDs. Nobody will be able to touch timeline data once
         // it is cancelled, so WAL storage won't be opened again.
         shared_state.sk.close_wal_store();
diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs
index 01c6aff6c3..1ff6a72bce 100644
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -475,6 +475,8 @@ impl GlobalTimelines {
                 info!("deleting timeline {}, only_local={}", ttid, only_local);
                 timeline.shutdown().await;
 
+                info!("timeline {ttid} shut down for deletion");
+
                 // Take a lock and finish the deletion holding this mutex.
                 let mut shared_state = timeline.write_shared_state().await;
 
diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs
index adac6067da..a0dd571a34 100644
--- a/safekeeper/src/wal_reader_stream.rs
+++ b/safekeeper/src/wal_reader_stream.rs
@@ -246,7 +246,7 @@ mod tests {
             .unwrap();
 
         let resident_tli = tli.wal_residence_guard().await.unwrap();
-        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT)
+        let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None)
             .await
             .unwrap();
         let end_pos = end_watch.get();
diff --git a/scripts/generate_image_maps.py b/scripts/generate_image_maps.py
new file mode 100644
index 0000000000..a2f553d290
--- /dev/null
+++ b/scripts/generate_image_maps.py
@@ -0,0 +1,58 @@
+import itertools
+import json
+import os
+
+build_tag = os.environ["BUILD_TAG"]
+branch = os.environ["BRANCH"]
+dev_acr = os.environ["DEV_ACR"]
+prod_acr = os.environ["PROD_ACR"]
+
+components = {
+    "neon": ["neon"],
+    "compute": [
+        "compute-node-v14",
+        "compute-node-v15",
+        "compute-node-v16",
+        "compute-node-v17",
+        "vm-compute-node-v14",
+        "vm-compute-node-v15",
+        "vm-compute-node-v16",
+        "vm-compute-node-v17",
+    ],
+}
+
+registries = {
+    "dev": [
+        "docker.io/neondatabase",
+        "369495373322.dkr.ecr.eu-central-1.amazonaws.com",
+        f"{dev_acr}.azurecr.io/neondatabase",
+    ],
+    "prod": [
+        "093970136003.dkr.ecr.eu-central-1.amazonaws.com",
+        f"{prod_acr}.azurecr.io/neondatabase",
+    ],
+}
+
+outputs: dict[str, dict[str, list[str]]] = {}
+
+target_tags = [build_tag, "latest"] if branch == "main" else [build_tag]
+target_stages = ["dev", "prod"] if branch.startswith("release") else ["dev"]
+
+for component_name, component_images in components.items():
+    for stage in target_stages:
+        outputs[f"{component_name}-{stage}"] = dict(
+            [
+                (
+                    f"docker.io/neondatabase/{component_image}:{build_tag}",
+                    [
+                        f"{combo[0]}/{component_image}:{combo[1]}"
+                        for combo in itertools.product(registries[stage], target_tags)
+                    ],
+                )
+                for component_image in component_images
+            ]
+        )
+
+with open(os.environ["GITHUB_OUTPUT"], "a") as f:
+    for key, value in outputs.items():
+        f.write(f"{key}={json.dumps(value)}\n")
diff --git a/scripts/ingest_regress_test_result-new-format.py b/scripts/ingest_regress_test_result-new-format.py
index ad2baf56bb..3a5cdf013a 100644
--- a/scripts/ingest_regress_test_result-new-format.py
+++ b/scripts/ingest_regress_test_result-new-format.py
@@ -32,6 +32,7 @@ CREATE TABLE IF NOT EXISTS results (
     flaky        BOOLEAN NOT NULL,
     arch         arch DEFAULT 'X64',
     lfc          BOOLEAN DEFAULT false NOT NULL,
+    sanitizers   BOOLEAN DEFAULT false NOT NULL,
     build_type   TEXT NOT NULL,
     pg_version   INT NOT NULL,
     run_id       BIGINT NOT NULL,
@@ -39,7 +40,7 @@ CREATE TABLE IF NOT EXISTS results (
     reference    TEXT NOT NULL,
     revision     CHAR(40) NOT NULL,
     raw          JSONB COMPRESSION lz4 NOT NULL,
-    UNIQUE (parent_suite, suite, name, arch, build_type, pg_version, started_at, stopped_at, run_id)
+    UNIQUE (parent_suite, suite, name, arch, lfc, sanitizers, build_type, pg_version, started_at, stopped_at, run_id)
 );
 """
 
@@ -56,6 +57,7 @@ class Row:
     flaky: bool
     arch: str
     lfc: bool
+    sanitizers: bool
     build_type: str
     pg_version: int
     run_id: int
@@ -135,6 +137,7 @@ def ingest_test_result(
         }
         arch = parameters.get("arch", "UNKNOWN").strip("'")
         lfc = parameters.get("lfc", "without-lfc").strip("'") == "with-lfc"
+        sanitizers = parameters.get("sanitizers", "disabled").strip("'") == "enabled"
 
         build_type, pg_version, unparametrized_name = parse_test_name(test["name"])
         labels = {label["name"]: label["value"] for label in test["labels"]}
@@ -149,6 +152,7 @@ def ingest_test_result(
             flaky=test["flaky"] or test["retriesStatusChange"],
             arch=arch,
             lfc=lfc,
+            sanitizers=sanitizers,
             build_type=build_type,
             pg_version=pg_version,
             run_id=run_id,
diff --git a/scripts/push_with_image_map.py b/scripts/push_with_image_map.py
new file mode 100644
index 0000000000..c68f6ad407
--- /dev/null
+++ b/scripts/push_with_image_map.py
@@ -0,0 +1,22 @@
+import json
+import os
+import subprocess
+
+image_map = os.getenv("IMAGE_MAP")
+if not image_map:
+    raise ValueError("IMAGE_MAP environment variable is not set")
+
+try:
+    parsed_image_map: dict[str, list[str]] = json.loads(image_map)
+except json.JSONDecodeError as e:
+    raise ValueError("Failed to parse IMAGE_MAP as JSON") from e
+
+for source, targets in parsed_image_map.items():
+    for target in targets:
+        cmd = ["docker", "buildx", "imagetools", "create", "-t", target, source]
+        print(f"Running: {' '.join(cmd)}")
+        result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+        if result.returncode != 0:
+            print(f"Error: {result.stdout}")
+            raise RuntimeError(f"Command failed: {' '.join(cmd)}")
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 63f43cdf62..91d8098cb9 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -55,6 +55,7 @@ diesel-async = { version = "0.5.2", features = ["postgres", "bb8", "async-connec
 diesel_migrations = { version = "2.2.0" }
 scoped-futures = "0.1.4"
 
+http-utils = { path = "../libs/http-utils/" }
 utils = { path = "../libs/utils/" }
 metrics = { path = "../libs/metrics/" }
 control_plane = { path = "../control_plane" }
diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs
index ac890b008f..1a56116cad 100644
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -8,6 +8,14 @@ use crate::reconciler::ReconcileError;
 use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
 use anyhow::Context;
 use futures::Future;
+use http_utils::{
+    endpoint::{self, auth_middleware, check_permission_with, request_span},
+    error::ApiError,
+    failpoints::failpoints_handler,
+    json::{json_request, json_response},
+    request::{must_get_query_param, parse_query_param, parse_request_param},
+    RequestExt, RouterBuilder,
+};
 use hyper::header::CONTENT_TYPE;
 use hyper::{Body, Request, Response};
 use hyper::{StatusCode, Uri};
@@ -29,20 +37,7 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use utils::auth::{Scope, SwappableJwtAuth};
-use utils::failpoint_support::failpoints_handler;
-use utils::http::endpoint::{auth_middleware, check_permission_with, request_span};
-use utils::http::request::{must_get_query_param, parse_query_param, parse_request_param};
-use utils::id::{TenantId, TimelineId};
-
-use utils::{
-    http::{
-        endpoint::{self},
-        error::ApiError,
-        json::{json_request, json_response},
-        RequestExt, RouterBuilder,
-    },
-    id::NodeId,
-};
+use utils::id::{NodeId, TenantId, TimelineId};
 
 use pageserver_api::controller_api::{
     NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest,
diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs
index 659c088d51..07279a67ff 100644
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -320,7 +320,7 @@ async fn async_main() -> anyhow::Result<()> {
     let router = make_router(service.clone(), auth, build_info)
         .build()
         .map_err(|err| anyhow!(err))?;
-    let router_service = utils::http::RouterService::new(router).unwrap();
+    let router_service = http_utils::RouterService::new(router).unwrap();
 
     // Start HTTP server
     let server_shutdown = CancellationToken::new();
diff --git a/storage_controller/src/peer_client.rs b/storage_controller/src/peer_client.rs
index ee4eb55294..1a15bae365 100644
--- a/storage_controller/src/peer_client.rs
+++ b/storage_controller/src/peer_client.rs
@@ -6,9 +6,10 @@ use std::error::Error as _;
 use std::time::Duration;
 use tokio_util::sync::CancellationToken;
 
+use http_utils::error::HttpErrorBody;
 use hyper::Uri;
 use reqwest::{StatusCode, Url};
-use utils::{backoff, http::error::HttpErrorBody};
+use utils::backoff;
 
 #[derive(Debug, Clone)]
 pub(crate) struct PeerClient {
diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs
index f9e72862ae..106a7b2699 100644
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -1,9 +1,10 @@
 use crate::{metrics::NodeLabelGroup, node::Node, tenant_shard::TenantShard};
+use http_utils::error::ApiError;
 use itertools::Itertools;
 use pageserver_api::{controller_api::AvailabilityZone, models::PageserverUtilization};
 use serde::Serialize;
 use std::{collections::HashMap, fmt::Debug};
-use utils::{http::error::ApiError, id::NodeId};
+use utils::id::NodeId;
 
 /// Scenarios in which we cannot find a suitable location for a tenant shard
 #[derive(thiserror::Error, Debug)]
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 4028cd7023..6829663a4c 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -61,6 +61,7 @@ use reqwest::StatusCode;
 use tracing::{instrument, Instrument};
 
 use crate::pageserver_client::PageserverClient;
+use http_utils::error::ApiError;
 use pageserver_api::{
     models::{
         self, LocationConfig, LocationConfigListResponse, LocationConfigMode,
@@ -81,7 +82,6 @@ use utils::{
     completion::Barrier,
     failpoint_support,
     generation::Generation,
-    http::error::ApiError,
     id::{NodeId, TenantId, TimelineId},
     pausable_failpoint,
     sync::gate::Gate,
diff --git a/test_runner/fixtures/compute_reconfigure.py b/test_runner/fixtures/compute_reconfigure.py
index 33f01f80fb..425abef935 100644
--- a/test_runner/fixtures/compute_reconfigure.py
+++ b/test_runner/fixtures/compute_reconfigure.py
@@ -69,7 +69,10 @@ def compute_reconfigure_listener(make_httpserver: HTTPServer):
             # This causes the endpoint to query storage controller for its location, which
             # is redundant since we already have it here, but this avoids extending the
             # neon_local CLI to take full lists of locations
-            reconfigure_threads.submit(lambda workload=workload: workload.reconfigure())  # type: ignore[misc]
+            fut = reconfigure_threads.submit(lambda workload=workload: workload.reconfigure())  # type: ignore[misc]
+
+            # To satisfy semantics of notify-attach API, we must wait for the change to be applied before returning 200
+            fut.result()
 
         return Response(status=200)
 
diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py
index 6e8210e978..cdc162fca2 100644
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -9,21 +9,23 @@ from requests.adapters import HTTPAdapter
 class EndpointHttpClient(requests.Session):
     def __init__(
         self,
-        port: int,
+        external_port: int,
+        internal_port: int,
     ):
         super().__init__()
-        self.port = port
+        self.external_port: int = external_port
+        self.internal_port: int = internal_port
 
         self.mount("http://", HTTPAdapter())
 
     def dbs_and_roles(self):
-        res = self.get(f"http://localhost:{self.port}/dbs_and_roles")
+        res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles")
         res.raise_for_status()
         return res.json()
 
     def database_schema(self, database: str):
         res = self.get(
-            f"http://localhost:{self.port}/database_schema?database={urllib.parse.quote(database, safe='')}"
+            f"http://localhost:{self.external_port}/database_schema?database={urllib.parse.quote(database, safe='')}"
         )
         res.raise_for_status()
         return res.text
@@ -34,20 +36,20 @@ class EndpointHttpClient(requests.Session):
             "version": version,
             "database": database,
         }
-        res = self.post(f"http://localhost:{self.port}/extensions", json=body)
+        res = self.post(f"http://localhost:{self.internal_port}/extensions", json=body)
         res.raise_for_status()
         return res.json()
 
     def set_role_grants(self, database: str, role: str, schema: str, privileges: list[str]):
         res = self.post(
-            f"http://localhost:{self.port}/grants",
+            f"http://localhost:{self.internal_port}/grants",
             json={"database": database, "schema": schema, "role": role, "privileges": privileges},
         )
         res.raise_for_status()
         return res.json()
 
     def metrics(self) -> str:
-        res = self.get(f"http://localhost:{self.port}/metrics")
+        res = self.get(f"http://localhost:{self.external_port}/metrics")
         res.raise_for_status()
         return res.text
 
@@ -62,5 +64,5 @@ class EndpointHttpClient(requests.Session):
                 }
             )
 
-        res = self.post(f"http://localhost:{self.port}/failpoints", json=body)
+        res = self.post(f"http://localhost:{self.internal_port}/failpoints", json=body)
         res.raise_for_status()
diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py
index 33d422c590..97a5a36814 100644
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -478,7 +478,8 @@ class NeonLocalCli(AbstractNeonCli):
         self,
         branch_name: str,
         pg_port: int,
-        http_port: int,
+        external_http_port: int,
+        internal_http_port: int,
         tenant_id: TenantId,
         pg_version: PgVersion,
         endpoint_id: str | None = None,
@@ -486,6 +487,7 @@ class NeonLocalCli(AbstractNeonCli):
         lsn: Lsn | None = None,
         pageserver_id: int | None = None,
         allow_multiple=False,
+        update_catalog: bool = False,
     ) -> subprocess.CompletedProcess[str]:
         args = [
             "endpoint",
@@ -501,8 +503,10 @@ class NeonLocalCli(AbstractNeonCli):
             args.extend(["--lsn", str(lsn)])
         if pg_port is not None:
             args.extend(["--pg-port", str(pg_port)])
-        if http_port is not None:
-            args.extend(["--http-port", str(http_port)])
+        if external_http_port is not None:
+            args.extend(["--external-http-port", str(external_http_port)])
+        if internal_http_port is not None:
+            args.extend(["--internal-http-port", str(internal_http_port)])
         if endpoint_id is not None:
             args.append(endpoint_id)
         if hot_standby:
@@ -511,6 +515,8 @@ class NeonLocalCli(AbstractNeonCli):
             args.extend(["--pageserver-id", str(pageserver_id)])
         if allow_multiple:
             args.extend(["--allow-multiple"])
+        if update_catalog:
+            args.extend(["--update-catalog"])
 
         res = self.raw_cli(args)
         res.check_returncode()
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 7c4991ffab..2fa82754ef 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3345,7 +3345,7 @@ class NeonProxy(PgProtocol):
         metric_collection_interval: str | None = None,
     ):
         host = "127.0.0.1"
-        domain = "proxy.localtest.me"  # resolves to 127.0.0.1
+        domain = "proxy.local.neon.build"  # resolves to 127.0.0.1
         super().__init__(dsn=auth_backend.default_conn_url, host=domain, port=proxy_port)
 
         self.domain = domain
@@ -3368,7 +3368,7 @@ class NeonProxy(PgProtocol):
         # generate key of it doesn't exist
         crt_path = self.test_output_dir / "proxy.crt"
         key_path = self.test_output_dir / "proxy.key"
-        generate_proxy_tls_certs("*.localtest.me", key_path, crt_path)
+        generate_proxy_tls_certs("*.local.neon.build", key_path, crt_path)
 
         args = [
             str(self.neon_binpath / "proxy"),
@@ -3569,7 +3569,7 @@ class NeonAuthBroker:
         external_http_port: int,
         auth_backend: NeonAuthBroker.ProxyV1,
     ):
-        self.domain = "apiauth.localtest.me"  # resolves to 127.0.0.1
+        self.domain = "apiauth.local.neon.build"  # resolves to 127.0.0.1
         self.host = "127.0.0.1"
         self.http_port = http_port
         self.external_http_port = external_http_port
@@ -3586,7 +3586,7 @@ class NeonAuthBroker:
         # generate key of it doesn't exist
         crt_path = self.test_output_dir / "proxy.crt"
         key_path = self.test_output_dir / "proxy.key"
-        generate_proxy_tls_certs("apiauth.localtest.me", key_path, crt_path)
+        generate_proxy_tls_certs("apiauth.local.neon.build", key_path, crt_path)
 
         args = [
             str(self.neon_binpath / "proxy"),
@@ -3807,7 +3807,8 @@ class Endpoint(PgProtocol, LogUtils):
         env: NeonEnv,
         tenant_id: TenantId,
         pg_port: int,
-        http_port: int,
+        external_http_port: int,
+        internal_http_port: int,
         check_stop_result: bool = True,
     ):
         super().__init__(host="localhost", port=pg_port, user="cloud_admin", dbname="postgres")
@@ -3817,7 +3818,8 @@ class Endpoint(PgProtocol, LogUtils):
         self.pgdata_dir: Path | None = None  # Path to computenode PGDATA
         self.tenant_id = tenant_id
         self.pg_port = pg_port
-        self.http_port = http_port
+        self.external_http_port = external_http_port
+        self.internal_http_port = internal_http_port
         self.check_stop_result = check_stop_result
         # passed to endpoint create and endpoint reconfigure
         self.active_safekeepers: list[int] = list(map(lambda sk: sk.id, env.safekeepers))
@@ -3834,7 +3836,8 @@ class Endpoint(PgProtocol, LogUtils):
         self, auth_token: str | None = None, retries: Retry | None = None
     ) -> EndpointHttpClient:
         return EndpointHttpClient(
-            port=self.http_port,
+            external_port=self.external_http_port,
+            internal_port=self.internal_http_port,
         )
 
     def create(
@@ -3846,6 +3849,7 @@ class Endpoint(PgProtocol, LogUtils):
         config_lines: list[str] | None = None,
         pageserver_id: int | None = None,
         allow_multiple: bool = False,
+        update_catalog: bool = False,
     ) -> Self:
         """
         Create a new Postgres endpoint.
@@ -3866,10 +3870,12 @@ class Endpoint(PgProtocol, LogUtils):
             lsn=lsn,
             hot_standby=hot_standby,
             pg_port=self.pg_port,
-            http_port=self.http_port,
+            external_http_port=self.external_http_port,
+            internal_http_port=self.internal_http_port,
             pg_version=self.env.pg_version,
             pageserver_id=pageserver_id,
             allow_multiple=allow_multiple,
+            update_catalog=update_catalog,
         )
         path = Path("endpoints") / self.endpoint_id / "pgdata"
         self.pgdata_dir = self.env.repo_dir / path
@@ -4258,7 +4264,8 @@ class EndpointFactory:
             self.env,
             tenant_id=tenant_id or self.env.initial_tenant,
             pg_port=self.env.port_distributor.get_port(),
-            http_port=self.env.port_distributor.get_port(),
+            external_http_port=self.env.port_distributor.get_port(),
+            internal_http_port=self.env.port_distributor.get_port(),
         )
         self.num_instances += 1
         self.endpoints.append(ep)
@@ -4283,12 +4290,14 @@ class EndpointFactory:
         hot_standby: bool = False,
         config_lines: list[str] | None = None,
         pageserver_id: int | None = None,
+        update_catalog: bool = False,
     ) -> Endpoint:
         ep = Endpoint(
             self.env,
             tenant_id=tenant_id or self.env.initial_tenant,
             pg_port=self.env.port_distributor.get_port(),
-            http_port=self.env.port_distributor.get_port(),
+            external_http_port=self.env.port_distributor.get_port(),
+            internal_http_port=self.env.port_distributor.get_port(),
         )
 
         endpoint_id = endpoint_id or self.env.generate_endpoint_id()
@@ -4303,6 +4312,7 @@ class EndpointFactory:
             hot_standby=hot_standby,
             config_lines=config_lines,
             pageserver_id=pageserver_id,
+            update_catalog=update_catalog,
         )
 
     def stop_all(self, fail_on_error=True) -> Self:
@@ -5122,12 +5132,14 @@ def wait_for_last_flush_lsn(
     timeline: TimelineId,
     pageserver_id: int | None = None,
     auth_token: str | None = None,
+    last_flush_lsn: Lsn | None = None,
 ) -> Lsn:
     """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
 
     shards = tenant_get_shards(env, tenant, pageserver_id)
 
-    last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
+    if last_flush_lsn is None:
+        last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
 
     results = []
     for tenant_shard_id, pageserver in shards:
diff --git a/test_runner/fixtures/parametrize.py b/test_runner/fixtures/parametrize.py
index 1acb1af23b..c33342c89e 100644
--- a/test_runner/fixtures/parametrize.py
+++ b/test_runner/fixtures/parametrize.py
@@ -124,5 +124,8 @@ def pytest_runtest_makereport(*args, **kwargs):
     allure.dynamic.parameter(
         "__lfc", "with-lfc" if os.getenv("USE_LFC") != "false" else "without-lfc"
     )
+    allure.dynamic.parameter(
+        "__sanitizers", "enabled" if os.getenv("SANITIZERS") == "enabled" else "disabled"
+    )
 
     yield
diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py
index d969971a35..4df2b2df2b 100644
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -282,18 +282,35 @@ class S3Storage:
     def timeline_path(self, tenant_id: TenantShardId | TenantId, timeline_id: TimelineId) -> str:
         return f"{self.tenant_path(tenant_id)}/timelines/{timeline_id}"
 
+    def get_latest_generation_key(self, prefix: str, suffix: str, keys: list[str]) -> str:
+        """
+        Gets the latest generation key from a list of keys.
+
+        @param index_keys: A list of keys of different generations, which start with `prefix`
+        """
+
+        def parse_gen(key: str) -> int:
+            shortname = key.split("/")[-1]
+            generation_str = shortname.removeprefix(prefix).removesuffix(suffix)
+            try:
+                return int(generation_str, base=16)
+            except ValueError:
+                log.info(f"Ignoring non-matching key: {key}")
+                return -1
+
+        if len(keys) == 0:
+            raise IndexError("No keys found")
+
+        return max(keys, key=parse_gen)
+
     def get_latest_index_key(self, index_keys: list[str]) -> str:
         """
         Gets the latest index file key.
 
         @param index_keys: A list of index keys of different generations.
         """
-
-        def parse_gen(index_key: str) -> int:
-            parts = index_key.split("index_part.json-")
-            return int(parts[-1], base=16) if len(parts) == 2 else -1
-
-        return max(index_keys, key=parse_gen)
+        key = self.get_latest_generation_key(prefix="index_part.json-", suffix="", keys=index_keys)
+        return key
 
     def download_index_part(self, index_key: str) -> IndexPartDump:
         """
@@ -306,6 +323,29 @@ class S3Storage:
         log.info(f"index_part.json: {body}")
         return IndexPartDump.from_json(json.loads(body))
 
+    def download_tenant_manifest(self, tenant_id: TenantId) -> dict[str, Any] | None:
+        tenant_prefix = self.tenant_path(tenant_id)
+
+        objects = self.client.list_objects_v2(Bucket=self.bucket_name, Prefix=f"{tenant_prefix}/")[
+            "Contents"
+        ]
+        keys = [obj["Key"] for obj in objects if obj["Key"].find("tenant-manifest") != -1]
+        try:
+            manifest_key = self.get_latest_generation_key("tenant-manifest-", ".json", keys)
+        except IndexError:
+            log.info(
+                f"No manifest found for tenant {tenant_id}, this is normal if it didn't offload anything yet"
+            )
+            return None
+
+        response = self.client.get_object(Bucket=self.bucket_name, Key=manifest_key)
+        body = response["Body"].read().decode("utf-8")
+        log.info(f"Downloaded manifest {manifest_key}: {body}")
+
+        manifest = json.loads(body)
+        assert isinstance(manifest, dict)
+        return manifest
+
     def heatmap_key(self, tenant_id: TenantId) -> str:
         return f"{self.tenant_path(tenant_id)}/{TENANT_HEATMAP_FILE_NAME}"
 
diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py
index eea0ec2b95..1947a9c3fb 100644
--- a/test_runner/fixtures/workload.py
+++ b/test_runner/fixtures/workload.py
@@ -53,6 +53,8 @@ class Workload:
         self._endpoint: Endpoint | None = None
         self._endpoint_opts = endpoint_opts or {}
 
+        self._configured_pageserver: int | None = None
+
     def branch(
         self,
         timeline_id: TimelineId,
@@ -92,8 +94,12 @@ class Workload:
                     **self._endpoint_opts,
                 )
                 self._endpoint.start(pageserver_id=pageserver_id)
+                self._configured_pageserver = pageserver_id
             else:
-                self._endpoint.reconfigure(pageserver_id=pageserver_id)
+                if self._configured_pageserver != pageserver_id:
+                    self._configured_pageserver = pageserver_id
+                    self._endpoint.reconfigure(pageserver_id=pageserver_id)
+                    self._endpoint_config = pageserver_id
 
         connstring = self._endpoint.safe_psql(
             "SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
@@ -122,6 +128,7 @@ class Workload:
 
     def write_rows(self, n: int, pageserver_id: int | None = None, upload: bool = True):
         endpoint = self.endpoint(pageserver_id)
+
         start = self.expect_rows
         end = start + n - 1
         self.expect_rows += n
diff --git a/test_runner/performance/test_ingest_logical_message.py b/test_runner/performance/test_ingest_logical_message.py
index d3118eb15a..b55cb68b64 100644
--- a/test_runner/performance/test_ingest_logical_message.py
+++ b/test_runner/performance/test_ingest_logical_message.py
@@ -76,6 +76,9 @@ def test_ingest_logical_message(
             log.info("Waiting for Pageserver to catch up")
             wait_for_last_record_lsn(client, env.initial_tenant, env.initial_timeline, end_lsn)
 
+    recover_to_lsn = Lsn(endpoint.safe_psql("select pg_current_wal_lsn()")[0][0])
+    endpoint.stop()
+
     # Now that all data is ingested, delete and recreate the tenant in the pageserver. This will
     # reingest all the WAL from the safekeeper without any other constraints. This gives us a
     # baseline of how fast the pageserver can ingest this WAL in isolation.
@@ -88,7 +91,13 @@ def test_ingest_logical_message(
     with zenbenchmark.record_duration("pageserver_recover_ingest"):
         log.info("Recovering WAL into pageserver")
         client.timeline_create(env.pg_version, env.initial_tenant, env.initial_timeline)
-        wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, env.initial_timeline)
+        wait_for_last_flush_lsn(
+            env, endpoint, env.initial_tenant, env.initial_timeline, last_flush_lsn=recover_to_lsn
+        )
+
+    # Check endpoint can start, i.e. we really recovered
+    endpoint.start()
+    wait_for_last_flush_lsn(env, endpoint, env.initial_tenant, env.initial_timeline)
 
     # Emit metrics.
     wal_written_mb = round((end_lsn - start_lsn) / (1024 * 1024))
diff --git a/test_runner/performance/test_lazy_startup.py b/test_runner/performance/test_lazy_startup.py
index 704073fe3b..3bf3ef890f 100644
--- a/test_runner/performance/test_lazy_startup.py
+++ b/test_runner/performance/test_lazy_startup.py
@@ -79,7 +79,9 @@ def test_lazy_startup(slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark:
             assert sum == 1000000
 
         # Get metrics
-        metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
         durations = {
             "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
             "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py
index f0a0c1f5a2..da62422fca 100644
--- a/test_runner/performance/test_perf_ingest_using_pgcopydb.py
+++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py
@@ -136,7 +136,7 @@ def run_command_and_log_output(command, log_file_path: Path):
         "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}",
         "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")),
         "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")),
-        "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
+        "PGOPTIONS": "-c idle_in_transaction_session_timeout=0 -c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7",
     }
     # Combine the current environment with custom variables
     env = os.environ.copy()
diff --git a/test_runner/performance/test_startup.py b/test_runner/performance/test_startup.py
index d051717e92..60d8b5be30 100644
--- a/test_runner/performance/test_startup.py
+++ b/test_runner/performance/test_startup.py
@@ -56,7 +56,9 @@ def test_startup_simple(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenc
             endpoint.safe_psql("select 1;")
 
         # Get metrics
-        metrics = requests.get(f"http://localhost:{endpoint.http_port}/metrics.json").json()
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
         durations = {
             "wait_for_spec_ms": f"{i}_wait_for_spec",
             "sync_safekeepers_ms": f"{i}_sync_safekeepers",
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index a4b9eabf8e..07600dd911 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -140,9 +140,11 @@ def test_fully_custom_config(positive_env: NeonEnv):
         "compaction_period": "1h",
         "compaction_threshold": 13,
         "compaction_upper_limit": 100,
+        "compaction_l0_first": False,
+        "compaction_l0_semaphore": False,
         "l0_flush_delay_threshold": 25,
         "l0_flush_stall_threshold": 42,
-        "l0_flush_wait_upload": True,
+        "l0_flush_wait_upload": False,
         "compaction_target_size": 1048576,
         "checkpoint_distance": 10000,
         "checkpoint_timeout": "13m",
@@ -175,7 +177,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
         "image_layer_creation_check_threshold": 1,
         "lsn_lease_length": "1m",
         "lsn_lease_length_for_ts": "5s",
-        "timeline_offloading": True,
+        "timeline_offloading": False,
         "wal_receiver_protocol_override": {
             "type": "interpreted",
             "args": {"format": "bincode", "compression": {"zstd": {"level": 1}}},
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index f3347b594e..f10872590c 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -689,9 +689,7 @@ def test_pageserver_compaction_circuit_breaker(neon_env_builder: NeonEnvBuilder)
     env.pageserver.http_client().configure_failpoints((FAILPOINT, "return"))
 
     # Write some data to trigger compaction
-    workload.write_rows(1024, upload=False)
-    workload.write_rows(1024, upload=False)
-    workload.write_rows(1024, upload=False)
+    workload.write_rows(32768, upload=False)
 
     def assert_broken():
         env.pageserver.assert_log_contains(BROKEN_LOG)
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index cdc6c0053d..823f2185e4 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -314,7 +314,10 @@ def test_forward_compatibility(
 
 
 def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
-    ep = env.endpoints.create_start("main")
+    ep = env.endpoints.create("main")
+    ep_env = {"LD_LIBRARY_PATH": str(env.pg_distrib_dir / f"v{env.pg_version}/lib")}
+    ep.start(env=ep_env)
+
     connstr = ep.connstr()
 
     pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
@@ -363,7 +366,7 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
     )
 
     # Timeline exists again: restart the endpoint
-    ep.start()
+    ep.start(env=ep_env)
 
     pg_bin.run_capture(
         ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
@@ -471,6 +474,14 @@ HISTORIC_DATA_SETS = [
         PgVersion.V16,
         "https://neon-github-public-dev.s3.eu-central-1.amazonaws.com/compatibility-data-snapshots/2024-07-18-pgv16.tar.zst",
     ),
+    # This dataset created on a pageserver running modern code at time of capture, but configured with no generation.  This
+    # is our regression test that we can load data written without generations in layer file names & indices
+    HistoricDataSet(
+        "2025-02-07-nogenerations",
+        TenantId("e1411ca6562d6ff62419f693a5695d67"),
+        PgVersion.V17,
+        "https://neon-github-public-dev.s3.eu-central-1.amazonaws.com/compatibility-data-snapshots/2025-02-07-pgv17-nogenerations.tar.zst",
+    ),
 ]
 
 
diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py
index 50a922a616..3a08671bbf 100644
--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -82,7 +82,7 @@ def test_compute_catalog(neon_simple_env: NeonEnv):
         ddl = client.database_schema(database=test_db["name"])
 
         # Check that it looks like a valid PostgreSQL dump
-        assert "-- PostgreSQL database dump" in ddl
+        assert "-- PostgreSQL database dump complete" in ddl
 
         # Check that it doesn't contain health_check and migration traces.
         # They are only created in system `postgres` database, so by checking
diff --git a/test_runner/regress/test_compute_reconfigure.py b/test_runner/regress/test_compute_reconfigure.py
new file mode 100644
index 0000000000..6619548811
--- /dev/null
+++ b/test_runner/regress/test_compute_reconfigure.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from fixtures.neon_fixtures import NeonEnv
+from fixtures.utils import wait_until
+
+
+def test_compute_reconfigure(neon_simple_env: NeonEnv):
+    """
+    Test that we can change postgresql.conf settings even if
+    skip_pg_catalog_updates=True is set.
+    """
+    env = neon_simple_env
+
+    TEST_LOG_LINE_PREFIX = "%m [%p] [test_compute_reconfigure]: "
+
+    endpoint = env.endpoints.create_start("main")
+
+    # Check that the log line prefix is not set
+    # or different from TEST_LOG_LINE_PREFIX
+    with endpoint.cursor() as cursor:
+        cursor.execute("SHOW log_line_prefix;")
+        row = cursor.fetchone()
+        assert row is not None
+        assert row[0] != TEST_LOG_LINE_PREFIX
+
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": True,
+            "cluster": {
+                "settings": [
+                    {
+                        "name": "log_line_prefix",
+                        "vartype": "string",
+                        "value": TEST_LOG_LINE_PREFIX,
+                    }
+                ]
+            },
+        }
+    )
+    endpoint.reconfigure()
+
+    # Check that in logs we see that it was actually reconfigured,
+    # not restarted or something else.
+    endpoint.log_contains("INFO request{method=POST uri=/configure")
+
+    # In /configure we only send SIGHUP at the end, so in theory
+    # it doesn't necessarily mean that Postgres already reloaded
+    # the new config; and it may race in some envs.
+    # So we wait until we see the log line that the config was changed.
+    def check_logs():
+        endpoint.log_contains(
+            f'[test_compute_reconfigure]: LOG:  parameter "log_line_prefix" changed to "{TEST_LOG_LINE_PREFIX}"'
+        )
+
+    wait_until(check_logs)
+
+    # Check that the log line prefix is set
+    with endpoint.cursor() as cursor:
+        cursor.execute("SHOW log_line_prefix;")
+        row = cursor.fetchone()
+        assert row is not None
+        assert row[0] == TEST_LOG_LINE_PREFIX
diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py
index 5e06a1d47f..3ac4ed1a3e 100644
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -20,6 +20,9 @@ from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
 from fixtures.utils import query_scalar, wait_until
 
 
+@pytest.mark.skip(
+    reason="We won't create future layers any more after https://github.com/neondatabase/neon/pull/10548"
+)
 @pytest.mark.parametrize(
     "attach_mode",
     ["default_generation", "same_generation"],
@@ -172,7 +175,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder, attach_mode: str):
     # force removal of layers from the future
     tenant_conf = ps_http.tenant_config(tenant_id)
     generation_before_detach = get_generation_number()
-    env.pageserver.tenant_detach(tenant_id)
+    env.pageserver.http_client().tenant_detach(tenant_id)
     failpoint_deletion_queue = "deletion-queue-before-execute-pause"
 
     ps_http.configure_failpoints((failpoint_deletion_queue, "pause"))
diff --git a/test_runner/regress/test_neon_local_cli.py b/test_runner/regress/test_neon_local_cli.py
index 80e26d9432..8d9aab6848 100644
--- a/test_runner/regress/test_neon_local_cli.py
+++ b/test_runner/regress/test_neon_local_cli.py
@@ -17,11 +17,13 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
 
         main_branch_name = "main"
         pg_port = port_distributor.get_port()
-        http_port = port_distributor.get_port()
+        external_http_port = port_distributor.get_port()
+        internal_http_port = port_distributor.get_port()
         env.neon_cli.endpoint_create(
             main_branch_name,
             pg_port,
-            http_port,
+            external_http_port,
+            internal_http_port,
             endpoint_id="ep-basic-main",
             tenant_id=env.initial_tenant,
             pg_version=env.pg_version,
@@ -35,11 +37,13 @@ def test_neon_cli_basics(neon_env_builder: NeonEnvBuilder, port_distributor: Por
             new_branch_name=branch_name,
         )
         pg_port = port_distributor.get_port()
-        http_port = port_distributor.get_port()
+        external_http_port = port_distributor.get_port()
+        internal_http_port = port_distributor.get_port()
         env.neon_cli.endpoint_create(
             branch_name,
             pg_port,
-            http_port,
+            external_http_port,
+            internal_http_port,
             endpoint_id=f"ep-{branch_name}",
             tenant_id=env.initial_tenant,
             pg_version=env.pg_version,
@@ -59,23 +63,27 @@ def test_neon_two_primary_endpoints_fail(
     branch_name = "main"
 
     pg_port = port_distributor.get_port()
-    http_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()
+    internal_http_port = port_distributor.get_port()
     env.neon_cli.endpoint_create(
         branch_name,
         pg_port,
-        http_port,
+        external_http_port,
+        internal_http_port,
         endpoint_id="ep1",
         tenant_id=env.initial_tenant,
         pg_version=env.pg_version,
     )
 
     pg_port = port_distributor.get_port()
-    http_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()
+    internal_http_port = port_distributor.get_port()
     # ep1 is not running so create will succeed
     env.neon_cli.endpoint_create(
         branch_name,
         pg_port,
-        http_port,
+        external_http_port,
+        internal_http_port,
         endpoint_id="ep2",
         tenant_id=env.initial_tenant,
         pg_version=env.pg_version,
diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py
index 7e5bb45242..fa1cd61206 100644
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -12,7 +12,6 @@ of the pageserver are:
 from __future__ import annotations
 
 import os
-import re
 import time
 from enum import StrEnum
 
@@ -29,7 +28,6 @@ from fixtures.pageserver.common_types import parse_layer_file_name
 from fixtures.pageserver.http import PageserverApiException
 from fixtures.pageserver.utils import (
     assert_tenant_state,
-    list_prefix,
     wait_for_last_record_lsn,
     wait_for_upload,
 )
@@ -124,109 +122,6 @@ def assert_deletion_queue(ps_http, size_fn) -> None:
     assert size_fn(v) is True
 
 
-def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
-    """
-    Validate behavior when a pageserver is run without generation support enabled,
-    then started again after activating it:
-    - Before upgrade, no objects should have generation suffixes
-    - After upgrade, the bucket should contain a mixture.
-    - In both cases, postgres I/O should work.
-    """
-    neon_env_builder.enable_pageserver_remote_storage(
-        RemoteStorageKind.MOCK_S3,
-    )
-
-    env = neon_env_builder.init_configs()
-    env.broker.start()
-    for sk in env.safekeepers:
-        sk.start()
-    env.storage_controller.start()
-
-    # We will start a pageserver with no control_plane_api set, so it won't be able to self-register
-    env.storage_controller.node_register(env.pageserver)
-
-    def remove_control_plane_api_field(config):
-        return config.pop("control_plane_api")
-
-    control_plane_api = env.pageserver.edit_config_toml(remove_control_plane_api_field)
-    env.pageserver.start()
-    env.storage_controller.node_configure(env.pageserver.id, {"availability": "Active"})
-
-    env.create_tenant(
-        tenant_id=env.initial_tenant, conf=TENANT_CONF, timeline_id=env.initial_timeline
-    )
-
-    generate_uploads_and_deletions(env, pageserver=env.pageserver)
-
-    def parse_generation_suffix(key):
-        m = re.match(".+-([0-9a-zA-Z]{8})$", key)
-        if m is None:
-            return None
-        else:
-            log.info(f"match: {m}")
-            log.info(f"group: {m.group(1)}")
-            return int(m.group(1), 16)
-
-    assert neon_env_builder.pageserver_remote_storage is not None
-    pre_upgrade_keys = list(
-        [
-            o["Key"]
-            for o in list_prefix(neon_env_builder.pageserver_remote_storage, delimiter="")[
-                "Contents"
-            ]
-        ]
-    )
-    for key in pre_upgrade_keys:
-        assert parse_generation_suffix(key) is None
-
-    env.pageserver.stop()
-    # Starting without the override that disabled control_plane_api
-    env.pageserver.patch_config_toml_nonrecursive(
-        {
-            "control_plane_api": control_plane_api,
-        }
-    )
-    env.pageserver.start()
-
-    generate_uploads_and_deletions(env, pageserver=env.pageserver, init=False)
-
-    legacy_objects: list[str] = []
-    suffixed_objects = []
-    post_upgrade_keys = list(
-        [
-            o["Key"]
-            for o in list_prefix(neon_env_builder.pageserver_remote_storage, delimiter="")[
-                "Contents"
-            ]
-        ]
-    )
-    for key in post_upgrade_keys:
-        log.info(f"post-upgrade key: {key}")
-        if parse_generation_suffix(key) is not None:
-            suffixed_objects.append(key)
-        else:
-            legacy_objects.append(key)
-
-    # Bucket now contains a mixture of suffixed and non-suffixed objects
-    assert len(suffixed_objects) > 0
-    assert len(legacy_objects) > 0
-
-    # Flush through deletions to get a clean state for scrub: we are implicitly validating
-    # that our generations-enabled pageserver was able to do deletions of layers
-    # from earlier which don't have a generation.
-    env.pageserver.http_client().deletion_queue_flush(execute=True)
-
-    assert get_deletion_queue_unexpected_errors(env.pageserver.http_client()) == 0
-
-    # Having written a mixture of generation-aware and legacy index_part.json,
-    # ensure the scrubber handles the situation as expected.
-    healthy, metadata_summary = env.storage_scrubber.scan_metadata()
-    assert metadata_summary["tenant_count"] == 1  # Scrubber should have seen our timeline
-    assert metadata_summary["timeline_count"] == 1
-    assert metadata_summary["timeline_shard_count"] == 1
-    assert healthy
-
-
 def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py
index 835ccbd5d4..21cb780c06 100644
--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import random
 from contextlib import closing
 
+import psycopg2.errors as pgerr
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder
@@ -226,3 +227,43 @@ def test_pageserver_chaos(neon_env_builder: NeonEnvBuilder, shard_count: int | N
     # so instead, do a fast shutdown for this one test.
     # See https://github.com/neondatabase/neon/issues/8709
     env.stop(immediate=True)
+
+
+def test_pageserver_lost_and_transaction_aborted(neon_env_builder: NeonEnvBuilder):
+    """
+    If pageserver is unavailable during a transaction abort and target relation is
+    not present in cache, we abort the transaction in ABORT state which triggers a sigabrt.
+    This is _expected_ behavour
+    """
+    env = neon_env_builder.init_start()
+    endpoint = env.endpoints.create_start("main", config_lines=["neon.relsize_hash_size=0"])
+    with closing(endpoint.connect()) as conn, conn.cursor() as cur:
+        cur.execute("CREATE DATABASE test")
+    with (
+        pytest.raises((pgerr.InterfaceError, pgerr.InternalError)),
+        endpoint.connect(dbname="test") as conn,
+        conn.cursor() as cur,
+    ):
+        cur.execute("create table t(b box)")
+        env.pageserver.stop()
+        cur.execute("create index ti on t using gist(b)")
+
+
+def test_pageserver_lost_and_transaction_committed(neon_env_builder: NeonEnvBuilder):
+    """
+    If pageserver is unavailable during a transaction commit and target relation is
+    not present in cache, we abort the transaction in COMMIT state which triggers a sigabrt.
+    This is _expected_ behavour
+    """
+    env = neon_env_builder.init_start()
+    endpoint = env.endpoints.create_start("main", config_lines=["neon.relsize_hash_size=0"])
+    with closing(endpoint.connect()) as conn, conn.cursor() as cur:
+        cur.execute("CREATE DATABASE test")
+    with (
+        pytest.raises((pgerr.InterfaceError, pgerr.InternalError)),
+        endpoint.connect(dbname="test") as conn,
+        conn.cursor() as cur,
+    ):
+        cur.execute("create table t(t boolean)")
+        env.pageserver.stop()
+        cur.execute("drop table t")
diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py
index 2877f14e0e..c5ae669dce 100644
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -120,7 +120,7 @@ def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: End
 
 # Run the main PostgreSQL regression tests, in src/test/regress.
 #
-@pytest.mark.timeout(900)  # Contains many sub-tests, is slow in debug builds
+@pytest.mark.timeout(3000)  # Contains many sub-tests, is slow in debug builds
 @pytest.mark.parametrize("shard_count", [None, 4])
 def test_pg_regress(
     neon_env_builder: NeonEnvBuilder,
@@ -194,7 +194,7 @@ def test_pg_regress(
 
 # Run the PostgreSQL "isolation" tests, in src/test/isolation.
 #
-@pytest.mark.timeout(600)  # Contains many sub-tests, is slow in debug builds
+@pytest.mark.timeout(1500)  # Contains many sub-tests, is slow in debug builds
 @pytest.mark.parametrize("shard_count", [None, 4])
 def test_isolation(
     neon_env_builder: NeonEnvBuilder,
@@ -222,6 +222,8 @@ def test_isolation(
             "max_prepared_transactions=100",
             # Enable the test mode, so that we don't need to patch the test cases.
             "neon.regress_test_mode = true",
+            # Stack size should be increased for tests to pass with asan.
+            "max_stack_depth = 4MB",
         ],
     )
     endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
@@ -417,7 +419,7 @@ def test_tx_abort_with_many_relations(
         try:
             # Rollback phase should be fast: this is one WAL record that we should process efficiently
             fut = exec.submit(rollback_and_wait)
-            fut.result(timeout=5)
+            fut.result(timeout=15)
         except:
             exec.shutdown(wait=False, cancel_futures=True)
             raise
diff --git a/test_runner/regress/test_proxy.py b/test_runner/regress/test_proxy.py
index d8df2efc78..3c7fd0b897 100644
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -57,7 +57,7 @@ def test_proxy_select_1(static_proxy: NeonProxy):
     assert out[0][0] == 1
 
     # with SNI
-    out = static_proxy.safe_psql("select 42", host="generic-project-name.localtest.me")
+    out = static_proxy.safe_psql("select 42", host="generic-project-name.local.neon.build")
     assert out[0][0] == 42
 
 
@@ -234,7 +234,7 @@ def test_sql_over_http_serverless_driver(static_proxy: NeonProxy):
 
     connstr = f"postgresql://http:http@{static_proxy.domain}:{static_proxy.proxy_port}/postgres"
     response = requests.post(
-        f"https://api.localtest.me:{static_proxy.external_http_port}/sql",
+        f"https://api.local.neon.build:{static_proxy.external_http_port}/sql",
         data=json.dumps({"query": "select 42 as answer", "params": []}),
         headers={"Content-Type": "application/sql", "Neon-Connection-String": connstr},
         verify=str(static_proxy.test_output_dir / "proxy.crt"),
diff --git a/test_runner/regress/test_proxy_allowed_ips.py b/test_runner/regress/test_proxy_allowed_ips.py
index 902da1942e..c59da8c6b0 100644
--- a/test_runner/regress/test_proxy_allowed_ips.py
+++ b/test_runner/regress/test_proxy_allowed_ips.py
@@ -35,7 +35,7 @@ async def test_proxy_psql_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil
     check_cannot_connect(query="select 1", sslsni=0, options="endpoint=private-project")
 
     # with SNI
-    check_cannot_connect(query="select 1", host="private-project.localtest.me")
+    check_cannot_connect(query="select 1", host="private-project.local.neon.build")
 
     # no SNI, deprecated `options=project` syntax (before we had several endpoint in project)
     out = static_proxy.safe_psql(query="select 1", sslsni=0, options="project=generic-project")
@@ -46,7 +46,7 @@ async def test_proxy_psql_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil
     assert out[0][0] == 1
 
     # with SNI
-    out = static_proxy.safe_psql(query="select 1", host="generic-project.localtest.me")
+    out = static_proxy.safe_psql(query="select 1", host="generic-project.local.neon.build")
     assert out[0][0] == 1
 
 
diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py
index 86a6b7428b..8910873690 100644
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -91,7 +91,7 @@ def test_sharding_smoke(
     workload.init()
 
     sizes_before = get_sizes()
-    workload.write_rows(256)
+    workload.write_rows(65536)
 
     # Test that we can read data back from a sharded tenant
     workload.validate()
@@ -1368,6 +1368,7 @@ def test_sharding_split_failures(
     workload = Workload(env, tenant_id, timeline_id)
     workload.init()
     workload.write_rows(100)
+    compute_reconfigure_listener.register_workload(workload)
 
     # Put the environment into a failing state (exact meaning depends on `failure`)
     failure.apply(env)
@@ -1546,6 +1547,9 @@ def test_sharding_backpressure(neon_env_builder: NeonEnvBuilder):
                 # Tip: set to 100MB to make the test fail
                 "max_replication_write_lag=1MB",
             ],
+            # We need `neon` extension for calling backpressure functions,
+            # this flag instructs `compute_ctl` to pre-install it.
+            "update_catalog": True,
         },
     )
     workload.init()
@@ -1810,3 +1814,14 @@ def test_sharding_gc(
         shard_gc_cutoff_lsn = Lsn(shard_index["metadata_bytes"]["latest_gc_cutoff_lsn"])
         log.info(f"Shard {shard_number} cutoff LSN: {shard_gc_cutoff_lsn}")
         assert shard_gc_cutoff_lsn == shard_0_gc_cutoff_lsn
+
+    for ps in env.pageservers:
+        # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by
+        # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does.
+        # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
+        ps.allowed_errors.extend(
+            [
+                ".*could not find data for key 020000000000000000000000000000000000.*",
+                ".*could not ingest record.*",
+            ]
+        )
diff --git a/test_runner/regress/test_sni_router.py b/test_runner/regress/test_sni_router.py
index 2a26fef59a..3487542d6e 100644
--- a/test_runner/regress/test_sni_router.py
+++ b/test_runner/regress/test_sni_router.py
@@ -116,7 +116,7 @@ def test_pg_sni_router(
     test_output_dir: Path,
 ):
     generate_tls_cert(
-        "endpoint.namespace.localtest.me",
+        "endpoint.namespace.local.neon.build",
         test_output_dir / "router.crt",
         test_output_dir / "router.key",
     )
@@ -130,7 +130,7 @@ def test_pg_sni_router(
     with PgSniRouter(
         neon_binpath=neon_binpath,
         port=router_port,
-        destination="localtest.me",
+        destination="local.neon.build",
         tls_cert=test_output_dir / "router.crt",
         tls_key=test_output_dir / "router.key",
         test_output_dir=test_output_dir,
@@ -141,7 +141,7 @@ def test_pg_sni_router(
             "select 1",
             dbname="postgres",
             sslmode="require",
-            host=f"endpoint--namespace--{pg_port}.localtest.me",
+            host=f"endpoint--namespace--{pg_port}.local.neon.build",
             hostaddr="127.0.0.1",
         )
         assert out[0][0] == 1
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 11a4d09202..2750826aec 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -2139,12 +2139,18 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
         workload.validate()
 
 
-def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize("num_azs", [1, 2])
+def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder, num_azs: int):
     """
     Graceful reststart of storage controller clusters use the drain and
     fill hooks in order to migrate attachments away from pageservers before
     restarting. In practice, Ansible will drive this process.
+
+    Test is parametrized on the number of AZs to exercise the AZ-driven behavior
+    of reliably moving shards back to their home AZ, and the behavior for AZ-agnostic
+    tenants where we fill based on a target shard count.
     """
+    neon_env_builder.num_azs = num_azs
     neon_env_builder.num_pageservers = 2
     env = neon_env_builder.init_configs()
     env.start()
@@ -2174,8 +2180,15 @@ def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder):
         min_shard_count = min(shard_counts.values())
         max_shard_count = max(shard_counts.values())
 
-        flake_factor = 5 / 100
-        assert max_shard_count - min_shard_count <= int(total_shards * flake_factor)
+        if num_azs == 1:
+            # AZ-agnostic case: we expect all nodes to have the same number of shards, within some bound
+            flake_factor = 5 / 100
+            assert max_shard_count - min_shard_count <= int(total_shards * flake_factor)
+        else:
+            # AZ-driven case: we expect tenants to have been round-robin allocated to AZs,
+            # and after the restart they should all be back in their home AZ, so difference
+            # should be at most a single shard's tenants
+            assert max_shard_count - min_shard_count <= shard_count_per_tenant
 
     # Perform a graceful rolling restart
     for ps in env.pageservers:
diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py
index 0f4e5688a9..b8253fb125 100644
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -312,6 +312,17 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
     drop_local_state(env, tenant_id)
     workload.validate()
 
+    for ps in env.pageservers:
+        # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by
+        # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does.
+        # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
+        ps.allowed_errors.extend(
+            [
+                ".*could not find data for key 020000000000000000000000000000000000.*",
+                ".*could not ingest record.*",
+            ]
+        )
+
 
 def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder):
     """
diff --git a/test_runner/regress/test_subscriber_restart.py b/test_runner/regress/test_subscriber_restart.py
index 7d4f66d044..8ad7282ea2 100644
--- a/test_runner/regress/test_subscriber_restart.py
+++ b/test_runner/regress/test_subscriber_restart.py
@@ -3,12 +3,14 @@ from __future__ import annotations
 import threading
 import time
 
+import pytest
 from fixtures.neon_fixtures import NeonEnv
 from fixtures.utils import wait_until
 
 
 # This test checks of logical replication subscriber is able to correctly restart replication without receiving duplicates.
 # It requires tracking information about replication origins at page server side
+@pytest.mark.timeout(900)  # This test is slow with sanitizers enabled, especially on ARM
 def test_subscriber_restart(neon_simple_env: NeonEnv):
     env = neon_simple_env
     env.create_branch("publisher")
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index 306e971657..2706ddf2f0 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -139,9 +139,9 @@ def test_timeline_archive(neon_env_builder: NeonEnvBuilder, shard_count: int):
 
 @pytest.mark.parametrize("manual_offload", [False, True])
 def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: bool):
-    if not manual_offload:
-        # (automatic) timeline offloading defaults to false for now
-        neon_env_builder.pageserver_config_override = "timeline_offloading = true"
+    if manual_offload:
+        # (automatic) timeline offloading defaults to true
+        neon_env_builder.pageserver_config_override = "timeline_offloading = false"
 
     env = neon_env_builder.init_start()
     ps_http = env.pageserver.http_client()
@@ -396,8 +396,6 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
     with tenant migrations and timeline deletions.
     """
 
-    # Offloading is off by default at time of writing: remove this line when it's on by default
-    neon_env_builder.pageserver_config_override = "timeline_offloading = true"
     neon_env_builder.storage_controller_config = {"heartbeat_interval": "100msec"}
     neon_env_builder.enable_pageserver_remote_storage(s3_storage())
 
@@ -554,8 +552,33 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder):
                                 log.info(f"Timeline {state.timeline_id} is still active")
                                 shutdown.wait(0.5)
                             elif state.timeline_id in offloaded_ids:
-                                log.info(f"Timeline {state.timeline_id} is now offloaded")
-                                state.offloaded = True
+                                log.info(f"Timeline {state.timeline_id} is now offloaded in memory")
+
+                                # Hack: when we see something offloaded in the API, it doesn't guarantee that the offload
+                                # is persistent (it is marked offloaded first, then that is persisted to the tenant manifest).
+                                # So we wait until we see the manifest update before considering it offloaded, that way
+                                # subsequent checks that it doesn't revert to active on a restart will pass reliably.
+                                time.sleep(0.1)
+                                assert isinstance(env.pageserver_remote_storage, S3Storage)
+                                manifest = env.pageserver_remote_storage.download_tenant_manifest(
+                                    tenant_id
+                                )
+                                if manifest is None:
+                                    log.info(
+                                        f"Timeline {state.timeline_id} is not yet offloaded persistently (no manifest)"
+                                    )
+                                elif str(state.timeline_id) in [
+                                    t["timeline_id"] for t in manifest["offloaded_timelines"]
+                                ]:
+                                    log.info(
+                                        f"Timeline {state.timeline_id} is now offloaded persistently"
+                                    )
+                                    state.offloaded = True
+                                else:
+                                    log.info(
+                                        f"Timeline {state.timeline_id} is not yet offloaded persistently (manifest: {manifest})"
+                                    )
+
                                 break
                             else:
                                 # Timeline is neither offloaded nor active, this is unexpected: the pageserver
@@ -969,8 +992,6 @@ def test_timeline_offload_race_unarchive(
     Ensure that unarchive and timeline offload don't race each other
     """
     # Regression test for issue https://github.com/neondatabase/neon/issues/10220
-    # (automatic) timeline offloading defaults to false for now
-    neon_env_builder.pageserver_config_override = "timeline_offloading = true"
 
     failpoint = "before-timeline-auto-offload"
 
diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py
index b32b028fa1..936c774657 100644
--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -268,7 +268,8 @@ def endpoint_create_start(
         env,
         tenant_id=env.initial_tenant,
         pg_port=env.port_distributor.get_port(),
-        http_port=env.port_distributor.get_port(),
+        external_http_port=env.port_distributor.get_port(),
+        internal_http_port=env.port_distributor.get_port(),
         # In these tests compute has high probability of terminating on its own
         # before our stop() due to lost consensus leadership.
         check_stop_result=False,
diff --git a/test_runner/websocket_tunnel.py b/test_runner/websocket_tunnel.py
index facdb19140..069852468d 100755
--- a/test_runner/websocket_tunnel.py
+++ b/test_runner/websocket_tunnel.py
@@ -13,12 +13,12 @@
 # postgres -D data -p3000
 #
 # ## Launch proxy with WSS enabled:
-# openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj '/CN=*.neon.localtest.me'
+# openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key -subj '/CN=*.local.neon.build'
 # ./target/debug/proxy --wss 127.0.0.1:40433 --http 127.0.0.1:28080 --mgmt 127.0.0.1:9099 --proxy 127.0.0.1:4433 --tls-key server.key --tls-cert server.crt --auth-backend postgres
 #
 # ## Launch the tunnel:
 #
-# poetry run ./test_runner/websocket_tunnel.py --ws-port 40433 --ws-url "wss://ep-test.neon.localtest.me"
+# poetry run ./test_runner/websocket_tunnel.py --ws-port 40433 --ws-url "wss://ep-test.local.neon.build"
 #
 # ## Now you can connect with psql:
 # psql "postgresql://heikki@localhost:40433/postgres"
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index 86d9ea96eb..13cf5d06c9 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit 86d9ea96ebb9088eac62f57f1f5ace68e70e0d1c
+Subproject commit 13cf5d06c98a8e9b0590ce6cdfd193a08d0a7792
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index 8dfd5a7030..4c45d78ad5 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit 8dfd5a7030d3e8a98b60265ebe045788892ac7f3
+Subproject commit 4c45d78ad587e4bcb4a5a7ef6931b88c6a3d575d
diff --git a/vendor/revisions.json b/vendor/revisions.json
index efddaef46a..5f60e1d690 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,11 +1,11 @@
 {
   "v17": [
     "17.2",
-    "8dfd5a7030d3e8a98b60265ebe045788892ac7f3"
+    "4c45d78ad587e4bcb4a5a7ef6931b88c6a3d575d"
   ],
   "v16": [
     "16.6",
-    "86d9ea96ebb9088eac62f57f1f5ace68e70e0d1c"
+    "13cf5d06c98a8e9b0590ce6cdfd193a08d0a7792"
   ],
   "v15": [
     "15.10",
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 2c65401154..1b7c376560 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -42,7 +42,7 @@ half = { version = "2", default-features = false, features = ["num-traits"] }
 hashbrown = { version = "0.14", features = ["raw"] }
 hex = { version = "0.4", features = ["serde"] }
 hmac = { version = "0.12", default-features = false, features = ["reset"] }
-hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14", features = ["full"] }
+hyper-582f2526e08bb6a0 = { package = "hyper", version = "0.14", features = ["client", "http1", "http2", "runtime", "server", "stream"] }
 hyper-dff4ba8e3ae991db = { package = "hyper", version = "1", features = ["full"] }
 hyper-util = { version = "0.1", features = ["client-legacy", "http1", "http2", "server", "service"] }
 indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] }
@@ -94,6 +94,7 @@ tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 tracing-log = { version = "0.2" }
 url = { version = "2", features = ["serde"] }
+uuid = { version = "1", features = ["serde", "v4", "v7"] }
 zerocopy = { version = "0.7", features = ["derive", "simd"] }
 zeroize = { version = "1", features = ["derive", "serde"] }
 zstd = { version = "0.13" }