diff --git a/.github/actionlint.yml b/.github/actionlint.yml
index 2b96ce95da..1e6c2d0aa2 100644
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -28,3 +28,7 @@ config-variables:
   - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
   - SLACK_ON_CALL_STORAGE_STAGING_STREAM
   - SLACK_CICD_CHANNEL_ID
+  - SLACK_STORAGE_CHANNEL_ID
+  - NEON_DEV_AWS_ACCOUNT_ID
+  - NEON_PROD_AWS_ACCOUNT_ID
+  - AWS_ECR_REGION
diff --git a/.github/actions/neon-project-create/action.yml b/.github/actions/neon-project-create/action.yml
index c9f6b0832e..a393aa6106 100644
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -19,7 +19,11 @@ inputs:
     default: '[1, 1]'
   # settings below only needed if you want the project to be sharded from the beginning
   shard_split_project:
-    description: 'by default new projects are not shard-split, specify true to shard-split'
+    description: 'by default new projects are not shard-split initiailly, but only when shard-split threshold is reached, specify true to explicitly shard-split initially'
+    required: false
+    default: 'false'
+  disable_sharding:
+    description: 'by default new projects use storage controller default policy to shard-split when shard-split threshold is reached, specify true to explicitly disable sharding'
     required: false
     default: 'false'
   admin_api_key:
@@ -107,6 +111,21 @@ runs:
             -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
             -d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
         fi
+        if [ "${DISABLE_SHARDING}" = "true" ]; then
+          # determine tenant ID
+          TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
+
+          echo "Explicitly disabling shard-splitting for project ${project_id} with tenant_id ${TENANT_ID}"
+
+          echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy"
+          echo "with body {\"scheduling\": \"Essential\"}"
+
+          # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
+          curl -X PUT \
+            "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy" \
+            -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
+            -d "{\"scheduling\": \"Essential\"}"
+        fi
 
       env:
         API_HOST: ${{ inputs.api_host }}
@@ -116,6 +135,7 @@ runs:
         MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
         MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
         SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
+        DISABLE_SHARDING: ${{ inputs.disable_sharding }}
         ADMIN_API_KEY: ${{ inputs.admin_api_key }}
         SHARD_COUNT: ${{ inputs.shard_count }}
         STRIPE_SIZE: ${{ inputs.stripe_size }}
diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml
index 86a791497c..3740e6dc9c 100644
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -348,6 +348,10 @@ jobs:
           rerun_failed: true
           pg_version: ${{ matrix.pg_version }}
           aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
+          # Attempt to stop tests gracefully to generate test reports
+          # until they are forcibly stopped by the stricter `timeout-minutes` limit.
+          extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }}
         env:
           TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
           CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml
index 3c97c8a67a..c938f62ad5 100644
--- a/.github/workflows/_push-to-container-registry.yml
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -2,7 +2,7 @@ name: Push images to Container Registry
 on:
   workflow_call:
     inputs:
-      # Example: {"docker.io/neondatabase/neon:13196061314":["369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
+      # Example: {"docker.io/neondatabase/neon:13196061314":["${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
       image-map:
         description: JSON map of images, mapping from a source image to an array of target images that should be pushed.
         required: true
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 88cb395958..8f3392ceea 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -68,7 +68,7 @@ jobs:
   tag:
     needs: [ check-permissions ]
     runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
     outputs:
       build-tag: ${{steps.build-tag.outputs.tag}}
 
@@ -263,8 +263,9 @@ jobs:
           echo "json=$(jq --compact-output '.' /tmp/benchmark_durations.json)" >> $GITHUB_OUTPUT
 
   benchmarks:
-    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
-    needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `deploy` in PRs
+    if: github.ref_name == 'main' || (contains(github.event.pull_request.labels.*.name, 'run-benchmarks') && !failure() && !cancelled())
+    needs: [ check-permissions, build-build-tools-image, get-benchmarks-durations, deploy ]
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
@@ -858,14 +859,17 @@ jobs:
           BRANCH: "${{ github.ref_name }}"
           DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
           PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
+          DEV_AWS: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+          PROD_AWS: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+          AWS_REGION: "${{ vars.AWS_ECR_REGION }}"
 
   push-neon-image-dev:
     needs: [ generate-image-maps, neon-image ]
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
       image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
-      aws-region: eu-central-1
-      aws-account-ids: "369495373322"
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
       azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
@@ -880,8 +884,8 @@ jobs:
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
       image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
-      aws-region: eu-central-1
-      aws-account-ids: "369495373322"
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
       azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
@@ -897,8 +901,8 @@ jobs:
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
       image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
-      aws-region: eu-central-1
-      aws-account-ids: "093970136003"
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
       azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
@@ -914,8 +918,8 @@ jobs:
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
       image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
-      aws-region: eu-central-1
-      aws-account-ids: "093970136003"
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
       azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
@@ -1028,7 +1032,7 @@ jobs:
       statuses: write
       contents: write
     runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/ansible:latest
     steps:
       - uses: actions/checkout@v4
 
@@ -1177,6 +1181,22 @@ jobs:
             exit 1
           fi
 
+  notify-storage-release-deploy-failure:
+    needs: [ deploy ]
+    # We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
+    if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Post release-deploy failure to team-storage slack channel
+        uses: slackapi/slack-github-action@v2
+        with:
+          method: chat.postMessage
+          token: ${{ secrets.SLACK_BOT_TOKEN }}
+          payload: |
+            channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
+            text: |
+              🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
+
   # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
   promote-compatibility-data:
     needs: [ deploy ]
@@ -1273,7 +1293,7 @@ jobs:
           done
 
   pin-build-tools-image:
-    needs: [ build-build-tools-image, push-compute-image-prod, push-neon-image-prod, build-and-test-locally ]
+    needs: [ build-build-tools-image, test-images, build-and-test-locally ]
     if: github.ref_name == 'main'
     uses: ./.github/workflows/pin-build-tools-image.yml
     with:
diff --git a/.github/workflows/build_and_test_with_sanitizers.yml b/.github/workflows/build_and_test_with_sanitizers.yml
index 2bc938509f..e40b02b5d2 100644
--- a/.github/workflows/build_and_test_with_sanitizers.yml
+++ b/.github/workflows/build_and_test_with_sanitizers.yml
@@ -27,7 +27,7 @@ env:
 jobs:
   tag:
     runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
     outputs:
       build-tag: ${{steps.build-tag.outputs.tag}}
 
diff --git a/.github/workflows/force-test-extensions-upgrade.yml b/.github/workflows/force-test-extensions-upgrade.yml
new file mode 100644
index 0000000000..71c5158ef6
--- /dev/null
+++ b/.github/workflows/force-test-extensions-upgrade.yml
@@ -0,0 +1,76 @@
+name: Force Test Upgrading of Extension
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '45 2 * * *' # run once a day, timezone is utc
+  workflow_dispatch: # adds ability to run this manually
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow
+  group: ${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  id-token: write # aws-actions/configure-aws-credentials
+  statuses: write
+  contents: read
+
+jobs:
+  regress:
+    strategy:
+      fail-fast: false
+      matrix:
+        pg-version: [16, 17]
+
+    runs-on: small
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: false
+
+      - name: Get the last compute release tag
+        id: get-last-compute-release-tag
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "/repos/${GITHUB_REPOSITORY}/releases")
+          echo tag=${tag} >> ${GITHUB_OUTPUT}
+
+      - name: Test extension upgrade
+        timeout-minutes: 20
+        env:
+          NEWTAG: latest
+          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          PG_VERSION: ${{ matrix.pg-version }}
+          FORCE_ALL_UPGRADE_TESTS: true
+        run: ./docker-compose/test_extensions_upgrade.sh
+
+      - name: Print logs and clean up
+        if: always()
+        run: |
+          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
+          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
+
+      - name: Post to the Slack channel
+        if: ${{ github.event.schedule && failure() }}
+        uses: slackapi/slack-github-action@v1
+        with:
+          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
+          slack-message: |
+            Test upgrading of extensions: ${{ job.status }}
+            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml
index 7b303fa37a..c20c5890f9 100644
--- a/.github/workflows/ingest_benchmark.yml
+++ b/.github/workflows/ingest_benchmark.yml
@@ -32,18 +32,27 @@ jobs:
           - target_project: new_empty_project_stripe_size_2048 
             stripe_size: 2048 # 16 MiB
             postgres_version: 16
+            disable_sharding: false
           - target_project: new_empty_project_stripe_size_32768
             stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
                                # while here it is sharded from the beginning with a shard size of 256 MiB
+            disable_sharding: false
             postgres_version: 16
           - target_project: new_empty_project
             stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: false
             postgres_version: 16
           - target_project: new_empty_project
             stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: false
             postgres_version: 17
           - target_project: large_existing_project
             stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
+            disable_sharding: false
+            postgres_version: 16
+          - target_project: new_empty_project_unsharded
+            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: true
             postgres_version: 16
       max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
     permissions:
@@ -96,6 +105,7 @@ jobs:
         admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }} 
         shard_count: 8
         stripe_size: ${{ matrix.stripe_size }}
+        disable_sharding: ${{ matrix.disable_sharding }} 
 
     - name: Initialize Neon project
       if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
diff --git a/.github/workflows/pin-build-tools-image.yml b/.github/workflows/pin-build-tools-image.yml
index 626de2b0e0..b305b662ee 100644
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -33,10 +33,6 @@ concurrency:
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
 
-env:
-  FROM_TAG: ${{ inputs.from-tag }}
-  TO_TAG: pinned
-
 jobs:
   check-manifests:
     runs-on: ubuntu-22.04
@@ -46,11 +42,14 @@ jobs:
     steps:
       - name: Check if we really need to pin the image
         id: check-manifests
+        env:
+          FROM_TAG: ${{ inputs.from-tag }}
+          TO_TAG: pinned
         run: |
-          docker manifest inspect neondatabase/build-tools:${FROM_TAG} > ${FROM_TAG}.json
-          docker manifest inspect neondatabase/build-tools:${TO_TAG}   > ${TO_TAG}.json
+          docker manifest inspect "docker.io/neondatabase/build-tools:${FROM_TAG}" > "${FROM_TAG}.json"
+          docker manifest inspect "docker.io/neondatabase/build-tools:${TO_TAG}"   > "${TO_TAG}.json"
 
-          if diff ${FROM_TAG}.json ${TO_TAG}.json; then
+          if diff "${FROM_TAG}.json" "${TO_TAG}.json"; then
             skip=true
           else
             skip=false
@@ -64,55 +63,34 @@ jobs:
     # use format(..) to catch both inputs.force = true AND inputs.force = 'true'
     if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
 
-    runs-on: ubuntu-22.04
-
     permissions:
-      id-token: write # for `azure/login` and aws auth
+      id-token: write  # Required for aws/azure login
 
-    steps:
-      - uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Azure login
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
-        with:
-          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
-          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
-          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
-
-      - name: Login to ACR
-        run: |
-          az acr login --name=neoneastus2
-
-      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
-        env:
-          DEFAULT_DEBIAN_VERSION: bookworm
-        run: |
-          for debian_version in bullseye bookworm; do
-            tags=()
-
-            tags+=("-t" "neondatabase/build-tools:${TO_TAG}-${debian_version}")
-            tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}-${debian_version}")
-            tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}-${debian_version}")
-
-            if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
-              tags+=("-t" "neondatabase/build-tools:${TO_TAG}")
-              tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}")
-              tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}")
-            fi
-
-            docker buildx imagetools create "${tags[@]}" \
-                                              neondatabase/build-tools:${FROM_TAG}-${debian_version}
-          done
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
+            "docker.io/neondatabase/build-tools:pinned-bullseye",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
+          ],
+          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
+            "docker.io/neondatabase/build-tools:pinned-bookworm",
+            "docker.io/neondatabase/build-tools:pinned",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned"
+          ]
+        }
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
diff --git a/.github/workflows/regenerate-pg-setting.yml b/.github/workflows/regenerate-pg-setting.yml
new file mode 100644
index 0000000000..1e9d2ec5e2
--- /dev/null
+++ b/.github/workflows/regenerate-pg-setting.yml
@@ -0,0 +1,41 @@
+name: Regenerate Postgres Settings
+
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+    paths:
+      - pgxn/neon/**.c
+      - vendor/postgres-v*
+      - vendor/revisions.json
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+permissions:
+  pull-requests: write
+
+jobs:
+  regenerate-pg-settings:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Add comment
+        uses: thollander/actions-comment-pull-request@v3
+        with:
+          comment-tag: ${{ github.job }}
+          pr-number: ${{ github.event.number }}
+          message: |
+            If this PR added a GUC in the Postgres fork or `neon` extension,
+            please regenerate the Postgres settings in the `cloud` repo:
+
+            ```
+            make NEON_WORKDIR=path/to/neon/checkout \
+              -C goapp/internal/shareddomain/postgres generate
+            ```
+
+            If you're an external contributor, a Neon employee will assist in
+            making sure this step is done.
diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml
index 27ed1e4cff..be6a7a7901 100644
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -88,7 +88,7 @@ jobs:
           BUILD_AND_TEST_RUN_ID=${TAG}
           while true; do
             gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '[.jobs[] | select((.name | startswith("push-neon-image-dev")) or (.name | startswith("push-compute-image-dev"))) | {"name": .name, "conclusion": .conclusion, "url": .url}]' > jobs.json
-            if [ $(jq '[.[] | select(.conclusion == "success")]' jobs.json) -eq 2 ]; then
+            if [ $(jq '[.[] | select(.conclusion == "success")] | length' jobs.json) -eq 2 ]; then
               break
             fi
             jq -c '.[]' jobs.json | while read -r job; do
diff --git a/Cargo.lock b/Cargo.lock
index 407c8170bb..12232eaece 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -786,7 +786,7 @@ dependencies = [
 [[package]]
 name = "azure_core"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -815,7 +815,7 @@ dependencies = [
 [[package]]
 name = "azure_identity"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
  "async-lock",
  "async-trait",
@@ -834,7 +834,7 @@ dependencies = [
 [[package]]
 name = "azure_storage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
  "RustyXML",
  "async-lock",
@@ -852,7 +852,7 @@ dependencies = [
 [[package]]
 name = "azure_storage_blobs"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
  "RustyXML",
  "azure_core",
@@ -872,7 +872,7 @@ dependencies = [
 [[package]]
 name = "azure_svc_blobstorage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#c36ed4c039bb3d59b5a1705f2cc337636c73b541"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
  "azure_core",
  "bytes",
@@ -1029,12 +1029,6 @@ dependencies = [
  "generic-array",
 ]
 
-[[package]]
-name = "boxcar"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2721c3c5a6f0e7f7e607125d963fedeb765f545f67adc9d71ed934693881eb42"
-
 [[package]]
 name = "bstr"
 version = "1.5.0"
@@ -1293,6 +1287,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "chrono",
+ "jsonwebtoken",
  "regex",
  "remote_storage",
  "serde",
@@ -1308,6 +1303,7 @@ dependencies = [
  "aws-config",
  "aws-sdk-kms",
  "aws-sdk-s3",
+ "aws-smithy-types",
  "axum",
  "base64 0.13.1",
  "bytes",
@@ -1329,7 +1325,6 @@ dependencies = [
  "opentelemetry_sdk",
  "postgres",
  "postgres_initdb",
- "prometheus",
  "regex",
  "remote_storage",
  "reqwest",
@@ -1348,13 +1343,13 @@ dependencies = [
  "tower 0.5.2",
  "tower-http",
  "tracing",
- "tracing-opentelemetry",
  "tracing-subscriber",
  "tracing-utils",
  "url",
  "utils",
  "uuid",
  "vm_monitor",
+ "walkdir",
  "workspace_hack",
  "zstd",
 ]
@@ -4927,7 +4922,6 @@ dependencies = [
  "aws-sdk-iam",
  "aws-sigv4",
  "base64 0.13.1",
- "boxcar",
  "bstr",
  "bytes",
  "camino",
@@ -4979,7 +4973,6 @@ dependencies = [
  "postgres-protocol2",
  "postgres_backend",
  "pq_proto",
- "prometheus",
  "rand 0.8.5",
  "rand_distr",
  "rcgen",
@@ -5004,7 +4997,6 @@ dependencies = [
  "smallvec",
  "smol_str",
  "socket2",
- "strum",
  "strum_macros",
  "subtle",
  "thiserror 1.0.69",
@@ -5019,7 +5011,6 @@ dependencies = [
  "tracing",
  "tracing-log",
  "tracing-opentelemetry",
- "tracing-serde",
  "tracing-subscriber",
  "tracing-utils",
  "try-lock",
@@ -6460,10 +6451,13 @@ dependencies = [
  "pageserver_client",
  "postgres_connection",
  "rand 0.8.5",
+ "regex",
  "reqwest",
  "routerify",
  "rustls 0.23.18",
  "rustls-native-certs 0.8.0",
+ "safekeeper_api",
+ "safekeeper_client",
  "scoped-futures",
  "scopeguard",
  "serde",
@@ -6471,6 +6465,7 @@ dependencies = [
  "strum",
  "strum_macros",
  "thiserror 1.0.69",
+ "tikv-jemallocator",
  "tokio",
  "tokio-postgres",
  "tokio-postgres-rustls",
@@ -7024,14 +7019,11 @@ dependencies = [
 name = "tokio-postgres2"
 version = "0.1.0"
 dependencies = [
- "async-trait",
- "byteorder",
  "bytes",
  "fallible-iterator",
  "futures-util",
  "log",
  "parking_lot 0.12.1",
- "percent-encoding",
  "phf",
  "pin-project-lite",
  "postgres-protocol2",
@@ -7618,7 +7610,6 @@ dependencies = [
  "hex",
  "hex-literal",
  "humantime",
- "inferno 0.12.0",
  "jsonwebtoken",
  "metrics",
  "nix 0.27.1",
diff --git a/Dockerfile b/Dockerfile
index b399bcf7e4..83ad86badb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -50,6 +50,14 @@ RUN set -e \
     && rm -rf pg_install/build \
     && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
 
+# Prepare cargo-chef recipe
+FROM $REPOSITORY/$IMAGE:$TAG AS plan
+WORKDIR /home/nonroot
+
+COPY --chown=nonroot . .
+
+RUN cargo chef prepare --recipe-path recipe.json
+
 # Build neon binaries
 FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
@@ -63,9 +71,15 @@ COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_i
 COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
 COPY --from=pg-build /home/nonroot/pg_install/v17/lib                       pg_install/v17/lib
+COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
+
+ARG ADDITIONAL_RUSTFLAGS=""
+
+RUN set -e \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json
+
 COPY --chown=nonroot . .
 
-ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
     && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
       --bin pg_sni_router  \
diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile
index fa72ca1bc2..317eded26e 100644
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -300,6 +300,7 @@ ARG CARGO_HAKARI_VERSION=0.9.33
 ARG CARGO_DENY_VERSION=0.16.2
 ARG CARGO_HACK_VERSION=0.6.33
 ARG CARGO_NEXTEST_VERSION=0.9.85
+ARG CARGO_CHEF_VERSION=0.1.71
 ARG CARGO_DIESEL_CLI_VERSION=2.2.6
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
@@ -314,6 +315,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
     cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
     cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
     cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
     cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} \
                                       --features postgres-bundled --no-default-features && \
     rm -rf /home/nonroot/.cargo/registry && \
diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 6814aadcb9..0b3001613d 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -148,7 +148,7 @@ RUN case $DEBIAN_VERSION in \
     apt install --no-install-recommends --no-install-suggests -y \
     ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
     zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
-    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip \
+    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
     $VERSION_INSTALLS \
     && apt clean && rm -rf /var/lib/apt/lists/*
 
@@ -1464,6 +1464,31 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
     make install -j $(getconf _NPROCESSORS_ONLN) && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
 
+#########################################################################################
+#
+# Layer "pg-duckdb-pg-build"
+# compile pg_duckdb extension
+#
+#########################################################################################
+FROM build-deps AS pg_duckdb-src
+WORKDIR /ext-src
+COPY compute/patches/pg_duckdb_v031.patch .
+# pg_duckdb build requires source dir to be a git repo to get submodules
+# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: 
+# - extension management function duckdb.install_extension()
+# - access to duckdb.extensions table and its sequence
+RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
+    cd pg_duckdb-src && \
+    git submodule update --init --recursive && \
+    patch -p1 < /ext-src/pg_duckdb_v031.patch
+
+FROM pg-build AS pg_duckdb-build
+ARG PG_VERSION
+COPY --from=pg_duckdb-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_duckdb-src
+RUN make install -j $(getconf _NPROCESSORS_ONLN) && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control 
+        
 #########################################################################################
 #
 # Layer "pg_repack"
@@ -1484,6 +1509,73 @@ WORKDIR /ext-src/pg_repack-src
 RUN make -j $(getconf _NPROCESSORS_ONLN) && \
     make -j $(getconf _NPROCESSORS_ONLN) install
 
+
+#########################################################################################
+#
+# Layer "pgaudit"
+# compile pgaudit extension
+#
+#########################################################################################
+
+FROM build-deps AS pgaudit-src
+ARG PG_VERSION
+WORKDIR /ext-src
+RUN case "${PG_VERSION}" in \
+    "v14") \
+    export PGAUDIT_VERSION=1.6.2 \
+    export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
+    ;; \
+    "v15") \
+    export PGAUDIT_VERSION=1.7.0 \
+    export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
+    ;; \
+    "v16") \
+    export PGAUDIT_VERSION=16.0 \
+    export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
+    ;; \
+    "v17") \
+    export PGAUDIT_VERSION=17.0 \
+    export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
+    ;; \
+    *) \
+    echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
+    esac && \
+    wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
+    echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
+    mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgaudit-build
+COPY --from=pgaudit-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgaudit-src
+RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
+
+#########################################################################################
+#
+# Layer "pgauditlogtofile"
+# compile pgauditlogtofile extension
+#
+#########################################################################################
+
+FROM build-deps AS pgauditlogtofile-src
+ARG PG_VERSION
+WORKDIR /ext-src
+RUN case "${PG_VERSION}" in \
+    "v14" | "v15" | "v16" | "v17") \
+    export PGAUDITLOGTOFILE_VERSION=v1.6.4 \
+    export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \
+    ;; \
+    *) \
+    echo "pgauditlogtofile is not supported on this PostgreSQL version" && exit 1;; \
+    esac && \
+    wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \
+    echo "${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz" | sha256sum --check && \
+    mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgauditlogtofile-build
+COPY --from=pgauditlogtofile-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgauditlogtofile-src
+RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
+
 #########################################################################################
 #
 # Layer "neon-ext-build"
@@ -1577,7 +1669,10 @@ COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/
 
 #########################################################################################
 #
@@ -1669,29 +1764,6 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then\
     && echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
     && echo "${sql_exporter_sha256} sql_exporter" | sha256sum -c -
 
-#########################################################################################
-#
-# Layer "awscli"
-#
-#########################################################################################
-FROM build-deps AS awscli
-ARG TARGETARCH
-RUN set -ex; \
-    if [ "${TARGETARCH}" = "amd64" ]; then \
-        TARGETARCH_ALT="x86_64"; \
-        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
-    elif [ "${TARGETARCH}" = "arm64" ]; then \
-        TARGETARCH_ALT="aarch64"; \
-        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
-    else \
-        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
-    fi; \
-    curl --retry 5 -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
-    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
-    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
-    /tmp/awscliv2/aws/install; \
-    rm -rf /tmp/awscliv2.zip /tmp/awscliv2
-
 #########################################################################################
 #
 # Clean up postgres folder before inclusion
@@ -1750,7 +1822,7 @@ COPY --from=pg_graphql-src /ext-src/ /ext-src/
 COPY --from=hypopg-src /ext-src/ /ext-src/
 COPY --from=pg_hashids-src /ext-src/ /ext-src/
 COPY --from=rum-src /ext-src/ /ext-src/
-#COPY --from=pgtap-src /ext-src/ /ext-src/
+COPY --from=pgtap-src /ext-src/ /ext-src/
 COPY --from=ip4r-src /ext-src/ /ext-src/
 COPY --from=prefix-src /ext-src/ /ext-src/
 COPY --from=hll-src /ext-src/ /ext-src/
@@ -1775,11 +1847,14 @@ COPY --from=pg_partman-src /ext-src/ /ext-src/
 #COPY --from=pg_repack-src /ext-src/ /ext-src/
 
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
+RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
+   && apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
 ENV PGUSER=cloud_admin
 ENV PGDATABASE=postgres
+ENV PG_VERSION=${PG_VERSION:?}
 
 #########################################################################################
 #
@@ -1861,9 +1936,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
     mkdir /usr/local/download_extensions && \
     chown -R postgres:postgres /usr/local/download_extensions
 
-# aws cli is used by fast_import
-COPY --from=awscli /usr/local/aws-cli /usr/local/aws-cli
-
 # pgbouncer and its config
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
diff --git a/compute/patches/pg_duckdb_v031.patch b/compute/patches/pg_duckdb_v031.patch
new file mode 100644
index 0000000000..a7e188d69e
--- /dev/null
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -0,0 +1,11 @@
+diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
+index d777d76..af60106 100644
+--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
++++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
+@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
+ GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
+ GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
+ GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
++GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
++GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
++GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml
index 124c40cf5d..6617c98599 100644
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -47,7 +47,9 @@ files:
       # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
       # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
       # regardless of hostname (ALL)
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
+      #
+      # Also allow it to shut down the VM. The fast_import job does that when it's finished.
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
   - filename: cgconfig.conf
     content: |
       # Configuration for cgroups in VM compute nodes
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index b04f364cbb..c276996df5 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -14,6 +14,7 @@ base64.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-sdk-kms.workspace = true
+aws-smithy-types.workspace = true
 anyhow.workspace = true
 axum = { workspace = true, features = [] }
 camino.workspace = true
@@ -46,13 +47,12 @@ tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
 tracing.workspace = true
-tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
 url.workspace = true
 uuid.workspace = true
-prometheus.workspace = true
+walkdir.workspace = true
 
 postgres_initdb.workspace = true
 compute_api.workspace = true
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index df47adda6c..a8803ec793 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -55,7 +55,7 @@ use signal_hook::{consts::SIGINT, iterator::Signals};
 use tracing::{error, info, warn};
 use url::Url;
 
-use compute_api::responses::ComputeStatus;
+use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
 use compute_api::spec::ComputeSpec;
 
 use compute_tools::compute::{
@@ -281,6 +281,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
         info!("got spec from cli argument {}", spec_json);
         return Ok(CliSpecParams {
             spec: Some(serde_json::from_str(spec_json)?),
+            compute_ctl_config: ComputeCtlConfig::default(),
             live_config_allowed: false,
         });
     }
@@ -290,6 +291,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
         let file = File::open(Path::new(spec_path))?;
         return Ok(CliSpecParams {
             spec: Some(serde_json::from_reader(file)?),
+            compute_ctl_config: ComputeCtlConfig::default(),
             live_config_allowed: true,
         });
     }
@@ -299,8 +301,9 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
     };
 
     match get_spec_from_control_plane(cli.control_plane_uri.as_ref().unwrap(), &cli.compute_id) {
-        Ok(spec) => Ok(CliSpecParams {
-            spec,
+        Ok(resp) => Ok(CliSpecParams {
+            spec: resp.0,
+            compute_ctl_config: resp.1,
             live_config_allowed: true,
         }),
         Err(e) => {
@@ -317,6 +320,8 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
 struct CliSpecParams {
     /// If a spec was provided via CLI or file, the [`ComputeSpec`]
     spec: Option<ComputeSpec>,
+    #[allow(dead_code)]
+    compute_ctl_config: ComputeCtlConfig,
     live_config_allowed: bool,
 }
 
@@ -326,6 +331,7 @@ fn wait_spec(
     CliSpecParams {
         spec,
         live_config_allowed,
+        compute_ctl_config: _,
     }: CliSpecParams,
 ) -> Result<Arc<ComputeNode>> {
     let mut new_state = ComputeState::new();
diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs
index 27cf1c2317..585f3e4e1d 100644
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -25,10 +25,10 @@
 //! docker push localhost:3030/localregistry/compute-node-v14:latest
 //! ```
 
-use anyhow::Context;
+use anyhow::{bail, Context};
 use aws_config::BehaviorVersion;
 use camino::{Utf8Path, Utf8PathBuf};
-use clap::Parser;
+use clap::{Parser, Subcommand};
 use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
 use nix::unistd::Pid;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -44,32 +44,59 @@ mod s3_uri;
 const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
 const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);
 
+#[derive(Subcommand, Debug)]
+enum Command {
+    /// Runs local postgres (neon binary), restores into it,
+    /// uploads pgdata to s3 to be consumed by pageservers
+    Pgdata {
+        /// Raw connection string to the source database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        source_connection_string: Option<String>,
+        /// If specified, will not shut down the local postgres after the import. Used in local testing
+        #[clap(short, long)]
+        interactive: bool,
+        /// Port to run postgres on. Default is 5432.
+        #[clap(long, default_value_t = 5432)]
+        pg_port: u16, // port to run postgres on, 5432 is default
+
+        /// Number of CPUs in the system. This is used to configure # of
+        /// parallel worker processes, for index creation.
+        #[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
+        num_cpus: Option<usize>,
+
+        /// Amount of RAM in the system. This is used to configure shared_buffers
+        /// and maintenance_work_mem.
+        #[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
+        memory_mb: Option<usize>,
+    },
+
+    /// Runs pg_dump-pg_restore from source to destination without running local postgres.
+    DumpRestore {
+        /// Raw connection string to the source database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        source_connection_string: Option<String>,
+        /// Raw connection string to the destination database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        destination_connection_string: Option<String>,
+    },
+}
+
 #[derive(clap::Parser)]
 struct Args {
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_WORKDIR")]
     working_directory: Utf8PathBuf,
     #[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
     s3_prefix: Option<s3_uri::S3Uri>,
-    #[clap(long)]
-    source_connection_string: Option<String>,
-    #[clap(short, long)]
-    interactive: bool,
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_PG_BIN_DIR")]
     pg_bin_dir: Utf8PathBuf,
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_PG_LIB_DIR")]
     pg_lib_dir: Utf8PathBuf,
-    #[clap(long)]
-    pg_port: Option<u16>, // port to run postgres on, 5432 is default
 
-    /// Number of CPUs in the system. This is used to configure # of
-    /// parallel worker processes, for index creation.
-    #[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
-    num_cpus: Option<usize>,
-
-    /// Amount of RAM in the system. This is used to configure shared_buffers
-    /// and maintenance_work_mem.
-    #[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
-    memory_mb: Option<usize>,
+    #[clap(subcommand)]
+    command: Command,
 }
 
 #[serde_with::serde_as]
@@ -78,6 +105,8 @@ struct Spec {
     encryption_secret: EncryptionSecret,
     #[serde_as(as = "serde_with::base64::Base64")]
     source_connstring_ciphertext_base64: Vec<u8>,
+    #[serde_as(as = "Option<serde_with::base64::Base64>")]
+    destination_connstring_ciphertext_base64: Option<Vec<u8>>,
 }
 
 #[derive(serde::Deserialize)]
@@ -93,192 +122,150 @@ const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
     "C.UTF-8"
 };
 
-#[tokio::main]
-pub(crate) async fn main() -> anyhow::Result<()> {
-    utils::logging::init(
-        utils::logging::LogFormat::Plain,
-        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
-        utils::logging::Output::Stdout,
-    )?;
-
-    info!("starting");
-
-    let args = Args::parse();
-
-    // Validate arguments
-    if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
-        anyhow::bail!("either s3_prefix or source_connection_string must be specified");
-    }
-    if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
-        anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
-    }
-
-    let working_directory = args.working_directory;
-    let pg_bin_dir = args.pg_bin_dir;
-    let pg_lib_dir = args.pg_lib_dir;
-    let pg_port = args.pg_port.unwrap_or_else(|| {
-        info!("pg_port not specified, using default 5432");
-        5432
-    });
-
-    // Initialize AWS clients only if s3_prefix is specified
-    let (aws_config, kms_client) = if args.s3_prefix.is_some() {
-        let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
-        let kms = aws_sdk_kms::Client::new(&config);
-        (Some(config), Some(kms))
-    } else {
-        (None, None)
-    };
-
-    // Get source connection string either from S3 spec or direct argument
-    let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
-        let spec: Spec = {
-            let spec_key = s3_prefix.append("/spec.json");
-            let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
-            let object = s3_client
-                .get_object()
-                .bucket(&spec_key.bucket)
-                .key(spec_key.key)
-                .send()
-                .await
-                .context("get spec from s3")?
-                .body
-                .collect()
-                .await
-                .context("download spec body")?;
-            serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
-        };
-
-        match spec.encryption_secret {
-            EncryptionSecret::KMS { key_id } => {
-                let mut output = kms_client
-                    .unwrap()
-                    .decrypt()
-                    .key_id(key_id)
-                    .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
-                        spec.source_connstring_ciphertext_base64,
-                    ))
-                    .send()
-                    .await
-                    .context("decrypt source connection string")?;
-                let plaintext = output
-                    .plaintext
-                    .take()
-                    .context("get plaintext source connection string")?;
-                String::from_utf8(plaintext.into_inner())
-                    .context("parse source connection string as utf8")?
-            }
-        }
-    } else {
-        args.source_connection_string.unwrap()
-    };
-
-    match tokio::fs::create_dir(&working_directory).await {
-        Ok(()) => {}
-        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
-            if !is_directory_empty(&working_directory)
-                .await
-                .context("check if working directory is empty")?
-            {
-                anyhow::bail!("working directory is not empty");
-            } else {
-                // ok
-            }
-        }
-        Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
-    }
-
-    let pgdata_dir = working_directory.join("pgdata");
-    tokio::fs::create_dir(&pgdata_dir)
+async fn decode_connstring(
+    kms_client: &aws_sdk_kms::Client,
+    key_id: &String,
+    connstring_ciphertext_base64: Vec<u8>,
+) -> Result<String, anyhow::Error> {
+    let mut output = kms_client
+        .decrypt()
+        .key_id(key_id)
+        .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
+            connstring_ciphertext_base64,
+        ))
+        .send()
         .await
-        .context("create pgdata directory")?;
+        .context("decrypt connection string")?;
 
-    let pgbin = pg_bin_dir.join("postgres");
-    let pg_version = match get_pg_version(pgbin.as_ref()) {
-        PostgresMajorVersion::V14 => 14,
-        PostgresMajorVersion::V15 => 15,
-        PostgresMajorVersion::V16 => 16,
-        PostgresMajorVersion::V17 => 17,
-    };
-    let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
-    postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
-        superuser,
-        locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
-        pg_version,
-        initdb_bin: pg_bin_dir.join("initdb").as_ref(),
-        library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
-        pgdata: &pgdata_dir,
-    })
-    .await
-    .context("initdb")?;
+    let plaintext = output
+        .plaintext
+        .take()
+        .context("get plaintext connection string")?;
 
-    // If the caller didn't specify CPU / RAM to use for sizing, default to
-    // number of CPUs in the system, and pretty arbitrarily, 256 MB of RAM.
-    let nproc = args.num_cpus.unwrap_or_else(num_cpus::get);
-    let memory_mb = args.memory_mb.unwrap_or(256);
+    String::from_utf8(plaintext.into_inner()).context("parse connection string as utf8")
+}
 
-    // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
-    // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
-    // available for misc other stuff that PostgreSQL uses memory for.
-    let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
-    let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
+struct PostgresProcess {
+    pgdata_dir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pgbin: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    postgres_proc: Option<tokio::process::Child>,
+}
 
-    //
-    // Launch postgres process
-    //
-    let mut postgres_proc = tokio::process::Command::new(pgbin)
-        .arg("-D")
-        .arg(&pgdata_dir)
-        .args(["-p", &format!("{pg_port}")])
-        .args(["-c", "wal_level=minimal"])
-        .args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
-        .args(["-c", "max_wal_senders=0"])
-        .args(["-c", "fsync=off"])
-        .args(["-c", "full_page_writes=off"])
-        .args(["-c", "synchronous_commit=off"])
-        .args([
-            "-c",
-            &format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
-        ])
-        .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
-        .args(["-c", &format!("max_parallel_workers={nproc}")])
-        .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
-        .args(["-c", &format!("max_worker_processes={nproc}")])
-        .args([
-            "-c",
-            &format!(
-                "effective_io_concurrency={}",
-                if cfg!(target_os = "macos") { 0 } else { 100 }
-            ),
-        ])
-        .env_clear()
-        .env("LD_LIBRARY_PATH", &pg_lib_dir)
-        .env(
-            "ASAN_OPTIONS",
-            std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+impl PostgresProcess {
+    fn new(pgdata_dir: Utf8PathBuf, pg_bin_dir: Utf8PathBuf, pg_lib_dir: Utf8PathBuf) -> Self {
+        Self {
+            pgdata_dir,
+            pgbin: pg_bin_dir.join("postgres"),
+            pg_bin_dir,
+            pg_lib_dir,
+            postgres_proc: None,
+        }
+    }
+
+    async fn prepare(&self, initdb_user: &str) -> Result<(), anyhow::Error> {
+        tokio::fs::create_dir(&self.pgdata_dir)
+            .await
+            .context("create pgdata directory")?;
+
+        let pg_version = match get_pg_version(self.pgbin.as_ref()) {
+            PostgresMajorVersion::V14 => 14,
+            PostgresMajorVersion::V15 => 15,
+            PostgresMajorVersion::V16 => 16,
+            PostgresMajorVersion::V17 => 17,
+        };
+        postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
+            superuser: initdb_user,
+            locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
+            pg_version,
+            initdb_bin: self.pg_bin_dir.join("initdb").as_ref(),
+            library_search_path: &self.pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
+            pgdata: &self.pgdata_dir,
+        })
+        .await
+        .context("initdb")
+    }
+
+    async fn start(
+        &mut self,
+        initdb_user: &str,
+        port: u16,
+        nproc: usize,
+        memory_mb: usize,
+    ) -> Result<&tokio::process::Child, anyhow::Error> {
+        self.prepare(initdb_user).await?;
+
+        // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
+        // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
+        // available for misc other stuff that PostgreSQL uses memory for.
+        let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
+        let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
+
+        //
+        // Launch postgres process
+        //
+        let mut proc = tokio::process::Command::new(&self.pgbin)
+            .arg("-D")
+            .arg(&self.pgdata_dir)
+            .args(["-p", &format!("{port}")])
+            .args(["-c", "wal_level=minimal"])
+            .args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
+            .args(["-c", "max_wal_senders=0"])
+            .args(["-c", "fsync=off"])
+            .args(["-c", "full_page_writes=off"])
+            .args(["-c", "synchronous_commit=off"])
+            .args([
+                "-c",
+                &format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
+            ])
+            .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
+            .args(["-c", &format!("max_parallel_workers={nproc}")])
+            .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
+            .args(["-c", &format!("max_worker_processes={nproc}")])
+            .args(["-c", "effective_io_concurrency=100"])
+            .env_clear()
+            .env("LD_LIBRARY_PATH", &self.pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context("spawn postgres")?;
+
+        info!("spawned postgres, waiting for it to become ready");
+        tokio::spawn(
+            child_stdio_to_log::relay_process_output(proc.stdout.take(), proc.stderr.take())
+                .instrument(info_span!("postgres")),
+        );
+
+        self.postgres_proc = Some(proc);
+        Ok(self.postgres_proc.as_ref().unwrap())
+    }
+
+    async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
+        let proc: &mut tokio::process::Child = self.postgres_proc.as_mut().unwrap();
+        info!("shutdown postgres");
+        nix::sys::signal::kill(
+            Pid::from_raw(i32::try_from(proc.id().unwrap()).expect("convert child pid to i32")),
+            nix::sys::signal::SIGTERM,
         )
-        .env(
-            "UBSAN_OPTIONS",
-            std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
-        )
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .context("spawn postgres")?;
-
-    info!("spawned postgres, waiting for it to become ready");
-    tokio::spawn(
-        child_stdio_to_log::relay_process_output(
-            postgres_proc.stdout.take(),
-            postgres_proc.stderr.take(),
-        )
-        .instrument(info_span!("postgres")),
-    );
+        .context("signal postgres to shut down")?;
+        proc.wait()
+            .await
+            .context("wait for postgres to shut down")
+            .map(|_| ())
+    }
+}
 
+async fn wait_until_ready(connstring: String, create_dbname: String) {
     // Create neondb database in the running postgres
-    let restore_pg_connstring =
-        format!("host=localhost port={pg_port} user={superuser} dbname=postgres");
-
     let start_time = std::time::Instant::now();
 
     loop {
@@ -289,7 +276,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
             std::process::exit(1);
         }
 
-        match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
+        match tokio_postgres::connect(
+            &connstring.replace("dbname=neondb", "dbname=postgres"),
+            tokio_postgres::NoTls,
+        )
+        .await
+        {
             Ok((client, connection)) => {
                 // Spawn the connection handling task to maintain the connection
                 tokio::spawn(async move {
@@ -298,9 +290,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
                     }
                 });
 
-                match client.simple_query("CREATE DATABASE neondb;").await {
+                match client
+                    .simple_query(format!("CREATE DATABASE {create_dbname};").as_str())
+                    .await
+                {
                     Ok(_) => {
-                        info!("created neondb database");
+                        info!("created {} database", create_dbname);
                         break;
                     }
                     Err(e) => {
@@ -324,10 +319,16 @@ pub(crate) async fn main() -> anyhow::Result<()> {
             }
         }
     }
+}
 
-    let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");
-
-    let dumpdir = working_directory.join("dumpdir");
+async fn run_dump_restore(
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    source_connstring: String,
+    destination_connstring: String,
+) -> Result<(), anyhow::Error> {
+    let dumpdir = workdir.join("dumpdir");
 
     let common_args = [
         // schema mapping (prob suffices to specify them on one side)
@@ -356,10 +357,18 @@ pub(crate) async fn main() -> anyhow::Result<()> {
             .arg("--no-sync")
             // POSITIONAL args
             // source db (db name included in connection string)
-            .arg(&source_connection_string)
+            .arg(&source_connstring)
             // how we run it
             .env_clear()
             .env("LD_LIBRARY_PATH", &pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
             .kill_on_drop(true)
             .stdout(std::process::Stdio::piped())
             .stderr(std::process::Stdio::piped())
@@ -376,24 +385,31 @@ pub(crate) async fn main() -> anyhow::Result<()> {
         let st = pg_dump.wait().await.context("wait for pg_dump")?;
         info!(status=?st, "pg_dump exited");
         if !st.success() {
-            warn!(status=%st, "pg_dump failed, restore will likely fail as well");
+            error!(status=%st, "pg_dump failed, restore will likely fail as well");
+            bail!("pg_dump failed");
         }
     }
 
-    // TODO: do it in a streaming way, plenty of internal research done on this already
+    // TODO: maybe do it in a streaming way, plenty of internal research done on this already
     // TODO: do the unlogged table trick
-
-    info!("restore from working directory into vanilla postgres");
     {
         let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
             .args(&common_args)
             .arg("-d")
-            .arg(&restore_pg_connstring)
+            .arg(&destination_connstring)
             // POSITIONAL args
             .arg(&dumpdir)
             // how we run it
             .env_clear()
             .env("LD_LIBRARY_PATH", &pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
             .kill_on_drop(true)
             .stdout(std::process::Stdio::piped())
             .stderr(std::process::Stdio::piped())
@@ -411,48 +427,259 @@ pub(crate) async fn main() -> anyhow::Result<()> {
         let st = pg_restore.wait().await.context("wait for pg_restore")?;
         info!(status=?st, "pg_restore exited");
         if !st.success() {
-            warn!(status=%st, "pg_restore failed, restore will likely fail as well");
-        }
-    }
-
-    // If interactive mode, wait for Ctrl+C
-    if args.interactive {
-        info!("Running in interactive mode. Press Ctrl+C to shut down.");
-        tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
-    }
-
-    info!("shutdown postgres");
-    {
-        nix::sys::signal::kill(
-            Pid::from_raw(
-                i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
-            ),
-            nix::sys::signal::SIGTERM,
-        )
-        .context("signal postgres to shut down")?;
-        postgres_proc
-            .wait()
-            .await
-            .context("wait for postgres to shut down")?;
-    }
-
-    // Only sync if s3_prefix was specified
-    if let Some(s3_prefix) = args.s3_prefix {
-        info!("upload pgdata");
-        aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
-            .await
-            .context("sync dump directory to destination")?;
-
-        info!("write status");
-        {
-            let status_dir = working_directory.join("status");
-            std::fs::create_dir(&status_dir).context("create status directory")?;
-            let status_file = status_dir.join("pgdata");
-            std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
-                .context("write status file")?;
-            aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
-                .await
-                .context("sync status directory to destination")?;
+            error!(status=%st, "pg_restore failed, restore will likely fail as well");
+            bail!("pg_restore failed");
+        }
+    }
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn cmd_pgdata(
+    s3_client: Option<aws_sdk_s3::Client>,
+    kms_client: Option<aws_sdk_kms::Client>,
+    maybe_s3_prefix: Option<s3_uri::S3Uri>,
+    maybe_spec: Option<Spec>,
+    source_connection_string: Option<String>,
+    interactive: bool,
+    pg_port: u16,
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    num_cpus: Option<usize>,
+    memory_mb: Option<usize>,
+) -> Result<(), anyhow::Error> {
+    if maybe_spec.is_none() && source_connection_string.is_none() {
+        bail!("spec must be provided for pgdata command");
+    }
+    if maybe_spec.is_some() && source_connection_string.is_some() {
+        bail!("only one of spec or source_connection_string can be provided");
+    }
+
+    let source_connection_string = if let Some(spec) = maybe_spec {
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                decode_connstring(
+                    kms_client.as_ref().unwrap(),
+                    &key_id,
+                    spec.source_connstring_ciphertext_base64,
+                )
+                .await?
+            }
+        }
+    } else {
+        source_connection_string.unwrap()
+    };
+
+    let superuser = "cloud_admin";
+    let destination_connstring = format!(
+        "host=localhost port={} user={} dbname=neondb",
+        pg_port, superuser
+    );
+
+    let pgdata_dir = workdir.join("pgdata");
+    let mut proc = PostgresProcess::new(pgdata_dir.clone(), pg_bin_dir.clone(), pg_lib_dir.clone());
+    let nproc = num_cpus.unwrap_or_else(num_cpus::get);
+    let memory_mb = memory_mb.unwrap_or(256);
+    proc.start(superuser, pg_port, nproc, memory_mb).await?;
+    wait_until_ready(destination_connstring.clone(), "neondb".to_string()).await;
+
+    run_dump_restore(
+        workdir.clone(),
+        pg_bin_dir,
+        pg_lib_dir,
+        source_connection_string,
+        destination_connstring,
+    )
+    .await?;
+
+    // If interactive mode, wait for Ctrl+C
+    if interactive {
+        info!("Running in interactive mode. Press Ctrl+C to shut down.");
+        tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
+    }
+
+    proc.shutdown().await?;
+
+    // Only sync if s3_prefix was specified
+    if let Some(s3_prefix) = maybe_s3_prefix {
+        info!("upload pgdata");
+        aws_s3_sync::upload_dir_recursive(
+            s3_client.as_ref().unwrap(),
+            Utf8Path::new(&pgdata_dir),
+            &s3_prefix.append("/pgdata/"),
+        )
+        .await
+        .context("sync dump directory to destination")?;
+
+        info!("write status");
+        {
+            let status_dir = workdir.join("status");
+            std::fs::create_dir(&status_dir).context("create status directory")?;
+            let status_file = status_dir.join("pgdata");
+            std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
+                .context("write status file")?;
+            aws_s3_sync::upload_dir_recursive(
+                s3_client.as_ref().unwrap(),
+                &status_dir,
+                &s3_prefix.append("/status/"),
+            )
+            .await
+            .context("sync status directory to destination")?;
+        }
+    }
+
+    Ok(())
+}
+
+async fn cmd_dumprestore(
+    kms_client: Option<aws_sdk_kms::Client>,
+    maybe_spec: Option<Spec>,
+    source_connection_string: Option<String>,
+    destination_connection_string: Option<String>,
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+) -> Result<(), anyhow::Error> {
+    let (source_connstring, destination_connstring) = if let Some(spec) = maybe_spec {
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                let source = decode_connstring(
+                    kms_client.as_ref().unwrap(),
+                    &key_id,
+                    spec.source_connstring_ciphertext_base64,
+                )
+                .await?;
+
+                let dest = if let Some(dest_ciphertext) =
+                    spec.destination_connstring_ciphertext_base64
+                {
+                    decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
+                        .await?
+                } else {
+                    bail!("destination connection string must be provided in spec for dump_restore command");
+                };
+
+                (source, dest)
+            }
+        }
+    } else {
+        (
+            source_connection_string.unwrap(),
+            if let Some(val) = destination_connection_string {
+                val
+            } else {
+                bail!("destination connection string must be provided for dump_restore command");
+            },
+        )
+    };
+
+    run_dump_restore(
+        workdir,
+        pg_bin_dir,
+        pg_lib_dir,
+        source_connstring,
+        destination_connstring,
+    )
+    .await
+}
+
+#[tokio::main]
+pub(crate) async fn main() -> anyhow::Result<()> {
+    utils::logging::init(
+        utils::logging::LogFormat::Json,
+        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
+        utils::logging::Output::Stdout,
+    )?;
+
+    info!("starting");
+
+    let args = Args::parse();
+
+    // Initialize AWS clients only if s3_prefix is specified
+    let (s3_client, kms_client) = if args.s3_prefix.is_some() {
+        let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+        let s3_client = aws_sdk_s3::Client::new(&config);
+        let kms = aws_sdk_kms::Client::new(&config);
+        (Some(s3_client), Some(kms))
+    } else {
+        (None, None)
+    };
+
+    let spec: Option<Spec> = if let Some(s3_prefix) = &args.s3_prefix {
+        let spec_key = s3_prefix.append("/spec.json");
+        let object = s3_client
+            .as_ref()
+            .unwrap()
+            .get_object()
+            .bucket(&spec_key.bucket)
+            .key(spec_key.key)
+            .send()
+            .await
+            .context("get spec from s3")?
+            .body
+            .collect()
+            .await
+            .context("download spec body")?;
+        serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
+    } else {
+        None
+    };
+
+    match tokio::fs::create_dir(&args.working_directory).await {
+        Ok(()) => {}
+        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+            if !is_directory_empty(&args.working_directory)
+                .await
+                .context("check if working directory is empty")?
+            {
+                bail!("working directory is not empty");
+            } else {
+                // ok
+            }
+        }
+        Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
+    }
+
+    match args.command {
+        Command::Pgdata {
+            source_connection_string,
+            interactive,
+            pg_port,
+            num_cpus,
+            memory_mb,
+        } => {
+            cmd_pgdata(
+                s3_client,
+                kms_client,
+                args.s3_prefix,
+                spec,
+                source_connection_string,
+                interactive,
+                pg_port,
+                args.working_directory,
+                args.pg_bin_dir,
+                args.pg_lib_dir,
+                num_cpus,
+                memory_mb,
+            )
+            .await?;
+        }
+        Command::DumpRestore {
+            source_connection_string,
+            destination_connection_string,
+        } => {
+            cmd_dumprestore(
+                kms_client,
+                spec,
+                source_connection_string,
+                destination_connection_string,
+                args.working_directory,
+                args.pg_bin_dir,
+                args.pg_lib_dir,
+            )
+            .await?;
         }
     }
 
diff --git a/compute_tools/src/bin/fast_import/aws_s3_sync.rs b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
index 5fa58c8f87..1be10b36d6 100644
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -1,24 +1,102 @@
-use anyhow::Context;
-use camino::Utf8Path;
+use camino::{Utf8Path, Utf8PathBuf};
+use tokio::task::JoinSet;
+use walkdir::WalkDir;
 
 use super::s3_uri::S3Uri;
 
-pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
-    let mut builder = tokio::process::Command::new("aws");
-    builder
-        .arg("s3")
-        .arg("sync")
-        .arg(local.as_str())
-        .arg(remote.to_string());
-    let st = builder
-        .spawn()
-        .context("spawn aws s3 sync")?
-        .wait()
-        .await
-        .context("wait for aws s3 sync")?;
-    if st.success() {
-        Ok(())
-    } else {
-        Err(anyhow::anyhow!("aws s3 sync failed"))
+use tracing::{info, warn};
+
+const MAX_PARALLEL_UPLOADS: usize = 10;
+
+/// Upload all files from 'local' to 'remote'
+pub(crate) async fn upload_dir_recursive(
+    s3_client: &aws_sdk_s3::Client,
+    local: &Utf8Path,
+    remote: &S3Uri,
+) -> anyhow::Result<()> {
+    // Recursively scan directory
+    let mut dirwalker = WalkDir::new(local)
+        .into_iter()
+        .map(|entry| {
+            let entry = entry?;
+            let file_type = entry.file_type();
+            let path = <&Utf8Path>::try_from(entry.path())?.to_path_buf();
+            Ok((file_type, path))
+        })
+        .filter_map(|e: anyhow::Result<(std::fs::FileType, Utf8PathBuf)>| {
+            match e {
+                Ok((file_type, path)) if file_type.is_file() => Some(Ok(path)),
+                Ok((file_type, _path)) if file_type.is_dir() => {
+                    // The WalkDir iterator will recurse into directories, but we don't want
+                    // to do anything with directories as such. There's no concept of uploading
+                    // an empty directory to S3.
+                    None
+                }
+                Ok((file_type, path)) if file_type.is_symlink() => {
+                    // huh, didn't expect a symlink. Can't upload that to S3. Warn and skip.
+                    warn!("cannot upload symlink ({})", path);
+                    None
+                }
+                Ok((_file_type, path)) => {
+                    // should not happen
+                    warn!("directory entry has unexpected type ({})", path);
+                    None
+                }
+                Err(e) => Some(Err(e)),
+            }
+        });
+
+    // Spawn upload tasks for each file, keeping MAX_PARALLEL_UPLOADS active in
+    // parallel.
+    let mut joinset = JoinSet::new();
+    loop {
+        // Could we upload more?
+        while joinset.len() < MAX_PARALLEL_UPLOADS {
+            if let Some(full_local_path) = dirwalker.next() {
+                let full_local_path = full_local_path?;
+                let relative_local_path = full_local_path
+                    .strip_prefix(local)
+                    .expect("all paths start from the walkdir root");
+                let remote_path = remote.append(relative_local_path.as_str());
+                info!(
+                    "starting upload of {} to {}",
+                    &full_local_path, &remote_path
+                );
+                let upload_task = upload_file(s3_client.clone(), full_local_path, remote_path);
+                joinset.spawn(upload_task);
+            } else {
+                info!("draining upload tasks");
+                break;
+            }
+        }
+
+        // Wait for an upload to complete
+        if let Some(res) = joinset.join_next().await {
+            let _ = res?;
+        } else {
+            // all done!
+            break;
+        }
     }
+    Ok(())
+}
+
+pub(crate) async fn upload_file(
+    s3_client: aws_sdk_s3::Client,
+    local_path: Utf8PathBuf,
+    remote: S3Uri,
+) -> anyhow::Result<()> {
+    use aws_smithy_types::byte_stream::ByteStream;
+    let stream = ByteStream::from_path(&local_path).await?;
+
+    let _result = s3_client
+        .put_object()
+        .bucket(remote.bucket)
+        .key(&remote.key)
+        .body(stream)
+        .send()
+        .await?;
+    info!("upload of {} to {} finished", &local_path, &remote.key);
+
+    Ok(())
 }
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 73950cd95a..6f28bd9733 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -11,7 +11,9 @@ use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
 
-use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
+use compute_api::responses::{
+    ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
+};
 use compute_api::spec::ComputeSpec;
 
 // Do control plane request and return response if any. In case of error it
@@ -73,14 +75,13 @@ fn do_control_plane_request(
 pub fn get_spec_from_control_plane(
     base_uri: &str,
     compute_id: &str,
-) -> Result<Option<ComputeSpec>> {
+) -> Result<(Option<ComputeSpec>, ComputeCtlConfig)> {
     let cp_uri = format!("{base_uri}/compute/api/v2/computes/{compute_id}/spec");
     let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
         Ok(v) => v,
         Err(_) => "".to_string(),
     };
     let mut attempt = 1;
-    let mut spec: Result<Option<ComputeSpec>> = Ok(None);
 
     info!("getting spec from control plane: {}", cp_uri);
 
@@ -90,7 +91,7 @@ pub fn get_spec_from_control_plane(
     // - no spec for compute yet (Empty state) -> return Ok(None)
     // - got spec -> return Ok(Some(spec))
     while attempt < 4 {
-        spec = match do_control_plane_request(&cp_uri, &jwt) {
+        let result = match do_control_plane_request(&cp_uri, &jwt) {
             Ok(spec_resp) => {
                 CPLANE_REQUESTS_TOTAL
                     .with_label_values(&[
@@ -99,10 +100,10 @@ pub fn get_spec_from_control_plane(
                     ])
                     .inc();
                 match spec_resp.status {
-                    ControlPlaneComputeStatus::Empty => Ok(None),
+                    ControlPlaneComputeStatus::Empty => Ok((None, spec_resp.compute_ctl_config)),
                     ControlPlaneComputeStatus::Attached => {
                         if let Some(spec) = spec_resp.spec {
-                            Ok(Some(spec))
+                            Ok((Some(spec), spec_resp.compute_ctl_config))
                         } else {
                             bail!("compute is attached, but spec is empty")
                         }
@@ -121,10 +122,10 @@ pub fn get_spec_from_control_plane(
             }
         };
 
-        if let Err(e) = &spec {
+        if let Err(e) = &result {
             error!("attempt {} to get spec failed with: {}", attempt, e);
         } else {
-            return spec;
+            return result;
         }
 
         attempt += 1;
@@ -132,7 +133,9 @@ pub fn get_spec_from_control_plane(
     }
 
     // All attempts failed, return error.
-    spec
+    Err(anyhow::anyhow!(
+        "Exhausted all attempts to retrieve the spec from the control plane"
+    ))
 }
 
 /// Check `pg_hba.conf` and update if needed to allow external connections.
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 3b2634204c..c3c8229c38 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -48,6 +48,8 @@ use std::sync::Arc;
 use std::time::Duration;
 
 use anyhow::{anyhow, bail, Context, Result};
+use compute_api::requests::ConfigurationRequest;
+use compute_api::responses::ComputeCtlConfig;
 use compute_api::spec::Database;
 use compute_api::spec::PgIdent;
 use compute_api::spec::RemoteExtSpec;
@@ -880,10 +882,13 @@ impl Endpoint {
                 self.external_http_address.port()
             ))
             .header(CONTENT_TYPE.as_str(), "application/json")
-            .body(format!(
-                "{{\"spec\":{}}}",
-                serde_json::to_string_pretty(&spec)?
-            ))
+            .body(
+                serde_json::to_string(&ConfigurationRequest {
+                    spec,
+                    compute_ctl_config: ComputeCtlConfig::default(),
+                })
+                .unwrap(),
+            )
             .send()
             .await?;
 
diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs
index 9a2d30c861..0fadb9c5fe 100644
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -838,7 +838,10 @@ impl StorageController {
         self.dispatch(
             Method::PUT,
             format!("control/v1/tenant/{tenant_shard_id}/migrate"),
-            Some(TenantShardMigrateRequest { node_id }),
+            Some(TenantShardMigrateRequest {
+                node_id,
+                migration_config: None,
+            }),
         )
         .await
     }
diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index 985fe6b3b1..3c574efc63 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -22,7 +22,7 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::{self};
 use reqwest::{Method, StatusCode, Url};
-use utils::id::{NodeId, TenantId};
+use utils::id::{NodeId, TenantId, TimelineId};
 
 use pageserver_api::controller_api::{
     NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -239,6 +239,19 @@ enum Command {
         #[arg(long)]
         scheduling_policy: SkSchedulingPolicyArg,
     },
+    /// Downloads any missing heatmap layers for all shard for a given timeline
+    DownloadHeatmapLayers {
+        /// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,
+        /// the operation is performed on all shards. When a sharded tenant ID is
+        /// specified, the operation is only performed on the specified shard.
+        #[arg(long)]
+        tenant_shard_id: TenantShardId,
+        #[arg(long)]
+        timeline_id: TimelineId,
+        /// Optional: Maximum download concurrency (default is 16)
+        #[arg(long)]
+        concurrency: Option<usize>,
+    },
 }
 
 #[derive(Parser)]
@@ -609,7 +622,10 @@ async fn main() -> anyhow::Result<()> {
             tenant_shard_id,
             node,
         } => {
-            let req = TenantShardMigrateRequest { node_id: node };
+            let req = TenantShardMigrateRequest {
+                node_id: node,
+                migration_config: None,
+            };
 
             storcon_client
                 .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -623,7 +639,10 @@ async fn main() -> anyhow::Result<()> {
             tenant_shard_id,
             node,
         } => {
-            let req = TenantShardMigrateRequest { node_id: node };
+            let req = TenantShardMigrateRequest {
+                node_id: node,
+                migration_config: None,
+            };
 
             storcon_client
                 .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -1082,7 +1101,10 @@ async fn main() -> anyhow::Result<()> {
                             .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
                                 Method::PUT,
                                 format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
-                                Some(TenantShardMigrateRequest { node_id: mv.to }),
+                                Some(TenantShardMigrateRequest {
+                                    node_id: mv.to,
+                                    migration_config: None,
+                                }),
                             )
                             .await
                             .map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
@@ -1238,6 +1260,24 @@ async fn main() -> anyhow::Result<()> {
                 String::from(scheduling_policy)
             );
         }
+        Command::DownloadHeatmapLayers {
+            tenant_shard_id,
+            timeline_id,
+            concurrency,
+        } => {
+            let mut path = format!(
+                "/v1/tenant/{}/timeline/{}/download_heatmap_layers",
+                tenant_shard_id, timeline_id,
+            );
+
+            if let Some(c) = concurrency {
+                path = format!("{path}?concurrency={c}");
+            }
+
+            storcon_client
+                .dispatch::<(), ()>(Method::POST, path, None)
+                .await?;
+        }
     }
 
     Ok(())
diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh
index c4ff86ab66..dd520d4986 100755
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -71,7 +71,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
         cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
         # We are running tests now
         rm -f testout.txt testout_contrib.txt
-        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
+        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
         $TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
         docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
         $TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch
similarity index 100%
rename from docker-compose/ext-src/pg_semver-src/test-upgrade.patch
rename to docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch
diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch
new file mode 100644
index 0000000000..2d0bf280db
--- /dev/null
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch
@@ -0,0 +1,24 @@
+diff --git a/test/sql/base.sql b/test/sql/base.sql
+index 53adb30..2eed91b 100644
+--- a/test/sql/base.sql
++++ b/test/sql/base.sql
+@@ -2,7 +2,6 @@
+ BEGIN;
+ 
+ \i test/pgtap-core.sql
+-CREATE EXTENSION semver;
+ 
+ SELECT plan(334);
+ --SELECT * FROM no_plan();
+diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
+index c0fe98e..39cdd2e 100644
+--- a/test/sql/corpus.sql
++++ b/test/sql/corpus.sql
+@@ -4,7 +4,6 @@ BEGIN;
+ -- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
+ 
+ \i test/pgtap-core.sql
+-CREATE EXTENSION semver;
+ 
+ SELECT plan(76);
+ --SELECT * FROM no_plan();
diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade.sh b/docker-compose/ext-src/pg_semver-src/test-upgrade.sh
index e1541f272a..18b2848fd1 100755
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade.sh
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 set -ex
 cd "$(dirname ${0})"
-patch -p1 <test-upgrade.patch
+patch -p1 <test-upgrade-${PG_VERSION}.patch
+psql -d contrib_regression -c "DROP EXTENSION IF EXISTS pgtap"
 PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
 ${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression base corpus
\ No newline at end of file
diff --git a/docker-compose/ext-src/pgtap-src/test-upgrade.patch b/docker-compose/ext-src/pgtap-src/test-upgrade.patch
new file mode 100644
index 0000000000..a4c46e93ce
--- /dev/null
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.patch
@@ -0,0 +1,28 @@
+diff --git a/Makefile b/Makefile
+index f255fe6..0a0fa65 100644
+--- a/Makefile
++++ b/Makefile
+@@ -346,7 +346,7 @@ test: test-serial test-parallel
+ TB_DIR = test/build
+ GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests
+ REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe
+-REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
++REGRESS_OPTS = --use-existing --dbname=pgtap_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
+ SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!
+ IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))
+ PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))
+diff --git a/test/schedule/create.sql b/test/schedule/create.sql
+index ba355ed..7e250f5 100644
+--- a/test/schedule/create.sql
++++ b/test/schedule/create.sql
+@@ -1,3 +1,2 @@
+ \unset ECHO
+ \i test/psql.sql
+-CREATE EXTENSION pgtap;
+diff --git a/test/schedule/main.sch b/test/schedule/main.sch
+index a8a5fbc..0463fc4 100644
+--- a/test/schedule/main.sch
++++ b/test/schedule/main.sch
+@@ -1,2 +1 @@
+-test: build
+ test: create
diff --git a/docker-compose/ext-src/pgtap-src/test-upgrade.sh b/docker-compose/ext-src/pgtap-src/test-upgrade.sh
new file mode 100755
index 0000000000..5f59c636aa
--- /dev/null
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+patch -p1 <test-upgrade.patch
+make installcheck
\ No newline at end of file
diff --git a/docker-compose/ext-src/plv8-src/test-upgrade.sh b/docker-compose/ext-src/plv8-src/test-upgrade.sh
index 6514d4fe92..325540b85d 100755
--- a/docker-compose/ext-src/plv8-src/test-upgrade.sh
+++ b/docker-compose/ext-src/plv8-src/test-upgrade.sh
@@ -2,4 +2,5 @@
 set -ex
 cd "$(dirname ${0})"
 PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
-${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure
\ No newline at end of file
+REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension")+15);}')"
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression ${REGRESS}
\ No newline at end of file
diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh
index 08b1a60f2d..4a9024569b 100755
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -11,6 +11,7 @@ if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEW
   exit 1
 fi
 export PG_VERSION=${PG_VERSION:-16}
+export PG_TEST_VERSION=${PG_VERSION}
 function wait_for_ready {
   TIME=0
   while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
@@ -41,7 +42,8 @@ EXTENSIONS='[
 {"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
 {"extname": "semver", "extdir": "pg_semver-src"},
 {"extname": "pg_ivm", "extdir": "pg_ivm-src"},
-{"extname": "pgjwt", "extdir": "pgjwt-src"}
+{"extname": "pgjwt", "extdir": "pgjwt-src"},
+{"extname": "pgtap", "extdir": "pgtap-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
 TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
@@ -57,9 +59,15 @@ wait_for_ready
 docker compose cp  ext-src neon-test-extensions:/
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
+docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression"
+docker compose exec neon-test-extensions psql -d pgtap_regression -c "CREATE EXTENSION pgtap"
 create_extensions "${EXTNAMES}"
-query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
-exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
+if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then
+  exts="${EXTNAMES}"
+else
+  query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
+  exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
+fi
 if [ -z "${exts}" ]; then
   echo "No extensions were upgraded"
 else
@@ -87,7 +95,10 @@ else
       exit 1
     fi
     docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
-    docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh
+    if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then
+      docker  compose exec neon-test-extensions  cat /ext-src/${EXTDIR}/regression.diffs
+      exit 1
+    fi
     docker compose exec neon-test-extensions psql -d contrib_regression -c "alter extension ${ext} update"
     docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
   done
diff --git a/libs/compute_api/Cargo.toml b/libs/compute_api/Cargo.toml
index c0ec40a6c2..c11a1b6688 100644
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
+jsonwebtoken.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 regex.workspace = true
diff --git a/libs/compute_api/src/requests.rs b/libs/compute_api/src/requests.rs
index fc3757d981..0c256cae2e 100644
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,18 +1,20 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
 use crate::{
     privilege::Privilege,
+    responses::ComputeCtlConfig,
     spec::{ComputeSpec, ExtVersion, PgIdent},
 };
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 
 /// Request of the /configure API
 ///
 /// We now pass only `spec` in the configuration request, but later we can
 /// extend it and something like `restart: bool` or something else. So put
 /// `spec` into a struct initially to be more flexible in the future.
-#[derive(Deserialize, Debug)]
+#[derive(Debug, Deserialize, Serialize)]
 pub struct ConfigurationRequest {
     pub spec: ComputeSpec,
+    pub compute_ctl_config: ComputeCtlConfig,
 }
 
 #[derive(Deserialize, Debug)]
diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs
index 5286e0e61d..a6248019d9 100644
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -3,6 +3,7 @@
 use std::fmt::Display;
 
 use chrono::{DateTime, Utc};
+use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};
 
 use crate::{
@@ -135,13 +136,27 @@ pub struct CatalogObjects {
     pub databases: Vec<Database>,
 }
 
+#[derive(Debug, Deserialize, Serialize)]
+pub struct ComputeCtlConfig {
+    pub jwks: JwkSet,
+}
+
+impl Default for ComputeCtlConfig {
+    fn default() -> Self {
+        Self {
+            jwks: JwkSet {
+                keys: Vec::default(),
+            },
+        }
+    }
+}
+
 /// Response of the `/computes/{compute_id}/spec` control-plane API.
-/// This is not actually a compute API response, so consider moving
-/// to a different place.
 #[derive(Deserialize, Debug)]
 pub struct ControlPlaneSpecResponse {
     pub spec: Option<ComputeSpec>,
     pub status: ControlPlaneComputeStatus,
+    pub compute_ctl_config: ComputeCtlConfig,
 }
 
 #[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
diff --git a/libs/http-utils/src/pprof.rs b/libs/http-utils/src/pprof.rs
index dd57f9ed4b..fe1cc10838 100644
--- a/libs/http-utils/src/pprof.rs
+++ b/libs/http-utils/src/pprof.rs
@@ -2,7 +2,6 @@ use anyhow::bail;
 use flate2::write::{GzDecoder, GzEncoder};
 use flate2::Compression;
 use itertools::Itertools as _;
-use once_cell::sync::Lazy;
 use pprof::protos::{Function, Line, Location, Message as _, Profile};
 use regex::Regex;
 
@@ -58,38 +57,30 @@ pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {
 
         // Resolve the line and function for each location.
         backtrace::resolve(loc.address as *mut c_void, |symbol| {
-            let Some(symname) = symbol.name() else {
+            let Some(symbol_name) = symbol.name() else {
                 return;
             };
-            let mut name = symname.to_string();
 
-            // Strip the Rust monomorphization suffix from the symbol name.
-            static SUFFIX_REGEX: Lazy<Regex> =
-                Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex"));
-            if let Some(m) = SUFFIX_REGEX.find(&name) {
-                name.truncate(m.start());
-            }
-
-            let function_id = match functions.get(&name) {
-                Some(function) => function.id,
-                None => {
-                    let id = functions.len() as u64 + 1;
-                    let system_name = String::from_utf8_lossy(symname.as_bytes());
+            let function_name = format!("{symbol_name:#}");
+            let functions_len = functions.len();
+            let function_id = functions
+                .entry(function_name)
+                .or_insert_with_key(|function_name| {
+                    let function_id = functions_len as u64 + 1;
+                    let system_name = String::from_utf8_lossy(symbol_name.as_bytes());
                     let filename = symbol
                         .filename()
                         .map(|path| path.to_string_lossy())
                         .unwrap_or(Cow::Borrowed(""));
-                    let function = Function {
-                        id,
-                        name: string_id(&name),
+                    Function {
+                        id: function_id,
+                        name: string_id(function_name),
                         system_name: string_id(&system_name),
                         filename: string_id(&filename),
                         ..Default::default()
-                    };
-                    functions.insert(name, function);
-                    id
-                }
-            };
+                    }
+                })
+                .id;
             loc.line.push(Line {
                 function_id,
                 line: symbol.lineno().unwrap_or(0) as i64,
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index 79f068a47b..0f33bcf45b 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -351,7 +351,7 @@ pub struct TenantConfigToml {
 
     /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
     /// `index_part.json`, and it cannot be reversed.
-    pub rel_size_v2_enabled: Option<bool>,
+    pub rel_size_v2_enabled: bool,
 
     // gc-compaction related configs
     /// Enable automatic gc-compaction trigger on this tenant.
@@ -544,10 +544,11 @@ pub mod tenant_conf_defaults {
     pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
     pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
 
-    // This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
-    // most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
-    // calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
-    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
+    // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
+    // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
+    // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
+    // with this config, we can get a maximum peak compaction usage of 9 GB.
+    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
     pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
     pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
 
@@ -633,7 +634,7 @@ impl Default for TenantConfigToml {
             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
             timeline_offloading: true,
             wal_receiver_protocol_override: None,
-            rel_size_v2_enabled: None,
+            rel_size_v2_enabled: false,
             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
             gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
             gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index 78e080981a..42f6e47e63 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -182,6 +182,18 @@ pub struct TenantDescribeResponseShard {
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantShardMigrateRequest {
     pub node_id: NodeId,
+    #[serde(default)]
+    pub migration_config: Option<MigrationConfig>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct MigrationConfig {
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub secondary_warmup_timeout: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub secondary_download_request_timeout: Option<Duration>,
 }
 
 #[derive(Serialize, Clone, Debug)]
diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs
index dbd45da314..b88a2e46a1 100644
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,10 +1,12 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
+use bytes::Bytes;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::Oid;
 use postgres_ffi::RepOriginId;
 use serde::{Deserialize, Serialize};
 use std::{fmt, ops::Range};
+use utils::const_assert;
 
 use crate::reltag::{BlockNumber, RelTag, SlruKind};
 
@@ -49,6 +51,64 @@ pub const AUX_KEY_PREFIX: u8 = 0x62;
 /// The key prefix of ReplOrigin keys.
 pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
 
+/// The key prefix of db directory keys.
+pub const DB_DIR_KEY_PREFIX: u8 = 0x64;
+
+/// The key prefix of rel directory keys.
+pub const REL_DIR_KEY_PREFIX: u8 = 0x65;
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub enum RelDirExists {
+    Exists,
+    Removed,
+}
+
+#[derive(Debug)]
+pub struct DecodeError;
+
+impl fmt::Display for DecodeError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "invalid marker")
+    }
+}
+
+impl std::error::Error for DecodeError {}
+
+impl RelDirExists {
+    /// The value of the rel directory keys that indicates the existence of a relation.
+    const REL_EXISTS_MARKER: Bytes = Bytes::from_static(b"r");
+
+    pub fn encode(&self) -> Bytes {
+        match self {
+            Self::Exists => Self::REL_EXISTS_MARKER.clone(),
+            Self::Removed => SPARSE_TOMBSTONE_MARKER.clone(),
+        }
+    }
+
+    pub fn decode_option(data: Option<impl AsRef<[u8]>>) -> Result<Self, DecodeError> {
+        match data {
+            Some(marker) if marker.as_ref() == Self::REL_EXISTS_MARKER => Ok(Self::Exists),
+            // Any other marker is invalid
+            Some(_) => Err(DecodeError),
+            None => Ok(Self::Removed),
+        }
+    }
+
+    pub fn decode(data: impl AsRef<[u8]>) -> Result<Self, DecodeError> {
+        let data = data.as_ref();
+        if data == Self::REL_EXISTS_MARKER {
+            Ok(Self::Exists)
+        } else if data == SPARSE_TOMBSTONE_MARKER {
+            Ok(Self::Removed)
+        } else {
+            Err(DecodeError)
+        }
+    }
+}
+
+/// A tombstone in the sparse keyspace, which is an empty buffer.
+pub const SPARSE_TOMBSTONE_MARKER: Bytes = Bytes::from_static(b"");
+
 /// Check if the key falls in the range of metadata keys.
 pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
     key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
@@ -110,6 +170,24 @@ impl Key {
         }
     }
 
+    pub fn rel_dir_sparse_key_range() -> Range<Self> {
+        Key {
+            field1: REL_DIR_KEY_PREFIX,
+            field2: 0,
+            field3: 0,
+            field4: 0,
+            field5: 0,
+            field6: 0,
+        }..Key {
+            field1: REL_DIR_KEY_PREFIX + 1,
+            field2: 0,
+            field3: 0,
+            field4: 0,
+            field5: 0,
+            field6: 0,
+        }
+    }
+
     /// This function checks more extensively what keys we can take on the write path.
     /// If a key beginning with 00 does not have a global/default tablespace OID, it
     /// will be rejected on the write path.
@@ -440,6 +518,36 @@ pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
     }
 }
 
+#[inline(always)]
+pub fn rel_tag_sparse_key(spcnode: Oid, dbnode: Oid, relnode: Oid, forknum: u8) -> Key {
+    Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: relnode,
+        field5: forknum,
+        field6: 1,
+    }
+}
+
+pub fn rel_tag_sparse_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
+    Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: u32::MAX,
+        field5: u8::MAX,
+        field6: u32::MAX,
+    } // it's fine to exclude the last key b/c we only use field6 == 1
+}
+
 #[inline(always)]
 pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
     Key {
@@ -734,9 +842,9 @@ impl Key {
         self.field1 == RELATION_SIZE_PREFIX
     }
 
-    pub fn sparse_non_inherited_keyspace() -> Range<Key> {
+    pub const fn sparse_non_inherited_keyspace() -> Range<Key> {
         // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
-        debug_assert_eq!(AUX_KEY_PREFIX + 1, REPL_ORIGIN_KEY_PREFIX);
+        const_assert!(AUX_KEY_PREFIX + 1 == REPL_ORIGIN_KEY_PREFIX);
         Key {
             field1: AUX_KEY_PREFIX,
             field2: 0,
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 6dbfbec345..dd7bea2916 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1080,8 +1080,7 @@ pub struct TenantInfo {
 
     /// Opaque explanation if gc is being blocked.
     ///
-    /// Only looked up for the individual tenant detail, not the listing. This is purely for
-    /// debugging, not included in openapi.
+    /// Only looked up for the individual tenant detail, not the listing.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub gc_blocking: Option<String>,
 }
@@ -1136,7 +1135,26 @@ pub struct TimelineInfo {
     pub ancestor_lsn: Option<Lsn>,
     pub last_record_lsn: Lsn,
     pub prev_record_lsn: Option<Lsn>,
+
+    /// Legacy field for compat with control plane.  Synonym of `min_readable_lsn`.
+    /// TODO: remove once control plane no longer reads it.
     pub latest_gc_cutoff_lsn: Lsn,
+
+    /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
+    /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
+    /// as it is easier to reason about.
+    #[serde(default)]
+    pub applied_gc_cutoff_lsn: Lsn,
+
+    /// The upper bound of data which is either already GC'ed, or elegible to be GC'ed at any time based on PITR interval.
+    /// This LSN represents the "end of history" for this timeline, and callers should use it to figure out the oldest
+    /// LSN at which it is legal to create a branch or ephemeral endpoint.
+    ///
+    /// Note that holders of valid LSN leases may be able to create branches and read pages earlier
+    /// than this LSN, but new leases may not be taken out earlier than this LSN.
+    #[serde(default)]
+    pub min_readable_lsn: Lsn,
+
     pub disk_consistent_lsn: Lsn,
 
     /// The LSN that we have succesfully uploaded to remote storage
diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs
index 8c024375c1..f74b229ac4 100644
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -9,6 +9,8 @@ use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use std::io::ErrorKind;
 use std::net::SocketAddr;
+use std::os::fd::AsRawFd;
+use std::os::fd::RawFd;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{ready, Poll};
@@ -268,6 +270,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> MaybeWriteOnly<IO> {
 }
 
 pub struct PostgresBackend<IO> {
+    pub socket_fd: RawFd,
     framed: MaybeWriteOnly<IO>,
 
     pub state: ProtoState,
@@ -293,9 +296,11 @@ impl PostgresBackend<tokio::net::TcpStream> {
         tls_config: Option<Arc<rustls::ServerConfig>>,
     ) -> io::Result<Self> {
         let peer_addr = socket.peer_addr()?;
+        let socket_fd = socket.as_raw_fd();
         let stream = MaybeTlsStream::Unencrypted(socket);
 
         Ok(Self {
+            socket_fd,
             framed: MaybeWriteOnly::Full(Framed::new(stream)),
             state: ProtoState::Initialization,
             auth_type,
@@ -307,6 +312,7 @@ impl PostgresBackend<tokio::net::TcpStream> {
 
 impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
     pub fn new_from_io(
+        socket_fd: RawFd,
         socket: IO,
         peer_addr: SocketAddr,
         auth_type: AuthType,
@@ -315,6 +321,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
         let stream = MaybeTlsStream::Unencrypted(socket);
 
         Ok(Self {
+            socket_fd,
             framed: MaybeWriteOnly::Full(Framed::new(stream)),
             state: ProtoState::Initialization,
             auth_type,
diff --git a/libs/proxy/tokio-postgres2/Cargo.toml b/libs/proxy/tokio-postgres2/Cargo.toml
index ade0ffc9f6..161c6b8309 100644
--- a/libs/proxy/tokio-postgres2/Cargo.toml
+++ b/libs/proxy/tokio-postgres2/Cargo.toml
@@ -5,18 +5,15 @@ edition = "2021"
 license = "MIT/Apache-2.0"
 
 [dependencies]
-async-trait.workspace = true
 bytes.workspace = true
-byteorder.workspace = true
 fallible-iterator.workspace = true
 futures-util = { workspace = true, features = ["sink"] }
 log = "0.4"
 parking_lot.workspace = true
-percent-encoding = "2.0"
 pin-project-lite.workspace = true
 phf = "0.11"
 postgres-protocol2 = { path = "../postgres-protocol2" }
 postgres-types2 = { path = "../postgres-types2" }
 tokio = { workspace = true, features = ["io-util", "time", "net"] }
 tokio-util = { workspace = true, features = ["codec"] }
-serde = { workspace = true, features = ["derive"] }
\ No newline at end of file
+serde = { workspace = true, features = ["derive"] }
diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs
index 9bbbd4c260..46151ab924 100644
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -10,8 +10,8 @@ use crate::simple_query::SimpleQueryStream;
 use crate::types::{Oid, ToSql, Type};
 
 use crate::{
-    prepare, query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
-    SimpleQueryMessage, Statement, ToStatement, Transaction, TransactionBuilder,
+    query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
+    SimpleQueryMessage, Statement, Transaction, TransactionBuilder,
 };
 use bytes::BytesMut;
 use fallible_iterator::FallibleIterator;
@@ -54,18 +54,18 @@ impl Responses {
 }
 
 /// A cache of type info and prepared statements for fetching type info
-/// (corresponding to the queries in the [prepare] module).
+/// (corresponding to the queries in the [crate::prepare] module).
 #[derive(Default)]
 struct CachedTypeInfo {
     /// A statement for basic information for a type from its
-    /// OID. Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_QUERY) (or its
+    /// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
     /// fallback).
     typeinfo: Option<Statement>,
     /// A statement for getting information for a composite type from its OID.
-    /// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY).
+    /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY).
     typeinfo_composite: Option<Statement>,
     /// A statement for getting information for a composite type from its OID.
-    /// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY) (or
+    /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY) (or
     /// its fallback).
     typeinfo_enum: Option<Statement>,
 
@@ -190,26 +190,6 @@ impl Client {
         &self.inner
     }
 
-    /// Creates a new prepared statement.
-    ///
-    /// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc),
-    /// which are set when executed. Prepared statements can only be used with the connection that created them.
-    pub async fn prepare(&self, query: &str) -> Result<Statement, Error> {
-        self.prepare_typed(query, &[]).await
-    }
-
-    /// Like `prepare`, but allows the types of query parameters to be explicitly specified.
-    ///
-    /// The list of types may be smaller than the number of parameters - the types of the remaining parameters will be
-    /// inferred. For example, `client.prepare_typed(query, &[])` is equivalent to `client.prepare(query)`.
-    pub async fn prepare_typed(
-        &self,
-        query: &str,
-        parameter_types: &[Type],
-    ) -> Result<Statement, Error> {
-        prepare::prepare(&self.inner, query, parameter_types).await
-    }
-
     /// Executes a statement, returning a vector of the resulting rows.
     ///
     /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
@@ -222,14 +202,11 @@ impl Client {
     /// # Panics
     ///
     /// Panics if the number of parameters provided does not match the number expected.
-    pub async fn query<T>(
+    pub async fn query(
         &self,
-        statement: &T,
+        statement: Statement,
         params: &[&(dyn ToSql + Sync)],
-    ) -> Result<Vec<Row>, Error>
-    where
-        T: ?Sized + ToStatement,
-    {
+    ) -> Result<Vec<Row>, Error> {
         self.query_raw(statement, slice_iter(params))
             .await?
             .try_collect()
@@ -250,13 +227,15 @@ impl Client {
     /// Panics if the number of parameters provided does not match the number expected.
     ///
     /// [`query`]: #method.query
-    pub async fn query_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<RowStream, Error>
+    pub async fn query_raw<'a, I>(
+        &self,
+        statement: Statement,
+        params: I,
+    ) -> Result<RowStream, Error>
     where
-        T: ?Sized + ToStatement,
         I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
         I::IntoIter: ExactSizeIterator,
     {
-        let statement = statement.__convert().into_statement(self).await?;
         query::query(&self.inner, statement, params).await
     }
 
@@ -271,55 +250,6 @@ impl Client {
         query::query_txt(&self.inner, statement, params).await
     }
 
-    /// Executes a statement, returning the number of rows modified.
-    ///
-    /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
-    /// provided, 1-indexed.
-    ///
-    /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
-    /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
-    /// with the `prepare` method.
-    ///
-    /// If the statement does not modify any rows (e.g. `SELECT`), 0 is returned.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the number of parameters provided does not match the number expected.
-    pub async fn execute<T>(
-        &self,
-        statement: &T,
-        params: &[&(dyn ToSql + Sync)],
-    ) -> Result<u64, Error>
-    where
-        T: ?Sized + ToStatement,
-    {
-        self.execute_raw(statement, slice_iter(params)).await
-    }
-
-    /// The maximally flexible version of [`execute`].
-    ///
-    /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
-    /// provided, 1-indexed.
-    ///
-    /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
-    /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
-    /// with the `prepare` method.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the number of parameters provided does not match the number expected.
-    ///
-    /// [`execute`]: #method.execute
-    pub async fn execute_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<u64, Error>
-    where
-        T: ?Sized + ToStatement,
-        I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
-        I::IntoIter: ExactSizeIterator,
-    {
-        let statement = statement.__convert().into_statement(self).await?;
-        query::execute(self.inner(), statement, params).await
-    }
-
     /// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
     ///
     /// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
diff --git a/libs/proxy/tokio-postgres2/src/generic_client.rs b/libs/proxy/tokio-postgres2/src/generic_client.rs
index 768213f8ed..042b5a675e 100644
--- a/libs/proxy/tokio-postgres2/src/generic_client.rs
+++ b/libs/proxy/tokio-postgres2/src/generic_client.rs
@@ -1,7 +1,8 @@
+#![allow(async_fn_in_trait)]
+
 use crate::query::RowStream;
 use crate::types::Type;
 use crate::{Client, Error, Transaction};
-use async_trait::async_trait;
 use postgres_protocol2::Oid;
 
 mod private {
@@ -11,7 +12,6 @@ mod private {
 /// A trait allowing abstraction over connections and transactions.
 ///
 /// This trait is "sealed", and cannot be implemented outside of this crate.
-#[async_trait]
 pub trait GenericClient: private::Sealed {
     /// Like `Client::query_raw_txt`.
     async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
@@ -26,7 +26,6 @@ pub trait GenericClient: private::Sealed {
 
 impl private::Sealed for Client {}
 
-#[async_trait]
 impl GenericClient for Client {
     async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
     where
@@ -39,14 +38,12 @@ impl GenericClient for Client {
 
     /// Query for type information
     async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
-        self.get_type(oid).await
+        crate::prepare::get_type(self.inner(), oid).await
     }
 }
 
 impl private::Sealed for Transaction<'_> {}
 
-#[async_trait]
-#[allow(clippy::needless_lifetimes)]
 impl GenericClient for Transaction<'_> {
     async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
     where
diff --git a/libs/proxy/tokio-postgres2/src/lib.rs b/libs/proxy/tokio-postgres2/src/lib.rs
index 9155dd8279..7426279167 100644
--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -14,7 +14,6 @@ pub use crate::row::{Row, SimpleQueryRow};
 pub use crate::simple_query::SimpleQueryStream;
 pub use crate::statement::{Column, Statement};
 pub use crate::tls::NoTls;
-pub use crate::to_statement::ToStatement;
 pub use crate::transaction::Transaction;
 pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
 use crate::types::ToSql;
@@ -65,7 +64,6 @@ pub mod row;
 mod simple_query;
 mod statement;
 pub mod tls;
-mod to_statement;
 mod transaction;
 mod transaction_builder;
 pub mod types;
diff --git a/libs/proxy/tokio-postgres2/src/prepare.rs b/libs/proxy/tokio-postgres2/src/prepare.rs
index da0c755c5b..58bbb26cbc 100644
--- a/libs/proxy/tokio-postgres2/src/prepare.rs
+++ b/libs/proxy/tokio-postgres2/src/prepare.rs
@@ -1,7 +1,6 @@
 use crate::client::InnerClient;
 use crate::codec::FrontendMessage;
 use crate::connection::RequestMessages;
-use crate::error::SqlState;
 use crate::types::{Field, Kind, Oid, Type};
 use crate::{query, slice_iter};
 use crate::{Column, Error, Statement};
@@ -13,7 +12,6 @@ use postgres_protocol2::message::backend::Message;
 use postgres_protocol2::message::frontend;
 use std::future::Future;
 use std::pin::Pin;
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 
 pub(crate) const TYPEINFO_QUERY: &str = "\
@@ -24,14 +22,6 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
 WHERE t.oid = $1
 ";
 
-// Range types weren't added until Postgres 9.2, so pg_range may not exist
-const TYPEINFO_FALLBACK_QUERY: &str = "\
-SELECT t.typname, t.typtype, t.typelem, NULL::OID, t.typbasetype, n.nspname, t.typrelid
-FROM pg_catalog.pg_type t
-INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
-WHERE t.oid = $1
-";
-
 const TYPEINFO_ENUM_QUERY: &str = "\
 SELECT enumlabel
 FROM pg_catalog.pg_enum
@@ -39,14 +29,6 @@ WHERE enumtypid = $1
 ORDER BY enumsortorder
 ";
 
-// Postgres 9.0 didn't have enumsortorder
-const TYPEINFO_ENUM_FALLBACK_QUERY: &str = "\
-SELECT enumlabel
-FROM pg_catalog.pg_enum
-WHERE enumtypid = $1
-ORDER BY oid
-";
-
 pub(crate) const TYPEINFO_COMPOSITE_QUERY: &str = "\
 SELECT attname, atttypid
 FROM pg_catalog.pg_attribute
@@ -56,15 +38,13 @@ AND attnum > 0
 ORDER BY attnum
 ";
 
-static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
-
 pub async fn prepare(
     client: &Arc<InnerClient>,
+    name: &'static str,
     query: &str,
     types: &[Type],
 ) -> Result<Statement, Error> {
-    let name = format!("s{}", NEXT_ID.fetch_add(1, Ordering::SeqCst));
-    let buf = encode(client, &name, query, types)?;
+    let buf = encode(client, name, query, types)?;
     let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
 
     match responses.next().await? {
@@ -105,10 +85,11 @@ pub async fn prepare(
 
 fn prepare_rec<'a>(
     client: &'a Arc<InnerClient>,
+    name: &'static str,
     query: &'a str,
     types: &'a [Type],
 ) -> Pin<Box<dyn Future<Output = Result<Statement, Error>> + 'a + Send>> {
-    Box::pin(prepare(client, query, types))
+    Box::pin(prepare(client, name, query, types))
 }
 
 fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
@@ -192,13 +173,8 @@ async fn typeinfo_statement(client: &Arc<InnerClient>) -> Result<Statement, Erro
         return Ok(stmt);
     }
 
-    let stmt = match prepare_rec(client, TYPEINFO_QUERY, &[]).await {
-        Ok(stmt) => stmt,
-        Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_TABLE) => {
-            prepare_rec(client, TYPEINFO_FALLBACK_QUERY, &[]).await?
-        }
-        Err(e) => return Err(e),
-    };
+    let typeinfo = "neon_proxy_typeinfo";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_QUERY, &[]).await?;
 
     client.set_typeinfo(&stmt);
     Ok(stmt)
@@ -219,13 +195,8 @@ async fn typeinfo_enum_statement(client: &Arc<InnerClient>) -> Result<Statement,
         return Ok(stmt);
     }
 
-    let stmt = match prepare_rec(client, TYPEINFO_ENUM_QUERY, &[]).await {
-        Ok(stmt) => stmt,
-        Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_COLUMN) => {
-            prepare_rec(client, TYPEINFO_ENUM_FALLBACK_QUERY, &[]).await?
-        }
-        Err(e) => return Err(e),
-    };
+    let typeinfo = "neon_proxy_typeinfo_enum";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_ENUM_QUERY, &[]).await?;
 
     client.set_typeinfo_enum(&stmt);
     Ok(stmt)
@@ -255,7 +226,8 @@ async fn typeinfo_composite_statement(client: &Arc<InnerClient>) -> Result<State
         return Ok(stmt);
     }
 
-    let stmt = prepare_rec(client, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
+    let typeinfo = "neon_proxy_typeinfo_composite";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
 
     client.set_typeinfo_composite(&stmt);
     Ok(stmt)
diff --git a/libs/proxy/tokio-postgres2/src/query.rs b/libs/proxy/tokio-postgres2/src/query.rs
index 534195a707..e21631c85d 100644
--- a/libs/proxy/tokio-postgres2/src/query.rs
+++ b/libs/proxy/tokio-postgres2/src/query.rs
@@ -157,49 +157,6 @@ where
     })
 }
 
-pub async fn execute<'a, I>(
-    client: &InnerClient,
-    statement: Statement,
-    params: I,
-) -> Result<u64, Error>
-where
-    I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
-    I::IntoIter: ExactSizeIterator,
-{
-    let buf = if log_enabled!(Level::Debug) {
-        let params = params.into_iter().collect::<Vec<_>>();
-        debug!(
-            "executing statement {} with parameters: {:?}",
-            statement.name(),
-            BorrowToSqlParamsDebug(params.as_slice()),
-        );
-        encode(client, &statement, params)?
-    } else {
-        encode(client, &statement, params)?
-    };
-    let mut responses = start(client, buf).await?;
-
-    let mut rows = 0;
-    loop {
-        match responses.next().await? {
-            Message::DataRow(_) => {}
-            Message::CommandComplete(body) => {
-                rows = body
-                    .tag()
-                    .map_err(Error::parse)?
-                    .rsplit(' ')
-                    .next()
-                    .unwrap()
-                    .parse()
-                    .unwrap_or(0);
-            }
-            Message::EmptyQueryResponse => rows = 0,
-            Message::ReadyForQuery(_) => return Ok(rows),
-            _ => return Err(Error::unexpected_message()),
-        }
-    }
-}
-
 async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
     let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
 
diff --git a/libs/proxy/tokio-postgres2/src/statement.rs b/libs/proxy/tokio-postgres2/src/statement.rs
index 22e160fc05..591872fbc5 100644
--- a/libs/proxy/tokio-postgres2/src/statement.rs
+++ b/libs/proxy/tokio-postgres2/src/statement.rs
@@ -13,7 +13,7 @@ use std::{
 
 struct StatementInner {
     client: Weak<InnerClient>,
-    name: String,
+    name: &'static str,
     params: Vec<Type>,
     columns: Vec<Column>,
 }
@@ -22,7 +22,7 @@ impl Drop for StatementInner {
     fn drop(&mut self) {
         if let Some(client) = self.client.upgrade() {
             let buf = client.with_buf(|buf| {
-                frontend::close(b'S', &self.name, buf).unwrap();
+                frontend::close(b'S', self.name, buf).unwrap();
                 frontend::sync(buf);
                 buf.split().freeze()
             });
@@ -40,7 +40,7 @@ pub struct Statement(Arc<StatementInner>);
 impl Statement {
     pub(crate) fn new(
         inner: &Arc<InnerClient>,
-        name: String,
+        name: &'static str,
         params: Vec<Type>,
         columns: Vec<Column>,
     ) -> Statement {
@@ -55,14 +55,14 @@ impl Statement {
     pub(crate) fn new_anonymous(params: Vec<Type>, columns: Vec<Column>) -> Statement {
         Statement(Arc::new(StatementInner {
             client: Weak::new(),
-            name: String::new(),
+            name: "<anonymous>",
             params,
             columns,
         }))
     }
 
     pub(crate) fn name(&self) -> &str {
-        &self.0.name
+        self.0.name
     }
 
     /// Returns the expected types of the statement's parameters.
diff --git a/libs/proxy/tokio-postgres2/src/to_statement.rs b/libs/proxy/tokio-postgres2/src/to_statement.rs
deleted file mode 100644
index 7e12992728..0000000000
--- a/libs/proxy/tokio-postgres2/src/to_statement.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-use crate::to_statement::private::{Sealed, ToStatementType};
-use crate::Statement;
-
-mod private {
-    use crate::{Client, Error, Statement};
-
-    pub trait Sealed {}
-
-    pub enum ToStatementType<'a> {
-        Statement(&'a Statement),
-        Query(&'a str),
-    }
-
-    impl ToStatementType<'_> {
-        pub async fn into_statement(self, client: &Client) -> Result<Statement, Error> {
-            match self {
-                ToStatementType::Statement(s) => Ok(s.clone()),
-                ToStatementType::Query(s) => client.prepare(s).await,
-            }
-        }
-    }
-}
-
-/// A trait abstracting over prepared and unprepared statements.
-///
-/// Many methods are generic over this bound, so that they support both a raw query string as well as a statement which
-/// was prepared previously.
-///
-/// This trait is "sealed" and cannot be implemented by anything outside this crate.
-pub trait ToStatement: Sealed {
-    #[doc(hidden)]
-    fn __convert(&self) -> ToStatementType<'_>;
-}
-
-impl ToStatement for Statement {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Statement(self)
-    }
-}
-
-impl Sealed for Statement {}
-
-impl ToStatement for str {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Query(self)
-    }
-}
-
-impl Sealed for str {}
-
-impl ToStatement for String {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Query(self)
-    }
-}
-
-impl Sealed for String {}
diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs
index a39fda526f..8b14a4f290 100644
--- a/libs/safekeeper_api/src/membership.rs
+++ b/libs/safekeeper_api/src/membership.rs
@@ -9,13 +9,43 @@ use anyhow::bail;
 use serde::{Deserialize, Serialize};
 use utils::id::NodeId;
 
-/// Number uniquely identifying safekeeper configuration.
-/// Note: it is a part of sk control file.
-pub type Generation = u32;
 /// 1 is the first valid generation, 0 is used as
 /// a placeholder before we fully migrate to generations.
-pub const INVALID_GENERATION: Generation = 0;
-pub const INITIAL_GENERATION: Generation = 1;
+pub const INVALID_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(0);
+pub const INITIAL_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(1);
+
+/// Number uniquely identifying safekeeper configuration.
+/// Note: it is a part of sk control file.
+///
+/// Like tenant generations, but for safekeepers.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub struct SafekeeperGeneration(u32);
+
+impl SafekeeperGeneration {
+    pub const fn new(v: u32) -> Self {
+        Self(v)
+    }
+
+    #[track_caller]
+    pub fn previous(&self) -> Option<Self> {
+        Some(Self(self.0.checked_sub(1)?))
+    }
+
+    #[track_caller]
+    pub fn next(&self) -> Self {
+        Self(self.0 + 1)
+    }
+
+    pub fn into_inner(self) -> u32 {
+        self.0
+    }
+}
+
+impl Display for SafekeeperGeneration {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
 
 /// Membership is defined by ids so e.g. walproposer uses them to figure out
 /// quorums, but we also carry host and port to give wp idea where to connect.
@@ -89,7 +119,7 @@ impl Display for MemberSet {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Configuration {
     /// Unique id.
-    pub generation: Generation,
+    pub generation: SafekeeperGeneration,
     /// Current members of the configuration.
     pub members: MemberSet,
     /// Some means it is a joint conf.
diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs
index 30418b0efd..41ccdaa428 100644
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -282,3 +282,18 @@ pub struct TimelineTermBumpResponse {
 pub struct SafekeeperUtilization {
     pub timeline_count: u64,
 }
+
+/// pull_timeline request body.
+#[derive(Debug, Deserialize, Serialize)]
+pub struct PullTimelineRequest {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub http_hosts: Vec<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct PullTimelineResponse {
+    // Donor safekeeper host
+    pub safekeeper_host: String,
+    // TODO: add more fields?
+}
diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml
index 0f10300959..62e0f4cfba 100644
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -24,11 +24,10 @@ diatomic-waker.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
-inferno.workspace = true
 fail.workspace = true
 futures = { workspace = true }
 jsonwebtoken.workspace = true
-nix.workspace = true
+nix = {workspace = true, features = [ "ioctl" ] }
 once_cell.workspace = true
 pin-project-lite.workspace = true
 regex.workspace = true
diff --git a/libs/utils/src/bin_ser.rs b/libs/utils/src/bin_ser.rs
index 42b45eeea0..4d173d0726 100644
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -286,6 +286,11 @@ mod tests {
     const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
     const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];
 
+    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    struct NewTypeStruct(u32);
+    const NT1: NewTypeStruct = NewTypeStruct(414243);
+    const NT1_INNER: u32 = 414243;
+
     #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
     pub struct LongMsg {
         pub tag: u8,
@@ -408,4 +413,42 @@ mod tests {
         let msg2 = LongMsg::des(&encoded).unwrap();
         assert_eq!(msg, msg2);
     }
+
+    #[test]
+    /// Ensure that newtype wrappers around u32 don't change the serialization format
+    fn be_nt() {
+        use super::BeSer;
+
+        assert_eq!(NT1.serialized_size().unwrap(), 4);
+
+        let msg = NT1;
+
+        let encoded = msg.ser().unwrap();
+        let expected = hex_literal::hex!("0006 5223");
+        assert_eq!(encoded, expected);
+
+        assert_eq!(encoded, NT1_INNER.ser().unwrap());
+
+        let msg2 = NewTypeStruct::des(&encoded).unwrap();
+        assert_eq!(msg, msg2);
+    }
+
+    #[test]
+    /// Ensure that newtype wrappers around u32 don't change the serialization format
+    fn le_nt() {
+        use super::LeSer;
+
+        assert_eq!(NT1.serialized_size().unwrap(), 4);
+
+        let msg = NT1;
+
+        let encoded = msg.ser().unwrap();
+        let expected = hex_literal::hex!("2352 0600");
+        assert_eq!(encoded, expected);
+
+        assert_eq!(encoded, NT1_INNER.ser().unwrap());
+
+        let msg2 = NewTypeStruct::des(&encoded).unwrap();
+        assert_eq!(msg, msg2);
+    }
 }
diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs
index 820ff2d5ea..9389a27bf3 100644
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -93,6 +93,9 @@ pub mod try_rcu;
 
 pub mod guard_arc_swap;
 
+#[cfg(target_os = "linux")]
+pub mod linux_socket_ioctl;
+
 // Re-export used in macro. Avoids adding git-version as dep in target crates.
 #[doc(hidden)]
 pub use git_version;
diff --git a/libs/utils/src/linux_socket_ioctl.rs b/libs/utils/src/linux_socket_ioctl.rs
new file mode 100644
index 0000000000..5ae0e86af8
--- /dev/null
+++ b/libs/utils/src/linux_socket_ioctl.rs
@@ -0,0 +1,35 @@
+//! Linux-specific socket ioctls.
+//!
+//! <https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27>
+
+use std::{
+    io,
+    mem::MaybeUninit,
+    os::{fd::RawFd, raw::c_int},
+};
+
+use nix::libc::{FIONREAD, TIOCOUTQ};
+
+unsafe fn do_ioctl(socket_fd: RawFd, cmd: nix::libc::Ioctl) -> io::Result<c_int> {
+    let mut inq: MaybeUninit<c_int> = MaybeUninit::uninit();
+    let err = nix::libc::ioctl(socket_fd, cmd, inq.as_mut_ptr());
+    if err == 0 {
+        Ok(inq.assume_init())
+    } else {
+        Err(io::Error::last_os_error())
+    }
+}
+
+/// # Safety
+///
+/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.
+pub unsafe fn inq(socket_fd: RawFd) -> io::Result<c_int> {
+    do_ioctl(socket_fd, FIONREAD)
+}
+
+/// # Safety
+///
+/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.
+pub unsafe fn outq(socket_fd: RawFd) -> io::Result<c_int> {
+    do_ioctl(socket_fd, TIOCOUTQ)
+}
diff --git a/libs/utils/src/shard.rs b/libs/utils/src/shard.rs
index 6352ea9f92..d98284f969 100644
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -117,6 +117,10 @@ impl TenantShardId {
         )
     }
 
+    pub fn range(&self) -> RangeInclusive<Self> {
+        RangeInclusive::new(*self, *self)
+    }
+
     pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
         ShardSlug(self)
     }
diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs
index da7ec5abce..bb0f64ca32 100644
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -477,6 +477,26 @@ impl Client {
         self.request(Method::POST, &uri, ()).await.map(|_| ())
     }
 
+    pub async fn timeline_download_heatmap_layers(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        concurrency: Option<usize>,
+    ) -> Result<()> {
+        let mut path = reqwest::Url::parse(&format!(
+            "{}/v1/tenant/{}/timeline/{}/download_heatmap_layers",
+            self.mgmt_api_endpoint, tenant_shard_id, timeline_id
+        ))
+        .expect("Cannot build URL");
+
+        if let Some(concurrency) = concurrency {
+            path.query_pairs_mut()
+                .append_pair("concurrency", &format!("{}", concurrency));
+        }
+
+        self.request(Method::POST, path, ()).await.map(|_| ())
+    }
+
     pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
         let uri = format!(
             "{}/v1/tenant/{}/reset",
diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs
index a6087920fd..e03b1bbe96 100644
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,7 +13,7 @@
 use anyhow::{anyhow, Context};
 use bytes::{BufMut, Bytes, BytesMut};
 use fail::fail_point;
-use pageserver_api::key::Key;
+use pageserver_api::key::{rel_block_to_key, Key};
 use postgres_ffi::pg_constants;
 use std::fmt::Write as FmtWrite;
 use std::time::{Instant, SystemTime};
@@ -42,8 +42,8 @@ use utils::lsn::Lsn;
 pub enum BasebackupError {
     #[error("basebackup pageserver error {0:#}")]
     Server(#[from] anyhow::Error),
-    #[error("basebackup client error {0:#}")]
-    Client(#[source] io::Error),
+    #[error("basebackup client error {0:#} when {1}")]
+    Client(#[source] io::Error, &'static str),
 }
 
 /// Create basebackup with non-rel data in it.
@@ -234,7 +234,7 @@ where
         self.ar
             .append(&header, self.buf.as_slice())
             .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "flush"))?;
 
         self.total_blocks += nblocks;
         debug!("Added to basebackup slru {} relsize {}", segname, nblocks);
@@ -273,9 +273,9 @@ where
         for dir in subdirs.iter() {
             let header = new_tar_header_dir(dir)?;
             self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                 .await
-                .context("could not add directory to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball"))?;
         }
 
         // Send config files.
@@ -286,13 +286,13 @@ where
                 self.ar
                     .append(&header, data)
                     .await
-                    .context("could not add config file to basebackup tarball")?;
+                    .map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
             } else {
                 let header = new_tar_header(filepath, 0)?;
                 self.ar
-                    .append(&header, &mut io::empty())
+                    .append(&header, io::empty())
                     .await
-                    .context("could not add config file to basebackup tarball")?;
+                    .map_err(|e| BasebackupError::Client(e, "send_tarball,add_config_file"))?;
             }
         }
         if !lazy_slru_download {
@@ -406,7 +406,7 @@ where
             self.ar
                 .append(&header, &*content)
                 .await
-                .context("could not add aux file to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball,add_aux_file"))?;
         }
 
         if min_restart_lsn != Lsn::MAX {
@@ -419,7 +419,7 @@ where
             self.ar
                 .append(&header, &data[..])
                 .await
-                .context("could not add restart.lsn file to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball,restart.lsn"))?;
         }
         for xid in self
             .timeline
@@ -451,9 +451,9 @@ where
             let crc32 = crc32c::crc32c(&content);
             content.extend_from_slice(&crc32.to_le_bytes());
             let header = new_tar_header("pg_logical/replorigin_checkpoint", content.len() as u64)?;
-            self.ar.append(&header, &*content).await.context(
-                "could not add pg_logical/replorigin_checkpoint file to basebackup tarball",
-            )?;
+            self.ar.append(&header, &*content).await.map_err(|e| {
+                BasebackupError::Client(e, "send_tarball,pg_logical/replorigin_checkpoint")
+            })?;
         }
 
         fail_point!("basebackup-before-control-file", |_| {
@@ -464,7 +464,10 @@ where
 
         // Generate pg_control and bootstrap WAL segment.
         self.add_pgcontrol_file().await?;
-        self.ar.finish().await.map_err(BasebackupError::Client)?;
+        self.ar
+            .finish()
+            .await
+            .map_err(|e| BasebackupError::Client(e, "send_tarball,finish"))?;
         debug!("all tarred up!");
         Ok(())
     }
@@ -482,9 +485,9 @@ where
             let file_name = dst.to_segfile_name(0);
             let header = new_tar_header(&file_name, 0)?;
             self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                 .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_rel,empty"))?;
             return Ok(());
         }
 
@@ -498,13 +501,9 @@ where
             for blknum in startblk..endblk {
                 let img = self
                     .timeline
-                    .get_rel_page_at_lsn(
-                        src,
-                        blknum,
-                        Version::Lsn(self.lsn),
-                        self.ctx,
-                        self.io_concurrency.clone(),
-                    )
+                    // TODO: investigate using get_vectored for the entire startblk..endblk range.
+                    // But this code path is not on the critical path for most basebackups (?).
+                    .get(rel_block_to_key(src, blknum), self.lsn, self.ctx)
                     .await
                     .map_err(|e| BasebackupError::Server(e.into()))?;
                 segment_data.extend_from_slice(&img[..]);
@@ -515,7 +514,7 @@ where
             self.ar
                 .append(&header, segment_data.as_slice())
                 .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_rel,segment"))?;
 
             seg += 1;
             startblk = endblk;
@@ -566,7 +565,7 @@ where
             self.ar
                 .append(&header, pg_version_str.as_bytes())
                 .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_dbdir,PG_VERSION"))?;
 
             info!("timeline.pg_version {}", self.timeline.pg_version);
 
@@ -576,7 +575,7 @@ where
                 self.ar
                     .append(&header, &img[..])
                     .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,global/pg_filenode.map"))?;
             } else {
                 warn!("global/pg_filenode.map is missing");
             }
@@ -612,9 +611,9 @@ where
             let path = format!("base/{}", dbnode);
             let header = new_tar_header_dir(&path)?;
             self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                 .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_dbdir,base"))?;
 
             if let Some(img) = relmap_img {
                 let dst_path = format!("base/{}/PG_VERSION", dbnode);
@@ -627,14 +626,14 @@ where
                 self.ar
                     .append(&header, pg_version_str.as_bytes())
                     .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,base/PG_VERSION"))?;
 
                 let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
                 let header = new_tar_header(&relmap_path, img.len() as u64)?;
                 self.ar
                     .append(&header, &img[..])
                     .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,base/pg_filenode.map"))?;
             }
         };
         Ok(())
@@ -663,7 +662,7 @@ where
         self.ar
             .append(&header, &buf[..])
             .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_twophase_file"))?;
 
         Ok(())
     }
@@ -693,7 +692,7 @@ where
                 zenith_signal.as_bytes(),
             )
             .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,zenith.signal"))?;
 
         let checkpoint_bytes = self
             .timeline
@@ -718,7 +717,7 @@ where
         self.ar
             .append(&header, &pg_control_bytes[..])
             .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,pg_control"))?;
 
         //send wal segment
         let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);
@@ -742,7 +741,7 @@ where
         self.ar
             .append(&header, &wal_seg[..])
             .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,wal_segment"))?;
         Ok(())
     }
 }
diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs
index 8f2177fe5b..da9c095a15 100644
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -98,6 +98,7 @@ pub struct RequestContext {
     download_behavior: DownloadBehavior,
     access_stats_behavior: AccessStatsBehavior,
     page_content_kind: PageContentKind,
+    read_path_debug: bool,
 }
 
 /// The kind of access to the page cache.
@@ -155,6 +156,7 @@ impl RequestContextBuilder {
                 download_behavior: DownloadBehavior::Download,
                 access_stats_behavior: AccessStatsBehavior::Update,
                 page_content_kind: PageContentKind::Unknown,
+                read_path_debug: false,
             },
         }
     }
@@ -168,6 +170,7 @@ impl RequestContextBuilder {
                 download_behavior: original.download_behavior,
                 access_stats_behavior: original.access_stats_behavior,
                 page_content_kind: original.page_content_kind,
+                read_path_debug: original.read_path_debug,
             },
         }
     }
@@ -191,6 +194,11 @@ impl RequestContextBuilder {
         self
     }
 
+    pub(crate) fn read_path_debug(mut self, b: bool) -> Self {
+        self.inner.read_path_debug = b;
+        self
+    }
+
     pub fn build(self) -> RequestContext {
         self.inner
     }
@@ -291,4 +299,8 @@ impl RequestContext {
     pub(crate) fn page_content_kind(&self) -> PageContentKind {
         self.page_content_kind
     }
+
+    pub(crate) fn read_path_debug(&self) -> bool {
+        self.read_path_debug
+    }
 }
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index 4b976e7f6f..12252739fd 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -824,6 +824,38 @@ paths:
               schema:
                 $ref: "#/components/schemas/TenantConfigResponse"
 
+  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers:
+    parameters:
+      - name: tenant_shard_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: concurrency
+        description: Maximum number of concurrent downloads (capped at remote storage concurrency)
+        in: query
+        required: false
+        schema:
+          type: integer
+    post:
+      description: |
+        Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter
+        may be used to target all shards of a tenant when the unsharded form is used, or a specific
+        tenant shard with the sharded form.
+      responses:
+        "200":
+          description: Success
+    delete:
+      description: Stop any on-going background downloads of heatmap layers for the specified timeline.
+      responses:
+        "200":
+          description: Success
+
   /v1/utilization:
     get:
       description: |
@@ -882,6 +914,8 @@ components:
               properties:
                 reason:
                   type: string
+        gc_blocking:
+          type: string
 
     TenantCreateRequest:
       allOf:
@@ -1080,9 +1114,15 @@ components:
           type: integer
         state:
           type: string
+        min_readable_lsn:
+          type: string
+          format: hex
         latest_gc_cutoff_lsn:
           type: string
           format: hex
+        applied_gc_cutoff_lsn:
+          type: string
+          format: hex
 
     SyntheticSizeResponse:
       type: object
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index bd196621c1..56a84a98a8 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -68,6 +68,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 
 use crate::config::PageServerConf;
+use crate::context::RequestContextBuilder;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::pgdatadir_mapping::LsnForTimestamp;
@@ -482,6 +483,11 @@ async fn build_timeline_info_common(
 
     let (pitr_history_size, within_ancestor_pitr) = timeline.get_pitr_history_stats();
 
+    let min_readable_lsn = std::cmp::max(
+        timeline.get_gc_cutoff_lsn(),
+        *timeline.get_applied_gc_cutoff_lsn(),
+    );
+
     let info = TimelineInfo {
         tenant_id: timeline.tenant_shard_id,
         timeline_id: timeline.timeline_id,
@@ -493,7 +499,12 @@ async fn build_timeline_info_common(
         initdb_lsn,
         last_record_lsn,
         prev_record_lsn: Some(timeline.get_prev_record_lsn()),
-        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
+        // Externally, expose the lowest LSN that can be used to create a branch as the "GC cutoff", although internally
+        // we distinguish between the "planned" GC cutoff (PITR point) and the "latest" GC cutoff (where we
+        // actually trimmed data to), which can pass each other when PITR is changed.
+        latest_gc_cutoff_lsn: min_readable_lsn,
+        min_readable_lsn,
+        applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(),
         current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
         current_logical_size_is_accurate: match current_logical_size.accuracy() {
             tenant::timeline::logical_size::Accuracy::Approximate => false,
@@ -1453,6 +1464,59 @@ async fn timeline_layer_scan_disposable_keys(
     )
 }
 
+async fn timeline_download_heatmap_layers_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    // Only used in the case where remote storage is not configured.
+    const DEFAULT_MAX_CONCURRENCY: usize = 100;
+    // A conservative default.
+    const DEFAULT_CONCURRENCY: usize = 16;
+
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+
+    let desired_concurrency =
+        parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY);
+
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    let state = get_state(&request);
+    let timeline =
+        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
+            .await?;
+
+    let max_concurrency = get_config(&request)
+        .remote_storage_config
+        .as_ref()
+        .map(|c| c.concurrency_limit())
+        .unwrap_or(DEFAULT_MAX_CONCURRENCY);
+    let concurrency = std::cmp::min(max_concurrency, desired_concurrency);
+
+    timeline.start_heatmap_layers_download(concurrency).await?;
+
+    json_response(StatusCode::ACCEPTED, ())
+}
+
+async fn timeline_shutdown_download_heatmap_layers_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    let state = get_state(&request);
+    let timeline =
+        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
+            .await?;
+
+    timeline.stop_and_drain_heatmap_layers_download().await;
+
+    json_response(StatusCode::OK, ())
+}
+
 async fn layer_download_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
@@ -2331,6 +2395,7 @@ async fn timeline_checkpoint_handler(
                     match e {
                         CompactionError::ShuttingDown => ApiError::ShuttingDown,
                         CompactionError::Offload(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
+                        CompactionError::CollectKeySpaceError(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
                         CompactionError::Other(e) => ApiError::InternalServerError(e)
                     }
                 )?;
@@ -2508,14 +2573,30 @@ async fn deletion_queue_flush(
     }
 }
 
-/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
 async fn getpage_at_lsn_handler(
+    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    getpage_at_lsn_handler_inner(false, request, cancel).await
+}
+
+async fn touchpage_at_lsn_handler(
+    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    getpage_at_lsn_handler_inner(true, request, cancel).await
+}
+
+/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
+async fn getpage_at_lsn_handler_inner(
+    touch: bool,
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    // Require pageserver admin permission for this API instead of only tenant-level token.
+    check_permission(&request, None)?;
     let state = get_state(&request);
 
     struct Key(pageserver_api::key::Key);
@@ -2530,22 +2611,29 @@ async fn getpage_at_lsn_handler(
 
     let key: Key = parse_query_param(&request, "key")?
         .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'key' query parameter")))?;
-    let lsn: Lsn = parse_query_param(&request, "lsn")?
-        .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?;
+    let lsn: Option<Lsn> = parse_query_param(&request, "lsn")?;
 
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+        // Enable read path debugging
+        let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true).build();
         let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
 
+        // Use last_record_lsn if no lsn is provided
+        let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
         let page = timeline.get(key.0, lsn, &ctx).await?;
 
-        Result::<_, ApiError>::Ok(
-            Response::builder()
-                .status(StatusCode::OK)
-                .header(header::CONTENT_TYPE, "application/octet-stream")
-                .body(hyper::Body::from(page))
-                .unwrap(),
-        )
+        if touch {
+            json_response(StatusCode::OK, ())
+        } else {
+            Result::<_, ApiError>::Ok(
+                Response::builder()
+                    .status(StatusCode::OK)
+                    .header(header::CONTENT_TYPE, "application/octet-stream")
+                    .body(hyper::Body::from(page))
+                    .unwrap(),
+            )
+        }
     }
     .instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
@@ -3616,6 +3704,14 @@ pub fn make_router(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
             |r| api_handler(r, layer_map_info_handler),
         )
+        .post(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
+            |r| api_handler(r, timeline_download_heatmap_layers_handler),
+        )
+        .delete(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
+            |r| api_handler(r, timeline_shutdown_download_heatmap_layers_handler),
+        )
         .get(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
             |r| api_handler(r, layer_download_handler),
@@ -3672,6 +3768,10 @@ pub fn make_router(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
             |r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
         )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/touchpage",
+            |r| api_handler(r, touchpage_at_lsn_handler),
+        )
         .get(
             "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
             |r| api_handler(r, timeline_collect_keyspace),
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 983a3079e4..e1c26b0684 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
 use std::num::NonZeroUsize;
+use std::os::fd::RawFd;
 use std::pin::Pin;
 use std::sync::atomic::AtomicU64;
 use std::sync::{Arc, Mutex};
@@ -129,7 +130,7 @@ pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
         "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
         &["tenant_id", "shard_id", "timeline_id"],
         // Low resolution to reduce cardinality.
-        vec![1.0, 5.0, 10.0, 25.0, 50.0, 100.0],
+        vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
     )
     .expect("failed to define a metric")
 });
@@ -1439,27 +1440,66 @@ impl Drop for SmgrOpTimer {
 }
 
 impl SmgrOpFlushInProgress {
-    pub(crate) async fn measure<Fut, O>(self, mut started_at: Instant, mut fut: Fut) -> O
+    /// The caller must guarantee that `socket_fd`` outlives this function.
+    pub(crate) async fn measure<Fut, O>(
+        self,
+        started_at: Instant,
+        mut fut: Fut,
+        socket_fd: RawFd,
+    ) -> O
     where
         Fut: std::future::Future<Output = O>,
     {
         let mut fut = std::pin::pin!(fut);
 
-        // Whenever observe_guard gets called, or dropped,
-        // it adds the time elapsed since its last call to metrics.
-        // Last call is tracked in `now`.
+        let mut logged = false;
+        let mut last_counter_increment_at = started_at;
         let mut observe_guard = scopeguard::guard(
-            || {
+            |is_timeout| {
                 let now = Instant::now();
-                let elapsed = now - started_at;
-                self.global_micros
-                    .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
-                self.per_timeline_micros
-                    .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
-                started_at = now;
+
+                // Increment counter
+                {
+                    let elapsed_since_last_observe = now - last_counter_increment_at;
+                    self.global_micros
+                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
+                    self.per_timeline_micros
+                        .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
+                    last_counter_increment_at = now;
+                }
+
+                // Log something on every timeout, and on completion but only if we hit a timeout.
+                if is_timeout || logged {
+                    logged = true;
+                    let elapsed_total = now - started_at;
+                    let msg = if is_timeout {
+                        "slow flush ongoing"
+                    } else {
+                        "slow flush completed or cancelled"
+                    };
+
+                    let (inq, outq) = {
+                        // SAFETY: caller guarantees that `socket_fd` outlives this function.
+                        #[cfg(target_os = "linux")]
+                        unsafe {
+                            (
+                                utils::linux_socket_ioctl::inq(socket_fd).unwrap_or(-2),
+                                utils::linux_socket_ioctl::outq(socket_fd).unwrap_or(-2),
+                            )
+                        }
+                        #[cfg(not(target_os = "linux"))]
+                        {
+                            _ = socket_fd; // appease unused lint on macOS
+                            (-1, -1)
+                        }
+                    };
+
+                    let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64());
+                    tracing::info!(elapsed_total_secs, inq, outq, msg);
+                }
             },
             |mut observe| {
-                observe();
+                observe(false);
             },
         );
 
@@ -1467,7 +1507,7 @@ impl SmgrOpFlushInProgress {
             match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
                 Ok(v) => return v,
                 Err(_timeout) => {
-                    (*observe_guard)();
+                    (*observe_guard)(true);
                 }
             }
         }
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 972dad34d4..0c8da6f2a8 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -73,6 +73,7 @@ use pageserver_api::models::PageTraceEvent;
 use pageserver_api::reltag::SlruKind;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;
+use std::os::fd::AsRawFd;
 
 /// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
 /// is not yet in state [`TenantState::Active`].
@@ -236,7 +237,7 @@ pub async fn libpq_listener_main(
 
 type ConnectionHandlerResult = anyhow::Result<()>;
 
-#[instrument(skip_all, fields(peer_addr))]
+#[instrument(skip_all, fields(peer_addr, application_name))]
 #[allow(clippy::too_many_arguments)]
 async fn page_service_conn_main(
     conf: &'static PageServerConf,
@@ -257,6 +258,8 @@ async fn page_service_conn_main(
         .set_nodelay(true)
         .context("could not set TCP_NODELAY")?;
 
+    let socket_fd = socket.as_raw_fd();
+
     let peer_addr = socket.peer_addr().context("get peer address")?;
     tracing::Span::current().record("peer_addr", field::display(peer_addr));
 
@@ -305,7 +308,7 @@ async fn page_service_conn_main(
         cancel.clone(),
         gate_guard,
     );
-    let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
+    let pgbackend = PostgresBackend::new_from_io(socket_fd, socket, peer_addr, auth_type, None)?;
 
     match pgbackend.run(&mut conn_handler, &cancel).await {
         Ok(()) => {
@@ -914,7 +917,7 @@ impl PageServerHandler {
                     &shard,
                     req.hdr.request_lsn,
                     req.hdr.not_modified_since,
-                    &shard.get_latest_gc_cutoff_lsn(),
+                    &shard.get_applied_gc_cutoff_lsn(),
                     ctx,
                 )
                 // TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
@@ -1286,12 +1289,15 @@ impl PageServerHandler {
             ))?;
 
             // what we want to do
+            let socket_fd = pgb_writer.socket_fd;
             let flush_fut = pgb_writer.flush();
             // metric for how long flushing takes
             let flush_fut = match flushing_timer {
-                Some(flushing_timer) => {
-                    futures::future::Either::Left(flushing_timer.measure(Instant::now(), flush_fut))
-                }
+                Some(flushing_timer) => futures::future::Either::Left(flushing_timer.measure(
+                    Instant::now(),
+                    flush_fut,
+                    socket_fd,
+                )),
                 None => futures::future::Either::Right(flush_fut),
             };
             // do it while respecting cancellation
@@ -1793,6 +1799,13 @@ impl PageServerHandler {
                 .as_millis()
                 .to_string()
         });
+
+        info!(
+            "acquired lease for {} until {}",
+            lsn,
+            valid_until_str.as_deref().unwrap_or("<unknown>")
+        );
+
         let bytes = valid_until_str.as_ref().map(|x| x.as_bytes());
 
         pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(
@@ -1810,7 +1823,7 @@ impl PageServerHandler {
         req: &PagestreamExistsRequest,
         ctx: &RequestContext,
     ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
             req.hdr.request_lsn,
@@ -1837,7 +1850,7 @@ impl PageServerHandler {
         req: &PagestreamNblocksRequest,
         ctx: &RequestContext,
     ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
             req.hdr.request_lsn,
@@ -1864,7 +1877,7 @@ impl PageServerHandler {
         req: &PagestreamDbSizeRequest,
         ctx: &RequestContext,
     ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
             req.hdr.request_lsn,
@@ -1954,7 +1967,7 @@ impl PageServerHandler {
         req: &PagestreamGetSlruSegmentRequest,
         ctx: &RequestContext,
     ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
         let lsn = Self::wait_or_get_last_lsn(
             timeline,
             req.hdr.request_lsn,
@@ -2050,7 +2063,8 @@ impl PageServerHandler {
     {
         fn map_basebackup_error(err: BasebackupError) -> QueryError {
             match err {
-                BasebackupError::Client(e) => QueryError::Disconnected(ConnectionError::Io(e)),
+                // TODO: passthrough the error site to the final error message?
+                BasebackupError::Client(e, _) => QueryError::Disconnected(ConnectionError::Io(e)),
                 BasebackupError::Server(e) => QueryError::Other(e),
             }
         }
@@ -2071,7 +2085,7 @@ impl PageServerHandler {
             //return Err(QueryError::NotFound("timeline is archived".into()))
         }
 
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
         if let Some(lsn) = lsn {
             // Backup was requested at a particular LSN. Wait for it to arrive.
             info!("waiting for {}", lsn);
@@ -2151,10 +2165,12 @@ impl PageServerHandler {
                 .await
                 .map_err(map_basebackup_error)?;
             }
-            writer
-                .flush()
-                .await
-                .map_err(|e| map_basebackup_error(BasebackupError::Client(e)))?;
+            writer.flush().await.map_err(|e| {
+                map_basebackup_error(BasebackupError::Client(
+                    e,
+                    "handle_basebackup_request,flush",
+                ))
+            })?;
         }
 
         pgb.write_message_noflush(&BeMessage::CopyDone)
@@ -2454,9 +2470,16 @@ where
     fn startup(
         &mut self,
         _pgb: &mut PostgresBackend<IO>,
-        _sm: &FeStartupPacket,
+        sm: &FeStartupPacket,
     ) -> Result<(), QueryError> {
         fail::fail_point!("ps::connection-start::startup-packet");
+
+        if let FeStartupPacket::StartupMessage { params, .. } = sm {
+            if let Some(app_name) = params.get("application_name") {
+                Span::current().record("application_name", field::display(app_name));
+            }
+        };
+
         Ok(())
     }
 
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index 00f332d797..ae2762bd1e 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -23,13 +23,14 @@ use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
 use itertools::Itertools;
-use pageserver_api::key::Key;
 use pageserver_api::key::{
     dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
-    relmap_file_key, repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
-    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
-    CompactKey, AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
+    rel_tag_sparse_key_range, relmap_file_key, repl_origin_key, repl_origin_key_range,
+    slru_block_to_key, slru_dir_to_key, slru_segment_key_range, slru_segment_size_to_key,
+    twophase_file_key, twophase_key_range, CompactKey, RelDirExists, AUX_FILES_KEY, CHECKPOINT_KEY,
+    CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
 };
+use pageserver_api::key::{rel_tag_sparse_key, Key};
 use pageserver_api::keyspace::SparseKeySpace;
 use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
@@ -490,12 +491,33 @@ impl Timeline {
         if !dbdirs.contains_key(&(tag.spcnode, tag.dbnode)) {
             return Ok(false);
         }
-        // fetch directory listing
+
+        // Read path: first read the new reldir keyspace. Early return if the relation exists.
+        // Otherwise, read the old reldir keyspace.
+        // TODO: if IndexPart::rel_size_migration is `Migrated`, we only need to read from v2.
+
+        if self.get_rel_size_v2_enabled() {
+            // fetch directory listing (new)
+            let key = rel_tag_sparse_key(tag.spcnode, tag.dbnode, tag.relnode, tag.forknum);
+            let buf = RelDirExists::decode_option(version.sparse_get(self, key, ctx).await?)
+                .map_err(|_| PageReconstructError::Other(anyhow::anyhow!("invalid reldir key")))?;
+            let exists_v2 = buf == RelDirExists::Exists;
+            // Fast path: if the relation exists in the new format, return true.
+            // TODO: we should have a verification mode that checks both keyspaces
+            // to ensure the relation only exists in one of them.
+            if exists_v2 {
+                return Ok(true);
+            }
+        }
+
+        // fetch directory listing (old)
+
         let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
         let buf = version.get(self, key, ctx).await?;
 
         let dir = RelDirectory::des(&buf)?;
-        Ok(dir.rels.contains(&(tag.relnode, tag.forknum)))
+        let exists_v1 = dir.rels.contains(&(tag.relnode, tag.forknum));
+        Ok(exists_v1)
     }
 
     /// Get a list of all existing relations in given tablespace and database.
@@ -513,12 +535,12 @@ impl Timeline {
         version: Version<'_>,
         ctx: &RequestContext,
     ) -> Result<HashSet<RelTag>, PageReconstructError> {
-        // fetch directory listing
+        // fetch directory listing (old)
         let key = rel_dir_to_key(spcnode, dbnode);
         let buf = version.get(self, key, ctx).await?;
 
         let dir = RelDirectory::des(&buf)?;
-        let rels: HashSet<RelTag> =
+        let rels_v1: HashSet<RelTag> =
             HashSet::from_iter(dir.rels.iter().map(|(relnode, forknum)| RelTag {
                 spcnode,
                 dbnode,
@@ -526,6 +548,46 @@ impl Timeline {
                 forknum: *forknum,
             }));
 
+        if !self.get_rel_size_v2_enabled() {
+            return Ok(rels_v1);
+        }
+
+        // scan directory listing (new), merge with the old results
+        let key_range = rel_tag_sparse_key_range(spcnode, dbnode);
+        let io_concurrency = IoConcurrency::spawn_from_conf(
+            self.conf,
+            self.gate
+                .enter()
+                .map_err(|_| PageReconstructError::Cancelled)?,
+        );
+        let results = self
+            .scan(
+                KeySpace::single(key_range),
+                version.get_lsn(),
+                ctx,
+                io_concurrency,
+            )
+            .await?;
+        let mut rels = rels_v1;
+        for (key, val) in results {
+            let val = RelDirExists::decode(&val?)
+                .map_err(|_| PageReconstructError::Other(anyhow::anyhow!("invalid reldir key")))?;
+            assert_eq!(key.field6, 1);
+            assert_eq!(key.field2, spcnode);
+            assert_eq!(key.field3, dbnode);
+            let tag = RelTag {
+                spcnode,
+                dbnode,
+                relnode: key.field4,
+                forknum: key.field5,
+            };
+            if val == RelDirExists::Removed {
+                debug_assert!(!rels.contains(&tag), "removed reltag in v2");
+                continue;
+            }
+            let did_not_contain = rels.insert(tag);
+            debug_assert!(did_not_contain, "duplicate reltag in v2");
+        }
         Ok(rels)
     }
 
@@ -611,7 +673,7 @@ impl Timeline {
     ) -> Result<LsnForTimestamp, PageReconstructError> {
         pausable_failpoint!("find-lsn-for-timestamp-pausable");
 
-        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
+        let gc_cutoff_lsn_guard = self.get_applied_gc_cutoff_lsn();
         let gc_cutoff_planned = {
             let gc_info = self.gc_info.read().unwrap();
             gc_info.min_cutoff()
@@ -1144,7 +1206,11 @@ impl Timeline {
 
         let dense_keyspace = result.to_keyspace();
         let sparse_keyspace = SparseKeySpace(KeySpace {
-            ranges: vec![Key::metadata_aux_key_range(), repl_origin_key_range()],
+            ranges: vec![
+                Key::metadata_aux_key_range(),
+                repl_origin_key_range(),
+                Key::rel_dir_sparse_key_range(),
+            ],
         });
 
         if cfg!(debug_assertions) {
@@ -1274,12 +1340,22 @@ pub struct DatadirModification<'a> {
 
     /// For special "directory" keys that store key-value maps, track the size of the map
     /// if it was updated in this modification.
-    pending_directory_entries: Vec<(DirectoryKind, usize)>,
+    pending_directory_entries: Vec<(DirectoryKind, MetricsUpdate)>,
 
     /// An **approximation** of how many metadata bytes will be written to the EphemeralFile.
     pending_metadata_bytes: usize,
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MetricsUpdate {
+    /// Set the metrics to this value
+    Set(u64),
+    /// Increment the metrics by this value
+    Add(u64),
+    /// Decrement the metrics by this value
+    Sub(u64),
+}
+
 impl DatadirModification<'_> {
     // When a DatadirModification is committed, we do a monolithic serialization of all its contents.  WAL records can
     // contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we
@@ -1359,7 +1435,8 @@ impl DatadirModification<'_> {
         let buf = DbDirectory::ser(&DbDirectory {
             dbdirs: HashMap::new(),
         })?;
-        self.pending_directory_entries.push((DirectoryKind::Db, 0));
+        self.pending_directory_entries
+            .push((DirectoryKind::Db, MetricsUpdate::Set(0)));
         self.put(DBDIR_KEY, Value::Image(buf.into()));
 
         let buf = if self.tline.pg_version >= 17 {
@@ -1372,7 +1449,7 @@ impl DatadirModification<'_> {
             })
         }?;
         self.pending_directory_entries
-            .push((DirectoryKind::TwoPhase, 0));
+            .push((DirectoryKind::TwoPhase, MetricsUpdate::Set(0)));
         self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
 
         let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into();
@@ -1382,17 +1459,23 @@ impl DatadirModification<'_> {
         // harmless but they'd just be dropped on later compaction.
         if self.tline.tenant_shard_id.is_shard_zero() {
             self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
+            self.pending_directory_entries.push((
+                DirectoryKind::SlruSegment(SlruKind::Clog),
+                MetricsUpdate::Set(0),
+            ));
             self.put(
                 slru_dir_to_key(SlruKind::MultiXactMembers),
                 empty_dir.clone(),
             );
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
+            self.pending_directory_entries.push((
+                DirectoryKind::SlruSegment(SlruKind::Clog),
+                MetricsUpdate::Set(0),
+            ));
             self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0));
+            self.pending_directory_entries.push((
+                DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets),
+                MetricsUpdate::Set(0),
+            ));
         }
 
         Ok(())
@@ -1658,10 +1741,16 @@ impl DatadirModification<'_> {
         }
         if r.is_none() {
             // Create RelDirectory
+            // TODO: if we have fully migrated to v2, no need to create this directory
             let buf = RelDirectory::ser(&RelDirectory {
                 rels: HashSet::new(),
             })?;
-            self.pending_directory_entries.push((DirectoryKind::Rel, 0));
+            self.pending_directory_entries
+                .push((DirectoryKind::Rel, MetricsUpdate::Set(0)));
+            if self.tline.get_rel_size_v2_enabled() {
+                self.pending_directory_entries
+                    .push((DirectoryKind::RelV2, MetricsUpdate::Set(0)));
+            }
             self.put(
                 rel_dir_to_key(spcnode, dbnode),
                 Value::Image(Bytes::from(buf)),
@@ -1685,8 +1774,10 @@ impl DatadirModification<'_> {
             if !dir.xids.insert(xid) {
                 anyhow::bail!("twophase file for xid {} already exists", xid);
             }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            self.pending_directory_entries.push((
+                DirectoryKind::TwoPhase,
+                MetricsUpdate::Set(dir.xids.len() as u64),
+            ));
             Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
         } else {
             let xid = xid as u32;
@@ -1694,8 +1785,10 @@ impl DatadirModification<'_> {
             if !dir.xids.insert(xid) {
                 anyhow::bail!("twophase file for xid {} already exists", xid);
             }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            self.pending_directory_entries.push((
+                DirectoryKind::TwoPhase,
+                MetricsUpdate::Set(dir.xids.len() as u64),
+            ));
             Bytes::from(TwoPhaseDirectory::ser(&dir)?)
         };
         self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
@@ -1744,8 +1837,10 @@ impl DatadirModification<'_> {
         let mut dir = DbDirectory::des(&buf)?;
         if dir.dbdirs.remove(&(spcnode, dbnode)).is_some() {
             let buf = DbDirectory::ser(&dir)?;
-            self.pending_directory_entries
-                .push((DirectoryKind::Db, dir.dbdirs.len()));
+            self.pending_directory_entries.push((
+                DirectoryKind::Db,
+                MetricsUpdate::Set(dir.dbdirs.len() as u64),
+            ));
             self.put(DBDIR_KEY, Value::Image(buf.into()));
         } else {
             warn!(
@@ -1778,39 +1873,85 @@ impl DatadirModification<'_> {
         // tablespace.  Create the reldir entry for it if so.
         let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
             .context("deserialize db")?;
-        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
-        let mut rel_dir =
+
+        let dbdir_exists =
             if let hash_map::Entry::Vacant(e) = dbdir.dbdirs.entry((rel.spcnode, rel.dbnode)) {
                 // Didn't exist. Update dbdir
                 e.insert(false);
                 let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
-                self.pending_directory_entries
-                    .push((DirectoryKind::Db, dbdir.dbdirs.len()));
+                self.pending_directory_entries.push((
+                    DirectoryKind::Db,
+                    MetricsUpdate::Set(dbdir.dbdirs.len() as u64),
+                ));
                 self.put(DBDIR_KEY, Value::Image(buf.into()));
-
-                // and create the RelDirectory
-                RelDirectory::default()
+                false
             } else {
-                // reldir already exists, fetch it
-                RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
-                    .context("deserialize db")?
+                true
             };
 
+        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
+        let mut rel_dir = if !dbdir_exists {
+            // Create the RelDirectory
+            RelDirectory::default()
+        } else {
+            // reldir already exists, fetch it
+            RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
+                .context("deserialize db")?
+        };
+
         // Add the new relation to the rel directory entry, and write it back
         if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
             return Err(RelationError::AlreadyExists);
         }
 
-        self.pending_directory_entries
-            .push((DirectoryKind::Rel, rel_dir.rels.len()));
-
-        self.put(
-            rel_dir_key,
-            Value::Image(Bytes::from(
-                RelDirectory::ser(&rel_dir).context("serialize")?,
-            )),
-        );
-
+        if self.tline.get_rel_size_v2_enabled() {
+            let sparse_rel_dir_key =
+                rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum);
+            // check if the rel_dir_key exists in v2
+            let val = self
+                .sparse_get(sparse_rel_dir_key, ctx)
+                .await
+                .map_err(|e| RelationError::Other(e.into()))?;
+            let val = RelDirExists::decode_option(val)
+                .map_err(|_| RelationError::Other(anyhow::anyhow!("invalid reldir key")))?;
+            if val == RelDirExists::Exists {
+                return Err(RelationError::AlreadyExists);
+            }
+            self.put(
+                sparse_rel_dir_key,
+                Value::Image(RelDirExists::Exists.encode()),
+            );
+            if !dbdir_exists {
+                self.pending_directory_entries
+                    .push((DirectoryKind::Rel, MetricsUpdate::Set(0)));
+                self.pending_directory_entries
+                    .push((DirectoryKind::RelV2, MetricsUpdate::Set(0)));
+                // We don't write `rel_dir_key -> rel_dir.rels` back to the storage in the v2 path unless it's the initial creation.
+                // TODO: if we have fully migrated to v2, no need to create this directory. Otherwise, there
+                // will be key not found errors if we don't create an empty one for rel_size_v2.
+                self.put(
+                    rel_dir_key,
+                    Value::Image(Bytes::from(
+                        RelDirectory::ser(&RelDirectory::default()).context("serialize")?,
+                    )),
+                );
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::RelV2, MetricsUpdate::Add(1)));
+        } else {
+            if !dbdir_exists {
+                self.pending_directory_entries
+                    .push((DirectoryKind::Rel, MetricsUpdate::Set(0)))
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::Rel, MetricsUpdate::Add(1)));
+            self.put(
+                rel_dir_key,
+                Value::Image(Bytes::from(
+                    RelDirectory::ser(&rel_dir).context("serialize")?,
+                )),
+            );
+        }
         // Put size
         let size_key = rel_size_to_key(rel);
         let buf = nblocks.to_le_bytes();
@@ -1896,9 +2037,34 @@ impl DatadirModification<'_> {
 
             let mut dirty = false;
             for rel_tag in rel_tags {
-                if dir.rels.remove(&(rel_tag.relnode, rel_tag.forknum)) {
+                let found = if dir.rels.remove(&(rel_tag.relnode, rel_tag.forknum)) {
+                    self.pending_directory_entries
+                        .push((DirectoryKind::Rel, MetricsUpdate::Sub(1)));
                     dirty = true;
+                    true
+                } else if self.tline.get_rel_size_v2_enabled() {
+                    // The rel is not found in the old reldir key, so we need to check the new sparse keyspace.
+                    // Note that a relation can only exist in one of the two keyspaces (guaranteed by the ingestion
+                    // logic).
+                    let key =
+                        rel_tag_sparse_key(spc_node, db_node, rel_tag.relnode, rel_tag.forknum);
+                    let val = RelDirExists::decode_option(self.sparse_get(key, ctx).await?)
+                        .map_err(|_| RelationError::Other(anyhow::anyhow!("invalid reldir key")))?;
+                    if val == RelDirExists::Exists {
+                        self.pending_directory_entries
+                            .push((DirectoryKind::RelV2, MetricsUpdate::Sub(1)));
+                        // put tombstone
+                        self.put(key, Value::Image(RelDirExists::Removed.encode()));
+                        // no need to set dirty to true
+                        true
+                    } else {
+                        false
+                    }
+                } else {
+                    false
+                };
 
+                if found {
                     // update logical size
                     let size_key = rel_size_to_key(rel_tag);
                     let old_size = self.get(size_key, ctx).await?.get_u32_le();
@@ -1914,8 +2080,6 @@ impl DatadirModification<'_> {
 
             if dirty {
                 self.put(dir_key, Value::Image(Bytes::from(RelDirectory::ser(&dir)?)));
-                self.pending_directory_entries
-                    .push((DirectoryKind::Rel, dir.rels.len()));
             }
         }
 
@@ -1939,8 +2103,10 @@ impl DatadirModification<'_> {
         if !dir.segments.insert(segno) {
             anyhow::bail!("slru segment {kind:?}/{segno} already exists");
         }
-        self.pending_directory_entries
-            .push((DirectoryKind::SlruSegment(kind), dir.segments.len()));
+        self.pending_directory_entries.push((
+            DirectoryKind::SlruSegment(kind),
+            MetricsUpdate::Set(dir.segments.len() as u64),
+        ));
         self.put(
             dir_key,
             Value::Image(Bytes::from(SlruSegmentDirectory::ser(&dir)?)),
@@ -1987,8 +2153,10 @@ impl DatadirModification<'_> {
         if !dir.segments.remove(&segno) {
             warn!("slru segment {:?}/{} does not exist", kind, segno);
         }
-        self.pending_directory_entries
-            .push((DirectoryKind::SlruSegment(kind), dir.segments.len()));
+        self.pending_directory_entries.push((
+            DirectoryKind::SlruSegment(kind),
+            MetricsUpdate::Set(dir.segments.len() as u64),
+        ));
         self.put(
             dir_key,
             Value::Image(Bytes::from(SlruSegmentDirectory::ser(&dir)?)),
@@ -2020,8 +2188,10 @@ impl DatadirModification<'_> {
             if !dir.xids.remove(&xid) {
                 warn!("twophase file for xid {} does not exist", xid);
             }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            self.pending_directory_entries.push((
+                DirectoryKind::TwoPhase,
+                MetricsUpdate::Set(dir.xids.len() as u64),
+            ));
             Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
         } else {
             let xid: u32 = u32::try_from(xid)?;
@@ -2030,8 +2200,10 @@ impl DatadirModification<'_> {
             if !dir.xids.remove(&xid) {
                 warn!("twophase file for xid {} does not exist", xid);
             }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            self.pending_directory_entries.push((
+                DirectoryKind::TwoPhase,
+                MetricsUpdate::Set(dir.xids.len() as u64),
+            ));
             Bytes::from(TwoPhaseDirectory::ser(&dir)?)
         };
         self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
@@ -2147,7 +2319,7 @@ impl DatadirModification<'_> {
         }
 
         for (kind, count) in std::mem::take(&mut self.pending_directory_entries) {
-            writer.update_directory_entries_count(kind, count as u64);
+            writer.update_directory_entries_count(kind, count);
         }
 
         Ok(())
@@ -2233,7 +2405,7 @@ impl DatadirModification<'_> {
         }
 
         for (kind, count) in std::mem::take(&mut self.pending_directory_entries) {
-            writer.update_directory_entries_count(kind, count as u64);
+            writer.update_directory_entries_count(kind, count);
         }
 
         self.pending_metadata_bytes = 0;
@@ -2297,6 +2469,22 @@ impl DatadirModification<'_> {
         self.tline.get(key, lsn, ctx).await
     }
 
+    /// Get a key from the sparse keyspace. Automatically converts the missing key error
+    /// and the empty value into None.
+    async fn sparse_get(
+        &self,
+        key: Key,
+        ctx: &RequestContext,
+    ) -> Result<Option<Bytes>, PageReconstructError> {
+        let val = self.get(key, ctx).await;
+        match val {
+            Ok(val) if val.is_empty() => Ok(None),
+            Ok(val) => Ok(Some(val)),
+            Err(PageReconstructError::MissingKey(_)) => Ok(None),
+            Err(e) => Err(e),
+        }
+    }
+
     fn put(&mut self, key: Key, val: Value) {
         if Self::is_data_key(&key) {
             self.put_data(key.to_compact(), val)
@@ -2379,6 +2567,23 @@ impl Version<'_> {
         }
     }
 
+    /// Get a key from the sparse keyspace. Automatically converts the missing key error
+    /// and the empty value into None.
+    async fn sparse_get(
+        &self,
+        timeline: &Timeline,
+        key: Key,
+        ctx: &RequestContext,
+    ) -> Result<Option<Bytes>, PageReconstructError> {
+        let val = self.get(timeline, key, ctx).await;
+        match val {
+            Ok(val) if val.is_empty() => Ok(None),
+            Ok(val) => Ok(Some(val)),
+            Err(PageReconstructError::MissingKey(_)) => Ok(None),
+            Err(e) => Err(e),
+        }
+    }
+
     fn get_lsn(&self) -> Lsn {
         match self {
             Version::Lsn(lsn) => *lsn,
@@ -2438,6 +2643,7 @@ pub(crate) enum DirectoryKind {
     Rel,
     AuxFiles,
     SlruSegment(SlruKind),
+    RelV2,
 }
 
 impl DirectoryKind {
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 4c65991e45..efb35625f2 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -40,6 +40,8 @@ use remote_timeline_client::manifest::{
 use remote_timeline_client::UploadQueueNotReadyError;
 use remote_timeline_client::FAILED_REMOTE_OP_RETRIES;
 use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD;
+use secondary::heatmap::HeatMapTenant;
+use secondary::heatmap::HeatMapTimeline;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::future::Future;
@@ -55,6 +57,7 @@ use timeline::offload::OffloadError;
 use timeline::CompactFlags;
 use timeline::CompactOptions;
 use timeline::CompactionError;
+use timeline::PreviousHeatmap;
 use timeline::ShutdownMode;
 use tokio::io::BufReader;
 use tokio::sync::watch;
@@ -262,6 +265,7 @@ struct TimelinePreload {
     timeline_id: TimelineId,
     client: RemoteTimelineClient,
     index_part: Result<MaybeDeletedIndexPart, DownloadError>,
+    previous_heatmap: Option<PreviousHeatmap>,
 }
 
 pub(crate) struct TenantPreload {
@@ -1128,6 +1132,7 @@ impl Tenant {
         resources: TimelineResources,
         mut index_part: IndexPart,
         metadata: TimelineMetadata,
+        previous_heatmap: Option<PreviousHeatmap>,
         ancestor: Option<Arc<Timeline>>,
         cause: LoadTimelineCause,
         ctx: &RequestContext,
@@ -1158,6 +1163,7 @@ impl Tenant {
         let timeline = self.create_timeline_struct(
             timeline_id,
             &metadata,
+            previous_heatmap,
             ancestor.clone(),
             resources,
             CreateTimelineCause::Load,
@@ -1557,8 +1563,18 @@ impl Tenant {
             }
         }
 
+        // TODO(vlad): Could go to S3 if the secondary is freezing cold and hasn't even
+        // pulled the first heatmap. Not entirely necessary since the storage controller
+        // will kick the secondary in any case and cause a download.
+        let maybe_heatmap_at = self.read_on_disk_heatmap().await;
+
         let timelines = self
-            .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
+            .load_timelines_metadata(
+                remote_timeline_ids,
+                remote_storage,
+                maybe_heatmap_at,
+                cancel,
+            )
             .await?;
 
         Ok(TenantPreload {
@@ -1571,6 +1587,26 @@ impl Tenant {
         })
     }
 
+    async fn read_on_disk_heatmap(&self) -> Option<(HeatMapTenant, std::time::Instant)> {
+        let on_disk_heatmap_path = self.conf.tenant_heatmap_path(&self.tenant_shard_id);
+        match tokio::fs::read_to_string(on_disk_heatmap_path).await {
+            Ok(heatmap) => match serde_json::from_str::<HeatMapTenant>(&heatmap) {
+                Ok(heatmap) => Some((heatmap, std::time::Instant::now())),
+                Err(err) => {
+                    error!("Failed to deserialize old heatmap: {err}");
+                    None
+                }
+            },
+            Err(err) => match err.kind() {
+                std::io::ErrorKind::NotFound => None,
+                _ => {
+                    error!("Unexpected IO error reading old heatmap: {err}");
+                    None
+                }
+            },
+        }
+    }
+
     ///
     /// Background task that downloads all data for a tenant and brings it to Active state.
     ///
@@ -1658,7 +1694,10 @@ impl Tenant {
             match index_part {
                 MaybeDeletedIndexPart::IndexPart(index_part) => {
                     timeline_ancestors.insert(timeline_id, index_part.metadata.clone());
-                    remote_index_and_client.insert(timeline_id, (index_part, preload.client));
+                    remote_index_and_client.insert(
+                        timeline_id,
+                        (index_part, preload.client, preload.previous_heatmap),
+                    );
                 }
                 MaybeDeletedIndexPart::Deleted(index_part) => {
                     info!(
@@ -1677,7 +1716,7 @@ impl Tenant {
         // layer file.
         let sorted_timelines = tree_sort_timelines(timeline_ancestors, |m| m.ancestor_timeline())?;
         for (timeline_id, remote_metadata) in sorted_timelines {
-            let (index_part, remote_client) = remote_index_and_client
+            let (index_part, remote_client, previous_heatmap) = remote_index_and_client
                 .remove(&timeline_id)
                 .expect("just put it in above");
 
@@ -1697,6 +1736,7 @@ impl Tenant {
                     timeline_id,
                     index_part,
                     remote_metadata,
+                    previous_heatmap,
                     self.get_timeline_resources_for(remote_client),
                     LoadTimelineCause::Attach,
                     ctx,
@@ -1846,11 +1886,13 @@ impl Tenant {
     }
 
     #[instrument(skip_all, fields(timeline_id=%timeline_id))]
+    #[allow(clippy::too_many_arguments)]
     async fn load_remote_timeline(
         self: &Arc<Self>,
         timeline_id: TimelineId,
         index_part: IndexPart,
         remote_metadata: TimelineMetadata,
+        previous_heatmap: Option<PreviousHeatmap>,
         resources: TimelineResources,
         cause: LoadTimelineCause,
         ctx: &RequestContext,
@@ -1880,6 +1922,7 @@ impl Tenant {
             resources,
             index_part,
             remote_metadata,
+            previous_heatmap,
             ancestor,
             cause,
             ctx,
@@ -1891,14 +1934,29 @@ impl Tenant {
         self: &Arc<Tenant>,
         timeline_ids: HashSet<TimelineId>,
         remote_storage: &GenericRemoteStorage,
+        heatmap: Option<(HeatMapTenant, std::time::Instant)>,
         cancel: CancellationToken,
     ) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
+        let mut timeline_heatmaps = heatmap.map(|h| (h.0.into_timelines_index(), h.1));
+
         let mut part_downloads = JoinSet::new();
         for timeline_id in timeline_ids {
             let cancel_clone = cancel.clone();
+
+            let previous_timeline_heatmap = timeline_heatmaps.as_mut().and_then(|hs| {
+                hs.0.remove(&timeline_id).map(|h| PreviousHeatmap::Active {
+                    heatmap: h,
+                    read_at: hs.1,
+                })
+            });
             part_downloads.spawn(
-                self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone)
-                    .instrument(info_span!("download_index_part", %timeline_id)),
+                self.load_timeline_metadata(
+                    timeline_id,
+                    remote_storage.clone(),
+                    previous_timeline_heatmap,
+                    cancel_clone,
+                )
+                .instrument(info_span!("download_index_part", %timeline_id)),
             );
         }
 
@@ -1946,6 +2004,7 @@ impl Tenant {
         self: &Arc<Tenant>,
         timeline_id: TimelineId,
         remote_storage: GenericRemoteStorage,
+        previous_heatmap: Option<PreviousHeatmap>,
         cancel: CancellationToken,
     ) -> impl Future<Output = TimelinePreload> {
         let client = self.build_timeline_client(timeline_id, remote_storage);
@@ -1961,6 +2020,7 @@ impl Tenant {
                 client,
                 timeline_id,
                 index_part,
+                previous_heatmap,
             }
         }
     }
@@ -2072,7 +2132,12 @@ impl Tenant {
             })?;
 
         let timeline_preload = self
-            .load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel.clone())
+            .load_timeline_metadata(
+                timeline_id,
+                self.remote_storage.clone(),
+                None,
+                cancel.clone(),
+            )
             .await;
 
         let index_part = match timeline_preload.index_part {
@@ -2106,6 +2171,7 @@ impl Tenant {
             timeline_id,
             index_part,
             remote_metadata,
+            None,
             timeline_resources,
             LoadTimelineCause::Unoffload,
             &ctx,
@@ -2821,7 +2887,7 @@ impl Tenant {
         };
         let metadata = index_part.metadata.clone();
         self
-            .load_remote_timeline(timeline_id, index_part, metadata, resources, LoadTimelineCause::ImportPgdata{
+            .load_remote_timeline(timeline_id, index_part, metadata, None, resources, LoadTimelineCause::ImportPgdata{
                 create_guard: timeline_create_guard, activate, }, &ctx)
             .await?
             .ready_to_activate()
@@ -3035,6 +3101,9 @@ impl Tenant {
                 if let Some(queue) = queue {
                     outcome = queue
                         .iteration(cancel, ctx, &self.gc_block, &timeline)
+                        .instrument(
+                            info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
+                        )
                         .await?;
                 }
             }
@@ -3081,6 +3150,12 @@ impl Tenant {
             // Offload failures don't trip the circuit breaker, since they're cheap to retry and
             // shouldn't block compaction.
             CompactionError::Offload(_) => {}
+            CompactionError::CollectKeySpaceError(err) => {
+                self.compaction_circuit_breaker
+                    .lock()
+                    .unwrap()
+                    .fail(&CIRCUIT_BREAKERS_BROKEN, err);
+            }
             CompactionError::Other(err) => {
                 self.compaction_circuit_breaker
                     .lock()
@@ -3858,6 +3933,13 @@ impl Tenant {
             .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
     }
 
+    pub fn get_rel_size_v2_enabled(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
+        tenant_conf
+            .rel_size_v2_enabled
+            .unwrap_or(self.conf.default_tenant_conf.rel_size_v2_enabled)
+    }
+
     pub fn get_compaction_upper_limit(&self) -> usize {
         let tenant_conf = self.tenant_conf.load().tenant_conf.clone();
         tenant_conf
@@ -4030,6 +4112,7 @@ impl Tenant {
         &self,
         new_timeline_id: TimelineId,
         new_metadata: &TimelineMetadata,
+        previous_heatmap: Option<PreviousHeatmap>,
         ancestor: Option<Arc<Timeline>>,
         resources: TimelineResources,
         cause: CreateTimelineCause,
@@ -4053,6 +4136,7 @@ impl Tenant {
             self.conf,
             Arc::clone(&self.tenant_conf),
             new_metadata,
+            previous_heatmap,
             ancestor,
             new_timeline_id,
             self.tenant_shard_id,
@@ -4695,24 +4779,24 @@ impl Tenant {
         // We check it against both the planned GC cutoff stored in 'gc_info',
         // and the 'latest_gc_cutoff' of the last GC that was performed.  The
         // planned GC cutoff in 'gc_info' is normally larger than
-        // 'latest_gc_cutoff_lsn', but beware of corner cases like if you just
+        // 'applied_gc_cutoff_lsn', but beware of corner cases like if you just
         // changed the GC settings for the tenant to make the PITR window
         // larger, but some of the data was already removed by an earlier GC
         // iteration.
 
         // check against last actual 'latest_gc_cutoff' first
-        let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
+        let applied_gc_cutoff_lsn = src_timeline.get_applied_gc_cutoff_lsn();
         {
             let gc_info = src_timeline.gc_info.read().unwrap();
             let planned_cutoff = gc_info.min_cutoff();
             if gc_info.lsn_covered_by_lease(start_lsn) {
-                tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *latest_gc_cutoff_lsn);
+                tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *applied_gc_cutoff_lsn);
             } else {
                 src_timeline
-                    .check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
+                    .check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)
                     .context(format!(
                         "invalid branch start lsn: less than latest GC cutoff {}",
-                        *latest_gc_cutoff_lsn,
+                        *applied_gc_cutoff_lsn,
                     ))
                     .map_err(CreateTimelineError::AncestorLsn)?;
 
@@ -4751,7 +4835,7 @@ impl Tenant {
             dst_prev,
             Some(src_id),
             start_lsn,
-            *src_timeline.latest_gc_cutoff_lsn.read(), // FIXME: should we hold onto this guard longer?
+            *src_timeline.applied_gc_cutoff_lsn.read(), // FIXME: should we hold onto this guard longer?
             src_timeline.initdb_lsn,
             src_timeline.pg_version,
         );
@@ -5124,6 +5208,7 @@ impl Tenant {
             .create_timeline_struct(
                 new_timeline_id,
                 new_metadata,
+                None,
                 ancestor,
                 resources,
                 CreateTimelineCause::Load,
@@ -5571,7 +5656,7 @@ pub(crate) mod harness {
                 lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
                 timeline_offloading: Some(tenant_conf.timeline_offloading),
                 wal_receiver_protocol_override: tenant_conf.wal_receiver_protocol_override,
-                rel_size_v2_enabled: tenant_conf.rel_size_v2_enabled,
+                rel_size_v2_enabled: Some(tenant_conf.rel_size_v2_enabled),
                 gc_compaction_enabled: Some(tenant_conf.gc_compaction_enabled),
                 gc_compaction_initial_threshold_kb: Some(
                     tenant_conf.gc_compaction_initial_threshold_kb,
@@ -6130,8 +6215,8 @@ mod tests {
         make_some_layers(tline.as_ref(), Lsn(0x20), &ctx).await?;
 
         repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO)?;
-        let latest_gc_cutoff_lsn = tline.get_latest_gc_cutoff_lsn();
-        assert!(*latest_gc_cutoff_lsn > Lsn(0x25));
+        let applied_gc_cutoff_lsn = tline.get_applied_gc_cutoff_lsn();
+        assert!(*applied_gc_cutoff_lsn > Lsn(0x25));
         match tline.get(*TEST_KEY, Lsn(0x25)) {
             Ok(_) => panic!("request for page should have failed"),
             Err(err) => assert!(err.to_string().contains("not found at")),
@@ -7770,18 +7855,6 @@ mod tests {
             }
 
             tline.freeze_and_flush().await?;
-            // Force layers to L1
-            tline
-                .compact(
-                    &cancel,
-                    {
-                        let mut flags = EnumSet::new();
-                        flags.insert(CompactFlags::ForceL0Compaction);
-                        flags
-                    },
-                    &ctx,
-                )
-                .await?;
 
             if iter % 5 == 0 {
                 let (_, before_delta_file_accessed) =
@@ -7794,7 +7867,6 @@ mod tests {
                             let mut flags = EnumSet::new();
                             flags.insert(CompactFlags::ForceImageLayerCreation);
                             flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::ForceL0Compaction);
                             flags
                         },
                         &ctx,
@@ -8241,8 +8313,6 @@ mod tests {
 
         let cancel = CancellationToken::new();
 
-        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
-        tline.force_set_disk_consistent_lsn(Lsn(0x40));
         tline
             .compact(
                 &cancel,
@@ -8256,7 +8326,8 @@ mod tests {
             )
             .await
             .unwrap();
-        // Image layers are created at repartition LSN
+
+        // Image layers are created at last_record_lsn
         let images = tline
             .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
             .await
@@ -8427,7 +8498,7 @@ mod tests {
             .await?;
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -8535,7 +8606,7 @@ mod tests {
         // increase GC horizon and compact again
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x40))
                 .wait()
@@ -8703,8 +8774,8 @@ mod tests {
 
         // Force set disk consistent lsn so we can get the cutoff at `end_lsn`.
         info!(
-            "latest_gc_cutoff_lsn: {}",
-            *timeline.get_latest_gc_cutoff_lsn()
+            "applied_gc_cutoff_lsn: {}",
+            *timeline.get_applied_gc_cutoff_lsn()
         );
         timeline.force_set_disk_consistent_lsn(end_lsn);
 
@@ -8730,7 +8801,7 @@ mod tests {
 
         // Make lease on a already GC-ed LSN.
         // 0/80 does not have a valid lease + is below latest_gc_cutoff
-        assert!(Lsn(0x80) < *timeline.get_latest_gc_cutoff_lsn());
+        assert!(Lsn(0x80) < *timeline.get_applied_gc_cutoff_lsn());
         timeline
             .init_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx)
             .expect_err("lease request on GC-ed LSN should fail");
@@ -8921,7 +8992,7 @@ mod tests {
         };
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -9008,7 +9079,7 @@ mod tests {
         // increase GC horizon and compact again
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x40))
                 .wait()
@@ -9461,7 +9532,7 @@ mod tests {
             .await?;
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -9608,7 +9679,7 @@ mod tests {
         // increase GC horizon and compact again
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x38))
                 .wait()
@@ -9709,7 +9780,7 @@ mod tests {
             .await?;
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -9960,7 +10031,7 @@ mod tests {
 
         {
             parent_tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x10))
                 .wait()
@@ -9980,7 +10051,7 @@ mod tests {
 
         {
             branch_tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x50))
                 .wait()
@@ -10336,7 +10407,7 @@ mod tests {
 
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -10721,7 +10792,7 @@ mod tests {
             .await?;
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
@@ -10972,7 +11043,7 @@ mod tests {
             .await?;
         {
             tline
-                .latest_gc_cutoff_lsn
+                .applied_gc_cutoff_lsn
                 .lock_for_write()
                 .store_and_unlock(Lsn(0x30))
                 .wait()
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 7fdfd736ad..c6bcfdf2fb 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -485,7 +485,9 @@ impl TenantConfOpt {
             wal_receiver_protocol_override: self
                 .wal_receiver_protocol_override
                 .or(global_conf.wal_receiver_protocol_override),
-            rel_size_v2_enabled: self.rel_size_v2_enabled.or(global_conf.rel_size_v2_enabled),
+            rel_size_v2_enabled: self
+                .rel_size_v2_enabled
+                .unwrap_or(global_conf.rel_size_v2_enabled),
             gc_compaction_enabled: self
                 .gc_compaction_enabled
                 .unwrap_or(global_conf.gc_compaction_enabled),
diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs
index d281eb305f..15c6955260 100644
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -130,7 +130,10 @@ struct TimelineMetadataBodyV2 {
     prev_record_lsn: Option<Lsn>,
     ancestor_timeline: Option<TimelineId>,
     ancestor_lsn: Lsn,
+
+    // The LSN at which GC was last executed.  Synonym of [`Timeline::applied_gc_cutoff_lsn`].
     latest_gc_cutoff_lsn: Lsn,
+
     initdb_lsn: Lsn,
     pg_version: u32,
 }
diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs
index 4a8e66d38a..0fa10ca294 100644
--- a/pageserver/src/tenant/secondary/heatmap.rs
+++ b/pageserver/src/tenant/secondary/heatmap.rs
@@ -1,4 +1,4 @@
-use std::time::SystemTime;
+use std::{collections::HashMap, time::SystemTime};
 
 use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName};
 
@@ -8,7 +8,7 @@ use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
 use utils::{generation::Generation, id::TimelineId};
 
 #[derive(Serialize, Deserialize)]
-pub(super) struct HeatMapTenant {
+pub(crate) struct HeatMapTenant {
     /// Generation of the attached location that uploaded the heatmap: this is not required
     /// for correctness, but acts as a hint to secondary locations in order to detect thrashing
     /// in the unlikely event that two attached locations are both uploading conflicting heatmaps.
@@ -25,8 +25,17 @@ pub(super) struct HeatMapTenant {
     pub(super) upload_period_ms: Option<u128>,
 }
 
+impl HeatMapTenant {
+    pub(crate) fn into_timelines_index(self) -> HashMap<TimelineId, HeatMapTimeline> {
+        self.timelines
+            .into_iter()
+            .map(|htl| (htl.timeline_id, htl))
+            .collect()
+    }
+}
+
 #[serde_as]
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone)]
 pub(crate) struct HeatMapTimeline {
     #[serde_as(as = "DisplayFromStr")]
     pub(crate) timeline_id: TimelineId,
@@ -35,13 +44,13 @@ pub(crate) struct HeatMapTimeline {
 }
 
 #[serde_as]
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone)]
 pub(crate) struct HeatMapLayer {
     pub(crate) name: LayerName,
     pub(crate) metadata: LayerFileMetadata,
 
     #[serde_as(as = "TimestampSeconds<i64>")]
-    pub(super) access_time: SystemTime,
+    pub(crate) access_time: SystemTime,
     // TODO: an actual 'heat' score that would let secondary locations prioritize downloading
     // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary.
 }
diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs
index 6c3276ea3c..1e84a9d9dc 100644
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -394,7 +394,7 @@ pub(super) async fn gather_inputs(
             ancestor_lsn,
             last_record: last_record_lsn,
             // this is not used above, because it might not have updated recently enough
-            latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
+            latest_gc_cutoff: *timeline.get_applied_gc_cutoff_lsn(),
             next_pitr_cutoff,
             retention_param_cutoff,
             lease_points,
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 40282defd4..0bf606cf0a 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -136,6 +136,22 @@ pub(crate) fn local_layer_path(
     }
 }
 
+pub(crate) enum LastEviction {
+    Never,
+    At(std::time::Instant),
+    Evicting,
+}
+
+impl LastEviction {
+    pub(crate) fn happened_after(&self, timepoint: std::time::Instant) -> bool {
+        match self {
+            LastEviction::Never => false,
+            LastEviction::At(evicted_at) => evicted_at > &timepoint,
+            LastEviction::Evicting => true,
+        }
+    }
+}
+
 impl Layer {
     /// Creates a layer value for a file we know to not be resident.
     pub(crate) fn for_evicted(
@@ -405,6 +421,17 @@ impl Layer {
         self.0.metadata()
     }
 
+    pub(crate) fn last_evicted_at(&self) -> LastEviction {
+        match self.0.last_evicted_at.try_lock() {
+            Ok(lock) => match *lock {
+                None => LastEviction::Never,
+                Some(at) => LastEviction::At(at),
+            },
+            Err(std::sync::TryLockError::WouldBlock) => LastEviction::Evicting,
+            Err(std::sync::TryLockError::Poisoned(p)) => panic!("Lock poisoned: {p}"),
+        }
+    }
+
     pub(crate) fn get_timeline_id(&self) -> Option<TimelineId> {
         self.0
             .timeline
@@ -656,7 +683,9 @@ struct LayerInner {
 
     /// When the Layer was last evicted but has not been downloaded since.
     ///
-    /// This is used solely for updating metrics. See [`LayerImplMetrics::redownload_after`].
+    /// This is used for skipping evicted layers from the previous heatmap (see
+    /// `[Timeline::generate_heatmap]`) and for updating metrics
+    /// (see [`LayerImplMetrics::redownload_after`]).
     last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,
 
     #[cfg(test)]
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 029444e973..5e63f59fd8 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -287,6 +287,7 @@ fn log_compaction_error(
     sleep_duration: Duration,
     task_cancelled: bool,
 ) {
+    use crate::pgdatadir_mapping::CollectKeySpaceError;
     use crate::tenant::upload_queue::NotInitialized;
     use crate::tenant::PageReconstructError;
     use CompactionError::*;
@@ -294,6 +295,8 @@ fn log_compaction_error(
     let level = match err {
         ShuttingDown => return,
         Offload(_) => Level::ERROR,
+        CollectKeySpaceError(CollectKeySpaceError::Cancelled) => Level::INFO,
+        CollectKeySpaceError(_) => Level::ERROR,
         _ if task_cancelled => Level::INFO,
         Other(err) => {
             let root_cause = err.root_cause();
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index aa71ccbbab..48c208d5d7 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4,6 +4,7 @@ pub mod delete;
 pub(crate) mod detach_ancestor;
 mod eviction_task;
 pub(crate) mod handle;
+mod heatmap_layers_downloader;
 pub(crate) mod import_pgdata;
 mod init;
 pub mod layer_manager;
@@ -21,6 +22,7 @@ use chrono::{DateTime, Utc};
 use compaction::CompactionOutcome;
 use enumset::EnumSet;
 use fail::fail_point;
+use futures::FutureExt;
 use futures::{stream::FuturesUnordered, StreamExt};
 use handle::ShardTimelineId;
 use layer_manager::Shutdown;
@@ -117,7 +119,7 @@ use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
 use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
-use crate::pgdatadir_mapping::CalculateLogicalSizeError;
+use crate::pgdatadir_mapping::{CalculateLogicalSizeError, MetricsUpdate};
 use crate::tenant::config::TenantConfOpt;
 use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::ShardIndex;
@@ -150,16 +152,15 @@ use super::{
     config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
     MaybeOffloaded,
 };
-use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
+use super::{
+    debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, HeatMapTimeline,
+};
 use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
 use super::{
     remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
     storage_layer::ReadableLayer,
 };
-use super::{
-    secondary::heatmap::{HeatMapLayer, HeatMapTimeline},
-    GcError,
-};
+use super::{secondary::heatmap::HeatMapLayer, GcError};
 
 #[cfg(test)]
 use pageserver_api::value::Value;
@@ -328,6 +329,7 @@ pub struct Timeline {
     // in `crate::page_service` writes these metrics.
     pub(crate) query_metrics: crate::metrics::SmgrQueryTimePerTimeline,
 
+    directory_metrics_inited: [AtomicBool; DirectoryKind::KINDS_NUM],
     directory_metrics: [AtomicU64; DirectoryKind::KINDS_NUM],
 
     /// Ensures layers aren't frozen by checkpointer between
@@ -352,8 +354,11 @@ pub struct Timeline {
     /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel
     layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,
 
-    // Needed to ensure that we can't create a branch at a point that was already garbage collected
-    pub latest_gc_cutoff_lsn: Rcu<Lsn>,
+    // The LSN at which we have executed GC: whereas [`Self::gc_info`] records the LSN at which
+    // we _intend_ to GC (i.e. the PITR cutoff), this LSN records where we actually last did it.
+    // Because PITR interval is mutable, it's possible for this LSN to be earlier or later than
+    // the planned GC cutoff.
+    pub applied_gc_cutoff_lsn: Rcu<Lsn>,
 
     pub(crate) gc_compaction_layer_update_lock: tokio::sync::RwLock<()>,
 
@@ -462,6 +467,20 @@ pub struct Timeline {
 
     /// If Some, collects GetPage metadata for an ongoing PageTrace.
     pub(crate) page_trace: ArcSwapOption<Sender<PageTraceEvent>>,
+
+    previous_heatmap: ArcSwapOption<PreviousHeatmap>,
+
+    /// May host a background Tokio task which downloads all the layers from the current
+    /// heatmap on demand.
+    heatmap_layers_downloader: Mutex<Option<heatmap_layers_downloader::HeatmapLayersDownloader>>,
+}
+
+pub(crate) enum PreviousHeatmap {
+    Active {
+        heatmap: HeatMapTimeline,
+        read_at: std::time::Instant,
+    },
+    Obsolete,
 }
 
 pub type TimelineDeleteProgress = Arc<tokio::sync::Mutex<DeleteTimelineFlow>>;
@@ -1077,9 +1096,15 @@ impl Timeline {
         (history, gc_info.within_ancestor_pitr)
     }
 
-    /// Lock and get timeline's GC cutoff
-    pub(crate) fn get_latest_gc_cutoff_lsn(&self) -> RcuReadGuard<Lsn> {
-        self.latest_gc_cutoff_lsn.read()
+    /// Read timeline's GC cutoff: this is the LSN at which GC has started to happen
+    pub(crate) fn get_applied_gc_cutoff_lsn(&self) -> RcuReadGuard<Lsn> {
+        self.applied_gc_cutoff_lsn.read()
+    }
+
+    /// Read timeline's planned GC cutoff: this is the logical end of history that users
+    /// are allowed to read (based on configured PITR), even if physically we have more history.
+    pub(crate) fn get_gc_cutoff_lsn(&self) -> Lsn {
+        self.gc_info.read().unwrap().cutoffs.time
     }
 
     /// Look up given page version.
@@ -1274,7 +1299,7 @@ impl Timeline {
         reconstruct_state: &mut ValuesReconstructState,
         ctx: &RequestContext,
     ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        let read_path = if self.conf.enable_read_path_debugging {
+        let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {
             Some(ReadPath::new(keyspace.clone(), lsn))
         } else {
             None
@@ -1587,7 +1612,7 @@ impl Timeline {
                     };
 
                     if init || validate {
-                        let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
+                        let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();
                         if lsn < *latest_gc_cutoff_lsn {
                             bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
                         }
@@ -1857,7 +1882,7 @@ impl Timeline {
         // Signal compaction failure to avoid L0 flush stalls when it's broken.
         match result {
             Ok(_) => self.compaction_failed.store(false, AtomicOrdering::Relaxed),
-            Err(CompactionError::Other(_)) => {
+            Err(CompactionError::Other(_)) | Err(CompactionError::CollectKeySpaceError(_)) => {
                 self.compaction_failed.store(true, AtomicOrdering::Relaxed)
             }
             // Don't change the current value on offload failure or shutdown. We don't want to
@@ -2020,6 +2045,11 @@ impl Timeline {
         tracing::debug!("Cancelling CancellationToken");
         self.cancel.cancel();
 
+        // If we have a background task downloading heatmap layers stop it.
+        // The background downloads are sensitive to timeline cancellation (done above),
+        // so the drain will be immediate.
+        self.stop_and_drain_heatmap_layers_download().await;
+
         // Ensure Prevent new page service requests from starting.
         self.handles.shutdown();
 
@@ -2337,6 +2367,14 @@ impl Timeline {
             .unwrap_or(self.conf.default_tenant_conf.compaction_threshold)
     }
 
+    pub(crate) fn get_rel_size_v2_enabled(&self) -> bool {
+        let tenant_conf = self.tenant_conf.load();
+        tenant_conf
+            .tenant_conf
+            .rel_size_v2_enabled
+            .unwrap_or(self.conf.default_tenant_conf.rel_size_v2_enabled)
+    }
+
     fn get_compaction_upper_limit(&self) -> usize {
         let tenant_conf = self.tenant_conf.load();
         tenant_conf
@@ -2559,6 +2597,7 @@ impl Timeline {
         conf: &'static PageServerConf,
         tenant_conf: Arc<ArcSwap<AttachedTenantConf>>,
         metadata: &TimelineMetadata,
+        previous_heatmap: Option<PreviousHeatmap>,
         ancestor: Option<Arc<Timeline>>,
         timeline_id: TimelineId,
         tenant_shard_id: TenantShardId,
@@ -2645,6 +2684,7 @@ impl Timeline {
                 ),
 
                 directory_metrics: array::from_fn(|_| AtomicU64::new(0)),
+                directory_metrics_inited: array::from_fn(|_| AtomicBool::new(false)),
 
                 flush_loop_state: Mutex::new(FlushLoopState::NotStarted),
 
@@ -2659,7 +2699,7 @@ impl Timeline {
                     LastImageLayerCreationStatus::default(),
                 )),
 
-                latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
+                applied_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
                 initdb_lsn: metadata.initdb_lsn(),
 
                 current_logical_size: if disk_consistent_lsn.is_valid() {
@@ -2721,6 +2761,10 @@ impl Timeline {
                 create_idempotency,
 
                 page_trace: Default::default(),
+
+                previous_heatmap: ArcSwapOption::from_pointee(previous_heatmap),
+
+                heatmap_layers_downloader: Mutex::new(None),
             };
 
             result.repartition_threshold =
@@ -3409,8 +3453,42 @@ impl Timeline {
         }
     }
 
-    pub(crate) fn update_directory_entries_count(&self, kind: DirectoryKind, count: u64) {
-        self.directory_metrics[kind.offset()].store(count, AtomicOrdering::Relaxed);
+    pub(crate) fn update_directory_entries_count(&self, kind: DirectoryKind, count: MetricsUpdate) {
+        // TODO: this directory metrics is not correct -- we could have multiple reldirs in the system
+        // for each of the database, but we only store one value, and therefore each pgdirmodification
+        // would overwrite the previous value if they modify different databases.
+
+        match count {
+            MetricsUpdate::Set(count) => {
+                self.directory_metrics[kind.offset()].store(count, AtomicOrdering::Relaxed);
+                self.directory_metrics_inited[kind.offset()].store(true, AtomicOrdering::Relaxed);
+            }
+            MetricsUpdate::Add(count) => {
+                // TODO: these operations are not atomic; but we only have one writer to the metrics, so
+                // it's fine.
+                if self.directory_metrics_inited[kind.offset()].load(AtomicOrdering::Relaxed) {
+                    // The metrics has been initialized with `MetricsUpdate::Set` before, so we can add/sub
+                    // the value reliably.
+                    self.directory_metrics[kind.offset()].fetch_add(count, AtomicOrdering::Relaxed);
+                }
+                // Otherwise, ignore this update
+            }
+            MetricsUpdate::Sub(count) => {
+                // TODO: these operations are not atomic; but we only have one writer to the metrics, so
+                // it's fine.
+                if self.directory_metrics_inited[kind.offset()].load(AtomicOrdering::Relaxed) {
+                    // The metrics has been initialized with `MetricsUpdate::Set` before.
+                    // The operation could overflow so we need to normalize the value.
+                    let prev_val =
+                        self.directory_metrics[kind.offset()].load(AtomicOrdering::Relaxed);
+                    let res = prev_val.saturating_sub(count);
+                    self.directory_metrics[kind.offset()].store(res, AtomicOrdering::Relaxed);
+                }
+                // Otherwise, ignore this update
+            }
+        };
+
+        // TODO: remove this, there's no place in the code that updates this aux metrics.
         let aux_metric =
             self.directory_metrics[DirectoryKind::AuxFiles.offset()].load(AtomicOrdering::Relaxed);
 
@@ -3459,12 +3537,52 @@ impl Timeline {
 
         let guard = self.layers.read().await;
 
+        // Firstly, if there's any heatmap left over from when this location
+        // was a secondary, take that into account. Keep layers that are:
+        // * present in the layer map
+        // * visible
+        // * non-resident
+        // * not evicted since we read the heatmap
+        //
+        // Without this, a new cold, attached location would clobber the previous
+        // heatamp.
+        let previous_heatmap = self.previous_heatmap.load();
+        let visible_non_resident = match previous_heatmap.as_deref() {
+            Some(PreviousHeatmap::Active { heatmap, read_at }) => {
+                Some(heatmap.layers.iter().filter_map(|hl| {
+                    let desc: PersistentLayerDesc = hl.name.clone().into();
+                    let layer = guard.try_get_from_key(&desc.key())?;
+
+                    if layer.visibility() == LayerVisibilityHint::Covered {
+                        return None;
+                    }
+
+                    if layer.is_likely_resident() {
+                        return None;
+                    }
+
+                    if layer.last_evicted_at().happened_after(*read_at) {
+                        return None;
+                    }
+
+                    Some((desc, hl.metadata.clone(), hl.access_time))
+                }))
+            }
+            Some(PreviousHeatmap::Obsolete) => None,
+            None => None,
+        };
+
+        // Secondly, all currently visible, resident layers are included.
         let resident = guard.likely_resident_layers().filter_map(|layer| {
             match layer.visibility() {
                 LayerVisibilityHint::Visible => {
                     // Layer is visible to one or more read LSNs: elegible for inclusion in layer map
                     let last_activity_ts = layer.latest_activity();
-                    Some((layer.layer_desc(), layer.metadata(), last_activity_ts))
+                    Some((
+                        layer.layer_desc().clone(),
+                        layer.metadata(),
+                        last_activity_ts,
+                    ))
                 }
                 LayerVisibilityHint::Covered => {
                     // Layer is resident but unlikely to be read: not elegible for inclusion in heatmap.
@@ -3473,7 +3591,18 @@ impl Timeline {
             }
         });
 
-        let mut layers = resident.collect::<Vec<_>>();
+        let mut layers = match visible_non_resident {
+            Some(non_resident) => {
+                let mut non_resident = non_resident.peekable();
+                if non_resident.peek().is_none() {
+                    self.previous_heatmap
+                        .store(Some(PreviousHeatmap::Obsolete.into()));
+                }
+
+                non_resident.chain(resident).collect::<Vec<_>>()
+            }
+            None => resident.collect::<Vec<_>>(),
+        };
 
         // Sort layers in order of which to download first.  For a large set of layers to download, we
         // want to prioritize those layers which are most likely to still be in the resident many minutes
@@ -3577,7 +3706,9 @@ impl Timeline {
             // space. If that's not the case, we had at least one key encounter a gap in the image layer
             // and stop the search as a result of that.
             let mut removed = keyspace.remove_overlapping_with(&image_covered_keyspace);
-            // Do not fire missing key error for sparse keys.
+            // Do not fire missing key error and end early for sparse keys. Note that we hava already removed
+            // non-inherited keyspaces before, so we can safely do a full `SPARSE_RANGE` remove instead of
+            // figuring out what is the inherited key range and do a fine-grained pruning.
             removed.remove_overlapping_with(&KeySpace {
                 ranges: vec![SPARSE_RANGE],
             });
@@ -3662,7 +3793,7 @@ impl Timeline {
         // the timeline, then it will remove layers that are required for fulfilling
         // the current get request (read-path cannot "look back" and notice the new
         // image layer).
-        let _gc_cutoff_holder = timeline.get_latest_gc_cutoff_lsn();
+        let _gc_cutoff_holder = timeline.get_applied_gc_cutoff_lsn();
 
         // See `compaction::compact_with_gc` for why we need this.
         let _guard = timeline.gc_compaction_layer_update_lock.read().await;
@@ -4349,7 +4480,7 @@ impl Timeline {
         let update = crate::tenant::metadata::MetadataUpdate::new(
             disk_consistent_lsn,
             ondisk_prev_record_lsn,
-            *self.latest_gc_cutoff_lsn.read(),
+            *self.applied_gc_cutoff_lsn.read(),
         );
 
         fail_point!("checkpoint-before-saving-metadata", |x| bail!(
@@ -4474,7 +4605,10 @@ impl Timeline {
             ));
         }
 
-        let (dense_ks, sparse_ks) = self.collect_keyspace(lsn, ctx).await?;
+        let (dense_ks, sparse_ks) = self
+            .collect_keyspace(lsn, ctx)
+            .await
+            .map_err(CompactionError::CollectKeySpaceError)?;
         let dense_partitioning = dense_ks.partition(&self.shard_identity, partition_size);
         let sparse_partitioning = SparseKeyPartitioning {
             parts: vec![sparse_ks],
@@ -4995,20 +5129,26 @@ impl Timeline {
                     // image layer generation taking too long time and blocking L0 compaction. So in this
                     // mode, we also inspect the current number of L0 layers and skip image layer generation
                     // if there are too many of them.
-                    let num_of_l0_layers = {
-                        let layers = self.layers.read().await;
-                        layers.layer_map()?.level0_deltas().len()
-                    };
                     let image_preempt_threshold = self.get_image_creation_preempt_threshold()
                         * self.get_compaction_threshold();
-                    if image_preempt_threshold != 0 && num_of_l0_layers >= image_preempt_threshold {
-                        tracing::info!(
-                        "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers {}",
-                        partition.start().unwrap(), partition.end().unwrap(), num_of_l0_layers
-                    );
-                        last_partition_processed = Some(partition.clone());
-                        all_generated = false;
-                        break;
+                    // TODO: currently we do not respect `get_image_creation_preempt_threshold` and always yield
+                    // when there is a single timeline with more than L0 threshold L0 layers. As long as the
+                    // `get_image_creation_preempt_threshold` is set to a value greater than 0, we will yield for L0 compaction.
+                    if image_preempt_threshold != 0 {
+                        let should_yield = self
+                            .l0_compaction_trigger
+                            .notified()
+                            .now_or_never()
+                            .is_some();
+                        if should_yield {
+                            tracing::info!(
+                                "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers",
+                                partition.start().unwrap(), partition.end().unwrap()
+                            );
+                            last_partition_processed = Some(partition.clone());
+                            all_generated = false;
+                            break;
+                        }
                     }
                 }
             }
@@ -5037,14 +5177,16 @@ impl Timeline {
             .map(|l| l.metadata().file_size)
             .sum::<u64>();
 
-        info!(
-            "created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
-            image_layers.len(),
-            total_layer_size,
-            duration.as_secs_f64(),
-            partition_processed,
-            total_partitions
-        );
+        if !image_layers.is_empty() {
+            info!(
+                "created {} image layers ({} bytes) in {}s, processed {} out of {} partitions",
+                image_layers.len(),
+                total_layer_size,
+                duration.as_secs_f64(),
+                partition_processed,
+                total_partitions
+            );
+        }
 
         Ok((
             image_layers,
@@ -5187,6 +5329,8 @@ pub(crate) enum CompactionError {
     #[error("Failed to offload timeline: {0}")]
     Offload(OffloadError),
     /// Compaction cannot be done right now; page reconstruction and so on.
+    #[error("Failed to collect keyspace: {0}")]
+    CollectKeySpaceError(CollectKeySpaceError),
     #[error(transparent)]
     Other(anyhow::Error),
 }
@@ -5577,7 +5721,7 @@ impl Timeline {
                 // PITR interval is set & we didn't look up a timestamp successfully.  Conservatively assume PITR
                 // cannot advance beyond what was already GC'd, and respect space-based retention
                 GcCutoffs {
-                    time: *self.get_latest_gc_cutoff_lsn(),
+                    time: *self.get_applied_gc_cutoff_lsn(),
                     space: space_cutoff,
                 }
             }
@@ -5698,7 +5842,7 @@ impl Timeline {
         let mut result: GcResult = GcResult::default();
 
         // Nothing to GC. Return early.
-        let latest_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff = *self.get_applied_gc_cutoff_lsn();
         if latest_gc_cutoff >= new_gc_cutoff {
             info!(
                 "Nothing to GC: new_gc_cutoff_lsn {new_gc_cutoff}, latest_gc_cutoff_lsn {latest_gc_cutoff}",
@@ -5712,7 +5856,7 @@ impl Timeline {
         //
         // The GC cutoff should only ever move forwards.
         let waitlist = {
-            let write_guard = self.latest_gc_cutoff_lsn.lock_for_write();
+            let write_guard = self.applied_gc_cutoff_lsn.lock_for_write();
             if *write_guard > new_gc_cutoff {
                 return Err(GcError::BadLsn {
                     why: format!(
@@ -6652,18 +6796,32 @@ fn is_send() {
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
     use pageserver_api::key::Key;
     use pageserver_api::value::Value;
+    use tracing::Instrument;
     use utils::{id::TimelineId, lsn::Lsn};
 
     use crate::tenant::{
         harness::{test_img, TenantHarness},
         layer_map::LayerMap,
-        storage_layer::{Layer, LayerName},
+        storage_layer::{Layer, LayerName, LayerVisibilityHint},
         timeline::{DeltaLayerTestDesc, EvictionError},
-        Timeline,
+        PreviousHeatmap, Timeline,
     };
 
+    use super::HeatMapTimeline;
+
+    fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {
+        assert_eq!(lhs.layers.len(), rhs.layers.len());
+        let lhs_rhs = lhs.layers.iter().zip(rhs.layers.iter());
+        for (l, r) in lhs_rhs {
+            assert_eq!(l.name, r.name);
+            assert_eq!(l.metadata, r.metadata);
+        }
+    }
+
     #[tokio::test]
     async fn test_heatmap_generation() {
         let harness = TenantHarness::create("heatmap_generation").await.unwrap();
@@ -6737,7 +6895,7 @@ mod tests {
         assert_eq!(heatmap.layers.last().unwrap().name, l0_delta.layer_name());
 
         let mut last_lsn = Lsn::MAX;
-        for layer in heatmap.layers {
+        for layer in &heatmap.layers {
             // Covered layer should be omitted
             assert!(layer.name != covered_delta.layer_name());
 
@@ -6752,6 +6910,144 @@ mod tests {
                 last_lsn = layer_lsn;
             }
         }
+
+        // Evict all the layers and stash the old heatmap in the timeline.
+        // This simulates a migration to a cold secondary location.
+
+        let guard = timeline.layers.read().await;
+        let mut all_layers = Vec::new();
+        let forever = std::time::Duration::from_secs(120);
+        for layer in guard.likely_resident_layers() {
+            all_layers.push(layer.clone());
+            layer.evict_and_wait(forever).await.unwrap();
+        }
+        drop(guard);
+
+        timeline
+            .previous_heatmap
+            .store(Some(Arc::new(PreviousHeatmap::Active {
+                heatmap: heatmap.clone(),
+                read_at: std::time::Instant::now(),
+            })));
+
+        // Generate a new heatmap and assert that it contains the same layers as the old one.
+        let post_migration_heatmap = timeline.generate_heatmap().await.unwrap();
+        assert_heatmaps_have_same_layers(&heatmap, &post_migration_heatmap);
+
+        // Download each layer one by one. Generate the heatmap at each step and check
+        // that it's stable.
+        for layer in all_layers {
+            if layer.visibility() == LayerVisibilityHint::Covered {
+                continue;
+            }
+
+            eprintln!("Downloading {layer} and re-generating heatmap");
+
+            let _resident = layer
+                .download_and_keep_resident()
+                .instrument(tracing::info_span!(
+                    parent: None,
+                    "download_layer",
+                    tenant_id = %timeline.tenant_shard_id.tenant_id,
+                    shard_id = %timeline.tenant_shard_id.shard_slug(),
+                    timeline_id = %timeline.timeline_id
+                ))
+                .await
+                .unwrap();
+
+            let post_download_heatmap = timeline.generate_heatmap().await.unwrap();
+            assert_heatmaps_have_same_layers(&heatmap, &post_download_heatmap);
+        }
+
+        // Everything from the post-migration heatmap is now resident.
+        // Check that we drop it from memory.
+        assert!(matches!(
+            timeline.previous_heatmap.load().as_deref(),
+            Some(PreviousHeatmap::Obsolete)
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_previous_heatmap_obsoletion() {
+        let harness = TenantHarness::create("heatmap_previous_heatmap_obsoletion")
+            .await
+            .unwrap();
+
+        let l0_delta = DeltaLayerTestDesc::new(
+            Lsn(0x20)..Lsn(0x30),
+            Key::from_hex("000000000000000000000000000000000000").unwrap()
+                ..Key::from_hex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF").unwrap(),
+            vec![(
+                Key::from_hex("720000000033333333444444445500000000").unwrap(),
+                Lsn(0x25),
+                Value::Image(test_img("foo")),
+            )],
+        );
+
+        let image_layer = (
+            Lsn(0x40),
+            vec![(
+                Key::from_hex("620000000033333333444444445500000000").unwrap(),
+                test_img("bar"),
+            )],
+        );
+
+        let delta_layers = vec![l0_delta];
+        let image_layers = vec![image_layer];
+
+        let (tenant, ctx) = harness.load().await;
+        let timeline = tenant
+            .create_test_timeline_with_layers(
+                TimelineId::generate(),
+                Lsn(0x10),
+                14,
+                &ctx,
+                delta_layers,
+                image_layers,
+                Lsn(0x100),
+            )
+            .await
+            .unwrap();
+
+        // Layer visibility is an input to heatmap generation, so refresh it first
+        timeline.update_layer_visibility().await.unwrap();
+
+        let heatmap = timeline
+            .generate_heatmap()
+            .await
+            .expect("Infallible while timeline is not shut down");
+
+        // Both layers should be in the heatmap
+        assert!(!heatmap.layers.is_empty());
+
+        // Now simulate a migration.
+        timeline
+            .previous_heatmap
+            .store(Some(Arc::new(PreviousHeatmap::Active {
+                heatmap: heatmap.clone(),
+                read_at: std::time::Instant::now(),
+            })));
+
+        // Evict all the layers in the previous heatmap
+        let guard = timeline.layers.read().await;
+        let forever = std::time::Duration::from_secs(120);
+        for layer in guard.likely_resident_layers() {
+            layer.evict_and_wait(forever).await.unwrap();
+        }
+        drop(guard);
+
+        // Generate a new heatmap and check that the previous heatmap
+        // has been marked obsolete.
+        let post_eviction_heatmap = timeline
+            .generate_heatmap()
+            .await
+            .expect("Infallible while timeline is not shut down");
+
+        assert!(post_eviction_heatmap.layers.is_empty());
+        assert!(matches!(
+            timeline.previous_heatmap.load().as_deref(),
+            Some(PreviousHeatmap::Obsolete)
+        ));
     }
 
     #[tokio::test]
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 5b915c50d3..4e4f906d78 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -11,7 +11,8 @@ use std::sync::Arc;
 use super::layer_manager::LayerManager;
 use super::{
     CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, GetVectoredError,
-    ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration, Timeline,
+    ImageLayerCreationMode, LastImageLayerCreationStatus, PageReconstructError, RecordedDuration,
+    Timeline,
 };
 
 use anyhow::{anyhow, bail, Context};
@@ -31,6 +32,7 @@ use utils::id::TimelineId;
 
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
+use crate::pgdatadir_mapping::CollectKeySpaceError;
 use crate::statvfs::Statvfs;
 use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::gc_block::GcBlock;
@@ -301,18 +303,12 @@ impl GcCompactionQueue {
                         let mut guard = self.inner.lock().unwrap();
                         guard.gc_guards.insert(id, gc_guard);
                     }
-                    let _ = timeline
-                        .compact_with_options(cancel, options, ctx)
-                        .instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
-                        .await?;
+                    let _ = timeline.compact_with_options(cancel, options, ctx).await?;
                     self.notify_and_unblock(id);
                 }
             }
             GcCompactionQueueItem::SubCompactionJob(options) => {
-                let _ = timeline
-                    .compact_with_options(cancel, options, ctx)
-                    .instrument(info_span!("scheduled_compact_timeline", %timeline.timeline_id))
-                    .await?;
+                let _ = timeline.compact_with_options(cancel, options, ctx).await?;
             }
             GcCompactionQueueItem::Notify(id) => {
                 self.notify_and_unblock(id);
@@ -692,21 +688,6 @@ impl Timeline {
 
         // Define partitioning schema if needed
 
-        let l0_l1_boundary_lsn = {
-            // We do the repartition on the L0-L1 boundary. All data below the boundary
-            // are compacted by L0 with low read amplification, thus making the `repartition`
-            // function run fast.
-            let guard = self.layers.read().await;
-            let l0_min_lsn = guard
-                .layer_map()?
-                .level0_deltas()
-                .iter()
-                .map(|l| l.get_lsn_range().start)
-                .min()
-                .unwrap_or(self.get_disk_consistent_lsn());
-            l0_min_lsn.max(self.get_ancestor_lsn())
-        };
-
         // 1. L0 Compact
         let l0_outcome = {
             let timer = self.metrics.compact_time_histo.start_timer();
@@ -733,86 +714,89 @@ impl Timeline {
             return Ok(CompactionOutcome::YieldForL0);
         }
 
-        if l0_l1_boundary_lsn < self.partitioning.read().1 {
-            // We never go backwards when repartition and create image layers.
-            info!("skipping image layer generation because repartition LSN is greater than L0-L1 boundary LSN.");
-        } else {
-            // 2. Repartition and create image layers if necessary
-            match self
-                .repartition(
-                    l0_l1_boundary_lsn,
-                    self.get_compaction_target_size(),
-                    options.flags,
-                    ctx,
-                )
-                .await
-            {
-                Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
-                    // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
-                    let image_ctx = RequestContextBuilder::extend(ctx)
-                        .access_stats_behavior(AccessStatsBehavior::Skip)
-                        .build();
+        // 2. Repartition and create image layers if necessary
+        match self
+            .repartition(
+                self.get_last_record_lsn(),
+                self.get_compaction_target_size(),
+                options.flags,
+                ctx,
+            )
+            .await
+        {
+            Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
+                // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
+                let image_ctx = RequestContextBuilder::extend(ctx)
+                    .access_stats_behavior(AccessStatsBehavior::Skip)
+                    .build();
 
-                    let mut partitioning = dense_partitioning;
-                    partitioning
-                        .parts
-                        .extend(sparse_partitioning.into_dense().parts);
+                let mut partitioning = dense_partitioning;
+                partitioning
+                    .parts
+                    .extend(sparse_partitioning.into_dense().parts);
 
-                    // 3. Create new image layers for partitions that have been modified "enough".
-                    let (image_layers, outcome) = self
-                        .create_image_layers(
-                            &partitioning,
-                            lsn,
-                            if options
-                                .flags
-                                .contains(CompactFlags::ForceImageLayerCreation)
-                            {
-                                ImageLayerCreationMode::Force
-                            } else {
-                                ImageLayerCreationMode::Try
-                            },
-                            &image_ctx,
-                            self.last_image_layer_creation_status
-                                .load()
-                                .as_ref()
-                                .clone(),
-                            !options.flags.contains(CompactFlags::NoYield),
-                        )
-                        .await
-                        .inspect_err(|err| {
-                            if let CreateImageLayersError::GetVectoredError(
-                                GetVectoredError::MissingKey(_),
-                            ) = err
-                            {
-                                critical!("missing key during compaction: {err:?}");
-                            }
-                        })?;
+                // 3. Create new image layers for partitions that have been modified "enough".
+                let (image_layers, outcome) = self
+                    .create_image_layers(
+                        &partitioning,
+                        lsn,
+                        if options
+                            .flags
+                            .contains(CompactFlags::ForceImageLayerCreation)
+                        {
+                            ImageLayerCreationMode::Force
+                        } else {
+                            ImageLayerCreationMode::Try
+                        },
+                        &image_ctx,
+                        self.last_image_layer_creation_status
+                            .load()
+                            .as_ref()
+                            .clone(),
+                        !options.flags.contains(CompactFlags::NoYield),
+                    )
+                    .await
+                    .inspect_err(|err| {
+                        if let CreateImageLayersError::GetVectoredError(
+                            GetVectoredError::MissingKey(_),
+                        ) = err
+                        {
+                            critical!("missing key during compaction: {err:?}");
+                        }
+                    })?;
 
-                    self.last_image_layer_creation_status
-                        .store(Arc::new(outcome.clone()));
+                self.last_image_layer_creation_status
+                    .store(Arc::new(outcome.clone()));
 
-                    self.upload_new_image_layers(image_layers)?;
-                    if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
-                        // Yield and do not do any other kind of compaction.
-                        info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
-                        return Ok(CompactionOutcome::YieldForL0);
-                    }
+                self.upload_new_image_layers(image_layers)?;
+                if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
+                    // Yield and do not do any other kind of compaction.
+                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
+                    return Ok(CompactionOutcome::YieldForL0);
                 }
-                Err(err) => {
-                    // no partitioning? This is normal, if the timeline was just created
-                    // as an empty timeline. Also in unit tests, when we use the timeline
-                    // as a simple key-value store, ignoring the datadir layout. Log the
-                    // error but continue.
-                    //
-                    // Suppress error when it's due to cancellation
-                    if !self.cancel.is_cancelled() && !err.is_cancelled() {
+            }
+            Err(err) => {
+                // no partitioning? This is normal, if the timeline was just created
+                // as an empty timeline. Also in unit tests, when we use the timeline
+                // as a simple key-value store, ignoring the datadir layout. Log the
+                // error but continue.
+                //
+                // Suppress error when it's due to cancellation
+                if !self.cancel.is_cancelled() && !err.is_cancelled() {
+                    if let CompactionError::CollectKeySpaceError(
+                        CollectKeySpaceError::Decode(_)
+                        | CollectKeySpaceError::PageRead(PageReconstructError::MissingKey(_)),
+                    ) = err
+                    {
+                        critical!("could not compact, repartitioning keyspace failed: {err:?}");
+                    } else {
                         tracing::error!(
                             "could not compact, repartitioning keyspace failed: {err:?}"
                         );
                     }
                 }
-            };
-        }
+            }
+        };
 
         let partition_count = self.partitioning.read().0 .0.parts.len();
 
@@ -852,7 +836,7 @@ impl Timeline {
         //
         // Holding this read guard also blocks [`Self::gc_timeline`] from entering while we
         // are rewriting layers.
-        let latest_gc_cutoff = self.get_latest_gc_cutoff_lsn();
+        let latest_gc_cutoff = self.get_applied_gc_cutoff_lsn();
 
         tracing::info!(
             "latest_gc_cutoff: {}, pitr cutoff {}",
@@ -2202,7 +2186,7 @@ impl Timeline {
 
         // TODO: ensure the child branches will not use anything below the watermark, or consider
         // them when computing the watermark.
-        gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn())
+        gc_cutoff_lsn.min(*self.get_applied_gc_cutoff_lsn())
     }
 
     /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 93b7efedb8..841b2fa1c7 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -294,6 +294,7 @@ impl DeleteTimelineFlow {
                 timeline_id,
                 local_metadata,
                 None, // Ancestor is not needed for deletion.
+                None, // Previous heatmap is not needed for deletion
                 tenant.get_timeline_resources_for(remote_client),
                 // Important. We dont pass ancestor above because it can be missing.
                 // Thus we need to skip the validation here.
diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
new file mode 100644
index 0000000000..0ba9753e85
--- /dev/null
+++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
@@ -0,0 +1,162 @@
+//! Timeline utility module to hydrate everything from the current heatmap.
+//!
+//! Provides utilities to spawn and abort a background task where the downloads happen.
+//! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers.
+
+use futures::StreamExt;
+use http_utils::error::ApiError;
+use std::sync::{Arc, Mutex};
+use tokio_util::sync::CancellationToken;
+use utils::sync::gate::Gate;
+
+use super::Timeline;
+
+// This status is not strictly necessary now, but gives us a nice place
+// to store progress information if we ever wish to expose it.
+pub(super) enum HeatmapLayersDownloadStatus {
+    InProgress,
+    Complete,
+}
+
+pub(super) struct HeatmapLayersDownloader {
+    handle: tokio::task::JoinHandle<()>,
+    status: Arc<Mutex<HeatmapLayersDownloadStatus>>,
+    cancel: CancellationToken,
+    downloads_guard: Arc<Gate>,
+}
+
+impl HeatmapLayersDownloader {
+    fn new(
+        timeline: Arc<Timeline>,
+        concurrency: usize,
+    ) -> Result<HeatmapLayersDownloader, ApiError> {
+        let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?;
+
+        let cancel = timeline.cancel.child_token();
+        let downloads_guard = Arc::new(Gate::default());
+
+        let status = Arc::new(Mutex::new(HeatmapLayersDownloadStatus::InProgress));
+
+        let handle = tokio::task::spawn({
+            let status = status.clone();
+            let downloads_guard = downloads_guard.clone();
+            let cancel = cancel.clone();
+
+            async move {
+                let _guard = tl_guard;
+
+                scopeguard::defer! {
+                    *status.lock().unwrap() = HeatmapLayersDownloadStatus::Complete;
+                }
+
+                let Some(heatmap) = timeline.generate_heatmap().await else {
+                    tracing::info!("Heatmap layers download failed to generate heatmap");
+                    return;
+                };
+
+                tracing::info!(
+                    resident_size=%timeline.resident_physical_size(),
+                    heatmap_layers=%heatmap.layers.len(),
+                    "Starting heatmap layers download"
+                );
+
+                let stream = futures::stream::iter(heatmap.layers.into_iter().filter_map(
+                    |layer| {
+                        let tl = timeline.clone();
+                        let dl_guard = match downloads_guard.enter() {
+                            Ok(g) => g,
+                            Err(_) => {
+                                // [`Self::shutdown`] was called. Don't spawn any more downloads.
+                                return None;
+                            }
+                        };
+
+                        Some(async move {
+                            let _dl_guard = dl_guard;
+
+                            let res = tl.download_layer(&layer.name).await;
+                            if let Err(err) = res {
+                                if !err.is_cancelled() {
+                                    tracing::warn!(layer=%layer.name,"Failed to download heatmap layer: {err}")
+                                }
+                            }
+                        })
+                    }
+                )).buffered(concurrency);
+
+                tokio::select! {
+                    _ = stream.collect::<()>() => {
+                        tracing::info!(
+                            resident_size=%timeline.resident_physical_size(),
+                            "Heatmap layers download completed"
+                        );
+                    },
+                    _ = cancel.cancelled() => {
+                        tracing::info!("Heatmap layers download cancelled");
+                    }
+                }
+            }
+        });
+
+        Ok(Self {
+            status,
+            handle,
+            cancel,
+            downloads_guard,
+        })
+    }
+
+    fn is_complete(&self) -> bool {
+        matches!(
+            *self.status.lock().unwrap(),
+            HeatmapLayersDownloadStatus::Complete
+        )
+    }
+
+    /// Drive any in-progress downloads to completion and stop spawning any new ones.
+    ///
+    /// This has two callers and they behave differently
+    /// 1. [`Timeline::shutdown`]: the drain will be immediate since downloads themselves
+    ///    are sensitive to timeline cancellation.
+    ///
+    /// 2. Endpoint handler in [`crate::http::routes`]: the drain will wait for any in-progress
+    ///    downloads to complete.
+    async fn stop_and_drain(self) {
+        // Counterintuitive: close the guard before cancelling.
+        // Something needs to poll the already created download futures to completion.
+        // If we cancel first, then the underlying task exits and we lost
+        // the poller.
+        self.downloads_guard.close().await;
+        self.cancel.cancel();
+        if let Err(err) = self.handle.await {
+            tracing::warn!("Failed to join heatmap layer downloader task: {err}");
+        }
+    }
+}
+
+impl Timeline {
+    pub(crate) async fn start_heatmap_layers_download(
+        self: &Arc<Self>,
+        concurrency: usize,
+    ) -> Result<(), ApiError> {
+        let mut locked = self.heatmap_layers_downloader.lock().unwrap();
+        if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) {
+            let dl = HeatmapLayersDownloader::new(self.clone(), concurrency)?;
+            *locked = Some(dl);
+            Ok(())
+        } else {
+            Err(ApiError::Conflict("Already running".to_string()))
+        }
+    }
+
+    pub(crate) async fn stop_and_drain_heatmap_layers_download(&self) {
+        // This can race with the start of a new downloader and lead to a situation
+        // where one donloader is shutting down and another one is in-flight.
+        // The only impact is that we'd end up using more remote storage semaphore
+        // units than expected.
+        let downloader = self.heatmap_layers_downloader.lock().unwrap().take();
+        if let Some(dl) = downloader {
+            dl.stop_and_drain().await;
+        }
+    }
+}
diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 3b5bf8290c..93e5a1100d 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -7,7 +7,9 @@ use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
 use crate::tenant::timeline::delete::{make_timeline_delete_guard, TimelineDeleteGuardKind};
-use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
+use crate::tenant::{
+    DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded,
+};
 
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum OffloadError {
@@ -37,12 +39,25 @@ pub(crate) async fn offload_timeline(
     debug_assert_current_span_has_tenant_and_timeline_id();
     tracing::info!("offloading archived timeline");
 
-    let (timeline, guard) = make_timeline_delete_guard(
+    let delete_guard_res = make_timeline_delete_guard(
         tenant,
         timeline.timeline_id,
         TimelineDeleteGuardKind::Offload,
-    )
-    .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
+    );
+    if let Err(DeleteTimelineError::HasChildren(children)) = delete_guard_res {
+        let is_archived = timeline.is_archived();
+        if is_archived == Some(true) {
+            tracing::error!("timeline is archived but has non-archived children: {children:?}");
+            return Err(OffloadError::NotArchived);
+        }
+        tracing::info!(
+            ?is_archived,
+            "timeline is not archived and has unarchived children"
+        );
+        return Err(OffloadError::NotArchived);
+    };
+    let (timeline, guard) =
+        delete_guard_res.map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
 
     let TimelineOrOffloaded::Timeline(timeline) = timeline else {
         tracing::error!("timeline already offloaded, but given timeline object");
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index 9d539198c7..c966ad813f 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -496,7 +496,8 @@ pub(crate) fn is_fatal_io_error(e: &std::io::Error) -> bool {
 /// bad storage or bad configuration, and we can't fix that from inside
 /// a running process.
 pub(crate) fn on_fatal_io_error(e: &std::io::Error, context: &str) -> ! {
-    tracing::error!("Fatal I/O error: {e}: {context})");
+    let backtrace = std::backtrace::Backtrace::force_capture();
+    tracing::error!("Fatal I/O error: {e}: {context})\n{backtrace}");
     std::process::abort();
 }
 
@@ -947,13 +948,18 @@ impl VirtualFileInner {
     where
         Buf: tokio_epoll_uring::IoBufMut + Send,
     {
-        let file_guard = match self.lock_file().await {
+        let file_guard = match self
+            .lock_file()
+            .await
+            .maybe_fatal_err("lock_file inside VirtualFileInner::read_at")
+        {
             Ok(file_guard) => file_guard,
             Err(e) => return (buf, Err(e)),
         };
 
         observe_duration!(StorageIoOperation::Read, {
             let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await;
+            let res = res.maybe_fatal_err("io_engine read_at inside VirtualFileInner::read_at");
             if let Ok(size) = res {
                 STORAGE_IO_SIZE
                     .with_label_values(&[
diff --git a/pgxn/neon/extension_server.c b/pgxn/neon/extension_server.c
index e38af08f89..0331f961b4 100644
--- a/pgxn/neon/extension_server.c
+++ b/pgxn/neon/extension_server.c
@@ -14,10 +14,12 @@
 
 #include "utils/guc.h"
 
-#include "extension_server.h" 
+#include "extension_server.h"
 #include "neon_utils.h"
 
 static int	extension_server_port = 0;
+static int	extension_server_request_timeout = 60;
+static int	extension_server_connect_timeout = 60;
 
 static download_extension_file_hook_type prev_download_extension_file_hook = NULL;
 
@@ -34,19 +36,18 @@ static download_extension_file_hook_type prev_download_extension_file_hook = NUL
 static bool
 neon_download_extension_file_http(const char *filename, bool is_library)
 {
-	static CURL	   *handle = NULL;
-
 	CURLcode	res;
-	char	   *compute_ctl_url;
 	bool		ret = false;
+	CURL	   *handle = NULL;
+	char	   *compute_ctl_url;
 
-	if (handle == NULL)
-	{
-		handle = alloc_curl_handle();
+	handle = alloc_curl_handle();
 
-		curl_easy_setopt(handle, CURLOPT_CUSTOMREQUEST, "POST");
-		curl_easy_setopt(handle, CURLOPT_TIMEOUT, 3L /* seconds */ );
-	}
+	curl_easy_setopt(handle, CURLOPT_CUSTOMREQUEST, "POST");
+	if (extension_server_request_timeout > 0)
+		curl_easy_setopt(handle, CURLOPT_TIMEOUT, (long)extension_server_request_timeout /* seconds */ );
+	if (extension_server_connect_timeout > 0)
+		curl_easy_setopt(handle, CURLOPT_CONNECTTIMEOUT, (long)extension_server_connect_timeout /* seconds */ );
 
 	compute_ctl_url = psprintf("http://localhost:%d/extension_server/%s%s",
 							   extension_server_port, filename, is_library ? "?is_library=true" : "");
@@ -57,6 +58,8 @@ neon_download_extension_file_http(const char *filename, bool is_library)
 
 	/* Perform the request, res will get the return code */
 	res = curl_easy_perform(handle);
+	curl_easy_cleanup(handle);
+
 	/* Check for errors */
 	if (res == CURLE_OK)
 	{
@@ -88,6 +91,24 @@ pg_init_extension_server()
 							0,	/* no flags required */
 							NULL, NULL, NULL);
 
+	DefineCustomIntVariable("neon.extension_server_request_timeout",
+							"timeout for fetching extensions in seconds",
+							NULL,
+							&extension_server_request_timeout,
+							60, 0, INT_MAX,
+							PGC_SUSET,
+							GUC_UNIT_S,
+							NULL, NULL, NULL);
+
+	DefineCustomIntVariable("neon.extension_server_connect_timeout",
+							"timeout for connecting to the extension server in seconds",
+							NULL,
+							&extension_server_connect_timeout,
+							60, 0, INT_MAX,
+							PGC_SUSET,
+							GUC_UNIT_S,
+							NULL, NULL, NULL);
+
 	/* set download_extension_file_hook */
 	prev_download_extension_file_hook = download_extension_file_hook;
 	download_extension_file_hook = neon_download_extension_file_http;
diff --git a/pgxn/neon/hll.c b/pgxn/neon/hll.c
index 1f53c8fd36..bbaad09f5f 100644
--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -122,8 +122,8 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
 	index = hash >> HLL_C_BITS;
 
 	/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
-	count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
-
+	count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS) - 1;
+	Assert(count <= HLL_C_BITS);
 	cState->regs[index][count] = now;
 }
 
@@ -136,7 +136,7 @@ getMaximum(const TimestampTz* reg, TimestampTz since)
 	{
 		if (reg[i] >= since)
 		{
-			max = i;
+			max = i + 1;
 		}
 	}
 
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 22aeb2e2d6..fc1aecd340 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -378,8 +378,9 @@ pageserver_connect(shardno_t shard_no, int elevel)
 	{
 	case PS_Disconnected:
 	{
-		const char *keywords[3];
-		const char *values[3];
+		const char *keywords[4];
+		const char *values[4];
+		char pid_str[16];
 		int			n_pgsql_params;
 		TimestampTz	now;
 		int64		us_since_last_attempt;
@@ -424,14 +425,30 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		 * can override the password from the env variable. Seems useful, although
 		 * we don't currently use that capability anywhere.
 		 */
-		keywords[0] = "dbname";
-		values[0] = connstr;
-		n_pgsql_params = 1;
+		n_pgsql_params = 0;
+
+		/*
+		 * Pageserver logs include this in the connection's tracing span.
+		 * This allows for reasier log correlation between compute and pageserver.
+		 */
+		keywords[n_pgsql_params] = "application_name";
+		{
+			int ret = snprintf(pid_str, sizeof(pid_str), "%d", MyProcPid);
+			if (ret < 0 || ret >= (int)(sizeof(pid_str)))
+				elog(FATAL, "stack-allocated buffer too small to hold pid");
+		}
+		/* lifetime: PQconnectStartParams strdups internally */
+		values[n_pgsql_params] = (const char*) pid_str;
+		n_pgsql_params++;
+
+		keywords[n_pgsql_params] = "dbname";
+		values[n_pgsql_params] = connstr;
+		n_pgsql_params++;
 
 		if (neon_auth_token)
 		{
-			keywords[1] = "password";
-			values[1] = neon_auth_token;
+			keywords[n_pgsql_params] = "password";
+			values[n_pgsql_params] = neon_auth_token;
 			n_pgsql_params++;
 		}
 
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 8051970176..f1087a8ccb 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -3765,7 +3765,7 @@ neon_dbsize(Oid dbNode)
  *	neon_truncate() -- Truncate relation to specified number of blocks.
  */
 static void
-neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
+neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
 {
 	XLogRecPtr	lsn;
 
@@ -3780,7 +3780,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
 		case RELPERSISTENCE_TEMP:
 		case RELPERSISTENCE_UNLOGGED:
-			mdtruncate(reln, forknum, nblocks);
+			mdtruncate(reln, forknum, old_blocks, nblocks);
 			return;
 
 		default:
@@ -3818,7 +3818,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 
 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
-		mdtruncate(reln, forknum, nblocks);
+		mdtruncate(reln, forknum, old_blocks, nblocks);
 #endif
 }
 
diff --git a/pgxn/neon_walredo/inmem_smgr.c b/pgxn/neon_walredo/inmem_smgr.c
index a45e8f5c4a..74cd5ac601 100644
--- a/pgxn/neon_walredo/inmem_smgr.c
+++ b/pgxn/neon_walredo/inmem_smgr.c
@@ -96,7 +96,7 @@ static void inmem_writeback(SMgrRelation reln, ForkNumber forknum,
 							BlockNumber blocknum, BlockNumber nblocks);
 static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
 static void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber nblocks);
+						   BlockNumber old_blocks, BlockNumber nblocks);
 static void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
 #if PG_MAJORVERSION_NUM >= 17
 static void inmem_registersync(SMgrRelation reln, ForkNumber forknum);
@@ -345,7 +345,7 @@ inmem_nblocks(SMgrRelation reln, ForkNumber forknum)
  *	inmem_truncate() -- Truncate relation to specified number of blocks.
  */
 static void
-inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
+inmem_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
 {
 }
 
diff --git a/poetry.lock b/poetry.lock
index fd200159b9..d66c3aae7a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -412,6 +412,7 @@ files = [
 
 [package.dependencies]
 botocore-stubs = "*"
+mypy-boto3-kms = {version = ">=1.26.0,<1.27.0", optional = true, markers = "extra == \"kms\""}
 mypy-boto3-s3 = {version = ">=1.26.0,<1.27.0", optional = true, markers = "extra == \"s3\""}
 types-s3transfer = "*"
 typing-extensions = ">=4.1.0"
@@ -2022,6 +2023,18 @@ install-types = ["pip"]
 mypyc = ["setuptools (>=50)"]
 reports = ["lxml"]
 
+[[package]]
+name = "mypy-boto3-kms"
+version = "1.26.147"
+description = "Type annotations for boto3.KMS 1.26.147 service generated with mypy-boto3-builder 7.14.5"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "mypy-boto3-kms-1.26.147.tar.gz", hash = "sha256:816a4d1bb0585e1b9620a3f96c1d69a06f53b7b5621858579dd77c60dbb5fa5c"},
+    {file = "mypy_boto3_kms-1.26.147-py3-none-any.whl", hash = "sha256:493f0db674a25c88769f5cb8ab8ac00d3dda5dfc903d5cda34c990ee64689f79"},
+]
+
 [[package]]
 name = "mypy-boto3-s3"
 version = "1.26.0.post1"
@@ -2758,18 +2771,18 @@ pytest = ">=5,<8"
 
 [[package]]
 name = "pytest-timeout"
-version = "2.1.0"
+version = "2.3.1"
 description = "pytest plugin to abort hanging tests"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "pytest-timeout-2.1.0.tar.gz", hash = "sha256:c07ca07404c612f8abbe22294b23c368e2e5104b521c1790195561f37e1ac3d9"},
-    {file = "pytest_timeout-2.1.0-py3-none-any.whl", hash = "sha256:f6f50101443ce70ad325ceb4473c4255e9d74e3c7cd0ef827309dfa4c0d975c6"},
+    {file = "pytest-timeout-2.3.1.tar.gz", hash = "sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9"},
+    {file = "pytest_timeout-2.3.1-py3-none-any.whl", hash = "sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e"},
 ]
 
 [package.dependencies]
-pytest = ">=5.0.0"
+pytest = ">=7.0.0"
 
 [[package]]
 name = "pytest-xdist"
@@ -3807,4 +3820,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "4dc3165fe22c0e0f7a030ea0f8a680ae2ff74561d8658c393abbe9112caaf5d7"
+content-hash = "00ddc42c32e235b6171845fc066dcab078282ed832cd464d5e8a0afa959dd04a"
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index 3aa6ac3a76..6a381bf094 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -19,7 +19,6 @@ aws-config.workspace = true
 aws-sdk-iam.workspace = true
 aws-sigv4.workspace = true
 base64.workspace = true
-boxcar = "0.2.8"
 bstr.workspace = true
 bytes = { workspace = true, features = ["serde"] }
 camino.workspace = true
@@ -63,7 +62,6 @@ postgres_backend.workspace = true
 postgres-client = { package = "tokio-postgres2", path = "../libs/proxy/tokio-postgres2" }
 postgres-protocol = { package = "postgres-protocol2", path = "../libs/proxy/postgres-protocol2" }
 pq_proto.workspace = true
-prometheus.workspace = true
 rand.workspace = true
 regex.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
@@ -81,7 +79,6 @@ sha2 = { workspace = true, features = ["asm", "oid"] }
 smol_str.workspace = true
 smallvec.workspace = true
 socket2.workspace = true
-strum.workspace = true
 strum_macros.workspace = true
 subtle.workspace = true
 thiserror.workspace = true
@@ -95,7 +92,6 @@ tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 tracing.workspace = true
 tracing-log.workspace = true
-tracing-serde.workspace = true
 tracing-opentelemetry.workspace = true
 try-lock.workspace = true
 typed-json.workspace = true
diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index 9be29c38c9..7503b4eac9 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -140,9 +140,8 @@ async fn authenticate(
     let (psql_session_id, waiter) = loop {
         let psql_session_id = new_psql_session_id();
 
-        match control_plane::mgmt::get_waiter(&psql_session_id) {
-            Ok(waiter) => break (psql_session_id, waiter),
-            Err(_e) => continue,
+        if let Ok(waiter) = control_plane::mgmt::get_waiter(&psql_session_id) {
+            break (psql_session_id, waiter);
         }
     };
 
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index e05a693cee..5d032c0deb 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -220,11 +220,11 @@ async fn fetch_jwks(
 }
 
 impl JwkCacheEntryLock {
-    async fn acquire_permit<'a>(self: &'a Arc<Self>) -> JwkRenewalPermit<'a> {
+    async fn acquire_permit(self: &Arc<Self>) -> JwkRenewalPermit<'_> {
         JwkRenewalPermit::acquire_permit(self).await
     }
 
-    fn try_acquire_permit<'a>(self: &'a Arc<Self>) -> Option<JwkRenewalPermit<'a>> {
+    fn try_acquire_permit(self: &Arc<Self>) -> Option<JwkRenewalPermit<'_>> {
         JwkRenewalPermit::try_acquire_permit(self)
     }
 
@@ -393,7 +393,7 @@ impl JwkCacheEntryLock {
                 verify_rsa_signature(header_payload.as_bytes(), &sig, key, &header.algorithm)?;
             }
             key => return Err(JwtError::UnsupportedKeyType(key.into())),
-        };
+        }
 
         tracing::debug!(?payload, "JWT signature valid with claims");
 
@@ -510,7 +510,7 @@ fn verify_rsa_signature(
             key.verify(data, &sig)?;
         }
         _ => return Err(JwtError::InvalidRsaSigningAlgorithm),
-    };
+    }
 
     Ok(())
 }
diff --git a/proxy/src/binary/local_proxy.rs b/proxy/src/binary/local_proxy.rs
index e0d8515375..4ab11f828c 100644
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -4,6 +4,20 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
+use anyhow::{bail, ensure, Context};
+use camino::{Utf8Path, Utf8PathBuf};
+use clap::Parser;
+use compute_api::spec::LocalProxySpec;
+use futures::future::Either;
+use thiserror::Error;
+use tokio::net::TcpListener;
+use tokio::sync::Notify;
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, error, info, warn};
+use utils::sentry_init::init_sentry;
+use utils::{pid_file, project_build_tag, project_git_version};
+
 use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
 use crate::auth::{self};
@@ -25,24 +39,10 @@ use crate::serverless::{self, GlobalConnPoolOptions};
 use crate::tls::client_config::compute_client_config_with_root_certs;
 use crate::types::RoleName;
 use crate::url::ApiUrl;
-use anyhow::{bail, ensure, Context};
-use camino::{Utf8Path, Utf8PathBuf};
-use compute_api::spec::LocalProxySpec;
-use futures::future::Either;
 
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 
-use clap::Parser;
-use thiserror::Error;
-use tokio::net::TcpListener;
-use tokio::sync::Notify;
-use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, warn};
-use utils::sentry_init::init_sentry;
-use utils::{pid_file, project_build_tag, project_git_version};
-
 /// Neon proxy/router
 #[derive(Parser)]
 #[command(version = GIT_VERSION, about)]
diff --git a/proxy/src/binary/pg_sni_router.rs b/proxy/src/binary/pg_sni_router.rs
index 235e9674c6..94e771a61c 100644
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -5,12 +5,6 @@
 /// the outside. Similar to an ingress controller for HTTPS.
 use std::{net::SocketAddr, sync::Arc};
 
-use crate::context::RequestContext;
-use crate::metrics::{Metrics, ThreadPoolMetrics};
-use crate::protocol2::ConnectionInfo;
-use crate::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
-use crate::stream::{PqStream, Stream};
-use crate::tls::TlsServerEndPoint;
 use anyhow::{anyhow, bail, ensure, Context};
 use clap::Arg;
 use futures::future::Either;
@@ -25,6 +19,13 @@ use tracing::{error, info, Instrument};
 use utils::project_git_version;
 use utils::sentry_init::init_sentry;
 
+use crate::context::RequestContext;
+use crate::metrics::{Metrics, ThreadPoolMetrics};
+use crate::protocol2::ConnectionInfo;
+use crate::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
+use crate::stream::{PqStream, Stream};
+use crate::tls::TlsServerEndPoint;
+
 project_git_version!(GIT_VERSION);
 
 fn cli() -> clap::Command {
diff --git a/proxy/src/binary/proxy.rs b/proxy/src/binary/proxy.rs
index e38c49ca10..b72799df54 100644
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -3,6 +3,16 @@ use std::pin::pin;
 use std::sync::Arc;
 use std::time::Duration;
 
+use anyhow::bail;
+use futures::future::Either;
+use remote_storage::RemoteStorageConfig;
+use tokio::net::TcpListener;
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::{info, warn, Instrument};
+use utils::sentry_init::init_sentry;
+use utils::{project_build_tag, project_git_version};
+
 use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
 use crate::cancellation::{handle_cancel_messages, CancellationHandler};
@@ -24,15 +34,6 @@ use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::GlobalConnPoolOptions;
 use crate::tls::client_config::compute_client_config_with_root_certs;
 use crate::{auth, control_plane, http, serverless, usage_metrics};
-use anyhow::bail;
-use futures::future::Either;
-use remote_storage::RemoteStorageConfig;
-use tokio::net::TcpListener;
-use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
-use tracing::{info, warn, Instrument};
-use utils::sentry_init::init_sentry;
-use utils::{project_build_tag, project_git_version};
 
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
@@ -303,7 +304,7 @@ pub async fn run() -> anyhow::Result<()> {
     match auth_backend {
         Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
         Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
-    };
+    }
     info!("Using region: {}", args.aws_region);
 
     // TODO: untangle the config args
@@ -803,9 +804,10 @@ fn build_auth_backend(
 mod tests {
     use std::time::Duration;
 
-    use crate::rate_limiter::RateBucketInfo;
     use clap::Parser;
 
+    use crate::rate_limiter::RateBucketInfo;
+
     #[test]
     fn parse_endpoint_rps_limit() {
         let config = super::ProxyCliArgs::parse_from([
diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs
index b5c42cd23d..8ec1a4648b 100644
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -242,7 +242,7 @@ impl EndpointsCache {
                             });
                             tracing::error!("error parsing value {value:?}: {err:?}");
                         }
-                    };
+                    }
                 }
                 if total.is_power_of_two() {
                     tracing::debug!("endpoints read {}", total);
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index e84f1676e2..1f9c8a48b7 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -501,7 +501,7 @@ impl Session {
             _guard: Metrics::get()
                 .proxy
                 .cancel_channel_size
-                .guard(RedisMsgKind::HSet),
+                .guard(RedisMsgKind::HDel),
         };
 
         let _ = tx.send_timeout(op, REDIS_SEND_TIMEOUT).await.map_err(|e| {
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index d71465765f..5447a4a4c0 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -137,8 +137,8 @@ impl ConnCfg {
             match k {
                 // Only set `user` if it's not present in the config.
                 // Console redirect auth flow takes username from the console's response.
-                "user" if self.user_is_set() => continue,
-                "database" if self.db_is_set() => continue,
+                "user" if self.user_is_set() => {}
+                "database" if self.db_is_set() => {}
                 "options" => {
                     if let Some(options) = filtered_options(v) {
                         self.set_param(k, &options);
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index c4548a7ddd..1044f5f8e2 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -82,7 +82,7 @@ pub async fn task_main(
                     error!("per-client task finished with an error: failed to set socket option: {e:#}");
                     return;
                 }
-            };
+            }
 
             let ctx = RequestContext::new(
                 session_id,
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index f92e4f3f60..89ec4f9b33 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -19,8 +19,7 @@ use crate::cache::{Cached, TimedLru};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo};
-use crate::intern::AccountIdInt;
-use crate::intern::ProjectIdInt;
+use crate::intern::{AccountIdInt, ProjectIdInt};
 use crate::types::{EndpointCacheKey, EndpointId};
 use crate::{compute, scram};
 
diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs
index 97c9f5a59c..fbd4811b54 100644
--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -7,9 +7,8 @@ use chrono::{DateTime, Utc};
 use opentelemetry::trace::TraceContextExt;
 use scopeguard::defer;
 use serde::ser::{SerializeMap, Serializer};
-use tracing::span;
 use tracing::subscriber::Interest;
-use tracing::{callsite, Event, Metadata, Span, Subscriber};
+use tracing::{callsite, span, Event, Metadata, Span, Subscriber};
 use tracing_opentelemetry::OpenTelemetrySpanExt;
 use tracing_subscriber::filter::{EnvFilter, LevelFilter};
 use tracing_subscriber::fmt::format::{Format, Full};
diff --git a/proxy/src/protocol2.rs b/proxy/src/protocol2.rs
index 0dc97b7097..74a15d9bf4 100644
--- a/proxy/src/protocol2.rs
+++ b/proxy/src/protocol2.rs
@@ -119,7 +119,7 @@ pub(crate) async fn read_proxy_protocol<T: AsyncRead + Unpin>(
         // if no more bytes available then exit
         if bytes_read == 0 {
             return Ok((ChainRW { inner: read, buf }, ConnectHeader::Missing));
-        };
+        }
 
         // check if we have enough bytes to continue
         if let Some(header) = buf.try_get::<ProxyProtocolV2Header>() {
@@ -169,7 +169,7 @@ fn process_proxy_payload(
                 header.version_and_command
             ),
         )),
-    };
+    }
 
     let size_err =
         "invalid proxy protocol length. payload not large enough to fit requested IP addresses";
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index dd145e6bb2..26fb1754bf 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -198,7 +198,7 @@ where
 
                 warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);
             }
-        };
+        }
 
         let wait_duration = retry_after(num_retries, compute.retry);
         num_retries += 1;
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 8a407c8119..2a406fcb34 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -118,7 +118,7 @@ pub async fn task_main(
                     error!("per-client task finished with an error: failed to set socket option: {e:#}");
                     return;
                 }
-            };
+            }
 
             let ctx = RequestContext::new(
                 session_id,
diff --git a/proxy/src/redis/notifications.rs b/proxy/src/redis/notifications.rs
index 1a7024588a..5f9f2509e2 100644
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -169,7 +169,7 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                         });
                         tracing::error!("broken message: {e}");
                     }
-                };
+                }
                 return Ok(());
             }
             Ok(msg) => msg,
@@ -180,7 +180,7 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                 match serde_json::from_str::<NotificationHeader>(&payload) {
                     Ok(header) => tracing::error!(topic = header.topic, "broken message: {e}"),
                     Err(_) => tracing::error!("broken message: {e}"),
-                };
+                }
                 return Ok(());
             }
         };
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index edc2935618..f35c375ba2 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -372,7 +372,7 @@ impl PoolingBackend {
             debug!("setting up backend session state");
 
             // initiates the auth session
-            if let Err(e) = client.execute("select auth.init()", &[]).await {
+            if let Err(e) = client.batch_execute("select auth.init();").await {
                 discard.discard();
                 return Err(e.into());
             }
@@ -651,7 +651,7 @@ async fn connect_http2(
                     e,
                 )));
             }
-        };
+        }
     };
 
     let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new())
diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs
index fe33f0ff65..137a2d6377 100644
--- a/proxy/src/serverless/local_conn_pool.rs
+++ b/proxy/src/serverless/local_conn_pool.rs
@@ -23,7 +23,6 @@ use indexmap::IndexMap;
 use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};
 use parking_lot::RwLock;
 use postgres_client::tls::NoTlsStream;
-use postgres_client::types::ToSql;
 use postgres_client::AsyncMessage;
 use serde_json::value::RawValue;
 use tokio::net::TcpStream;
@@ -280,14 +279,13 @@ impl ClientInnerCommon<postgres_client::Client> {
             local_data.jti += 1;
             let token = resign_jwt(&local_data.key, payload, local_data.jti)?;
 
-            // initiates the auth session
+            // discard all cannot run in a transaction. must be executed alone.
             self.inner.batch_execute("discard all").await?;
-            self.inner
-                .execute(
-                    "select auth.jwt_session_init($1)",
-                    &[&&*token as &(dyn ToSql + Sync)],
-                )
-                .await?;
+
+            // initiates the auth session
+            // this is safe from query injections as the jwt format free of any escape characters.
+            let query = format!("select auth.jwt_session_init('{token}')");
+            self.inner.batch_execute(&query).await?;
 
             let pid = self.inner.get_process_id();
             info!(pid, jti = local_data.jti, "user session state init");
diff --git a/pyproject.toml b/pyproject.toml
index e299c421e9..92a660c233 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,12 +17,12 @@ Jinja2 = "^3.1.5"
 types-requests = "^2.31.0.0"
 types-psycopg2 = "^2.9.21.20241019"
 boto3 = "^1.34.11"
-boto3-stubs = {extras = ["s3"], version = "^1.26.16"}
+boto3-stubs = {extras = ["s3", "kms"], version = "^1.26.16"}
 moto = {extras = ["server"], version = "^5.0.6"}
 backoff = "^2.2.1"
 pytest-lazy-fixture = "^0.6.3"
 prometheus-client = "^0.14.1"
-pytest-timeout = "^2.1.0"
+pytest-timeout = "^2.3.1"
 Werkzeug = "^3.0.6"
 pytest-order = "^1.1.0"
 allure-pytest = "^2.13.2"
diff --git a/pytest.ini b/pytest.ini
index 7197b078c6..237066b1f6 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -11,7 +11,7 @@ markers =
 testpaths =
     test_runner
 minversion = 6.0
-log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
+log_format = %(asctime)s.%(msecs)03d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
 log_date_format = %Y-%m-%d %H:%M:%S
 log_cli = true
 timeout = 300
diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs
index df049f3eba..5c305769dd 100644
--- a/safekeeper/client/src/mgmt_api.rs
+++ b/safekeeper/client/src/mgmt_api.rs
@@ -5,7 +5,10 @@
 
 use http_utils::error::HttpErrorBody;
 use reqwest::{IntoUrl, Method, StatusCode};
-use safekeeper_api::models::{TimelineCreateRequest, TimelineStatus};
+use safekeeper_api::models::{
+    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
+    TimelineStatus,
+};
 use std::error::Error as _;
 use utils::{
     id::{NodeId, TenantId, TimelineId},
@@ -32,6 +35,9 @@ pub enum Error {
     /// Status is not ok; parsed error in body as `HttpErrorBody`.
     #[error("safekeeper API: {1}")]
     ApiError(StatusCode, String),
+
+    #[error("Cancelled")]
+    Cancelled,
 }
 
 pub type Result<T> = std::result::Result<T, Error>;
@@ -85,6 +91,12 @@ impl Client {
         resp.json().await.map_err(Error::ReceiveBody)
     }
 
+    pub async fn pull_timeline(&self, req: &PullTimelineRequest) -> Result<PullTimelineResponse> {
+        let uri = format!("{}/v1/pull_timeline", self.mgmt_api_endpoint);
+        let resp = self.post(&uri, req).await?;
+        resp.json().await.map_err(Error::ReceiveBody)
+    }
+
     pub async fn delete_timeline(
         &self,
         tenant_id: TenantId,
@@ -124,9 +136,10 @@ impl Client {
         self.get(&uri).await
     }
 
-    pub async fn utilization(&self) -> Result<reqwest::Response> {
-        let uri = format!("{}/v1/utilization/", self.mgmt_api_endpoint);
-        self.get(&uri).await
+    pub async fn utilization(&self) -> Result<SafekeeperUtilization> {
+        let uri = format!("{}/v1/utilization", self.mgmt_api_endpoint);
+        let resp = self.get(&uri).await?;
+        resp.json().await.map_err(Error::ReceiveBody)
     }
 
     async fn post<B: serde::Serialize, U: IntoUrl>(
diff --git a/safekeeper/src/control_file.rs b/safekeeper/src/control_file.rs
index e92ca881e1..35aebfd8ad 100644
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -235,7 +235,7 @@ impl Storage for FileStorage {
 #[cfg(test)]
 mod test {
     use super::*;
-    use safekeeper_api::membership::{Configuration, MemberSet};
+    use safekeeper_api::membership::{Configuration, MemberSet, SafekeeperGeneration};
     use tokio::fs;
     use utils::lsn::Lsn;
 
@@ -246,7 +246,7 @@ mod test {
         let tempdir = camino_tempfile::tempdir()?;
         let mut state = TimelinePersistentState::empty();
         state.mconf = Configuration {
-            generation: 42,
+            generation: SafekeeperGeneration::new(42),
             members: MemberSet::empty(),
             new_members: None,
         };
diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs
index a64bf1ddd8..cd2ac5f44c 100644
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -2,6 +2,7 @@ use http_utils::failpoints::failpoints_handler;
 use hyper::{Body, Request, Response, StatusCode};
 use safekeeper_api::models;
 use safekeeper_api::models::AcceptorStateStatus;
+use safekeeper_api::models::PullTimelineRequest;
 use safekeeper_api::models::SafekeeperStatus;
 use safekeeper_api::models::TermSwitchApiEntry;
 use safekeeper_api::models::TimelineStatus;
@@ -230,7 +231,7 @@ async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<
 async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
     check_permission(&request, None)?;
 
-    let data: pull_timeline::Request = json_request(&mut request).await?;
+    let data: PullTimelineRequest = json_request(&mut request).await?;
     let conf = get_conf(&request);
     let global_timelines = get_global_timelines(&request);
 
@@ -626,7 +627,7 @@ pub fn make_router(
                 failpoints_handler(r, cancel).await
             })
         })
-        .get("/v1/uzilization", |r| request_span(r, utilization_handler))
+        .get("/v1/utilization", |r| request_span(r, utilization_handler))
         .delete("/v1/tenant/:tenant_id", |r| {
             request_span(r, tenant_delete_handler)
         })
diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs
index f2d8e4c85f..4827b73074 100644
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -4,10 +4,13 @@ use camino::Utf8PathBuf;
 use chrono::{DateTime, Utc};
 use futures::{SinkExt, StreamExt, TryStreamExt};
 use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
-use safekeeper_api::{models::TimelineStatus, Term};
+use safekeeper_api::{
+    models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus},
+    Term,
+};
 use safekeeper_client::mgmt_api;
 use safekeeper_client::mgmt_api::Client;
-use serde::{Deserialize, Serialize};
+use serde::Deserialize;
 use std::{
     cmp::min,
     io::{self, ErrorKind},
@@ -33,7 +36,7 @@ use crate::{
 };
 use utils::{
     crashsafe::fsync_async_opt,
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
+    id::{NodeId, TenantTimelineId},
     logging::SecretString,
     lsn::Lsn,
     pausable_failpoint,
@@ -378,21 +381,6 @@ impl WalResidentTimeline {
     }
 }
 
-/// pull_timeline request body.
-#[derive(Debug, Deserialize)]
-pub struct Request {
-    pub tenant_id: TenantId,
-    pub timeline_id: TimelineId,
-    pub http_hosts: Vec<String>,
-}
-
-#[derive(Debug, Serialize)]
-pub struct Response {
-    // Donor safekeeper host
-    pub safekeeper_host: String,
-    // TODO: add more fields?
-}
-
 /// Response for debug dump request.
 #[derive(Debug, Deserialize)]
 pub struct DebugDumpResponse {
@@ -405,10 +393,10 @@ pub struct DebugDumpResponse {
 
 /// Find the most advanced safekeeper and pull timeline from it.
 pub async fn handle_request(
-    request: Request,
+    request: PullTimelineRequest,
     sk_auth_token: Option<SecretString>,
     global_timelines: Arc<GlobalTimelines>,
-) -> Result<Response> {
+) -> Result<PullTimelineResponse> {
     let existing_tli = global_timelines.get(TenantTimelineId::new(
         request.tenant_id,
         request.timeline_id,
@@ -460,7 +448,7 @@ async fn pull_timeline(
     host: String,
     sk_auth_token: Option<SecretString>,
     global_timelines: Arc<GlobalTimelines>,
-) -> Result<Response> {
+) -> Result<PullTimelineResponse> {
     let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
     info!(
         "pulling timeline {} from safekeeper {}, commit_lsn={}, flush_lsn={}, term={}, epoch={}",
@@ -535,7 +523,7 @@ async fn pull_timeline(
         .load_temp_timeline(ttid, &tli_dir_path, false)
         .await?;
 
-    Ok(Response {
+    Ok(PullTimelineResponse {
         safekeeper_host: host,
     })
 }
diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs
index 45e19c31b6..f816f8459a 100644
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -1004,7 +1004,7 @@ mod tests {
 
     use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE};
     use safekeeper_api::{
-        membership::{Configuration, MemberSet, SafekeeperId},
+        membership::{Configuration, MemberSet, SafekeeperGeneration, SafekeeperId},
         ServerInfo,
     };
 
@@ -1303,7 +1303,7 @@ mod tests {
             tenant_id,
             timeline_id,
             mconf: Configuration {
-                generation: 42,
+                generation: SafekeeperGeneration::new(42),
                 members: MemberSet::new(vec![SafekeeperId {
                     id: NodeId(1),
                     host: "hehe.org".to_owned(),
diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs
index 8517fa0344..2f6b91cf47 100644
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -310,9 +310,12 @@ impl WalBackupTask {
                     retry_attempt = 0;
                 }
                 Err(e) => {
+                    // We might have managed to upload some segment even though
+                    // some later in the range failed, so log backup_lsn
+                    // separately.
                     error!(
-                        "failed while offloading range {}-{}: {:?}",
-                        backup_lsn, commit_lsn, e
+                        "failed while offloading range {}-{}, backup_lsn {}: {:?}",
+                        backup_lsn, commit_lsn, backup_lsn, e
                     );
 
                     retry_attempt = retry_attempt.saturating_add(1);
@@ -338,6 +341,13 @@ async fn backup_lsn_range(
     let start_lsn = *backup_lsn;
     let segments = get_segments(start_lsn, end_lsn, wal_seg_size);
 
+    info!(
+        "offloading segnos {:?} of range [{}-{})",
+        segments.iter().map(|&s| s.seg_no).collect::<Vec<_>>(),
+        start_lsn,
+        end_lsn,
+    );
+
     // Pool of concurrent upload tasks. We use `FuturesOrdered` to
     // preserve order of uploads, and update `backup_lsn` only after
     // all previous uploads are finished.
@@ -374,10 +384,10 @@ async fn backup_lsn_range(
     }
 
     info!(
-        "offloaded segnos {:?} up to {}, previous backup_lsn {}",
+        "offloaded segnos {:?} of range [{}-{})",
         segments.iter().map(|&s| s.seg_no).collect::<Vec<_>>(),
-        end_lsn,
         start_lsn,
+        end_lsn,
     );
     Ok(())
 }
diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs
index 1ebcb060e7..e5ccbb3230 100644
--- a/safekeeper/src/wal_service.rs
+++ b/safekeeper/src/wal_service.rs
@@ -13,6 +13,8 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::{auth::Scope, measured_stream::MeasuredStream};
 
+use std::os::fd::AsRawFd;
+
 use crate::metrics::TrafficMetrics;
 use crate::SafeKeeperConf;
 use crate::{handler::SafekeeperPostgresHandler, GlobalTimelines};
@@ -62,6 +64,7 @@ async fn handle_socket(
     global_timelines: Arc<GlobalTimelines>,
 ) -> Result<(), QueryError> {
     socket.set_nodelay(true)?;
+    let socket_fd = socket.as_raw_fd();
     let peer_addr = socket.peer_addr()?;
 
     // Set timeout on reading from the socket. It prevents hanged up connection
@@ -107,7 +110,7 @@ async fn handle_socket(
         auth_pair,
         global_timelines,
     );
-    let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
+    let pgbackend = PostgresBackend::new_from_io(socket_fd, socket, peer_addr, auth_type, None)?;
     // libpq protocol between safekeeper and walproposer / pageserver
     // We don't use shutdown.
     pgbackend
diff --git a/scripts/generate_image_maps.py b/scripts/generate_image_maps.py
index a2f553d290..915eb33673 100644
--- a/scripts/generate_image_maps.py
+++ b/scripts/generate_image_maps.py
@@ -6,6 +6,9 @@ build_tag = os.environ["BUILD_TAG"]
 branch = os.environ["BRANCH"]
 dev_acr = os.environ["DEV_ACR"]
 prod_acr = os.environ["PROD_ACR"]
+dev_aws = os.environ["DEV_AWS"]
+prod_aws = os.environ["PROD_AWS"]
+aws_region = os.environ["AWS_REGION"]
 
 components = {
     "neon": ["neon"],
@@ -24,11 +27,11 @@ components = {
 registries = {
     "dev": [
         "docker.io/neondatabase",
-        "369495373322.dkr.ecr.eu-central-1.amazonaws.com",
+        f"{dev_aws}.dkr.ecr.{aws_region}.amazonaws.com",
         f"{dev_acr}.azurecr.io/neondatabase",
     ],
     "prod": [
-        "093970136003.dkr.ecr.eu-central-1.amazonaws.com",
+        f"{prod_aws}.dkr.ecr.{aws_region}.amazonaws.com",
         f"{prod_acr}.azurecr.io/neondatabase",
     ],
 }
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 91d8098cb9..73dc1a5c10 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -32,6 +32,10 @@ postgres_connection.workspace = true
 rand.workspace = true
 reqwest = { workspace = true, features = ["stream"] }
 routerify.workspace = true
+safekeeper_api.workspace = true
+safekeeper_client.workspace = true
+tikv-jemallocator.workspace = true
+regex.workspace = true
 rustls-native-certs.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs
index b7e66d33eb..52b6110667 100644
--- a/storage_controller/src/heartbeater.rs
+++ b/storage_controller/src/heartbeater.rs
@@ -1,23 +1,30 @@
 use futures::{stream::FuturesUnordered, StreamExt};
+use safekeeper_api::models::SafekeeperUtilization;
+use safekeeper_client::mgmt_api;
 use std::{
     collections::HashMap,
+    fmt::Debug,
+    future::Future,
     sync::Arc,
     time::{Duration, Instant},
 };
 use tokio_util::sync::CancellationToken;
 
-use pageserver_api::{controller_api::NodeAvailability, models::PageserverUtilization};
+use pageserver_api::{
+    controller_api::{NodeAvailability, SkSchedulingPolicy},
+    models::PageserverUtilization,
+};
 
 use thiserror::Error;
-use utils::id::NodeId;
+use utils::{id::NodeId, logging::SecretString};
 
-use crate::node::Node;
+use crate::{node::Node, safekeeper::Safekeeper};
 
-struct HeartbeaterTask {
-    receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest>,
+struct HeartbeaterTask<Server, State> {
+    receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest<Server, State>>,
     cancel: CancellationToken,
 
-    state: HashMap<NodeId, PageserverState>,
+    state: HashMap<NodeId, State>,
 
     max_offline_interval: Duration,
     max_warming_up_interval: Duration,
@@ -36,8 +43,17 @@ pub(crate) enum PageserverState {
     Offline,
 }
 
+#[derive(Debug, Clone)]
+pub(crate) enum SafekeeperState {
+    Available {
+        last_seen_at: Instant,
+        utilization: SafekeeperUtilization,
+    },
+    Offline,
+}
+
 #[derive(Debug)]
-pub(crate) struct AvailablityDeltas(pub Vec<(NodeId, PageserverState)>);
+pub(crate) struct AvailablityDeltas<State>(pub Vec<(NodeId, State)>);
 
 #[derive(Debug, Error)]
 pub(crate) enum HeartbeaterError {
@@ -45,23 +61,28 @@ pub(crate) enum HeartbeaterError {
     Cancel,
 }
 
-struct HeartbeatRequest {
-    pageservers: Arc<HashMap<NodeId, Node>>,
-    reply: tokio::sync::oneshot::Sender<Result<AvailablityDeltas, HeartbeaterError>>,
+struct HeartbeatRequest<Server, State> {
+    servers: Arc<HashMap<NodeId, Server>>,
+    reply: tokio::sync::oneshot::Sender<Result<AvailablityDeltas<State>, HeartbeaterError>>,
 }
 
-pub(crate) struct Heartbeater {
-    sender: tokio::sync::mpsc::UnboundedSender<HeartbeatRequest>,
+pub(crate) struct Heartbeater<Server, State> {
+    sender: tokio::sync::mpsc::UnboundedSender<HeartbeatRequest<Server, State>>,
 }
 
-impl Heartbeater {
+#[allow(private_bounds)]
+impl<Server: Send + Sync + 'static, State: Debug + Send + 'static> Heartbeater<Server, State>
+where
+    HeartbeaterTask<Server, State>: HeartBeat<Server, State>,
+{
     pub(crate) fn new(
         jwt_token: Option<String>,
         max_offline_interval: Duration,
         max_warming_up_interval: Duration,
         cancel: CancellationToken,
     ) -> Self {
-        let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest>();
+        let (sender, receiver) =
+            tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest<Server, State>>();
         let mut heartbeater = HeartbeaterTask::new(
             receiver,
             jwt_token,
@@ -76,12 +97,12 @@ impl Heartbeater {
 
     pub(crate) async fn heartbeat(
         &self,
-        pageservers: Arc<HashMap<NodeId, Node>>,
-    ) -> Result<AvailablityDeltas, HeartbeaterError> {
+        servers: Arc<HashMap<NodeId, Server>>,
+    ) -> Result<AvailablityDeltas<State>, HeartbeaterError> {
         let (sender, receiver) = tokio::sync::oneshot::channel();
         self.sender
             .send(HeartbeatRequest {
-                pageservers,
+                servers,
                 reply: sender,
             })
             .map_err(|_| HeartbeaterError::Cancel)?;
@@ -93,9 +114,12 @@ impl Heartbeater {
     }
 }
 
-impl HeartbeaterTask {
+impl<Server, State: Debug> HeartbeaterTask<Server, State>
+where
+    HeartbeaterTask<Server, State>: HeartBeat<Server, State>,
+{
     fn new(
-        receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest>,
+        receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest<Server, State>>,
         jwt_token: Option<String>,
         max_offline_interval: Duration,
         max_warming_up_interval: Duration,
@@ -110,15 +134,19 @@ impl HeartbeaterTask {
             jwt_token,
         }
     }
-
     async fn run(&mut self) {
         loop {
             tokio::select! {
                 request = self.receiver.recv() => {
                     match request {
                         Some(req) => {
-                            let res = self.heartbeat(req.pageservers).await;
-                            req.reply.send(res).unwrap();
+                            if req.reply.is_closed() {
+                                // Prevent a possibly infinite buildup of the receiver channel, if requests arrive faster than we can handle them
+                                continue;
+                            }
+                            let res = self.heartbeat(req.servers).await;
+                            // Ignore the return value in order to not panic if the heartbeat function's future was cancelled
+                            _ = req.reply.send(res);
                         },
                         None => { return; }
                     }
@@ -127,11 +155,20 @@ impl HeartbeaterTask {
             }
         }
     }
+}
 
+pub(crate) trait HeartBeat<Server, State> {
+    fn heartbeat(
+        &mut self,
+        pageservers: Arc<HashMap<NodeId, Server>>,
+    ) -> impl Future<Output = Result<AvailablityDeltas<State>, HeartbeaterError>> + Send;
+}
+
+impl HeartBeat<Node, PageserverState> for HeartbeaterTask<Node, PageserverState> {
     async fn heartbeat(
         &mut self,
         pageservers: Arc<HashMap<NodeId, Node>>,
-    ) -> Result<AvailablityDeltas, HeartbeaterError> {
+    ) -> Result<AvailablityDeltas<PageserverState>, HeartbeaterError> {
         let mut new_state = HashMap::new();
 
         let mut heartbeat_futs = FuturesUnordered::new();
@@ -272,3 +309,130 @@ impl HeartbeaterTask {
         Ok(AvailablityDeltas(deltas))
     }
 }
+
+impl HeartBeat<Safekeeper, SafekeeperState> for HeartbeaterTask<Safekeeper, SafekeeperState> {
+    async fn heartbeat(
+        &mut self,
+        safekeepers: Arc<HashMap<NodeId, Safekeeper>>,
+    ) -> Result<AvailablityDeltas<SafekeeperState>, HeartbeaterError> {
+        let mut new_state = HashMap::new();
+
+        let mut heartbeat_futs = FuturesUnordered::new();
+        for (node_id, sk) in &*safekeepers {
+            if sk.scheduling_policy() == SkSchedulingPolicy::Decomissioned {
+                continue;
+            }
+            heartbeat_futs.push({
+                let jwt_token = self
+                    .jwt_token
+                    .as_ref()
+                    .map(|t| SecretString::from(t.to_owned()));
+                let cancel = self.cancel.clone();
+
+                async move {
+                    let response = sk
+                        .with_client_retries(
+                            |client| async move { client.get_utilization().await },
+                            &jwt_token,
+                            3,
+                            3,
+                            Duration::from_secs(1),
+                            &cancel,
+                        )
+                        .await;
+
+                    let status = match response {
+                        Ok(utilization) => SafekeeperState::Available {
+                            last_seen_at: Instant::now(),
+                            utilization,
+                        },
+                        Err(mgmt_api::Error::Cancelled) => {
+                            // This indicates cancellation of the request.
+                            // We ignore the node in this case.
+                            return None;
+                        }
+                        Err(e) => {
+                            tracing::info!(
+                                "Marking safekeeper {} at as offline: {e}",
+                                sk.base_url()
+                            );
+                            SafekeeperState::Offline
+                        }
+                    };
+
+                    Some((*node_id, status))
+                }
+            });
+
+            loop {
+                let maybe_status = tokio::select! {
+                    next = heartbeat_futs.next() => {
+                        match next {
+                            Some(result) => result,
+                            None => { break; }
+                        }
+                    },
+                    _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); }
+                };
+
+                if let Some((node_id, status)) = maybe_status {
+                    new_state.insert(node_id, status);
+                }
+            }
+        }
+
+        let mut offline = 0;
+        for state in new_state.values() {
+            match state {
+                SafekeeperState::Offline { .. } => offline += 1,
+                SafekeeperState::Available { .. } => {}
+            }
+        }
+
+        tracing::info!(
+            "Heartbeat round complete for {} safekeepers, {} offline",
+            new_state.len(),
+            offline
+        );
+
+        let mut deltas = Vec::new();
+        let now = Instant::now();
+        for (node_id, sk_state) in new_state.iter_mut() {
+            use std::collections::hash_map::Entry::*;
+            let entry = self.state.entry(*node_id);
+
+            let mut needs_update = false;
+            match entry {
+                Occupied(ref occ) => match (occ.get(), &sk_state) {
+                    (SafekeeperState::Offline, SafekeeperState::Offline) => {}
+                    (SafekeeperState::Available { last_seen_at, .. }, SafekeeperState::Offline) => {
+                        if now - *last_seen_at >= self.max_offline_interval {
+                            deltas.push((*node_id, sk_state.clone()));
+                            needs_update = true;
+                        }
+                    }
+                    _ => {
+                        deltas.push((*node_id, sk_state.clone()));
+                        needs_update = true;
+                    }
+                },
+                Vacant(_) => {
+                    // This is a new node. Don't generate a delta for it.
+                    deltas.push((*node_id, sk_state.clone()));
+                }
+            }
+
+            match entry {
+                Occupied(mut occ) if needs_update => {
+                    (*occ.get_mut()) = sk_state.clone();
+                }
+                Vacant(vac) => {
+                    vac.insert(sk_state.clone());
+                }
+                _ => {}
+            }
+        }
+
+        Ok(AvailablityDeltas(deltas))
+    }
+}
diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs
index 1a56116cad..1cc61a12e8 100644
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -9,7 +9,10 @@ use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECON
 use anyhow::Context;
 use futures::Future;
 use http_utils::{
-    endpoint::{self, auth_middleware, check_permission_with, request_span},
+    endpoint::{
+        self, auth_middleware, check_permission_with, profile_cpu_handler, profile_heap_handler,
+        request_span,
+    },
     error::ApiError,
     failpoints::failpoints_handler,
     json::{json_request, json_response},
@@ -54,7 +57,7 @@ pub struct HttpState {
     service: Arc<crate::service::Service>,
     auth: Option<Arc<SwappableJwtAuth>>,
     neon_metrics: NeonMetrics,
-    allowlist_routes: Vec<Uri>,
+    allowlist_routes: &'static [&'static str],
 }
 
 impl HttpState {
@@ -63,15 +66,17 @@ impl HttpState {
         auth: Option<Arc<SwappableJwtAuth>>,
         build_info: BuildInfo,
     ) -> Self {
-        let allowlist_routes = ["/status", "/ready", "/metrics"]
-            .iter()
-            .map(|v| v.parse().unwrap())
-            .collect::<Vec<_>>();
         Self {
             service,
             auth,
             neon_metrics: NeonMetrics::new(build_info),
-            allowlist_routes,
+            allowlist_routes: &[
+                "/status",
+                "/ready",
+                "/metrics",
+                "/profile/cpu",
+                "/profile/heap",
+            ],
         }
     }
 }
@@ -516,6 +521,35 @@ async fn handle_tenant_timeline_block_unblock_gc(
     json_response(StatusCode::OK, ())
 }
 
+async fn handle_tenant_timeline_download_heatmap_layers(
+    service: Arc<Service>,
+    req: Request<Body>,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
+
+    check_permissions(&req, Scope::PageServerApi)?;
+
+    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    let concurrency: Option<usize> = parse_query_param(&req, "concurrency")?;
+
+    service
+        .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
+        .await?;
+
+    json_response(StatusCode::OK, ())
+}
+
+// For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
+// and tenant/timeline IDs.  Since we are proxying to arbitrary paths, we don't have routing templates to
+// compare to, so we can just filter out our well known ID format with regexes.
+fn path_without_ids(path: &str) -> String {
+    static ID_REGEX: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
+    ID_REGEX
+        .get_or_init(|| regex::Regex::new(r"([0-9a-fA-F]{32}(-[0-9]{4})?|\?.*)").unwrap())
+        .replace_all(path, "")
+        .to_string()
+}
+
 async fn handle_tenant_timeline_passthrough(
     service: Arc<Service>,
     req: Request<Body>,
@@ -551,10 +585,7 @@ async fn handle_tenant_timeline_passthrough(
         .metrics_group
         .storage_controller_passthrough_request_latency;
 
-    // This is a bit awkward. We remove the param from the request
-    // and join the words by '_' to get a label for the request.
-    let just_path = path.replace(&tenant_shard_str, "");
-    let path_label = just_path
+    let path_label = path_without_ids(&path)
         .split('/')
         .filter(|token| !token.is_empty())
         .collect::<Vec<_>>()
@@ -1390,23 +1421,26 @@ pub fn prologue_leadership_status_check_middleware<
         let state = get_state(&req);
         let leadership_status = state.service.get_leadership_status();
 
-        enum AllowedRoutes<'a> {
+        enum AllowedRoutes {
             All,
-            Some(Vec<&'a str>),
+            Some(&'static [&'static str]),
         }
 
         let allowed_routes = match leadership_status {
             LeadershipStatus::Leader => AllowedRoutes::All,
             LeadershipStatus::SteppedDown => AllowedRoutes::All,
-            LeadershipStatus::Candidate => {
-                AllowedRoutes::Some(["/ready", "/status", "/metrics"].to_vec())
-            }
+            LeadershipStatus::Candidate => AllowedRoutes::Some(&[
+                "/ready",
+                "/status",
+                "/metrics",
+                "/profile/cpu",
+                "/profile/heap",
+            ]),
         };
 
-        let uri = req.uri().to_string();
         match allowed_routes {
             AllowedRoutes::All => Ok(req),
-            AllowedRoutes::Some(allowed) if allowed.contains(&uri.as_str()) => Ok(req),
+            AllowedRoutes::Some(allowed) if allowed.contains(&req.uri().path()) => Ok(req),
             _ => {
                 tracing::info!(
                     "Request {} not allowed due to current leadership state",
@@ -1515,7 +1549,8 @@ enum ForwardOutcome {
 
 /// Potentially forward the request to the current storage controler leader.
 /// More specifically we forward when:
-/// 1. Request is not one of ["/control/v1/step_down", "/status", "/ready", "/metrics"]
+/// 1. Request is not one of:
+///    ["/control/v1/step_down", "/status", "/ready", "/metrics", "/profile/cpu", "/profile/heap"]
 /// 2. Current instance is in [`LeadershipStatus::SteppedDown`] state
 /// 3. There is a leader in the database to forward to
 /// 4. Leader from step (3) is not the current instance
@@ -1536,10 +1571,17 @@ enum ForwardOutcome {
 /// Hence, if we are in the edge case scenario the leader persisted in the database is the
 /// stepped down instance that received the request. Condition (4) above covers this scenario.
 async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
-    const NOT_FOR_FORWARD: [&str; 4] = ["/control/v1/step_down", "/status", "/ready", "/metrics"];
+    const NOT_FOR_FORWARD: &[&str] = &[
+        "/control/v1/step_down",
+        "/status",
+        "/ready",
+        "/metrics",
+        "/profile/cpu",
+        "/profile/heap",
+    ];
 
-    let uri = req.uri().to_string();
-    let uri_for_forward = !NOT_FOR_FORWARD.contains(&uri.as_str());
+    let uri = req.uri();
+    let uri_for_forward = !NOT_FOR_FORWARD.contains(&uri.path());
 
     // Fast return before trying to take any Service locks, if we will never forward anyway
     if !uri_for_forward {
@@ -1739,7 +1781,7 @@ pub fn make_router(
     if auth.is_some() {
         router = router.middleware(auth_middleware(|request| {
             let state = get_state(request);
-            if state.allowlist_routes.contains(request.uri()) {
+            if state.allowlist_routes.contains(&request.uri().path()) {
                 None
             } else {
                 state.auth.as_deref()
@@ -1752,13 +1794,19 @@ pub fn make_router(
         .get("/metrics", |r| {
             named_request_span(r, measured_metrics_handler, RequestName("metrics"))
         })
-        // Non-prefixed generic endpoints (status, metrics)
+        // Non-prefixed generic endpoints (status, metrics, profiling)
         .get("/status", |r| {
             named_request_span(r, handle_status, RequestName("status"))
         })
         .get("/ready", |r| {
             named_request_span(r, handle_ready, RequestName("ready"))
         })
+        .get("/profile/cpu", |r| {
+            named_request_span(r, profile_cpu_handler, RequestName("profile_cpu"))
+        })
+        .get("/profile/heap", |r| {
+            named_request_span(r, profile_heap_handler, RequestName("profile_heap"))
+        })
         // Upcalls for the pageserver: point the pageserver's `control_plane_api` config to this prefix
         .post("/upcall/v1/re-attach", |r| {
             named_request_span(r, handle_re_attach, RequestName("upcall_v1_reattach"))
@@ -2070,6 +2118,16 @@ pub fn make_router(
                 )
             },
         )
+        .post(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
+            |r| {
+                tenant_service_handler(
+                    r,
+                    handle_tenant_timeline_download_heatmap_layers,
+                    RequestName("v1_tenant_timeline_download_heatmap_layers"),
+                )
+            },
+        )
         // Tenant detail GET passthrough to shard zero:
         .get("/v1/tenant/:tenant_id", |r| {
             tenant_service_handler(
@@ -2089,3 +2147,16 @@ pub fn make_router(
             )
         })
 }
+
+#[cfg(test)]
+mod test {
+
+    use super::path_without_ids;
+
+    #[test]
+    fn test_path_without_ids() {
+        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
+        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
+        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"), "/v1/tenant//timeline/");
+    }
+}
diff --git a/storage_controller/src/lib.rs b/storage_controller/src/lib.rs
index f5823935e1..5f2c081927 100644
--- a/storage_controller/src/lib.rs
+++ b/storage_controller/src/lib.rs
@@ -17,6 +17,8 @@ mod pageserver_client;
 mod peer_client;
 pub mod persistence;
 mod reconciler;
+mod safekeeper;
+mod safekeeper_client;
 mod scheduler;
 mod schema;
 pub mod service;
diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs
index 07279a67ff..9a9958f7a6 100644
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -12,7 +12,8 @@ use storage_controller::persistence::Persistence;
 use storage_controller::service::chaos_injector::ChaosInjector;
 use storage_controller::service::{
     Config, Service, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT,
-    MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
+    MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
+    PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
 };
 use tokio::signal::unix::SignalKind;
 use tokio_util::sync::CancellationToken;
@@ -26,6 +27,16 @@ use utils::{project_build_tag, project_git_version, tcp_listener};
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 
+#[global_allocator]
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
+/// Configure jemalloc to profile heap allocations by sampling stack traces every 2 MB (1 << 21).
+/// This adds roughly 3% overhead for allocations on average, which is acceptable considering
+/// performance-sensitive code will avoid allocations as far as possible anyway.
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
+
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
 #[command(arg_required_else_help(true))]
@@ -75,10 +86,14 @@ struct Cli {
     #[arg(long)]
     split_threshold: Option<u64>,
 
-    /// Maximum number of reconcilers that may run in parallel
+    /// Maximum number of normal-priority reconcilers that may run in parallel
     #[arg(long)]
     reconciler_concurrency: Option<usize>,
 
+    /// Maximum number of high-priority reconcilers that may run in parallel
+    #[arg(long)]
+    priority_reconciler_concurrency: Option<usize>,
+
     /// How long to wait for the initial database connection to be available.
     #[arg(long, default_value = "5s")]
     db_connect_timeout: humantime::Duration,
@@ -289,6 +304,9 @@ async fn async_main() -> anyhow::Result<()> {
         reconciler_concurrency: args
             .reconciler_concurrency
             .unwrap_or(RECONCILER_CONCURRENCY_DEFAULT),
+        priority_reconciler_concurrency: args
+            .priority_reconciler_concurrency
+            .unwrap_or(PRIORITY_RECONCILER_CONCURRENCY_DEFAULT),
         split_threshold: args.split_threshold,
         neon_local_repo_dir: args.neon_local_repo_dir,
         max_secondary_lag_bytes: args.max_secondary_lag_bytes,
diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs
index 4164e3dc2b..6d67e0d130 100644
--- a/storage_controller/src/metrics.rs
+++ b/storage_controller/src/metrics.rs
@@ -80,6 +80,11 @@ pub(crate) struct StorageControllerMetricGroup {
     pub(crate) storage_controller_pageserver_request_error:
         measured::CounterVec<PageserverRequestLabelGroupSet>,
 
+    /// Count of HTTP requests to the safekeeper that resulted in an error,
+    /// broken down by the safekeeper node id, request name and method
+    pub(crate) storage_controller_safekeeper_request_error:
+        measured::CounterVec<PageserverRequestLabelGroupSet>,
+
     /// Latency of HTTP requests to the pageserver, broken down by pageserver
     /// node id, request name and method. This include both successful and unsuccessful
     /// requests.
@@ -87,6 +92,13 @@ pub(crate) struct StorageControllerMetricGroup {
     pub(crate) storage_controller_pageserver_request_latency:
         measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
 
+    /// Latency of HTTP requests to the safekeeper, broken down by safekeeper
+    /// node id, request name and method. This include both successful and unsuccessful
+    /// requests.
+    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
+    pub(crate) storage_controller_safekeeper_request_latency:
+        measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
+
     /// Count of pass-through HTTP requests to the pageserver that resulted in an error,
     /// broken down by the pageserver node id, request name and method
     pub(crate) storage_controller_passthrough_request_error:
diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs
index 141ff6f720..645cbdfce1 100644
--- a/storage_controller/src/pageserver_client.rs
+++ b/storage_controller/src/pageserver_client.rs
@@ -280,6 +280,22 @@ impl PageserverClient {
         )
     }
 
+    pub(crate) async fn timeline_download_heatmap_layers(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        concurrency: Option<usize>,
+    ) -> Result<()> {
+        measured_request!(
+            "download_heatmap_layers",
+            crate::metrics::Method::Post,
+            &self.node_id_label,
+            self.inner
+                .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
+                .await
+        )
+    }
+
     pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
         measured_request!(
             "utilization",
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index c4e5b39589..67b60eadf3 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -1185,23 +1185,6 @@ impl Persistence {
         Ok(safekeepers)
     }
 
-    pub(crate) async fn safekeeper_get(
-        &self,
-        id: i64,
-    ) -> Result<SafekeeperPersistence, DatabaseError> {
-        use crate::schema::safekeepers::dsl::{id as id_column, safekeepers};
-        self.with_conn(move |conn| {
-            Box::pin(async move {
-                Ok(safekeepers
-                    .filter(id_column.eq(&id))
-                    .select(SafekeeperPersistence::as_select())
-                    .get_result(conn)
-                    .await?)
-            })
-        })
-        .await
-    }
-
     pub(crate) async fn safekeeper_upsert(
         &self,
         record: SafekeeperUpsert,
@@ -1554,6 +1537,21 @@ pub(crate) struct SafekeeperPersistence {
 }
 
 impl SafekeeperPersistence {
+    pub(crate) fn from_upsert(
+        upsert: SafekeeperUpsert,
+        scheduling_policy: SkSchedulingPolicy,
+    ) -> Self {
+        crate::persistence::SafekeeperPersistence {
+            id: upsert.id,
+            region_id: upsert.region_id,
+            version: upsert.version,
+            host: upsert.host,
+            port: upsert.port,
+            http_port: upsert.http_port,
+            availability_zone_id: upsert.availability_zone_id,
+            scheduling_policy: String::from(scheduling_policy),
+        }
+    }
     pub(crate) fn as_describe_response(&self) -> Result<SafekeeperDescribeResponse, DatabaseError> {
         let scheduling_policy =
             SkSchedulingPolicy::from_str(&self.scheduling_policy).map_err(|e| {
diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs
index 58bc0ba1cd..48f0804926 100644
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -1,7 +1,7 @@
 use crate::pageserver_client::PageserverClient;
 use crate::persistence::Persistence;
 use crate::{compute_hook, service};
-use pageserver_api::controller_api::{AvailabilityZone, PlacementPolicy};
+use pageserver_api::controller_api::{AvailabilityZone, MigrationConfig, PlacementPolicy};
 use pageserver_api::models::{
     LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, TenantWaitLsnRequest,
 };
@@ -91,9 +91,10 @@ pub(crate) struct ReconcilerConfigBuilder {
 }
 
 impl ReconcilerConfigBuilder {
-    pub(crate) fn new() -> Self {
+    /// Priority is special: you must pick one thoughtfully, do not just use 'normal' as the default
+    pub(crate) fn new(priority: ReconcilerPriority) -> Self {
         Self {
-            config: ReconcilerConfig::default(),
+            config: ReconcilerConfig::new(priority),
         }
     }
 
@@ -129,8 +130,18 @@ impl ReconcilerConfigBuilder {
     }
 }
 
-#[derive(Default, Debug, Copy, Clone)]
+// Higher priorities are used for user-facing tasks, so that a long backlog of housekeeping work (e.g. reconciling on startup, rescheduling
+// things on node changes) does not starve user-facing tasks.
+#[derive(Debug, Copy, Clone)]
+pub(crate) enum ReconcilerPriority {
+    Normal,
+    High,
+}
+
+#[derive(Debug, Copy, Clone)]
 pub(crate) struct ReconcilerConfig {
+    pub(crate) priority: ReconcilerPriority,
+
     // During live migration give up on warming-up the secondary
     // after this timeout.
     secondary_warmup_timeout: Option<Duration>,
@@ -145,6 +156,18 @@ pub(crate) struct ReconcilerConfig {
 }
 
 impl ReconcilerConfig {
+    /// Configs are always constructed with an explicit priority, to force callers to think about whether
+    /// the operation they're scheduling is high-priority or not. Normal priority is not a safe default, because
+    /// scheduling something user-facing at normal priority can result in it getting starved out by background work.
+    pub(crate) fn new(priority: ReconcilerPriority) -> Self {
+        Self {
+            priority,
+            secondary_warmup_timeout: None,
+            secondary_download_request_timeout: None,
+            tenant_creation_hint: false,
+        }
+    }
+
     pub(crate) fn get_secondary_warmup_timeout(&self) -> Duration {
         const SECONDARY_WARMUP_TIMEOUT_DEFAULT: Duration = Duration::from_secs(300);
         self.secondary_warmup_timeout
@@ -162,6 +185,24 @@ impl ReconcilerConfig {
     }
 }
 
+impl From<&MigrationConfig> for ReconcilerConfig {
+    fn from(value: &MigrationConfig) -> Self {
+        // Run reconciler at high priority because MigrationConfig comes from human requests that should
+        // be presumed urgent.
+        let mut builder = ReconcilerConfigBuilder::new(ReconcilerPriority::High);
+
+        if let Some(timeout) = value.secondary_warmup_timeout {
+            builder = builder.secondary_warmup_timeout(timeout)
+        }
+
+        if let Some(timeout) = value.secondary_download_request_timeout {
+            builder = builder.secondary_download_request_timeout(timeout)
+        }
+
+        builder.build()
+    }
+}
+
 /// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
 pub(crate) struct ReconcileUnits {
     _sem_units: tokio::sync::OwnedSemaphorePermit,
diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs
new file mode 100644
index 0000000000..53cd8a908b
--- /dev/null
+++ b/storage_controller/src/safekeeper.rs
@@ -0,0 +1,147 @@
+use std::{str::FromStr, time::Duration};
+
+use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy};
+use reqwest::StatusCode;
+use safekeeper_client::mgmt_api;
+use tokio_util::sync::CancellationToken;
+use utils::{backoff, id::NodeId, logging::SecretString};
+
+use crate::{
+    heartbeater::SafekeeperState,
+    persistence::{DatabaseError, SafekeeperPersistence},
+    safekeeper_client::SafekeeperClient,
+};
+
+#[derive(Clone)]
+pub struct Safekeeper {
+    pub(crate) skp: SafekeeperPersistence,
+    cancel: CancellationToken,
+    listen_http_addr: String,
+    listen_http_port: u16,
+    scheduling_policy: SkSchedulingPolicy,
+    id: NodeId,
+    availability: SafekeeperState,
+}
+
+impl Safekeeper {
+    pub(crate) fn from_persistence(skp: SafekeeperPersistence, cancel: CancellationToken) -> Self {
+        let scheduling_policy = SkSchedulingPolicy::from_str(&skp.scheduling_policy).unwrap();
+        Self {
+            cancel,
+            listen_http_addr: skp.host.clone(),
+            listen_http_port: skp.http_port as u16,
+            id: NodeId(skp.id as u64),
+            skp,
+            availability: SafekeeperState::Offline,
+            scheduling_policy,
+        }
+    }
+    pub(crate) fn base_url(&self) -> String {
+        format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
+    }
+
+    pub(crate) fn get_id(&self) -> NodeId {
+        self.id
+    }
+    pub(crate) fn describe_response(&self) -> Result<SafekeeperDescribeResponse, DatabaseError> {
+        self.skp.as_describe_response()
+    }
+    pub(crate) fn set_availability(&mut self, availability: SafekeeperState) {
+        self.availability = availability;
+    }
+    pub(crate) fn scheduling_policy(&self) -> SkSchedulingPolicy {
+        self.scheduling_policy
+    }
+    pub(crate) fn set_scheduling_policy(&mut self, scheduling_policy: SkSchedulingPolicy) {
+        self.scheduling_policy = scheduling_policy;
+        self.skp.scheduling_policy = String::from(scheduling_policy);
+    }
+    /// Perform an operation (which is given a [`SafekeeperClient`]) with retries
+    pub(crate) async fn with_client_retries<T, O, F>(
+        &self,
+        mut op: O,
+        jwt: &Option<SecretString>,
+        warn_threshold: u32,
+        max_retries: u32,
+        timeout: Duration,
+        cancel: &CancellationToken,
+    ) -> mgmt_api::Result<T>
+    where
+        O: FnMut(SafekeeperClient) -> F,
+        F: std::future::Future<Output = mgmt_api::Result<T>>,
+    {
+        fn is_fatal(e: &mgmt_api::Error) -> bool {
+            use mgmt_api::Error::*;
+            match e {
+                ReceiveBody(_) | ReceiveErrorBody(_) => false,
+                ApiError(StatusCode::SERVICE_UNAVAILABLE, _)
+                | ApiError(StatusCode::GATEWAY_TIMEOUT, _)
+                | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
+                ApiError(_, _) => true,
+                Cancelled => true,
+            }
+        }
+
+        backoff::retry(
+            || {
+                let http_client = reqwest::ClientBuilder::new()
+                    .timeout(timeout)
+                    .build()
+                    .expect("Failed to construct HTTP client");
+
+                let client = SafekeeperClient::from_client(
+                    self.get_id(),
+                    http_client,
+                    self.base_url(),
+                    jwt.clone(),
+                );
+
+                let node_cancel_fut = self.cancel.cancelled();
+
+                let op_fut = op(client);
+
+                async {
+                    tokio::select! {
+                        r = op_fut=> {r},
+                        _ = node_cancel_fut => {
+                        Err(mgmt_api::Error::Cancelled)
+                    }}
+                }
+            },
+            is_fatal,
+            warn_threshold,
+            max_retries,
+            &format!(
+                "Call to safekeeper {} ({}:{}) management API",
+                self.id, self.listen_http_addr, self.listen_http_port
+            ),
+            cancel,
+        )
+        .await
+        .unwrap_or(Err(mgmt_api::Error::Cancelled))
+    }
+
+    pub(crate) fn update_from_record(&mut self, record: crate::persistence::SafekeeperUpsert) {
+        let crate::persistence::SafekeeperUpsert {
+            active: _,
+            availability_zone_id: _,
+            host,
+            http_port,
+            id,
+            port: _,
+            region_id: _,
+            version: _,
+        } = record.clone();
+        if id != self.id.0 as i64 {
+            // The way the function is called ensures this. If we regress on that, it's a bug.
+            panic!(
+                "id can't be changed via update_from_record function: {id} != {}",
+                self.id.0
+            );
+        }
+        self.skp =
+            crate::persistence::SafekeeperPersistence::from_upsert(record, self.scheduling_policy);
+        self.listen_http_port = http_port as u16;
+        self.listen_http_addr = host;
+    }
+}
diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs
new file mode 100644
index 0000000000..f234ab3429
--- /dev/null
+++ b/storage_controller/src/safekeeper_client.rs
@@ -0,0 +1,121 @@
+use crate::metrics::PageserverRequestLabelGroup;
+use safekeeper_api::models::{
+    PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
+    TimelineStatus,
+};
+use safekeeper_client::mgmt_api::{Client, Result};
+use utils::{
+    id::{NodeId, TenantId, TimelineId},
+    logging::SecretString,
+};
+
+/// Thin wrapper around [`safekeeper_client::mgmt_api::Client`]. It allows the storage
+/// controller to collect metrics in a non-intrusive manner.
+///
+/// Analogous to [`crate::pageserver_client::PageserverClient`].
+#[derive(Debug, Clone)]
+pub(crate) struct SafekeeperClient {
+    inner: Client,
+    node_id_label: String,
+}
+
+macro_rules! measured_request {
+    ($name:literal, $method:expr, $node_id: expr, $invoke:expr) => {{
+        let labels = PageserverRequestLabelGroup {
+            pageserver_id: $node_id,
+            path: $name,
+            method: $method,
+        };
+
+        let latency = &crate::metrics::METRICS_REGISTRY
+            .metrics_group
+            .storage_controller_safekeeper_request_latency;
+        let _timer_guard = latency.start_timer(labels.clone());
+
+        let res = $invoke;
+
+        if res.is_err() {
+            let error_counters = &crate::metrics::METRICS_REGISTRY
+                .metrics_group
+                .storage_controller_pageserver_request_error;
+            error_counters.inc(labels)
+        }
+
+        res
+    }};
+}
+
+impl SafekeeperClient {
+    #[allow(dead_code)]
+    pub(crate) fn new(
+        node_id: NodeId,
+        mgmt_api_endpoint: String,
+        jwt: Option<SecretString>,
+    ) -> Self {
+        Self {
+            inner: Client::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt),
+            node_id_label: node_id.0.to_string(),
+        }
+    }
+
+    pub(crate) fn from_client(
+        node_id: NodeId,
+        raw_client: reqwest::Client,
+        mgmt_api_endpoint: String,
+        jwt: Option<SecretString>,
+    ) -> Self {
+        Self {
+            inner: Client::from_client(raw_client, mgmt_api_endpoint, jwt),
+            node_id_label: node_id.0.to_string(),
+        }
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn create_timeline(
+        &self,
+        req: &TimelineCreateRequest,
+    ) -> Result<TimelineStatus> {
+        measured_request!(
+            "create_timeline",
+            crate::metrics::Method::Post,
+            &self.node_id_label,
+            self.inner.create_timeline(req).await
+        )
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn delete_timeline(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<TimelineStatus> {
+        measured_request!(
+            "delete_timeline",
+            crate::metrics::Method::Delete,
+            &self.node_id_label,
+            self.inner.delete_timeline(tenant_id, timeline_id).await
+        )
+    }
+
+    #[allow(dead_code)]
+    pub(crate) async fn pull_timeline(
+        &self,
+        req: &PullTimelineRequest,
+    ) -> Result<PullTimelineResponse> {
+        measured_request!(
+            "pull_timeline",
+            crate::metrics::Method::Post,
+            &self.node_id_label,
+            self.inner.pull_timeline(req).await
+        )
+    }
+
+    pub(crate) async fn get_utilization(&self) -> Result<SafekeeperUtilization> {
+        measured_request!(
+            "utilization",
+            crate::metrics::Method::Get,
+            &self.node_id_label,
+            self.inner.utilization().await
+        )
+    }
+}
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index 6829663a4c..fc6d2f3d29 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -2,6 +2,7 @@ pub mod chaos_injector;
 mod context_iterator;
 
 use hyper::Uri;
+use safekeeper_api::models::SafekeeperUtilization;
 use std::{
     borrow::Cow,
     cmp::Ordering,
@@ -20,6 +21,7 @@ use crate::{
     },
     compute_hook::{self, NotifyError},
     drain_utils::{self, TenantShardDrain, TenantShardIterator},
+    heartbeater::SafekeeperState,
     id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard},
     leadership::Leadership,
     metrics,
@@ -28,7 +30,11 @@ use crate::{
         AbortShardSplitStatus, ControllerPersistence, DatabaseResult, MetadataHealthPersistence,
         ShardGenerationState, TenantFilter,
     },
-    reconciler::{ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder},
+    reconciler::{
+        ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder,
+        ReconcilerPriority,
+    },
+    safekeeper::Safekeeper,
     scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode},
     tenant_shard::{
         MigrateAttachment, ObservedStateDelta, ReconcileNeeded, ReconcilerStatus,
@@ -76,7 +82,7 @@ use pageserver_api::{
     },
 };
 use pageserver_client::{mgmt_api, BlockUnblock};
-use tokio::sync::mpsc::error::TrySendError;
+use tokio::sync::{mpsc::error::TrySendError, TryAcquireError};
 use tokio_util::sync::CancellationToken;
 use utils::{
     completion::Barrier,
@@ -156,6 +162,7 @@ enum TenantOperations {
     TimelineDetachAncestor,
     TimelineGcBlockUnblock,
     DropDetached,
+    DownloadHeatmapLayers,
 }
 
 #[derive(Clone, strum_macros::Display)]
@@ -192,6 +199,7 @@ pub(crate) enum LeadershipStatus {
 }
 
 pub const RECONCILER_CONCURRENCY_DEFAULT: usize = 128;
+pub const PRIORITY_RECONCILER_CONCURRENCY_DEFAULT: usize = 256;
 
 // Depth of the channel used to enqueue shards for reconciliation when they can't do it immediately.
 // This channel is finite-size to avoid using excessive memory if we get into a state where reconciles are finishing more slowly
@@ -206,6 +214,8 @@ struct ServiceState {
 
     nodes: Arc<HashMap<NodeId, Node>>,
 
+    safekeepers: Arc<HashMap<NodeId, Safekeeper>>,
+
     scheduler: Scheduler,
 
     /// Ongoing background operation on the cluster if any is running.
@@ -272,6 +282,7 @@ fn passthrough_api_error(node: &Node, e: mgmt_api::Error) -> ApiError {
 impl ServiceState {
     fn new(
         nodes: HashMap<NodeId, Node>,
+        safekeepers: HashMap<NodeId, Safekeeper>,
         tenants: BTreeMap<TenantShardId, TenantShard>,
         scheduler: Scheduler,
         delayed_reconcile_rx: tokio::sync::mpsc::Receiver<TenantShardId>,
@@ -283,6 +294,7 @@ impl ServiceState {
             leadership_status: initial_leadership_status,
             tenants,
             nodes: Arc::new(nodes),
+            safekeepers: Arc::new(safekeepers),
             scheduler,
             ongoing_operation: None,
             delayed_reconcile_rx,
@@ -299,6 +311,23 @@ impl ServiceState {
         (&mut self.nodes, &mut self.tenants, &mut self.scheduler)
     }
 
+    #[allow(clippy::type_complexity)]
+    fn parts_mut_sk(
+        &mut self,
+    ) -> (
+        &mut Arc<HashMap<NodeId, Node>>,
+        &mut Arc<HashMap<NodeId, Safekeeper>>,
+        &mut BTreeMap<TenantShardId, TenantShard>,
+        &mut Scheduler,
+    ) {
+        (
+            &mut self.nodes,
+            &mut self.safekeepers,
+            &mut self.tenants,
+            &mut self.scheduler,
+        )
+    }
+
     fn get_leadership_status(&self) -> LeadershipStatus {
         self.leadership_status
     }
@@ -342,9 +371,12 @@ pub struct Config {
     /// and/or upon handling the re-attach request from a node.
     pub max_warming_up_interval: Duration,
 
-    /// How many Reconcilers may be spawned concurrently
+    /// How many normal-priority Reconcilers may be spawned concurrently
     pub reconciler_concurrency: usize,
 
+    /// How many high-priority Reconcilers may be spawned concurrently
+    pub priority_reconciler_concurrency: usize,
+
     /// How large must a shard grow in bytes before we split it?
     /// None disables auto-splitting.
     pub split_threshold: Option<u64>,
@@ -397,7 +429,8 @@ pub struct Service {
     compute_hook: Arc<ComputeHook>,
     result_tx: tokio::sync::mpsc::UnboundedSender<ReconcileResultRequest>,
 
-    heartbeater: Heartbeater,
+    heartbeater_ps: Heartbeater<Node, PageserverState>,
+    heartbeater_sk: Heartbeater<Safekeeper, SafekeeperState>,
 
     // Channel for background cleanup from failed operations that require cleanup, such as shard split
     abort_tx: tokio::sync::mpsc::UnboundedSender<TenantShardSplitAbort>,
@@ -411,9 +444,14 @@ pub struct Service {
     // that transition it to/from Active.
     node_op_locks: IdLockMap<NodeId, NodeOperations>,
 
-    // Limit how many Reconcilers we will spawn concurrently
+    // Limit how many Reconcilers we will spawn concurrently for normal-priority tasks such as background reconciliations
+    // and reconciliation on startup.
     reconciler_concurrency: Arc<tokio::sync::Semaphore>,
 
+    // Limit how many Reconcilers we will spawn concurrently for high-priority tasks such as tenant/timeline CRUD, which
+    // a human user might be waiting for.
+    priority_reconciler_concurrency: Arc<tokio::sync::Semaphore>,
+
     /// Queue of tenants who are waiting for concurrency limits to permit them to reconcile
     /// Send into this queue to promptly attempt to reconcile this shard next time units are available.
     ///
@@ -607,7 +645,8 @@ impl Service {
             let locked = self.inner.read().unwrap();
             locked.nodes.clone()
         };
-        let mut nodes_online = self.initial_heartbeat_round(all_nodes.keys()).await;
+        let (mut nodes_online, mut sks_online) =
+            self.initial_heartbeat_round(all_nodes.keys()).await;
 
         // List of tenants for which we will attempt to notify compute of their location at startup
         let mut compute_notifications = Vec::new();
@@ -616,7 +655,7 @@ impl Service {
         tracing::info!("Populating tenant shards' states from initial pageserver scan...");
         let shard_count = {
             let mut locked = self.inner.write().unwrap();
-            let (nodes, tenants, scheduler) = locked.parts_mut();
+            let (nodes, safekeepers, tenants, scheduler) = locked.parts_mut_sk();
 
             // Mark nodes online if they responded to us: nodes are offline by default after a restart.
             let mut new_nodes = (**nodes).clone();
@@ -628,6 +667,17 @@ impl Service {
             }
             *nodes = Arc::new(new_nodes);
 
+            let mut new_sks = (**safekeepers).clone();
+            for (node_id, node) in new_sks.iter_mut() {
+                if let Some((utilization, last_seen_at)) = sks_online.remove(node_id) {
+                    node.set_availability(SafekeeperState::Available {
+                        utilization,
+                        last_seen_at,
+                    });
+                }
+            }
+            *safekeepers = Arc::new(new_sks);
+
             for (tenant_shard_id, observed_state) in observed.0 {
                 let Some(tenant_shard) = tenants.get_mut(&tenant_shard_id) else {
                     for node_id in observed_state.locations.keys() {
@@ -736,7 +786,10 @@ impl Service {
     async fn initial_heartbeat_round<'a>(
         &self,
         node_ids: impl Iterator<Item = &'a NodeId>,
-    ) -> HashMap<NodeId, PageserverUtilization> {
+    ) -> (
+        HashMap<NodeId, PageserverUtilization>,
+        HashMap<NodeId, (SafekeeperUtilization, Instant)>,
+    ) {
         assert!(!self.startup_complete.is_ready());
 
         let all_nodes = {
@@ -756,14 +809,21 @@ impl Service {
             }
         }
 
+        let all_sks = {
+            let locked = self.inner.read().unwrap();
+            locked.safekeepers.clone()
+        };
+
         tracing::info!("Sending initial heartbeats...");
-        let res = self
-            .heartbeater
-            .heartbeat(Arc::new(nodes_to_heartbeat))
-            .await;
+        // Put a small, but reasonable timeout to get the initial heartbeats of the safekeepers to avoid a storage controller downtime
+        const SK_TIMEOUT: Duration = Duration::from_secs(5);
+        let (res_ps, res_sk) = tokio::join!(
+            self.heartbeater_ps.heartbeat(Arc::new(nodes_to_heartbeat)),
+            tokio::time::timeout(SK_TIMEOUT, self.heartbeater_sk.heartbeat(all_sks))
+        );
 
         let mut online_nodes = HashMap::new();
-        if let Ok(deltas) = res {
+        if let Ok(deltas) = res_ps {
             for (node_id, status) in deltas.0 {
                 match status {
                     PageserverState::Available { utilization, .. } => {
@@ -777,7 +837,22 @@ impl Service {
             }
         }
 
-        online_nodes
+        let mut online_sks = HashMap::new();
+        if let Ok(Ok(deltas)) = res_sk {
+            for (node_id, status) in deltas.0 {
+                match status {
+                    SafekeeperState::Available {
+                        utilization,
+                        last_seen_at,
+                    } => {
+                        online_sks.insert(node_id, (utilization, last_seen_at));
+                    }
+                    SafekeeperState::Offline => {}
+                }
+            }
+        }
+
+        (online_nodes, online_sks)
     }
 
     /// Used during [`Self::startup_reconcile`]: issue GETs to all nodes concurrently, with a deadline.
@@ -957,12 +1032,11 @@ impl Service {
                 let reconciles_spawned = self.reconcile_all();
                 if reconciles_spawned == 0 {
                     // Run optimizer only when we didn't find any other work to do
-                    let optimizations = self.optimize_all().await;
-                    if optimizations == 0 {
-                        // Run new splits only when no optimizations are pending
-                        self.autosplit_tenants().await;
-                    }
+                    self.optimize_all().await;
                 }
+                // Always attempt autosplits. Sharding is crucial for bulk ingest performance, so we
+                // must be responsive when new projects begin ingesting and reach the threshold.
+                self.autosplit_tenants().await;
             }
               _ = self.reconcilers_cancel.cancelled() => return
             }
@@ -984,8 +1058,18 @@ impl Service {
                 locked.nodes.clone()
             };
 
-            let res = self.heartbeater.heartbeat(nodes).await;
-            if let Ok(deltas) = res {
+            let safekeepers = {
+                let locked = self.inner.read().unwrap();
+                locked.safekeepers.clone()
+            };
+
+            const SK_TIMEOUT: Duration = Duration::from_secs(3);
+            let (res_ps, res_sk) = tokio::join!(
+                self.heartbeater_ps.heartbeat(nodes),
+                tokio::time::timeout(SK_TIMEOUT, self.heartbeater_sk.heartbeat(safekeepers))
+            );
+
+            if let Ok(deltas) = res_ps {
                 let mut to_handle = Vec::default();
 
                 for (node_id, state) in deltas.0 {
@@ -1086,6 +1170,18 @@ impl Service {
                     }
                 }
             }
+            if let Ok(Ok(deltas)) = res_sk {
+                let mut locked = self.inner.write().unwrap();
+                let mut safekeepers = (*locked.safekeepers).clone();
+                for (id, state) in deltas.0 {
+                    let Some(sk) = safekeepers.get_mut(&id) else {
+                        tracing::info!("Couldn't update safekeeper safekeeper state for id {id} from heartbeat={state:?}");
+                        continue;
+                    };
+                    sk.set_availability(state);
+                }
+                locked.safekeepers = Arc::new(safekeepers);
+            }
         }
     }
 
@@ -1184,12 +1280,15 @@ impl Service {
         }
 
         // Maybe some other work can proceed now that this job finished.
+        //
+        // Only bother with this if we have some semaphore units available in the normal-priority semaphore (these
+        // reconciles are scheduled at `[ReconcilerPriority::Normal]`).
         if self.reconciler_concurrency.available_permits() > 0 {
             while let Ok(tenant_shard_id) = locked.delayed_reconcile_rx.try_recv() {
                 let (nodes, tenants, _scheduler) = locked.parts_mut();
                 if let Some(shard) = tenants.get_mut(&tenant_shard_id) {
                     shard.delayed_reconcile = false;
-                    self.maybe_reconcile_shard(shard, nodes);
+                    self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::Normal);
                 }
 
                 if self.reconciler_concurrency.available_permits() == 0 {
@@ -1311,6 +1410,17 @@ impl Service {
             .storage_controller_pageserver_nodes
             .set(nodes.len() as i64);
 
+        tracing::info!("Loading safekeepers from database...");
+        let safekeepers = persistence
+            .list_safekeepers()
+            .await?
+            .into_iter()
+            .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new()))
+            .collect::<Vec<_>>();
+        let safekeepers: HashMap<NodeId, Safekeeper> =
+            safekeepers.into_iter().map(|n| (n.get_id(), n)).collect();
+        tracing::info!("Loaded {} safekeepers from database.", safekeepers.len());
+
         tracing::info!("Loading shards from database...");
         let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?;
         tracing::info!(
@@ -1437,7 +1547,14 @@ impl Service {
         let cancel = CancellationToken::new();
         let reconcilers_cancel = cancel.child_token();
 
-        let heartbeater = Heartbeater::new(
+        let heartbeater_ps = Heartbeater::new(
+            config.jwt_token.clone(),
+            config.max_offline_interval,
+            config.max_warming_up_interval,
+            cancel.clone(),
+        );
+
+        let heartbeater_sk = Heartbeater::new(
             config.jwt_token.clone(),
             config.max_offline_interval,
             config.max_warming_up_interval,
@@ -1453,6 +1570,7 @@ impl Service {
         let this = Arc::new(Self {
             inner: Arc::new(std::sync::RwLock::new(ServiceState::new(
                 nodes,
+                safekeepers,
                 tenants,
                 scheduler,
                 delayed_reconcile_rx,
@@ -1462,10 +1580,14 @@ impl Service {
             persistence,
             compute_hook: Arc::new(ComputeHook::new(config.clone())),
             result_tx,
-            heartbeater,
+            heartbeater_ps,
+            heartbeater_sk,
             reconciler_concurrency: Arc::new(tokio::sync::Semaphore::new(
                 config.reconciler_concurrency,
             )),
+            priority_reconciler_concurrency: Arc::new(tokio::sync::Semaphore::new(
+                config.priority_reconciler_concurrency,
+            )),
             delayed_reconcile_tx,
             abort_tx,
             startup_complete: startup_complete.clone(),
@@ -2238,7 +2360,7 @@ impl Service {
         let waiters = {
             let mut locked = self.inner.write().unwrap();
             let (nodes, tenants, _scheduler) = locked.parts_mut();
-            let config = ReconcilerConfigBuilder::new()
+            let config = ReconcilerConfigBuilder::new(ReconcilerPriority::High)
                 .tenant_creation_hint(true)
                 .build();
             tenants
@@ -2713,7 +2835,8 @@ impl Service {
 
                         shard.schedule(scheduler, &mut schedule_context)?;
 
-                        let maybe_waiter = self.maybe_reconcile_shard(shard, nodes);
+                        let maybe_waiter =
+                            self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High);
                         if let Some(waiter) = maybe_waiter {
                             waiters.push(waiter);
                         }
@@ -2834,7 +2957,9 @@ impl Service {
             let (nodes, tenants, _scheduler) = locked.parts_mut();
             for (_shard_id, shard) in tenants.range_mut(TenantShardId::tenant_range(tenant_id)) {
                 shard.config = config.clone();
-                if let Some(waiter) = self.maybe_reconcile_shard(shard, nodes) {
+                if let Some(waiter) =
+                    self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High)
+                {
                     waiters.push(waiter);
                 }
             }
@@ -3116,7 +3241,9 @@ impl Service {
                 debug_assert!(shard.intent.get_attached().is_none());
                 debug_assert!(shard.intent.get_secondary().is_empty());
 
-                if let Some(waiter) = self.maybe_reconcile_shard(shard, nodes) {
+                if let Some(waiter) =
+                    self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High)
+                {
                     detach_waiters.push(waiter);
                 }
             }
@@ -3268,7 +3395,7 @@ impl Service {
 
             // In case scheduling is being switched back on, try it now.
             shard.schedule(scheduler, &mut schedule_context).ok();
-            self.maybe_reconcile_shard(shard, nodes);
+            self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High);
         }
 
         Ok(())
@@ -3635,6 +3762,61 @@ impl Service {
         Ok(())
     }
 
+    pub(crate) async fn tenant_timeline_download_heatmap_layers(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        concurrency: Option<usize>,
+    ) -> Result<(), ApiError> {
+        let _tenant_lock = trace_shared_lock(
+            &self.tenant_op_locks,
+            tenant_shard_id.tenant_id,
+            TenantOperations::DownloadHeatmapLayers,
+        )
+        .await;
+
+        let targets = {
+            let locked = self.inner.read().unwrap();
+            let mut targets = Vec::new();
+
+            // If the request got an unsharded tenant id, then apply
+            // the operation to all shards. Otherwise, apply it to a specific shard.
+            let shards_range = if tenant_shard_id.is_unsharded() {
+                TenantShardId::tenant_range(tenant_shard_id.tenant_id)
+            } else {
+                tenant_shard_id.range()
+            };
+
+            for (tenant_shard_id, shard) in locked.tenants.range(shards_range) {
+                if let Some(node_id) = shard.intent.get_attached() {
+                    let node = locked
+                        .nodes
+                        .get(node_id)
+                        .expect("Pageservers may not be deleted while referenced");
+
+                    targets.push((*tenant_shard_id, node.clone()));
+                }
+            }
+            targets
+        };
+
+        self.tenant_for_shards_api(
+            targets,
+            |tenant_shard_id, client| async move {
+                client
+                    .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency)
+                    .await
+            },
+            1,
+            1,
+            SHORT_RECONCILE_TIMEOUT,
+            &self.cancel,
+        )
+        .await;
+
+        Ok(())
+    }
+
     /// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation.
     ///
     /// On success, the returned vector contains exactly the same number of elements as the input `locations`.
@@ -4317,7 +4499,7 @@ impl Service {
                     tracing::warn!("Failed to schedule {tenant_shard_id} during shard abort: {e}")
                 }
 
-                self.maybe_reconcile_shard(shard, nodes);
+                self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High);
             }
 
             // We don't expect any new_shard_count shards to exist here, but drop them just in case
@@ -4483,7 +4665,11 @@ impl Service {
                         tracing::warn!("Failed to schedule child shard {child}: {e}");
                     }
                     // In the background, attach secondary locations for the new shards
-                    if let Some(waiter) = self.maybe_reconcile_shard(&mut child_state, nodes) {
+                    if let Some(waiter) = self.maybe_reconcile_shard(
+                        &mut child_state,
+                        nodes,
+                        ReconcilerPriority::High,
+                    ) {
                         waiters.push(waiter);
                     }
 
@@ -4848,7 +5034,9 @@ impl Service {
                 shard.intent.clear_secondary(scheduler);
 
                 // Run Reconciler to execute detach fo secondary locations.
-                if let Some(waiter) = self.maybe_reconcile_shard(shard, nodes) {
+                if let Some(waiter) =
+                    self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High)
+                {
                     waiters.push(waiter);
                 }
             }
@@ -5114,7 +5302,12 @@ impl Service {
                 shard.sequence = shard.sequence.next();
             }
 
-            self.maybe_reconcile_shard(shard, nodes)
+            let reconciler_config = match migrate_req.migration_config {
+                Some(cfg) => (&cfg).into(),
+                None => ReconcilerConfig::new(ReconcilerPriority::High),
+            };
+
+            self.maybe_configured_reconcile_shard(shard, nodes, reconciler_config)
         };
 
         if let Some(waiter) = waiter {
@@ -5177,7 +5370,7 @@ impl Service {
                 );
             }
 
-            self.maybe_reconcile_shard(shard, nodes)
+            self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::High)
         };
 
         if let Some(waiter) = waiter {
@@ -5589,7 +5782,7 @@ impl Service {
                             )
                         }
 
-                        self.maybe_reconcile_shard(shard, nodes);
+                        self.maybe_reconcile_shard(shard, nodes, ReconcilerPriority::Normal);
                     }
 
                     // Here we remove an existing observed location for the node we're removing, and it will
@@ -5958,7 +6151,14 @@ impl Service {
                                     tracing::warn!(%tenant_shard_id, "Scheduling error when marking pageserver {} offline: {e}", node_id);
                                 }
                                 Ok(()) => {
-                                    if self.maybe_reconcile_shard(tenant_shard, nodes).is_some() {
+                                    if self
+                                        .maybe_reconcile_shard(
+                                            tenant_shard,
+                                            nodes,
+                                            ReconcilerPriority::Normal,
+                                        )
+                                        .is_some()
+                                    {
                                         tenants_affected += 1;
                                     };
                                 }
@@ -5989,7 +6189,11 @@ impl Service {
 
                     if let Some(observed_loc) = tenant_shard.observed.locations.get_mut(&node_id) {
                         if observed_loc.conf.is_none() {
-                            self.maybe_reconcile_shard(tenant_shard, nodes);
+                            self.maybe_reconcile_shard(
+                                tenant_shard,
+                                nodes,
+                                ReconcilerPriority::Normal,
+                            );
                         }
                     }
                 }
@@ -6353,8 +6557,36 @@ impl Service {
         &self,
         shard: &mut TenantShard,
         nodes: &Arc<HashMap<NodeId, Node>>,
+        priority: ReconcilerPriority,
     ) -> Option<ReconcilerWaiter> {
-        self.maybe_configured_reconcile_shard(shard, nodes, ReconcilerConfig::default())
+        self.maybe_configured_reconcile_shard(shard, nodes, ReconcilerConfig::new(priority))
+    }
+
+    /// Before constructing a Reconciler, acquire semaphore units from the appropriate concurrency limit (depends on priority)
+    fn get_reconciler_units(
+        &self,
+        priority: ReconcilerPriority,
+    ) -> Result<ReconcileUnits, TryAcquireError> {
+        let units = match priority {
+            ReconcilerPriority::Normal => self.reconciler_concurrency.clone().try_acquire_owned(),
+            ReconcilerPriority::High => {
+                match self
+                    .priority_reconciler_concurrency
+                    .clone()
+                    .try_acquire_owned()
+                {
+                    Ok(u) => Ok(u),
+                    Err(TryAcquireError::NoPermits) => {
+                        // If the high priority semaphore is exhausted, then high priority tasks may steal units from
+                        // the normal priority semaphore.
+                        self.reconciler_concurrency.clone().try_acquire_owned()
+                    }
+                    Err(e) => Err(e),
+                }
+            }
+        };
+
+        units.map(ReconcileUnits::new)
     }
 
     /// Wrap [`TenantShard`] reconciliation methods with acquisition of [`Gate`] and [`ReconcileUnits`],
@@ -6374,8 +6606,8 @@ impl Service {
             }
         };
 
-        let units = match self.reconciler_concurrency.clone().try_acquire_owned() {
-            Ok(u) => ReconcileUnits::new(u),
+        let units = match self.get_reconciler_units(reconciler_config.priority) {
+            Ok(u) => u,
             Err(_) => {
                 tracing::info!(tenant_id=%shard.tenant_shard_id.tenant_id, shard_id=%shard.tenant_shard_id.shard_slug(),
                     "Concurrency limited: enqueued for reconcile later");
@@ -6468,7 +6700,10 @@ impl Service {
 
             // Eventual consistency: if an earlier reconcile job failed, and the shard is still
             // dirty, spawn another rone
-            if self.maybe_reconcile_shard(shard, &pageservers).is_some() {
+            if self
+                .maybe_reconcile_shard(shard, &pageservers, ReconcilerPriority::Normal)
+                .is_some()
+            {
                 reconciles_spawned += 1;
             } else if shard.delayed_reconcile {
                 // Shard wanted to reconcile but for some reason couldn't.
@@ -6554,7 +6789,10 @@ impl Service {
             tracing::info!(tenant_shard_id=%tenant_shard_id, "Applying optimization: {optimization:?}");
             if shard.apply_optimization(scheduler, optimization) {
                 optimizations_applied += 1;
-                if self.maybe_reconcile_shard(shard, nodes).is_some() {
+                if self
+                    .maybe_reconcile_shard(shard, nodes, ReconcilerPriority::Normal)
+                    .is_some()
+                {
                     reconciles_spawned += 1;
                 }
             }
@@ -7104,7 +7342,7 @@ impl Service {
         // to not stall the operation when a cold secondary is encountered.
         const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(20);
         const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
-        let reconciler_config = ReconcilerConfigBuilder::new()
+        let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal)
             .secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT)
             .secondary_download_request_timeout(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT)
             .build();
@@ -7437,7 +7675,7 @@ impl Service {
     ) -> Result<(), OperationError> {
         const SECONDARY_WARMUP_TIMEOUT: Duration = Duration::from_secs(20);
         const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
-        let reconciler_config = ReconcilerConfigBuilder::new()
+        let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal)
             .secondary_warmup_timeout(SECONDARY_WARMUP_TIMEOUT)
             .secondary_download_request_timeout(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT)
             .build();
@@ -7661,29 +7899,54 @@ impl Service {
     pub(crate) async fn safekeepers_list(
         &self,
     ) -> Result<Vec<SafekeeperDescribeResponse>, DatabaseError> {
-        self.persistence
-            .list_safekeepers()
-            .await?
-            .into_iter()
-            .map(|v| v.as_describe_response())
-            .collect::<Result<Vec<_>, _>>()
+        let locked = self.inner.read().unwrap();
+        let mut list = locked
+            .safekeepers
+            .iter()
+            .map(|sk| sk.1.describe_response())
+            .collect::<Result<Vec<_>, _>>()?;
+        list.sort_by_key(|v| v.id);
+        Ok(list)
     }
 
     pub(crate) async fn get_safekeeper(
         &self,
         id: i64,
     ) -> Result<SafekeeperDescribeResponse, DatabaseError> {
-        self.persistence
-            .safekeeper_get(id)
-            .await
-            .and_then(|v| v.as_describe_response())
+        let locked = self.inner.read().unwrap();
+        let sk = locked
+            .safekeepers
+            .get(&NodeId(id as u64))
+            .ok_or(diesel::result::Error::NotFound)?;
+        sk.describe_response()
     }
 
     pub(crate) async fn upsert_safekeeper(
         &self,
         record: crate::persistence::SafekeeperUpsert,
     ) -> Result<(), DatabaseError> {
-        self.persistence.safekeeper_upsert(record).await
+        let node_id = NodeId(record.id as u64);
+        self.persistence.safekeeper_upsert(record.clone()).await?;
+        {
+            let mut locked = self.inner.write().unwrap();
+            let mut safekeepers = (*locked.safekeepers).clone();
+            match safekeepers.entry(node_id) {
+                std::collections::hash_map::Entry::Occupied(mut entry) => {
+                    entry.get_mut().update_from_record(record);
+                }
+                std::collections::hash_map::Entry::Vacant(entry) => {
+                    entry.insert(Safekeeper::from_persistence(
+                        crate::persistence::SafekeeperPersistence::from_upsert(
+                            record,
+                            SkSchedulingPolicy::Pause,
+                        ),
+                        CancellationToken::new(),
+                    ));
+                }
+            }
+            locked.safekeepers = Arc::new(safekeepers);
+        }
+        Ok(())
     }
 
     pub(crate) async fn set_safekeeper_scheduling_policy(
@@ -7693,7 +7956,20 @@ impl Service {
     ) -> Result<(), DatabaseError> {
         self.persistence
             .set_safekeeper_scheduling_policy(id, scheduling_policy)
-            .await
+            .await?;
+        let node_id = NodeId(id as u64);
+        // After the change has been persisted successfully, update the in-memory state
+        {
+            let mut locked = self.inner.write().unwrap();
+            let mut safekeepers = (*locked.safekeepers).clone();
+            let sk = safekeepers
+                .get_mut(&node_id)
+                .ok_or(DatabaseError::Logical("Not found".to_string()))?;
+            sk.set_scheduling_policy(scheduling_policy);
+
+            locked.safekeepers = Arc::new(safekeepers);
+        }
+        Ok(())
     }
 
     pub(crate) async fn update_shards_preferred_azs(
diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs
index 91d7183fde..aa0ee0df5a 100644
--- a/storage_controller/src/service/chaos_injector.rs
+++ b/storage_controller/src/service/chaos_injector.rs
@@ -88,7 +88,11 @@ impl ChaosInjector {
 
         shard.intent.demote_attached(scheduler, old_location);
         shard.intent.promote_attached(scheduler, new_location);
-        self.service.maybe_reconcile_shard(shard, nodes);
+        self.service.maybe_reconcile_shard(
+            shard,
+            nodes,
+            crate::reconciler::ReconcilerPriority::Normal,
+        );
     }
 
     async fn inject_chaos(&mut self) {
diff --git a/test_runner/fixtures/fast_import.py b/test_runner/fixtures/fast_import.py
index 33248132ab..d674be99de 100644
--- a/test_runner/fixtures/fast_import.py
+++ b/test_runner/fixtures/fast_import.py
@@ -4,8 +4,10 @@ import subprocess
 import tempfile
 from collections.abc import Iterator
 from pathlib import Path
+from typing import cast
 
 import pytest
+from _pytest.config import Config
 
 from fixtures.log_helper import log
 from fixtures.neon_cli import AbstractNeonCli
@@ -23,6 +25,7 @@ class FastImport(AbstractNeonCli):
         pg_distrib_dir: Path,
         pg_version: PgVersion,
         workdir: Path,
+        cleanup: bool = True,
     ):
         if extra_env is None:
             env_vars = {}
@@ -47,12 +50,43 @@ class FastImport(AbstractNeonCli):
         if not workdir.exists():
             raise Exception(f"Working directory '{workdir}' does not exist")
         self.workdir = workdir
+        self.cleanup = cleanup
+
+    def run_pgdata(
+        self,
+        s3prefix: str | None = None,
+        pg_port: int | None = None,
+        source_connection_string: str | None = None,
+        interactive: bool = False,
+    ):
+        return self.run(
+            "pgdata",
+            s3prefix=s3prefix,
+            pg_port=pg_port,
+            source_connection_string=source_connection_string,
+            interactive=interactive,
+        )
+
+    def run_dump_restore(
+        self,
+        s3prefix: str | None = None,
+        source_connection_string: str | None = None,
+        destination_connection_string: str | None = None,
+    ):
+        return self.run(
+            "dump-restore",
+            s3prefix=s3prefix,
+            source_connection_string=source_connection_string,
+            destination_connection_string=destination_connection_string,
+        )
 
     def run(
         self,
-        pg_port: int,
-        source_connection_string: str | None = None,
+        command: str,
         s3prefix: str | None = None,
+        pg_port: int | None = None,
+        source_connection_string: str | None = None,
+        destination_connection_string: str | None = None,
         interactive: bool = False,
     ) -> subprocess.CompletedProcess[str]:
         if self.cmd is not None:
@@ -60,13 +94,17 @@ class FastImport(AbstractNeonCli):
         args = [
             f"--pg-bin-dir={self.pg_bin}",
             f"--pg-lib-dir={self.pg_lib}",
-            f"--pg-port={pg_port}",
             f"--working-directory={self.workdir}",
         ]
-        if source_connection_string is not None:
-            args.append(f"--source-connection-string={source_connection_string}")
         if s3prefix is not None:
             args.append(f"--s3-prefix={s3prefix}")
+        args.append(command)
+        if pg_port is not None:
+            args.append(f"--pg-port={pg_port}")
+        if source_connection_string is not None:
+            args.append(f"--source-connection-string={source_connection_string}")
+        if destination_connection_string is not None:
+            args.append(f"--destination-connection-string={destination_connection_string}")
         if interactive:
             args.append("--interactive")
 
@@ -77,7 +115,7 @@ class FastImport(AbstractNeonCli):
         return self
 
     def __exit__(self, *args):
-        if self.workdir.exists():
+        if self.workdir.exists() and self.cleanup:
             shutil.rmtree(self.workdir)
 
 
@@ -87,9 +125,17 @@ def fast_import(
     test_output_dir: Path,
     neon_binpath: Path,
     pg_distrib_dir: Path,
+    pytestconfig: Config,
 ) -> Iterator[FastImport]:
-    workdir = Path(tempfile.mkdtemp())
-    with FastImport(None, neon_binpath, pg_distrib_dir, pg_version, workdir) as fi:
+    workdir = Path(tempfile.mkdtemp(dir=test_output_dir, prefix="fast_import_"))
+    with FastImport(
+        None,
+        neon_binpath,
+        pg_distrib_dir,
+        pg_version,
+        workdir,
+        cleanup=not cast(bool, pytestconfig.getoption("--preserve-database-files")),
+    ) as fi:
         yield fi
 
         if fi.cmd is None:
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 2fa82754ef..58c5dbfd29 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import abc
 import asyncio
 import concurrent.futures
+import dataclasses
 import filecmp
 import json
 import os
@@ -26,6 +27,7 @@ from urllib.parse import quote, urlparse
 
 import asyncpg
 import backoff
+import boto3
 import httpx
 import psycopg2
 import psycopg2.sql
@@ -36,6 +38,8 @@ from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 from jwcrypto import jwk
+from mypy_boto3_kms import KMSClient
+from mypy_boto3_s3 import S3Client
 
 # Type-related stuff
 from psycopg2.extensions import connection as PgConnection
@@ -92,7 +96,7 @@ from fixtures.utils import (
     ATTACHMENT_NAME_REGEX,
     COMPONENT_BINARIES,
     USE_LFC,
-    allure_add_grafana_links,
+    allure_add_grafana_link,
     assert_no_errors,
     get_dir_size,
     print_gc_result,
@@ -198,6 +202,30 @@ def mock_s3_server(port_distributor: PortDistributor) -> Iterator[MockS3Server]:
     mock_s3_server.kill()
 
 
+@pytest.fixture(scope="session")
+def mock_kms(mock_s3_server: MockS3Server) -> Iterator[KMSClient]:
+    yield boto3.client(
+        "kms",
+        endpoint_url=mock_s3_server.endpoint(),
+        region_name=mock_s3_server.region(),
+        aws_access_key_id=mock_s3_server.access_key(),
+        aws_secret_access_key=mock_s3_server.secret_key(),
+        aws_session_token=mock_s3_server.session_token(),
+    )
+
+
+@pytest.fixture(scope="session")
+def mock_s3_client(mock_s3_server: MockS3Server) -> Iterator[S3Client]:
+    yield boto3.client(
+        "s3",
+        endpoint_url=mock_s3_server.endpoint(),
+        region_name=mock_s3_server.region(),
+        aws_access_key_id=mock_s3_server.access_key(),
+        aws_secret_access_key=mock_s3_server.secret_key(),
+        aws_session_token=mock_s3_server.session_token(),
+    )
+
+
 class PgProtocol:
     """Reusable connection logic"""
 
@@ -463,6 +491,7 @@ class NeonEnvBuilder:
         self.test_may_use_compatibility_snapshot_binaries = False
         self.version_combination = combination
         self.mixdir = self.test_output_dir / "mixdir_neon"
+
         if self.version_combination is not None:
             assert (
                 self.compatibility_neon_binpath is not None
@@ -674,6 +703,11 @@ class NeonEnvBuilder:
 
     def _mix_versions(self):
         assert self.version_combination is not None, "version combination must be set"
+
+        # Always use a newer version of `neon_local`
+        (self.mixdir / "neon_local").hardlink_to(self.neon_binpath / "neon_local")
+        self.neon_local_binpath = self.mixdir
+
         for component, paths in COMPONENT_BINARIES.items():
             directory = (
                 self.neon_binpath
@@ -682,10 +716,11 @@ class NeonEnvBuilder:
             )
             for filename in paths:
                 destination = self.mixdir / filename
-                destination.symlink_to(directory / filename)
+                destination.hardlink_to(directory / filename)
+        self.neon_binpath = self.mixdir
+
         if self.version_combination["compute"] == "old":
             self.pg_distrib_dir = self.compatibility_pg_distrib_dir
-        self.neon_binpath = self.mixdir
 
     def overlay_mount(self, ident: str, srcdir: Path, dstdir: Path):
         """
@@ -1675,6 +1710,12 @@ class StorageControllerLeadershipStatus(StrEnum):
     CANDIDATE = "candidate"
 
 
+@dataclass
+class StorageControllerMigrationConfig:
+    secondary_warmup_timeout: str | None
+    secondary_download_request_timeout: str | None
+
+
 class NeonStorageController(MetricsGetter, LogUtils):
     def __init__(self, env: NeonEnv, port: int, auth_enabled: bool):
         self.env = env
@@ -2068,11 +2109,20 @@ class NeonStorageController(MetricsGetter, LogUtils):
         shards: list[TenantShardId] = body["new_shards"]
         return shards
 
-    def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
+    def tenant_shard_migrate(
+        self,
+        tenant_shard_id: TenantShardId,
+        dest_ps_id: int,
+        config: StorageControllerMigrationConfig | None = None,
+    ):
+        payload = {"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id}
+        if config is not None:
+            payload["migration_config"] = dataclasses.asdict(config)
+
         self.request(
             "PUT",
             f"{self.api}/control/v1/tenant/{tenant_shard_id}/migrate",
-            json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
+            json=payload,
             headers=self.headers(TokenScope.ADMIN),
         )
         log.info(f"Migrated tenant {tenant_shard_id} to pageserver {dest_ps_id}")
@@ -2417,6 +2467,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
         response.raise_for_status()
         return [TenantShardId.parse(tid) for tid in response.json()["updated"]]
 
+    def download_heatmap_layers(self, tenant_shard_id: TenantShardId, timeline_id: TimelineId):
+        response = self.request(
+            "POST",
+            f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+        response.raise_for_status()
+
     def __enter__(self) -> Self:
         return self
 
@@ -3197,7 +3255,7 @@ def remote_pg(
     end_ms = int(datetime.utcnow().timestamp() * 1000)
     if is_neon:
         # Add 10s margin to the start and end times
-        allure_add_grafana_links(
+        allure_add_grafana_link(
             host,
             timeline_id,
             start_ms - 10_000,
@@ -4972,8 +5030,13 @@ def check_restored_datadir_content(
 
     restored_files = list_files_to_compare(restored_dir_path)
 
+    # pg_notify files are always ignored
+    pgdata_files = [f for f in pgdata_files if not f.startswith("pg_notify")]
+    restored_files = [f for f in restored_files if not f.startswith("pg_notify")]
+
+    # pg_xact and pg_multixact files are optional in basebackup: depending on our configuration they
+    # may be omitted and loaded on demand.
     if pgdata_files != restored_files:
-        # filter pg_xact and multixact files which are downloaded on demand
         pgdata_files = [
             f
             for f in pgdata_files
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
index e160c617cd..84d62fb877 100644
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -52,11 +52,11 @@ COMPONENT_BINARIES = {
 # Disable auto-formatting for better readability
 # fmt: off
 VERSIONS_COMBINATIONS = (
-    {"storage_controller": "new", "storage_broker": "new", "compute": "new", "safekeeper": "new", "pageserver": "new"},
-    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "old"},
-    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "new"},
-    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "new", "pageserver": "new"},
-    {"storage_controller": "old", "storage_broker": "old", "compute": "new", "safekeeper": "new", "pageserver": "new"},
+    {"storage_controller": "new", "storage_broker": "new", "compute": "new", "safekeeper": "new", "pageserver": "new"}, # combination: nnnnn
+    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "old"}, # combination: ooonn
+    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "old", "pageserver": "new"}, # combination: ononn
+    {"storage_controller": "new", "storage_broker": "new", "compute": "old", "safekeeper": "new", "pageserver": "new"}, # combination: onnnn
+    {"storage_controller": "old", "storage_broker": "old", "compute": "new", "safekeeper": "new", "pageserver": "new"}, # combination: nnnoo
 )
 # fmt: on
 
@@ -64,6 +64,8 @@ VERSIONS_COMBINATIONS = (
 # If it is not set or set to a value not equal to "false", LFC is enabled by default.
 USE_LFC = os.environ.get("USE_LFC") != "false"
 
+WITH_SANITIZERS = os.environ.get("SANITIZERS") == "enabled"
+
 
 def subprocess_capture(
     capture_dir: Path,
@@ -310,62 +312,46 @@ def allure_attach_from_dir(dir: Path, preserve_database_files: bool = False):
 
 
 GRAFANA_URL = "https://neonprod.grafana.net"
-GRAFANA_EXPLORE_URL = f"{GRAFANA_URL}/explore"
-GRAFANA_TIMELINE_INSPECTOR_DASHBOARD_URL = f"{GRAFANA_URL}/d/8G011dlnk/timeline-inspector"
-LOGS_STAGING_DATASOURCE_ID = "xHHYY0dVz"
+GRAFANA_DASHBOARD_URL = f"{GRAFANA_URL}/d/cdya0okb81zwga/cross-service-endpoint-debugging"
 
 
-def allure_add_grafana_links(host: str, timeline_id: TimelineId, start_ms: int, end_ms: int):
-    """Add links to server logs in Grafana to Allure report"""
-    links: dict[str, str] = {}
-    # We expect host to be in format like ep-divine-night-159320.us-east-2.aws.neon.build
+def allure_add_grafana_link(host: str, timeline_id: TimelineId, start_ms: int, end_ms: int):
+    """
+    Add a link to the cross-service endpoint debugging dashboard in Grafana to Allure report.
+
+    Args:
+        host (str): The host string in the format 'ep-<endpoint_id>.<region_id>.<domain>'.
+        timeline_id (TimelineId): The timeline identifier for the Grafana dashboard.
+            (currently ignored but may be needed in future verions of the dashboard)
+        start_ms (int): The start time in milliseconds for the Grafana dashboard.
+        end_ms (int): The end time in milliseconds for the Grafana dashboard.
+
+    Example:
+        Given
+        host = ''
+        timeline_id = '996926d1f5ddbe7381b8840083f8fc9a'
+
+        The generated link would be something like:
+        https://neonprod.grafana.net/d/cdya0okb81zwga/cross-service-endpoint-debugging?orgId=1&from=2025-02-17T21:10:00.000Z&to=2025-02-17T21:20:00.000Z&timezone=utc&var-env=dev%7Cstaging&var-input_endpoint_id=ep-holy-mouse-w2u462gi
+
+    """
+    # We expect host to be in format like ep-holy-mouse-w2u462gi.us-east-2.aws.neon.build
     endpoint_id, region_id, _ = host.split(".", 2)
 
-    expressions = {
-        "compute logs": f'{{app="compute-node-{endpoint_id}", neon_region="{region_id}"}}',
-        "k8s events": f'{{job="integrations/kubernetes/eventhandler"}} |~ "name=compute-node-{endpoint_id}-"',
-        "console logs": f'{{neon_service="console", neon_region="{region_id}"}} | json | endpoint_id = "{endpoint_id}"',
-        "proxy logs": f'{{neon_service="proxy-scram", neon_region="{region_id}"}}',
+    params = {
+        "orgId": 1,
+        "from": start_ms,
+        "to": end_ms,
+        "timezone": "utc",
+        "var-env": "dev|staging",
+        "var-input_endpoint_id": endpoint_id,
     }
 
-    params: dict[str, Any] = {
-        "datasource": LOGS_STAGING_DATASOURCE_ID,
-        "queries": [
-            {
-                "expr": "<PUT AN EXPRESSION HERE>",
-                "refId": "A",
-                "datasource": {"type": "loki", "uid": LOGS_STAGING_DATASOURCE_ID},
-                "editorMode": "code",
-                "queryType": "range",
-            }
-        ],
-        "range": {
-            "from": str(start_ms),
-            "to": str(end_ms),
-        },
-    }
-    for name, expr in expressions.items():
-        params["queries"][0]["expr"] = expr
-        query_string = urlencode({"orgId": 1, "left": json.dumps(params)})
-        links[name] = f"{GRAFANA_EXPLORE_URL}?{query_string}"
+    query_string = urlencode(params)
+    link = f"{GRAFANA_DASHBOARD_URL}?{query_string}"
 
-    timeline_qs = urlencode(
-        {
-            "orgId": 1,
-            "var-environment": "victoria-metrics-aws-dev",
-            "var-timeline_id": timeline_id,
-            "var-endpoint_id": endpoint_id,
-            "var-log_datasource": "grafanacloud-neonstaging-logs",
-            "from": start_ms,
-            "to": end_ms,
-        }
-    )
-    link = f"{GRAFANA_TIMELINE_INSPECTOR_DASHBOARD_URL}?{timeline_qs}"
-    links["Timeline Inspector"] = link
-
-    for name, link in links.items():
-        allure.dynamic.link(link, name=name)
-        log.info(f"{name}: {link}")
+    allure.dynamic.link(link, name="Cross-Service Endpoint Debugging")
+    log.info(f"Cross-Service Endpoint Debugging: {link}")
 
 
 def start_in_background(
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index f10872590c..c091cd0869 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -236,9 +236,7 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder, with_b
     wait_until(compaction_finished, timeout=60)
 
     # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains(
-        "scheduled_compact_timeline.*picked .* layers for compaction"
-    )
+    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
 
     log.info("Validating at workload end ...")
     workload.validate(env.pageserver.id)
@@ -300,6 +298,8 @@ def test_pageserver_gc_compaction_idempotent(
     workload.churn_rows(row_count, env.pageserver.id)
     env.create_branch("child_branch")  # so that we have a retain_lsn
     workload.churn_rows(row_count, env.pageserver.id)
+    env.create_branch("child_branch_2")  # so that we have another retain_lsn
+    workload.churn_rows(row_count, env.pageserver.id)
     # compact 3 times if mode is before_restart
     n_compactions = 3 if compaction_mode == "before_restart" else 1
     ps_http.timeline_compact(
@@ -315,10 +315,6 @@ def test_pageserver_gc_compaction_idempotent(
             body={
                 "scheduled": True,
                 "sub_compaction": True,
-                "compact_key_range": {
-                    "start": "000000000000000000000000000000000000",
-                    "end": "030000000000000000000000000000000000",
-                },
                 "sub_compaction_max_job_size_mb": 16,
             },
         )
@@ -336,19 +332,13 @@ def test_pageserver_gc_compaction_idempotent(
                 body={
                     "scheduled": True,
                     "sub_compaction": True,
-                    "compact_key_range": {
-                        "start": "000000000000000000000000000000000000",
-                        "end": "030000000000000000000000000000000000",
-                    },
                     "sub_compaction_max_job_size_mb": 16,
                 },
             )
             wait_until(compaction_finished, timeout=60)
 
     # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains(
-        "scheduled_compact_timeline.*picked .* layers for compaction"
-    )
+    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
 
     # ensure we hit the duplicated layer key warning at least once: we did two compactions consecutively,
     # and the second one should have hit the duplicated layer key warning.
@@ -466,9 +456,7 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
     wait_until(compaction_finished, timeout=60)
 
     # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
-    env.pageserver.assert_log_contains(
-        "scheduled_compact_timeline.*picked .* layers for compaction"
-    )
+    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
 
     log.info("Validating at workload end ...")
     workload.validate(env.pageserver.id)
diff --git a/test_runner/regress/test_endpoint_crash.py b/test_runner/regress/test_endpoint_crash.py
index 0217cd0d03..03bfd1cb8d 100644
--- a/test_runner/regress/test_endpoint_crash.py
+++ b/test_runner/regress/test_endpoint_crash.py
@@ -2,6 +2,8 @@ from __future__ import annotations
 
 import pytest
 from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.pg_version import PgVersion
+from fixtures.utils import WITH_SANITIZERS, run_only_on_postgres
 
 
 @pytest.mark.parametrize(
@@ -23,3 +25,20 @@ def test_endpoint_crash(neon_env_builder: NeonEnvBuilder, sql_func: str):
     endpoint.safe_psql("CREATE EXTENSION neon_test_utils;")
     with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
         endpoint.safe_psql(f"SELECT {sql_func}();")
+
+
+@run_only_on_postgres([PgVersion.V17], "Currently, build vith sanitizers is possible with v17 only")
+def test_sanitizers(neon_env_builder: NeonEnvBuilder):
+    """
+    Test that undefined behavior leads to endpoint abort with sanitizers enabled
+    """
+    env = neon_env_builder.init_start()
+    env.create_branch("test_ubsan")
+    endpoint = env.endpoints.create_start("test_ubsan")
+
+    # Test case based on https://www.postgresql.org/message-id/17167-028026e4ca333817@postgresql.org
+    if not WITH_SANITIZERS:
+        endpoint.safe_psql("SELECT 1::int4 << 128")
+    else:
+        with pytest.raises(Exception, match="This probably means the server terminated abnormally"):
+            endpoint.safe_psql("SELECT 1::int4 << 128")
diff --git a/test_runner/regress/test_import_pgdata.py b/test_runner/regress/test_import_pgdata.py
index 6b35f3c6bb..71e0d16edd 100644
--- a/test_runner/regress/test_import_pgdata.py
+++ b/test_runner/regress/test_import_pgdata.py
@@ -1,7 +1,9 @@
+import base64
 import json
 import re
 import time
 from enum import Enum
+from pathlib import Path
 
 import psycopg2
 import psycopg2.errors
@@ -14,8 +16,12 @@ from fixtures.pageserver.http import (
     ImportPgdataIdemptencyKey,
     PageserverApiException,
 )
+from fixtures.pg_version import PgVersion
 from fixtures.port_distributor import PortDistributor
-from fixtures.remote_storage import RemoteStorageKind
+from fixtures.remote_storage import MockS3Server, RemoteStorageKind
+from mypy_boto3_kms import KMSClient
+from mypy_boto3_kms.type_defs import EncryptResponseTypeDef
+from mypy_boto3_s3 import S3Client
 from pytest_httpserver import HTTPServer
 from werkzeug.wrappers.request import Request
 from werkzeug.wrappers.response import Response
@@ -103,13 +109,15 @@ def test_pgdata_import_smoke(
     while True:
         relblock_size = vanilla_pg.safe_psql_scalar("select pg_relation_size('t')")
         log.info(
-            f"relblock size: {relblock_size/8192} pages (target: {target_relblock_size//8192}) pages"
+            f"relblock size: {relblock_size / 8192} pages (target: {target_relblock_size // 8192}) pages"
         )
         if relblock_size >= target_relblock_size:
             break
         addrows = int((target_relblock_size - relblock_size) // 8192)
         assert addrows >= 1, "forward progress"
-        vanilla_pg.safe_psql(f"insert into t select generate_series({nrows+1}, {nrows + addrows})")
+        vanilla_pg.safe_psql(
+            f"insert into t select generate_series({nrows + 1}, {nrows + addrows})"
+        )
         nrows += addrows
     expect_nrows = nrows
     expect_sum = (
@@ -231,14 +239,14 @@ def test_pgdata_import_smoke(
     shard_zero_http = shard_zero_ps.http_client()
     shard_zero_timeline_info = shard_zero_http.timeline_detail(shard_zero["shard_id"], timeline_id)
     initdb_lsn = Lsn(shard_zero_timeline_info["initdb_lsn"])
-    latest_gc_cutoff_lsn = Lsn(shard_zero_timeline_info["latest_gc_cutoff_lsn"])
+    min_readable_lsn = Lsn(shard_zero_timeline_info["min_readable_lsn"])
     last_record_lsn = Lsn(shard_zero_timeline_info["last_record_lsn"])
     disk_consistent_lsn = Lsn(shard_zero_timeline_info["disk_consistent_lsn"])
     _remote_consistent_lsn = Lsn(shard_zero_timeline_info["remote_consistent_lsn"])
     remote_consistent_lsn_visible = Lsn(shard_zero_timeline_info["remote_consistent_lsn_visible"])
     # assert remote_consistent_lsn_visible == remote_consistent_lsn TODO: this fails initially and after restart, presumably because `UploadQueue::clean.1` is still `None`
     assert remote_consistent_lsn_visible == disk_consistent_lsn
-    assert initdb_lsn == latest_gc_cutoff_lsn
+    assert initdb_lsn == min_readable_lsn
     assert disk_consistent_lsn == initdb_lsn + 8
     assert last_record_lsn == disk_consistent_lsn
     # TODO: assert these values are the same everywhere
@@ -332,6 +340,224 @@ def test_pgdata_import_smoke(
         br_initdb_endpoint.safe_psql("select * from othertable")
 
 
+def test_fast_import_with_pageserver_ingest(
+    test_output_dir,
+    vanilla_pg: VanillaPostgres,
+    port_distributor: PortDistributor,
+    fast_import: FastImport,
+    pg_distrib_dir: Path,
+    pg_version: PgVersion,
+    mock_s3_server: MockS3Server,
+    mock_kms: KMSClient,
+    mock_s3_client: S3Client,
+    neon_env_builder: NeonEnvBuilder,
+    make_httpserver: HTTPServer,
+):
+    # Prepare KMS and S3
+    key_response = mock_kms.create_key(
+        Description="Test key",
+        KeyUsage="ENCRYPT_DECRYPT",
+        Origin="AWS_KMS",
+    )
+    key_id = key_response["KeyMetadata"]["KeyId"]
+
+    def encrypt(x: str) -> EncryptResponseTypeDef:
+        return mock_kms.encrypt(KeyId=key_id, Plaintext=x)
+
+    # Start source postgres and ingest data
+    vanilla_pg.start()
+    vanilla_pg.safe_psql("CREATE TABLE foo (a int); INSERT INTO foo SELECT generate_series(1, 10);")
+
+    # Setup pageserver and fake cplane for import progress
+    def handler(request: Request) -> Response:
+        log.info(f"control plane request: {request.json}")
+        return Response(json.dumps({}), status=200)
+
+    cplane_mgmt_api_server = make_httpserver
+    cplane_mgmt_api_server.expect_request(re.compile(".*")).respond_with_handler(handler)
+
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
+    env = neon_env_builder.init_start()
+
+    env.pageserver.patch_config_toml_nonrecursive(
+        {
+            "import_pgdata_upcall_api": f"http://{cplane_mgmt_api_server.host}:{cplane_mgmt_api_server.port}/path/to/mgmt/api",
+            # because import_pgdata code uses this endpoint, not the one in common remote storage config
+            # TODO: maybe use common remote_storage config in pageserver?
+            "import_pgdata_aws_endpoint_url": env.s3_mock_server.endpoint(),
+        }
+    )
+    env.pageserver.stop()
+    env.pageserver.start()
+
+    # Encrypt connstrings and put spec into S3
+    source_connstring_encrypted = encrypt(vanilla_pg.connstr())
+    spec = {
+        "encryption_secret": {"KMS": {"key_id": key_id}},
+        "source_connstring_ciphertext_base64": base64.b64encode(
+            source_connstring_encrypted["CiphertextBlob"]
+        ).decode("utf-8"),
+        "project_id": "someproject",
+        "branch_id": "somebranch",
+    }
+
+    bucket = "test-bucket"
+    key_prefix = "test-prefix"
+    mock_s3_client.create_bucket(Bucket=bucket)
+    mock_s3_client.put_object(Bucket=bucket, Key=f"{key_prefix}/spec.json", Body=json.dumps(spec))
+
+    # Create timeline with import_pgdata
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(tenant_id)
+
+    timeline_id = TimelineId.generate()
+    log.info("starting import")
+    start = time.monotonic()
+
+    idempotency = ImportPgdataIdemptencyKey.random()
+    log.info(f"idempotency key {idempotency}")
+    # TODO: teach neon_local CLI about the idempotency & 429 error so we can run inside the loop
+    # and check for 429
+
+    import_branch_name = "imported"
+    env.storage_controller.timeline_create(
+        tenant_id,
+        {
+            "new_timeline_id": str(timeline_id),
+            "import_pgdata": {
+                "idempotency_key": str(idempotency),
+                "location": {
+                    "AwsS3": {
+                        "region": env.s3_mock_server.region(),
+                        "bucket": bucket,
+                        "key": key_prefix,
+                    }
+                },
+            },
+        },
+    )
+    env.neon_cli.mappings_map_branch(import_branch_name, tenant_id, timeline_id)
+
+    # Run fast_import
+    if fast_import.extra_env is None:
+        fast_import.extra_env = {}
+    fast_import.extra_env["AWS_ACCESS_KEY_ID"] = mock_s3_server.access_key()
+    fast_import.extra_env["AWS_SECRET_ACCESS_KEY"] = mock_s3_server.secret_key()
+    fast_import.extra_env["AWS_SESSION_TOKEN"] = mock_s3_server.session_token()
+    fast_import.extra_env["AWS_REGION"] = mock_s3_server.region()
+    fast_import.extra_env["AWS_ENDPOINT_URL"] = mock_s3_server.endpoint()
+    fast_import.extra_env["RUST_LOG"] = "aws_config=debug,aws_sdk_kms=debug"
+    pg_port = port_distributor.get_port()
+    fast_import.run_pgdata(pg_port=pg_port, s3prefix=f"s3://{bucket}/{key_prefix}")
+    vanilla_pg.stop()
+
+    def validate_vanilla_equivalence(ep):
+        res = ep.safe_psql("SELECT count(*), sum(a) FROM foo;", dbname="neondb")
+        assert res[0] == (10, 55), f"got result: {res}"
+
+    # Sanity check that data in pgdata is expected:
+    pgbin = PgBin(test_output_dir, fast_import.pg_distrib_dir, fast_import.pg_version)
+    with VanillaPostgres(
+        fast_import.workdir / "pgdata", pgbin, pg_port, False
+    ) as new_pgdata_vanilla_pg:
+        new_pgdata_vanilla_pg.start()
+
+        # database name and user are hardcoded in fast_import binary, and they are different from normal vanilla postgres
+        conn = PgProtocol(dsn=f"postgresql://cloud_admin@localhost:{pg_port}/neondb")
+        validate_vanilla_equivalence(conn)
+
+    # Poll pageserver statuses in s3
+    while True:
+        locations = env.storage_controller.locate(tenant_id)
+        active_count = 0
+        for location in locations:
+            shard_id = TenantShardId.parse(location["shard_id"])
+            ps = env.get_pageserver(location["node_id"])
+            try:
+                detail = ps.http_client().timeline_detail(shard_id, timeline_id)
+                log.info(f"timeline {tenant_id}/{timeline_id} detail: {detail}")
+                state = detail["state"]
+                log.info(f"shard {shard_id} state: {state}")
+                if state == "Active":
+                    active_count += 1
+            except PageserverApiException as e:
+                if e.status_code == 404:
+                    log.info("not found, import is in progress")
+                    continue
+                elif e.status_code == 429:
+                    log.info("import is in progress")
+                    continue
+                else:
+                    raise
+
+            if state == "Active":
+                key = f"{key_prefix}/status/shard-{shard_id.shard_index}"
+                shard_status_file_contents = (
+                    mock_s3_client.get_object(Bucket=bucket, Key=key)["Body"].read().decode("utf-8")
+                )
+                shard_status = json.loads(shard_status_file_contents)
+                assert shard_status["done"] is True
+
+        if active_count == len(locations):
+            log.info("all shards are active")
+            break
+        time.sleep(0.5)
+
+    import_duration = time.monotonic() - start
+    log.info(f"import complete; duration={import_duration:.2f}s")
+
+    ep = env.endpoints.create_start(branch_name=import_branch_name, tenant_id=tenant_id)
+
+    # check that data is there
+    validate_vanilla_equivalence(ep)
+
+    # check that we can do basic ops
+
+    ep.safe_psql("create table othertable(values text)", dbname="neondb")
+    rw_lsn = Lsn(ep.safe_psql_scalar("select pg_current_wal_flush_lsn()"))
+    ep.stop()
+
+    # ... at the tip
+    _ = env.create_branch(
+        new_branch_name="br-tip",
+        ancestor_branch_name=import_branch_name,
+        tenant_id=tenant_id,
+        ancestor_start_lsn=rw_lsn,
+    )
+    br_tip_endpoint = env.endpoints.create_start(
+        branch_name="br-tip", endpoint_id="br-tip-ro", tenant_id=tenant_id
+    )
+    validate_vanilla_equivalence(br_tip_endpoint)
+    br_tip_endpoint.safe_psql("select * from othertable", dbname="neondb")
+    br_tip_endpoint.stop()
+
+    # ... at the initdb lsn
+    locations = env.storage_controller.locate(tenant_id)
+    [shard_zero] = [
+        loc for loc in locations if TenantShardId.parse(loc["shard_id"]).shard_number == 0
+    ]
+    shard_zero_ps = env.get_pageserver(shard_zero["node_id"])
+    shard_zero_timeline_info = shard_zero_ps.http_client().timeline_detail(
+        shard_zero["shard_id"], timeline_id
+    )
+    initdb_lsn = Lsn(shard_zero_timeline_info["initdb_lsn"])
+    _ = env.create_branch(
+        new_branch_name="br-initdb",
+        ancestor_branch_name=import_branch_name,
+        tenant_id=tenant_id,
+        ancestor_start_lsn=initdb_lsn,
+    )
+    br_initdb_endpoint = env.endpoints.create_start(
+        branch_name="br-initdb", endpoint_id="br-initdb-ro", tenant_id=tenant_id
+    )
+    validate_vanilla_equivalence(br_initdb_endpoint)
+    with pytest.raises(psycopg2.errors.UndefinedTable):
+        br_initdb_endpoint.safe_psql("select * from othertable", dbname="neondb")
+    br_initdb_endpoint.stop()
+
+    env.pageserver.stop(immediate=True)
+
+
 def test_fast_import_binary(
     test_output_dir,
     vanilla_pg: VanillaPostgres,
@@ -342,7 +568,7 @@ def test_fast_import_binary(
     vanilla_pg.safe_psql("CREATE TABLE foo (a int); INSERT INTO foo SELECT generate_series(1, 10);")
 
     pg_port = port_distributor.get_port()
-    fast_import.run(pg_port, vanilla_pg.connstr())
+    fast_import.run_pgdata(pg_port=pg_port, source_connection_string=vanilla_pg.connstr())
     vanilla_pg.stop()
 
     pgbin = PgBin(test_output_dir, fast_import.pg_distrib_dir, fast_import.pg_version)
@@ -358,6 +584,118 @@ def test_fast_import_binary(
         assert res[0][0] == 10
 
 
+def test_fast_import_restore_to_connstring(
+    test_output_dir,
+    vanilla_pg: VanillaPostgres,
+    port_distributor: PortDistributor,
+    fast_import: FastImport,
+    pg_distrib_dir: Path,
+    pg_version: PgVersion,
+):
+    vanilla_pg.start()
+    vanilla_pg.safe_psql("CREATE TABLE foo (a int); INSERT INTO foo SELECT generate_series(1, 10);")
+
+    pgdatadir = test_output_dir / "destination-pgdata"
+    pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
+    port = port_distributor.get_port()
+    with VanillaPostgres(pgdatadir, pg_bin, port) as destination_vanilla_pg:
+        destination_vanilla_pg.configure(["shared_preload_libraries='neon_rmgr'"])
+        destination_vanilla_pg.start()
+
+        # create another database & role and try to restore there
+        destination_vanilla_pg.safe_psql("""
+            CREATE ROLE testrole WITH
+                LOGIN
+                PASSWORD 'testpassword'
+                NOSUPERUSER
+                NOCREATEDB
+                NOCREATEROLE;
+        """)
+        destination_vanilla_pg.safe_psql("CREATE DATABASE testdb OWNER testrole;")
+
+        destination_connstring = destination_vanilla_pg.connstr(
+            dbname="testdb", user="testrole", password="testpassword"
+        )
+        fast_import.run_dump_restore(
+            source_connection_string=vanilla_pg.connstr(),
+            destination_connection_string=destination_connstring,
+        )
+        vanilla_pg.stop()
+        conn = PgProtocol(dsn=destination_connstring)
+        res = conn.safe_psql("SELECT count(*) FROM foo;")
+        log.info(f"Result: {res}")
+        assert res[0][0] == 10
+
+
+def test_fast_import_restore_to_connstring_from_s3_spec(
+    test_output_dir,
+    vanilla_pg: VanillaPostgres,
+    port_distributor: PortDistributor,
+    fast_import: FastImport,
+    pg_distrib_dir: Path,
+    pg_version: PgVersion,
+    mock_s3_server: MockS3Server,
+    mock_kms: KMSClient,
+    mock_s3_client: S3Client,
+):
+    # Prepare KMS and S3
+    key_response = mock_kms.create_key(
+        Description="Test key",
+        KeyUsage="ENCRYPT_DECRYPT",
+        Origin="AWS_KMS",
+    )
+    key_id = key_response["KeyMetadata"]["KeyId"]
+
+    def encrypt(x: str) -> EncryptResponseTypeDef:
+        return mock_kms.encrypt(KeyId=key_id, Plaintext=x)
+
+    # Start source postgres and ingest data
+    vanilla_pg.start()
+    vanilla_pg.safe_psql("CREATE TABLE foo (a int); INSERT INTO foo SELECT generate_series(1, 10);")
+
+    # Start target postgres
+    pgdatadir = test_output_dir / "destination-pgdata"
+    pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
+    port = port_distributor.get_port()
+    with VanillaPostgres(pgdatadir, pg_bin, port) as destination_vanilla_pg:
+        destination_vanilla_pg.configure(["shared_preload_libraries='neon_rmgr'"])
+        destination_vanilla_pg.start()
+
+        # Encrypt connstrings and put spec into S3
+        source_connstring_encrypted = encrypt(vanilla_pg.connstr())
+        destination_connstring_encrypted = encrypt(destination_vanilla_pg.connstr())
+        spec = {
+            "encryption_secret": {"KMS": {"key_id": key_id}},
+            "source_connstring_ciphertext_base64": base64.b64encode(
+                source_connstring_encrypted["CiphertextBlob"]
+            ).decode("utf-8"),
+            "destination_connstring_ciphertext_base64": base64.b64encode(
+                destination_connstring_encrypted["CiphertextBlob"]
+            ).decode("utf-8"),
+        }
+
+        mock_s3_client.create_bucket(Bucket="test-bucket")
+        mock_s3_client.put_object(
+            Bucket="test-bucket", Key="test-prefix/spec.json", Body=json.dumps(spec)
+        )
+
+        # Run fast_import
+        if fast_import.extra_env is None:
+            fast_import.extra_env = {}
+        fast_import.extra_env["AWS_ACCESS_KEY_ID"] = mock_s3_server.access_key()
+        fast_import.extra_env["AWS_SECRET_ACCESS_KEY"] = mock_s3_server.secret_key()
+        fast_import.extra_env["AWS_SESSION_TOKEN"] = mock_s3_server.session_token()
+        fast_import.extra_env["AWS_REGION"] = mock_s3_server.region()
+        fast_import.extra_env["AWS_ENDPOINT_URL"] = mock_s3_server.endpoint()
+        fast_import.extra_env["RUST_LOG"] = "aws_config=debug,aws_sdk_kms=debug"
+        fast_import.run_dump_restore(s3prefix="s3://test-bucket/test-prefix")
+        vanilla_pg.stop()
+
+        res = destination_vanilla_pg.safe_psql("SELECT count(*) FROM foo;")
+        log.info(f"Result: {res}")
+        assert res[0][0] == 10
+
+
 # TODO: Maybe test with pageserver?
 # 1. run whole neon env
 # 2. create timeline with some s3 path???
diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py
index 3ac4ed1a3e..872d3dc4cf 100644
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -20,9 +20,6 @@ from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
 from fixtures.utils import query_scalar, wait_until
 
 
-@pytest.mark.skip(
-    reason="We won't create future layers any more after https://github.com/neondatabase/neon/pull/10548"
-)
 @pytest.mark.parametrize(
     "attach_mode",
     ["default_generation", "same_generation"],
diff --git a/test_runner/regress/test_lfc_resize.py b/test_runner/regress/test_lfc_resize.py
index 8762e6525b..ea7d38a3d9 100644
--- a/test_runner/regress/test_lfc_resize.py
+++ b/test_runner/regress/test_lfc_resize.py
@@ -72,6 +72,11 @@ def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
 
     thread.join()
 
+    # Fill LFC: seqscan should fetch the whole table in cache.
+    # It is needed for further correct evaluation of LFC file size
+    # (a sparse chunk of LFC takes less than 1 MB on disk).
+    cur.execute("select sum(abalance) from pgbench_accounts")
+
     # Before shrinking the cache, check that it really is large now
     (lfc_file_size, lfc_file_blocks) = get_lfc_size()
     assert int(lfc_file_blocks) > 128 * 1024
diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py
index 590093d23c..602d493ae6 100644
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -10,14 +10,18 @@ from typing import TYPE_CHECKING
 import pytest
 from fixtures.common_types import TenantId, TenantShardId, TimelineId
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
+from fixtures.neon_fixtures import (
+    NeonEnvBuilder,
+    NeonPageserver,
+    StorageControllerMigrationConfig,
+)
 from fixtures.pageserver.common_types import parse_layer_file_name
 from fixtures.pageserver.utils import (
     assert_prefix_empty,
     wait_for_upload_queue_empty,
 )
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, S3Storage, s3_storage
-from fixtures.utils import skip_in_debug_build, wait_until
+from fixtures.utils import run_only_on_default_postgres, skip_in_debug_build, wait_until
 from fixtures.workload import Workload
 from werkzeug.wrappers.request import Request
 from werkzeug.wrappers.response import Response
@@ -889,3 +893,106 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll
     assert progress_3["heatmap_mtime"] is not None
     assert progress_3["layers_total"] == progress_3["layers_downloaded"]
     assert progress_3["bytes_total"] == progress_3["bytes_downloaded"]
+
+
+@skip_in_debug_build("only run with release build")
+@run_only_on_default_postgres("PG version is not interesting here")
+def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_pageservers = 2
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+
+    tenant_conf = TENANT_CONF.copy()
+    tenant_conf["heatmap_period"] = "0s"
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    assert isinstance(env.pageserver_remote_storage, S3Storage)  # Satisfy linter
+
+    tenant_id = TenantId.generate()
+    timeline_id = TimelineId.generate()
+    env.create_tenant(tenant_id, timeline_id, conf=tenant_conf, placement_policy='{"Attached":1}')
+
+    env.storage_controller.reconcile_until_idle()
+
+    attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
+    ps_attached = env.get_pageserver(attached_to_id)
+    ps_secondary = next(p for p in env.pageservers if p != ps_attached)
+
+    # Generate a bunch of small layers (we will apply a slowdown failpoint that works on a per-layer basis)
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init()
+    workload.write_rows(128, upload=True)
+    workload.write_rows(128, upload=True)
+    workload.write_rows(128, upload=True)
+    workload.write_rows(128, upload=True)
+    workload.stop()
+
+    # Expect lots of layers
+    assert len(ps_attached.list_layers(tenant_id, timeline_id)) > 10
+
+    # Simulate large data by making layer downloads artifically slow
+    for ps in env.pageservers:
+        ps.http_client().configure_failpoints([("secondary-layer-download-sleep", "return(1000)")])
+
+    # Upload a heatmap, so that secondaries have something to download
+    ps_attached.http_client().tenant_heatmap_upload(tenant_id)
+    heatmap_before_migration = env.pageserver_remote_storage.heatmap_content(tenant_id)
+
+    # This has no chance to succeed: we have lots of layers and each one takes at least 1000ms.
+    # However, it pulls the heatmap, which will be important later.
+    http_client = env.storage_controller.pageserver_api()
+    (status, progress) = http_client.tenant_secondary_download(tenant_id, wait_ms=4000)
+    assert status == 202
+    assert progress["heatmap_mtime"] is not None
+    assert progress["layers_downloaded"] > 0
+    assert progress["bytes_downloaded"] > 0
+    assert progress["layers_total"] > progress["layers_downloaded"]
+    assert progress["bytes_total"] > progress["bytes_downloaded"]
+
+    env.storage_controller.allowed_errors.extend(
+        [
+            ".*Timed out.*downloading layers.*",
+        ]
+    )
+
+    # Use a custom configuration that gives up earlier than usual.
+    # We can't hydrate everything anyway because of the failpoints.
+    config = StorageControllerMigrationConfig(
+        secondary_warmup_timeout="5s", secondary_download_request_timeout="2s"
+    )
+    env.storage_controller.tenant_shard_migrate(
+        TenantShardId(tenant_id, shard_number=0, shard_count=0), ps_secondary.id, config
+    )
+
+    env.storage_controller.reconcile_until_idle()
+    assert env.storage_controller.locate(tenant_id)[0]["node_id"] == ps_secondary.id
+
+    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
+    heatmap_after_migration = env.pageserver_remote_storage.heatmap_content(tenant_id)
+
+    assert len(heatmap_before_migration["timelines"][0]["layers"]) > 0
+
+    # The new layer map should contain all the layers in the pre-migration one
+    # and a new in memory layer
+    after_migration_heatmap_layers_count = len(heatmap_after_migration["timelines"][0]["layers"])
+    assert (
+        len(heatmap_before_migration["timelines"][0]["layers"]) + 1
+        == after_migration_heatmap_layers_count
+    )
+
+    log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}")
+
+    env.storage_controller.download_heatmap_layers(
+        TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
+    )
+
+    def all_layers_downloaded():
+        local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id))
+
+        log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}")
+        assert local_layers_count == after_migration_heatmap_layers_count
+
+    wait_until(all_layers_downloaded)
diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py
index c5ae669dce..411888efbc 100644
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -261,7 +261,7 @@ def test_isolation(
         pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)
 
     # This fails with a mismatch on `pg_multixact/offsets/0000`
-    # post_checks(env, test_output_dir, DBNAME, endpoint)
+    post_checks(env, test_output_dir, DBNAME, endpoint)
 
 
 # Run extra Neon-specific pg_regress-based tests. The tests and their
diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py
index c13bea7ee1..fe970a868c 100644
--- a/test_runner/regress/test_readonly_node.py
+++ b/test_runner/regress/test_readonly_node.py
@@ -287,7 +287,7 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
                 offset=offset,
             )
 
-        # Do some update so we can increment latest_gc_cutoff
+        # Do some update so we can increment gc_cutoff
         generate_updates_on_main(env, ep_main, i, end=100)
 
     # Wait for the existing lease to expire.
diff --git a/test_runner/regress/test_relations.py b/test_runner/regress/test_relations.py
new file mode 100644
index 0000000000..3e29c92a96
--- /dev/null
+++ b/test_runner/regress/test_relations.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from fixtures.neon_fixtures import (
+    NeonEnvBuilder,
+)
+
+
+def test_pageserver_reldir_v2(
+    neon_env_builder: NeonEnvBuilder,
+):
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
+            "rel_size_v2_enabled": "false",
+        }
+    )
+
+    endpoint = env.endpoints.create_start("main")
+    # Create a relation in v1
+    endpoint.safe_psql("CREATE TABLE foo1 (id INTEGER PRIMARY KEY, val text)")
+    endpoint.safe_psql("CREATE TABLE foo2 (id INTEGER PRIMARY KEY, val text)")
+
+    # Switch to v2
+    env.pageserver.http_client().update_tenant_config(
+        env.initial_tenant,
+        {
+            "rel_size_v2_enabled": True,
+        },
+    )
+
+    # Check if both relations are still accessible
+    endpoint.safe_psql("SELECT * FROM foo1")
+    endpoint.safe_psql("SELECT * FROM foo2")
+
+    # Restart the endpoint
+    endpoint.stop()
+    endpoint.start()
+
+    # Check if both relations are still accessible again after restart
+    endpoint.safe_psql("SELECT * FROM foo1")
+    endpoint.safe_psql("SELECT * FROM foo2")
+
+    # Create a relation in v2
+    endpoint.safe_psql("CREATE TABLE foo3 (id INTEGER PRIMARY KEY, val text)")
+    # Delete a relation in v1
+    endpoint.safe_psql("DROP TABLE foo1")
+
+    # Check if both relations are still accessible
+    endpoint.safe_psql("SELECT * FROM foo2")
+    endpoint.safe_psql("SELECT * FROM foo3")
+
+    # Restart the endpoint
+    endpoint.stop()
+    # This will acquire a basebackup, which lists all relations.
+    endpoint.start()
+
+    # Check if both relations are still accessible
+    endpoint.safe_psql("DROP TABLE IF EXISTS foo1")
+    endpoint.safe_psql("SELECT * FROM foo2")
+    endpoint.safe_psql("SELECT * FROM foo3")
+
+    endpoint.safe_psql("DROP TABLE foo3")
+    endpoint.stop()
+    endpoint.start()
+
+    # Check if relations are still accessible
+    endpoint.safe_psql("DROP TABLE IF EXISTS foo1")
+    endpoint.safe_psql("SELECT * FROM foo2")
+    endpoint.safe_psql("DROP TABLE IF EXISTS foo3")
diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py
index 8910873690..f58bbcd3c0 100644
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -1821,7 +1821,7 @@ def test_sharding_gc(
         # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
         ps.allowed_errors.extend(
             [
-                ".*could not find data for key 020000000000000000000000000000000000.*",
+                ".*could not find data for key.*",
                 ".*could not ingest record.*",
             ]
         )
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 2750826aec..1d95312140 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -3189,15 +3189,17 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
 
     assert len(target.get_safekeepers()) == 0
 
+    sk_0 = env.safekeepers[0]
+
     body = {
         "active": True,
         "id": fake_id,
         "created_at": "2023-10-25T09:11:25Z",
         "updated_at": "2024-08-28T11:32:43Z",
         "region_id": "aws-us-east-2",
-        "host": "safekeeper-333.us-east-2.aws.neon.build",
-        "port": 6401,
-        "http_port": 7676,
+        "host": "localhost",
+        "port": sk_0.port.pg,
+        "http_port": sk_0.port.http,
         "version": 5957,
         "availability_zone_id": "us-east-2b",
     }
@@ -3236,11 +3238,26 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
     newest_info = target.get_safekeeper(inserted["id"])
     assert newest_info
     assert newest_info["scheduling_policy"] == "Pause"
-    target.safekeeper_scheduling_policy(inserted["id"], "Decomissioned")
+    target.safekeeper_scheduling_policy(inserted["id"], "Active")
     newest_info = target.get_safekeeper(inserted["id"])
     assert newest_info
-    assert newest_info["scheduling_policy"] == "Decomissioned"
+    assert newest_info["scheduling_policy"] == "Active"
     # Ensure idempotency
+    target.safekeeper_scheduling_policy(inserted["id"], "Active")
+    newest_info = target.get_safekeeper(inserted["id"])
+    assert newest_info
+    assert newest_info["scheduling_policy"] == "Active"
+    # change back to paused again
+    target.safekeeper_scheduling_policy(inserted["id"], "Pause")
+
+    def storcon_heartbeat():
+        assert env.storage_controller.log_contains(
+            "Heartbeat round complete for 1 safekeepers, 0 offline"
+        )
+
+    wait_until(storcon_heartbeat)
+
+    # Now decomission it
     target.safekeeper_scheduling_policy(inserted["id"], "Decomissioned")
 
 
diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py
index b8253fb125..d44c176b35 100644
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -318,7 +318,7 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_
         # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed
         ps.allowed_errors.extend(
             [
-                ".*could not find data for key 020000000000000000000000000000000000.*",
+                ".*could not find data for key.*",
                 ".*could not ingest record.*",
             ]
         )
diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py
index b4c968b217..afe444f227 100644
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -481,7 +481,8 @@ def test_pageserver_metrics_many_relations(neon_env_builder: NeonEnvBuilder):
     counts = timeline_detail["directory_entries_counts"]
     assert counts
     log.info(f"directory counts: {counts}")
-    assert counts[2] > COUNT_AT_LEAST_EXPECTED
+    # We need to add up reldir v1 + v2 counts
+    assert counts[2] + counts[7] > COUNT_AT_LEAST_EXPECTED
 
 
 def test_timelines_parallel_endpoints(neon_simple_env: NeonEnv):
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 2b6a267bdf..c5045fe4a4 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -566,10 +566,14 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
     assert_prefix_empty(neon_env_builder.safekeepers_remote_storage, prefix)
 
 
-def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder):
+# This test is flaky, probably because PUTs of local fs storage are not atomic.
+# Let's keep both remote storage kinds for a while to see if this is the case.
+# https://github.com/neondatabase/neon/issues/10761
+@pytest.mark.parametrize("remote_storage_kind", [s3_storage(), RemoteStorageKind.LOCAL_FS])
+def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind):
     neon_env_builder.num_safekeepers = 3
 
-    neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
+    neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start()
     tenant_id = env.initial_tenant
@@ -1441,6 +1445,7 @@ def test_peer_recovery(neon_env_builder: NeonEnvBuilder):
 
     # roughly fills one segment
     endpoint.safe_psql("insert into t select generate_series(1,250000), 'payload'")
+    lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
 
     endpoint.stop()  # stop compute
 
@@ -1469,7 +1474,15 @@ def test_peer_recovery(neon_env_builder: NeonEnvBuilder):
         "flush_lsn to get aligned",
     )
 
-    cmp_sk_wal([sk1, sk2], tenant_id, timeline_id)
+    sk1_digest = sk1.http_client().timeline_digest(
+        tenant_id, timeline_id, sk1.get_timeline_start_lsn(tenant_id, timeline_id), lsn
+    )
+
+    sk2_digest = sk1.http_client().timeline_digest(
+        tenant_id, timeline_id, sk2.get_timeline_start_lsn(tenant_id, timeline_id), lsn
+    )
+
+    assert sk1_digest == sk2_digest
 
     # stop one of safekeepers which weren't recovering and insert a bit more to check we can commit
     env.safekeepers[2].stop()
diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
index c0aedfd3ca..6254ab9b44 160000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
@@ -1 +1 @@
-Subproject commit c0aedfd3cac447510a2db843b561f0c52901b679
+Subproject commit 6254ab9b4496c3e481bc037ae69d859bbc2bdd7d
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index 355a7c69d3..81e2eef061 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit 355a7c69d3f907f3612eb406cc7b9c2f55d59b59
+Subproject commit 81e2eef0616c65c2233c75b06f25766ae4c080c4
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index 13cf5d06c9..9422247c58 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit 13cf5d06c98a8e9b0590ce6cdfd193a08d0a7792
+Subproject commit 9422247c582e7c1a08a4855d04af0874f8df2f34
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index 4c45d78ad5..a8fea8b4be 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit 4c45d78ad587e4bcb4a5a7ef6931b88c6a3d575d
+Subproject commit a8fea8b4be43039f0782347c88a9b9b25f50c9d8
diff --git a/vendor/revisions.json b/vendor/revisions.json
index 5f60e1d690..72d97d7f6a 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
   "v17": [
-    "17.2",
-    "4c45d78ad587e4bcb4a5a7ef6931b88c6a3d575d"
+    "17.4",
+    "a8fea8b4be43039f0782347c88a9b9b25f50c9d8"
   ],
   "v16": [
-    "16.6",
-    "13cf5d06c98a8e9b0590ce6cdfd193a08d0a7792"
+    "16.8",
+    "9422247c582e7c1a08a4855d04af0874f8df2f34"
   ],
   "v15": [
-    "15.10",
-    "355a7c69d3f907f3612eb406cc7b9c2f55d59b59"
+    "15.12",
+    "81e2eef0616c65c2233c75b06f25766ae4c080c4"
   ],
   "v14": [
-    "14.15",
-    "c0aedfd3cac447510a2db843b561f0c52901b679"
+    "14.17",
+    "6254ab9b4496c3e481bc037ae69d859bbc2bdd7d"
   ]
 }