diff --git a/.dockerignore b/.dockerignore
index 7ead48db7c..9fafc2e4ba 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,6 +14,7 @@
 !compute/
 !compute_tools/
 !control_plane/
+!docker-compose/ext-src
 !libs/
 !pageserver/
 !pgxn/
diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml
index d07e3e32e8..b85ca7874d 100644
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -38,9 +38,11 @@ runs:
     #
     - name: Set variables
       shell: bash -euxo pipefail {0}
+      env:
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        BUCKET: neon-github-public-dev
       run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
+        if [ -n "${PR_NUMBER}" ]; then
           BRANCH_OR_PR=pr-${PR_NUMBER}
         elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
              [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
@@ -59,8 +61,6 @@ runs:
         echo "LOCK_FILE=${LOCK_FILE}"       >> $GITHUB_ENV
         echo "WORKDIR=${WORKDIR}"           >> $GITHUB_ENV
         echo "BUCKET=${BUCKET}"             >> $GITHUB_ENV
-      env:
-        BUCKET: neon-github-public-dev
 
     # TODO: We can replace with a special docker image with Java and Allure pre-installed
     - uses: actions/setup-java@v4
@@ -80,8 +80,8 @@ runs:
           rm -f ${ALLURE_ZIP}
         fi
       env:
-        ALLURE_VERSION: 2.27.0
-        ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
+        ALLURE_VERSION: 2.32.2
+        ALLURE_ZIP_SHA256: 3f28885e2118f6317c92f667eaddcc6491400af1fb9773c1f3797a5fa5174953
 
     - uses: aws-actions/configure-aws-credentials@v4
       if: ${{ !cancelled() }}
diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml
index 8548a886cf..687bfd49af 100644
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -18,9 +18,11 @@ runs:
   steps:
     - name: Set variables
       shell: bash -euxo pipefail {0}
+      env:
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        REPORT_DIR: ${{ inputs.report-dir }}
       run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
+        if [ -n "${PR_NUMBER}" ]; then
           BRANCH_OR_PR=pr-${PR_NUMBER}
         elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
              [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
@@ -32,8 +34,6 @@ runs:
 
         echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
         echo "REPORT_DIR=${REPORT_DIR}"     >> $GITHUB_ENV
-      env:
-        REPORT_DIR: ${{ inputs.report-dir }}
 
     - uses: aws-actions/configure-aws-credentials@v4
       if: ${{ !cancelled() }}
diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml
index 0eddfe5da6..122fe48b68 100644
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -236,5 +236,5 @@ runs:
       uses: ./.github/actions/allure-report-store
       with:
         report-dir: /tmp/test_output/allure/results
-        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}
+        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
         aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
diff --git a/scripts/generate_image_maps.py b/.github/scripts/generate_image_maps.py
similarity index 98%
rename from scripts/generate_image_maps.py
rename to .github/scripts/generate_image_maps.py
index 915eb33673..39ece5b38f 100644
--- a/scripts/generate_image_maps.py
+++ b/.github/scripts/generate_image_maps.py
@@ -27,6 +27,7 @@ components = {
 registries = {
     "dev": [
         "docker.io/neondatabase",
+        "ghcr.io/neondatabase",
         f"{dev_aws}.dkr.ecr.{aws_region}.amazonaws.com",
         f"{dev_acr}.azurecr.io/neondatabase",
     ],
diff --git a/.github/scripts/previous-releases.jq b/.github/scripts/previous-releases.jq
new file mode 100644
index 0000000000..b0b00bce18
--- /dev/null
+++ b/.github/scripts/previous-releases.jq
@@ -0,0 +1,25 @@
+# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input,
+# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases.
+# Extract only the `tag_name` field from each release object
+[ .[].tag_name ]
+
+# Transform each tag name into a structured object using regex capture
+| reduce map(
+    capture("^(?<full>release(-(?<component>proxy|compute))?-(?<version>\\d+))$")
+    | {
+        component: (.component // "storage"),  # Default to "storage" if no component is specified
+        version: (.version | tonumber),        # Convert the version number to an integer
+        full: .full                            # Store the full tag name for final output
+      }
+  )[] as $entry  # Loop over the transformed list
+
+# Accumulate the latest (highest-numbered) version for each component
+({};
+ .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
+
+# Convert the resulting object into an array of formatted strings
+| to_entries
+| map("\(.key)=\(.value.full)")
+
+# Output each string separately
+| .[]
diff --git a/scripts/push_with_image_map.py b/.github/scripts/push_with_image_map.py
similarity index 100%
rename from scripts/push_with_image_map.py
rename to .github/scripts/push_with_image_map.py
diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml
index 3740e6dc9c..30fde127b0 100644
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -337,7 +337,7 @@ jobs:
       - name: Pytest regression tests
         continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
         uses: ./.github/actions/run-python-test-set
-        timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 60 || 180 }}
+        timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
         with:
           build_type: ${{ inputs.build-type }}
           test_selection: regress
diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml
new file mode 100644
index 0000000000..ac09a51ae9
--- /dev/null
+++ b/.github/workflows/_meta.yml
@@ -0,0 +1,103 @@
+name: Generate run metadata
+on:
+  workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true
+    outputs:
+      build-tag:
+        description: "Tag for the current workflow run"
+        value: ${{ jobs.tags.outputs.build-tag }}
+      previous-storage-release:
+        description: "Tag of the last storage release"
+        value: ${{ jobs.tags.outputs.storage }}
+      previous-proxy-release:
+        description: "Tag of the last proxy release"
+        value: ${{ jobs.tags.outputs.proxy }}
+      previous-compute-release:
+        description: "Tag of the last compute release"
+        value: ${{ jobs.tags.outputs.compute }}
+      run-kind:
+        description: "The kind of run we're currently in. Will be one of `pr-main`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`"
+        value: ${{ jobs.tags.outputs.run-kind }}
+
+permissions: {}
+
+jobs:
+  tags:
+    runs-on: ubuntu-22.04
+    outputs:
+      build-tag: ${{ steps.build-tag.outputs.tag }}
+      compute: ${{ steps.previous-releases.outputs.compute }}
+      proxy: ${{ steps.previous-releases.outputs.proxy }}
+      storage: ${{ steps.previous-releases.outputs.storage }}
+      run-kind: ${{ steps.run-kind.outputs.run-kind }}
+    permissions:
+      contents: read
+    steps:
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get run kind
+        id: run-kind
+        env:
+          RUN_KIND: >-
+            ${{
+              false
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'main')            && 'push-main'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release')         && 'storage-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-compute') && 'compute-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-proxy')   && 'proxy-release'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'main')            && 'pr-main'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release')         && 'storage-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy')   && 'proxy-rc-pr'
+              || 'unknown'
+            }}
+        run: |
+          echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT
+
+      - name: Get build tag
+        id: build-tag
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
+          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          RUN_KIND: ${{ steps.run-kind.outputs.run-kind }}
+        run: |
+          case $RUN_KIND in
+          push-main)
+            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          storage-release)
+            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          proxy-release)
+            echo "tag=release-proxy-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          compute-release)
+            echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          pr-main|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
+            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
+            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
+            ;;
+          *)
+            echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!"
+            exit 1
+          esac
+
+      - name: Get the previous release-tags
+        id: previous-releases
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api --paginate \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "/repos/${GITHUB_REPOSITORY}/releases" \
+          | jq -f .github/scripts/previous-releases.jq -r \
+          | tee -a "${GITHUB_OUTPUT}"
diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml
index c938f62ad5..2dab665f40 100644
--- a/.github/workflows/_push-to-container-registry.yml
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -11,8 +11,12 @@ on:
         description: AWS region to log in to. Required when pushing to ECR.
         required: false
         type: string
-      aws-account-ids:
-        description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR.
+      aws-account-id:
+        description: AWS account ID to log in to for pushing to ECR. Required when pushing to ECR.
+        required: false
+        type: string
+      aws-role-to-assume:
+        description: AWS role to assume to for pushing to ECR. Required when pushing to ECR.
         required: false
         type: string
       azure-client-id:
@@ -31,16 +35,6 @@ on:
         description: ACR registry name. Required when pushing to ACR.
         required: false
         type: string
-    secrets:
-      docker-hub-username:
-        description: Docker Hub username. Required when pushing to Docker Hub.
-        required: false
-      docker-hub-password:
-        description: Docker Hub password. Required when pushing to Docker Hub.
-        required: false
-      aws-role-to-assume:
-        description: AWS role to assume. Required when pushing to ECR.
-        required: false
 
 permissions: {}
 
@@ -53,10 +47,11 @@ jobs:
     runs-on: ubuntu-22.04
     permissions:
       id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
     steps:
       - uses: actions/checkout@v4
         with:
-          sparse-checkout: scripts/push_with_image_map.py
+          sparse-checkout: .github/scripts/push_with_image_map.py
           sparse-checkout-cone-mode: false
 
       - name: Print image-map
@@ -67,14 +62,14 @@ jobs:
         uses: aws-actions/configure-aws-credentials@v4
         with:
           aws-region: "${{ inputs.aws-region }}"
-          role-to-assume: "${{ secrets.aws-role-to-assume }}"
+          role-to-assume: "arn:aws:iam::${{ inputs.aws-account-id }}:role/${{ inputs.aws-role-to-assume }}"
           role-duration-seconds: 3600
 
       - name: Login to ECR
         if: contains(inputs.image-map, 'amazonaws.com/')
         uses: aws-actions/amazon-ecr-login@v2
         with:
-          registries: "${{ inputs.aws-account-ids }}"
+          registries: "${{ inputs.aws-account-id }}"
 
       - name: Configure Azure credentials
         if: contains(inputs.image-map, 'azurecr.io/')
@@ -89,13 +84,21 @@ jobs:
         run: |
           az acr login --name=${{ inputs.acr-registry-name }}
 
+      - name: Login to GHCR
+        if: contains(inputs.image-map, 'ghcr.io/')
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Log in to Docker Hub
         uses: docker/login-action@v3
         with:
-          username: ${{ secrets.docker-hub-username }}
-          password: ${{ secrets.docker-hub-password }}
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
       - name: Copy docker images to target registries
-        run: python scripts/push_with_image_map.py
+        run: python3 .github/scripts/push_with_image_map.py
         env:
           IMAGE_MAP: ${{ inputs.image-map }}
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index b36ac46f35..dd4941bb21 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -398,6 +398,9 @@ jobs:
     runs-on: ${{ matrix.runner }}
     container:
       image: ${{ matrix.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     # Increase timeout to 8h, default timeout is 6h
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 8f3392ceea..1fd4f292e8 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -65,38 +65,11 @@ jobs:
           token: ${{ secrets.GITHUB_TOKEN }}
           filters: .github/file-filters.yaml
 
-  tag:
+  meta:
     needs: [ check-permissions ]
-    runs-on: [ self-hosted, small ]
-    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        run: |
-          echo run:$GITHUB_RUN_ID
-          echo ref:$GITHUB_REF_NAME
-          echo rev:$(git rev-list --count HEAD)
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
-            echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
-          fi
-        shell: bash
-        id: build-tag
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ github.event_name }}
 
   build-build-tools-image:
     needs: [ check-permissions ]
@@ -199,7 +172,7 @@ jobs:
     secrets: inherit
 
   build-and-test-locally:
-    needs: [ tag, build-build-tools-image ]
+    needs: [ meta, build-build-tools-image ]
     strategy:
       fail-fast: false
       matrix:
@@ -213,7 +186,7 @@ jobs:
     with:
       arch: ${{ matrix.arch }}
       build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
-      build-tag: ${{ needs.tag.outputs.build-tag }}
+      build-tag: ${{ needs.meta.outputs.build-tag }}
       build-type: ${{ matrix.build-type }}
       # Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
       # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
@@ -497,13 +470,24 @@ jobs:
             })
 
   trigger-e2e-tests:
-    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
+    # Depends on jobs that can get skipped
+    if: >-
+      ${{
+        (
+          !github.event.pull_request.draft
+          || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
+          || contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
+        ) && !failure() && !cancelled()
+      }}
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
     uses: ./.github/workflows/trigger-e2e-tests.yml
+    with:
+      github-event-name: ${{ github.event_name }}
     secrets: inherit
 
   neon-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
     strategy:
       matrix:
         arch: [ x64, arm64 ]
@@ -539,7 +523,7 @@ jobs:
           build-args: |
             ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}
             GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
             TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
             DEBIAN_VERSION=bookworm
           provenance: false
@@ -549,10 +533,11 @@ jobs:
           cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
           cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
           tags: |
-            neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }}
+            neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}
 
   neon-image:
-    needs: [ neon-image-arch, tag ]
+    needs: [ neon-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
     runs-on: ubuntu-22.04
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
@@ -567,13 +552,14 @@ jobs:
 
       - name: Create multi-arch image
         run: |
-          docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
+          docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
 
   compute-node-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
@@ -631,7 +617,7 @@ jobs:
           build-args: |
             GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
             PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
             TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
             DEBIAN_VERSION=${{ matrix.version.debian }}
           provenance: false
@@ -641,7 +627,7 @@ jobs:
           cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
           cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
           tags: |
-            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
 
       - name: Build neon extensions test image
         if: matrix.version.pg >= 'v16'
@@ -651,7 +637,7 @@ jobs:
           build-args: |
             GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
             PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
             TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
             DEBIAN_VERSION=${{ matrix.version.debian }}
           provenance: false
@@ -661,10 +647,11 @@ jobs:
           target: extension-tests
           cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
           tags: |
-            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
 
   compute-node-image:
-    needs: [ compute-node-image-arch, tag ]
+    needs: [ compute-node-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
@@ -692,21 +679,22 @@ jobs:
 
       - name: Create multi-arch compute-node image
         run: |
-          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
 
       - name: Create multi-arch neon-test-extensions image
         if: matrix.version.pg >= 'v16'
         run: |
-          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
 
   vm-compute-node-image:
-    needs: [ check-permissions, tag, compute-node-image ]
+    needs: [ check-permissions, meta, compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
     runs-on: [ self-hosted, large ]
     strategy:
       fail-fast: false
@@ -722,14 +710,14 @@ jobs:
           - pg: v17
             debian: bookworm
     env:
-      VM_BUILDER_VERSION: v0.37.1
+      VM_BUILDER_VERSION: v0.42.2
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Downloading vm-builder
         run: |
-          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-amd64 -o vm-builder
           chmod +x vm-builder
 
       - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -742,22 +730,25 @@ jobs:
       # it won't have the proper authentication (written at v0.6.0)
       - name: Pulling compute-node image
         run: |
-          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}
 
       - name: Build vm image
         run: |
           ./vm-builder \
             -size=2G \
             -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
-            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+            -target-arch=linux/amd64
 
       - name: Pushing vm-compute-node image
         run: |
-          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}
 
   test-images:
-    needs: [ check-permissions, tag, neon-image, compute-node-image ]
+    needs: [ check-permissions, meta, neon-image, compute-node-image ]
+    # Depends on jobs that can get skipped
+    if: "!failure() && !cancelled()"
     strategy:
       fail-fast: false
       matrix:
@@ -775,17 +766,6 @@ jobs:
           username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
           password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
-      - name: Get the last compute release tag
-        id: get-last-compute-release-tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            "/repos/${{ github.repository }}/releases")
-          echo tag=${tag} >> ${GITHUB_OUTPUT}
-
       # `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
       # Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
       # Regular pageserver version string looks like
@@ -795,8 +775,9 @@ jobs:
       # Ensure that we don't have bad versions.
       - name: Verify image versions
         shell: bash # ensure no set -e for better error messages
+        if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
         run: |
-          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
+          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
 
           echo "Pageserver version string: $pageserver_version"
 
@@ -813,7 +794,24 @@ jobs:
       - name: Verify docker-compose example and test extensions
         timeout-minutes: 20
         env:
-          TAG: ${{needs.tag.outputs.build-tag}}
+          TAG: >-
+            ${{
+              contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.build-tag
+            }}
+          COMPUTE_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-compute-release
+              || needs.meta.outputs.build-tag
+            }}
+          TEST_EXTENSIONS_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && 'latest'
+              || needs.meta.outputs.build-tag
+            }}
           TEST_VERSION_ONLY: ${{ matrix.pg_version }}
         run: ./docker-compose/docker_compose_test.sh
 
@@ -825,10 +823,17 @@ jobs:
 
       - name: Test extension upgrade
         timeout-minutes: 20
-        if: ${{ needs.tag.outputs.build-tag == github.run_id }}
+        if: ${{ contains(fromJSON('["pr-main", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
         env:
-          NEWTAG: ${{ needs.tag.outputs.build-tag }}
-          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          TAG: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'pr-main' && needs.meta.outputs.build-tag
+              || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release
+            }}
+          TEST_EXTENSIONS_TAG: latest
+          NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}
+          OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}
         run: ./docker-compose/test_extensions_upgrade.sh
 
       - name: Print logs and clean up
@@ -838,7 +843,7 @@ jobs:
           docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
 
   generate-image-maps:
-    needs: [ tag ]
+    needs: [ meta ]
     runs-on: ubuntu-22.04
     outputs:
       neon-dev: ${{ steps.generate.outputs.neon-dev }}
@@ -848,14 +853,14 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
-          sparse-checkout: scripts/generate_image_maps.py
+          sparse-checkout: .github/scripts/generate_image_maps.py
           sparse-checkout-cone-mode: false
 
       - name: Generate Image Maps
         id: generate
-        run: python scripts/generate_image_maps.py
+        run: python3 .github/scripts/generate_image_maps.py
         env:
-          BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
+          BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
           BRANCH: "${{ github.ref_name }}"
           DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
           PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
@@ -864,88 +869,93 @@ jobs:
           AWS_REGION: "${{ vars.AWS_ECR_REGION }}"
 
   push-neon-image-dev:
-    needs: [ generate-image-maps, neon-image ]
+    needs: [ meta, generate-image-maps, neon-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
     uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
     with:
       image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
       aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
       azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
       acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
 
   push-compute-image-dev:
-    needs: [ generate-image-maps, vm-compute-node-image ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
     uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
     with:
       image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
       aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
       azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
       acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
 
   push-neon-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, neon-image, test-images ]
+    needs: [ meta, generate-image-maps, neon-image, test-images ]
+    if: ${{ contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }}
     uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
     with:
       image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
       aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
       azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
       acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
 
   push-compute-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, vm-compute-node-image, test-images ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ]
+    if: ${{ needs.meta.outputs.run-kind == 'compute-release' }}
     uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
     with:
       image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
       aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
       azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
       acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
 
   # This is a bit of a special case so we're not using a generated image map.
   add-latest-tag-to-neon-extensions-test-image:
     if: github.ref_name == 'main'
-    needs: [ tag, compute-node-image ]
+    needs: [ meta, compute-node-image ]
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
       image-map: |
         {
-          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
-          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
         }
-    secrets:
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
 
   trigger-custom-extensions-build-and-wait:
-    needs: [ check-permissions, tag ]
+    needs: [ check-permissions, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
     runs-on: ubuntu-22.04
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
@@ -980,7 +990,7 @@ jobs:
                 \"ci_job_name\": \"build-and-upload-extensions\",
                 \"commit_hash\": \"$COMMIT_SHA\",
                 \"remote_repo\": \"${{ github.repository }}\",
-                \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
+                \"compute_image_tag\": \"${{ needs.meta.outputs.build-tag }}\",
                 \"remote_branch_name\": \"${{ github.ref_name }}\"
               }
             }"
@@ -1024,9 +1034,9 @@ jobs:
           exit 1
 
   deploy:
-    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
-    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
-    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
+    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
+    if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
     permissions:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
@@ -1037,108 +1047,103 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Create git tag and GitHub release
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+        if: ${{ contains(fromJSON('["storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) }}
         uses: actions/github-script@v7
+        env:
+          TAG: "${{ needs.meta.outputs.build-tag }}"
+          BRANCH: "${{ github.ref_name }}"
+          PREVIOUS_RELEASE: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release
+              || needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release
+              || 'unknown'
+            }}
         with:
           retries: 5
           script: |
-            const tag = "${{ needs.tag.outputs.build-tag }}";
-            const branch = "${{ github.ref_name }}";
+            const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env
 
             try {
               const existingRef = await github.rest.git.getRef({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
-                ref: `tags/${tag}`,
+                ref: `tags/${TAG}`,
               });
 
               if (existingRef.data.object.sha !== context.sha) {
-                throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
+                throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
               }
 
-              console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
+              console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`);
             } catch (error) {
               if (error.status !== 404) {
                 throw error;
               }
 
-              console.log(`Tag ${tag} does not exist. Creating it...`);
+              console.log(`Tag ${TAG} does not exist. Creating it...`);
               await github.rest.git.createRef({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
-                ref: `refs/tags/${tag}`,
+                ref: `refs/tags/${TAG}`,
                 sha: context.sha,
               });
-              console.log(`Tag ${tag} created successfully.`);
+              console.log(`Tag ${TAG} created successfully.`);
             }
 
             try {
               const existingRelease = await github.rest.repos.getReleaseByTag({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
-                tag: tag,
+                tag: TAG,
               });
 
-              console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
+              console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`);
             } catch (error) {
               if (error.status !== 404) {
                 throw error;
               }
 
-              console.log(`Release for tag ${tag} does not exist. Creating it...`);
+              console.log(`Release for tag ${TAG} does not exist. Creating it...`);
 
               // Find the PR number using the commit SHA
               const pullRequests = await github.rest.pulls.list({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 state: 'closed',
-                base: branch,
+                base: BRANCH,
               });
 
               const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
               const prNumber = pr ? pr.number : null;
 
-              // Find the previous release on the branch
-              const releases = await github.rest.repos.listReleases({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                per_page: 100,
-              });
-
-              const branchReleases = releases.data
-                .filter((release) => {
-                  const regex = new RegExp(`^${branch}-\\d+$`);
-                  return regex.test(release.tag_name) && !release.draft && !release.prerelease;
-                })
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
-
-              const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
-
               const releaseNotes = [
                 prNumber
                   ? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
                   : 'Release PR not found.',
-                previousTag
-                  ? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
-                  : `No previous release found on branch ${branch}.`,
+                `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.`
               ].join('\n\n');
 
               await github.rest.repos.createRelease({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
-                tag_name: tag,
+                tag_name: TAG,
                 body: releaseNotes,
               });
-              console.log(`Release for tag ${tag} created successfully.`);
+              console.log(`Release for tag ${TAG} created successfully.`);
             }
 
       - name: Trigger deploy workflow
         env:
           GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+          RUN_KIND: ${{ needs.meta.outputs.run-kind }}
         run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+          case ${RUN_KIND} in
+          push-main)
+            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false
+            ;;
+          storage-release)
             gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
               -f deployPgSniRouter=false \
               -f deployProxy=false \
@@ -1146,7 +1151,7 @@ jobs:
               -f deployStorageBroker=true \
               -f deployStorageController=true \
               -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
               -f deployPreprodRegion=true
 
             gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \
@@ -1154,8 +1159,9 @@ jobs:
               -f deployStorageBroker=true \
               -f deployStorageController=true \
               -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          proxy-release)
             gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
               -f deployPgSniRouter=true \
               -f deployProxy=true \
@@ -1163,7 +1169,7 @@ jobs:
               -f deployStorageBroker=false \
               -f deployStorageController=false \
               -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
               -f deployPreprodRegion=true
 
             gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
@@ -1173,13 +1179,16 @@ jobs:
               -f deployProxyScram=true \
               -f deployProxyAuthBroker=true \
               -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}}
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'"
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          compute-release)
+            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          *)
+            echo "RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'"
             exit 1
-          fi
+            ;;
+          esac
 
   notify-storage-release-deploy-failure:
     needs: [ deploy ]
@@ -1204,7 +1213,7 @@ jobs:
       id-token: write # aws-actions/configure-aws-credentials
       statuses: write
       contents: read
-    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
+    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
     if: github.ref_name == 'release' && !failure() && !cancelled()
 
     runs-on: ubuntu-22.04
@@ -1294,7 +1303,8 @@ jobs:
 
   pin-build-tools-image:
     needs: [ build-build-tools-image, test-images, build-and-test-locally ]
-    if: github.ref_name == 'main'
+    # `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped
+    if: github.ref_name == 'main' && !failure() && !cancelled()
     uses: ./.github/workflows/pin-build-tools-image.yml
     with:
       from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }}
@@ -1313,6 +1323,7 @@ jobs:
     # Format `needs` differently to make the list more readable.
     # Usually we do `needs: [...]`
     needs:
+      - meta
       - build-and-test-locally
       - check-codestyle-python
       - check-codestyle-rust
@@ -1336,7 +1347,7 @@ jobs:
           || needs.check-codestyle-python.result == 'skipped'
           || needs.check-codestyle-rust.result == 'skipped'
           || needs.files-changed.result == 'skipped'
-          || needs.push-compute-image-dev.result == 'skipped'
-          || needs.push-neon-image-dev.result == 'skipped'
+          || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
+          || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
           || needs.test-images.result == 'skipped'
-          || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
+          || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml
index 09d6acd325..606e1c0862 100644
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -38,6 +38,9 @@ jobs:
     runs-on: us-east-2
     container:
       image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml
index f077e04d1c..90318747b3 100644
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -71,7 +71,7 @@ jobs:
     uses: ./.github/workflows/build-macos.yml
     with:
       pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
-      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }}
       rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}
 
   gather-rust-build-stats:
diff --git a/.github/workflows/pin-build-tools-image.yml b/.github/workflows/pin-build-tools-image.yml
index b305b662ee..d2588ba0bf 100644
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -65,6 +65,7 @@ jobs:
 
     permissions:
       id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
 
     uses: ./.github/workflows/_push-to-container-registry.yml
     with:
@@ -72,12 +73,15 @@ jobs:
         {
           "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
             "docker.io/neondatabase/build-tools:pinned-bullseye",
+            "ghcr.io/neondatabase/build-tools:pinned-bullseye",
             "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
             "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
           ],
           "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
             "docker.io/neondatabase/build-tools:pinned-bookworm",
             "docker.io/neondatabase/build-tools:pinned",
+            "ghcr.io/neondatabase/build-tools:pinned-bookworm",
+            "ghcr.io/neondatabase/build-tools:pinned",
             "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm",
             "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned",
             "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm",
@@ -85,12 +89,10 @@ jobs:
           ]
         }
       aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
       azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
       azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
       azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
       acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml
index be6a7a7901..a30da35502 100644
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -5,6 +5,10 @@ on:
     types:
       - ready_for_review
   workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true
 
 defaults:
   run:
@@ -19,7 +23,7 @@ jobs:
     if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
     uses: ./.github/workflows/check-permissions.yml
     with:
-      github-event-name: ${{ github.event_name }}
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}
 
   cancel-previous-e2e-tests:
     needs: [ check-permissions ]
@@ -35,46 +39,29 @@ jobs:
             run cancel-previous-in-concurrency-group.yml \
               --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
 
-  tag:
-    needs: [ check-permissions ]
-    runs-on: ubuntu-22.04
-    outputs:
-      build-tag: ${{ steps.build-tag.outputs.tag }}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
-          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
-            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
-          fi
-        id: build-tag
+  meta:
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}
 
   trigger-e2e-tests:
-    needs: [ tag ]
+    needs: [ meta ]
     runs-on: ubuntu-22.04
     env:
       EVENT_ACTION: ${{ github.event.action }}
       GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-      TAG: ${{ needs.tag.outputs.build-tag }}
+      TAG: >-
+        ${{
+          contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-storage-release
+          || needs.meta.outputs.build-tag
+        }}
+      COMPUTE_TAG: >-
+        ${{
+          contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-compute-release
+          || needs.meta.outputs.build-tag
+        }}
     steps:
       - name: Wait for `push-{neon,compute}-image-dev` job to finish
         # It's important to have a timeout here, the script in the step can run infinitely
@@ -157,6 +144,6 @@ jobs:
               --raw-field "commit_hash=$COMMIT_SHA" \
               --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
               --raw-field "storage_image_tag=${TAG}" \
-              --raw-field "compute_image_tag=${TAG}" \
+              --raw-field "compute_image_tag=${COMPUTE_TAG}" \
               --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
               --raw-field "e2e-platforms=${E2E_PLATFORMS}"
diff --git a/Cargo.lock b/Cargo.lock
index 12232eaece..7d11f2b7fc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1342,7 +1342,9 @@ dependencies = [
  "tokio-util",
  "tower 0.5.2",
  "tower-http",
+ "tower-otel",
  "tracing",
+ "tracing-opentelemetry",
  "tracing-subscriber",
  "tracing-utils",
  "url",
@@ -1546,6 +1548,17 @@ dependencies = [
  "itertools 0.10.5",
 ]
 
+[[package]]
+name = "cron"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5877d3fbf742507b66bc2a1945106bd30dd8504019d596901ddd012a4dd01740"
+dependencies = [
+ "chrono",
+ "once_cell",
+ "winnow",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.8"
@@ -1874,6 +1887,12 @@ dependencies = [
  "syn 2.0.90",
 ]
 
+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -3331,6 +3350,17 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "json-structural-diff"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf"
+dependencies = [
+ "difflib",
+ "regex",
+ "serde_json",
+]
+
 [[package]]
 name = "jsonwebtoken"
 version = "9.2.0"
@@ -4155,7 +4185,6 @@ dependencies = [
  "pageserver_client",
  "pageserver_compaction",
  "pin-project-lite",
- "postgres",
  "postgres-protocol",
  "postgres-types",
  "postgres_backend",
@@ -4242,7 +4271,6 @@ dependencies = [
  "futures",
  "http-utils",
  "pageserver_api",
- "postgres",
  "reqwest",
  "serde",
  "thiserror 1.0.69",
@@ -4458,18 +4486,18 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
+checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
+checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4657,7 +4685,6 @@ dependencies = [
  "anyhow",
  "itertools 0.10.5",
  "once_cell",
- "postgres",
  "tokio-postgres",
  "url",
 ]
@@ -5799,7 +5826,6 @@ dependencies = [
  "once_cell",
  "pageserver_api",
  "parking_lot 0.12.1",
- "postgres",
  "postgres-protocol",
  "postgres_backend",
  "postgres_ffi",
@@ -6433,6 +6459,7 @@ dependencies = [
  "chrono",
  "clap",
  "control_plane",
+ "cron",
  "diesel",
  "diesel-async",
  "diesel_migrations",
@@ -6443,6 +6470,7 @@ dependencies = [
  "humantime",
  "hyper 0.14.30",
  "itertools 0.10.5",
+ "json-structural-diff",
  "lasso",
  "measured",
  "metrics",
@@ -7268,6 +7296,20 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
 
+[[package]]
+name = "tower-otel"
+version = "0.2.0"
+source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd"
+dependencies = [
+ "http 1.1.0",
+ "opentelemetry",
+ "pin-project",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "tracing-opentelemetry",
+]
+
 [[package]]
 name = "tower-service"
 version = "0.3.3"
@@ -7616,6 +7658,7 @@ dependencies = [
  "once_cell",
  "pin-project-lite",
  "postgres_connection",
+ "pprof",
  "pq_proto",
  "rand 0.8.5",
  "regex",
@@ -8123,9 +8166,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "winnow"
-version = "0.6.13"
+version = "0.6.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1"
+checksum = "1e90edd2ac1aa278a5c4599b1d89cf03074b610800f866d4026dc199d7929a28"
 dependencies = [
  "memchr",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 7228623c6b..223ff4249e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,6 +77,7 @@ byteorder = "1.4"
 bytes = "1.9"
 camino = "1.1.6"
 cfg-if = "1.0.0"
+cron = "0.15"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
@@ -192,6 +193,10 @@ toml_edit = "0.22"
 tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
+
+# This revision uses opentelemetry 0.27. There's no tag for it.
+tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" }
+
 tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
@@ -210,6 +215,7 @@ rustls-native-certs = "0.8"
 x509-parser = "0.16"
 whoami = "1.5.1"
 zerocopy = { version = "0.7", features = ["derive"] }
+json-structural-diff = { version = "0.2.0" }
 
 ## TODO replace this with tracing
 env_logger = "0.10"
diff --git a/build-tools.Dockerfile b/build-tools.Dockerfile
index 317eded26e..c103ceaea5 100644
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -292,7 +292,7 @@ WORKDIR /home/nonroot
 
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.84.1
+ENV RUSTC_VERSION=1.85.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 0b3001613d..0cdb44853f 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -395,15 +395,22 @@ RUN case "${PG_VERSION:?}" in \
     cd plv8-src && \
     if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
 
-FROM pg-build AS plv8-build
+# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
+# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
+# (The V8 engine takes a very long time to build)
+FROM build-deps AS plv8-build
 ARG PG_VERSION
+WORKDIR /ext-src/plv8-src
 RUN apt update && \
     apt install --no-install-recommends --no-install-suggests -y \
     ninja-build python3-dev libncurses5 binutils clang \
     && apt clean && rm -rf /var/lib/apt/lists/*
-
 COPY --from=plv8-src /ext-src/ /ext-src/
-WORKDIR /ext-src/plv8-src
+RUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8
+
+# Step 2: Build the PostgreSQL-dependent parts
+COPY --from=pg-build /usr/local/pgsql /usr/local/pgsql
+ENV PATH="/usr/local/pgsql/bin:$PATH"
 RUN \
     # generate and copy upgrade scripts
     make generate_upgrades && \
@@ -1451,9 +1458,11 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_mooncake-src
 ARG PG_VERSION
 WORKDIR /ext-src
+COPY compute/patches/duckdb_v113.patch .
 RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \
     echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \
     mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
+    cd third_party/duckdb && patch -p1 < /ext-src/duckdb_v113.patch && cd ../.. && \
     echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
     chmod a+x neon-test.sh
 
@@ -1473,6 +1482,7 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_duckdb-src
 WORKDIR /ext-src
 COPY compute/patches/pg_duckdb_v031.patch .
+COPY compute/patches/duckdb_v120.patch .
 # pg_duckdb build requires source dir to be a git repo to get submodules
 # allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: 
 # - extension management function duckdb.install_extension()
@@ -1480,7 +1490,9 @@ COPY compute/patches/pg_duckdb_v031.patch .
 RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
     cd pg_duckdb-src && \
     git submodule update --init --recursive && \
-    patch -p1 < /ext-src/pg_duckdb_v031.patch
+    patch -p1 < /ext-src/pg_duckdb_v031.patch && \
+    cd third_party/duckdb && \
+    patch -p1 < /ext-src/duckdb_v120.patch
 
 FROM pg-build AS pg_duckdb-build
 ARG PG_VERSION
@@ -1806,7 +1818,7 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute
 
 FROM pg-build AS extension-tests
 ARG PG_VERSION
-RUN mkdir /ext-src
+COPY docker-compose/ext-src/ /ext-src/
 
 COPY --from=pg-build /postgres /postgres
 #COPY --from=postgis-src /ext-src/ /ext-src/
@@ -1844,7 +1856,10 @@ COPY --from=pg_semver-src /ext-src/ /ext-src/
 COPY --from=pg_ivm-src /ext-src/ /ext-src/
 COPY --from=pg_partman-src /ext-src/ /ext-src/
 #COPY --from=pg_mooncake-src /ext-src/ /ext-src/
-#COPY --from=pg_repack-src /ext-src/ /ext-src/
+COPY --from=pg_repack-src /ext-src/ /ext-src/
+COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY compute/patches/pg_repack.patch /ext-src
+RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch
 
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
 RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
diff --git a/compute/patches/duckdb_v113.patch b/compute/patches/duckdb_v113.patch
new file mode 100644
index 0000000000..b7b43b88bf
--- /dev/null
+++ b/compute/patches/duckdb_v113.patch
@@ -0,0 +1,25 @@
+diff --git a/libduckdb.map b/libduckdb.map
+new file mode 100644
+index 0000000000..3b56f00cd7
+--- /dev/null
++++ b/libduckdb.map
+@@ -0,0 +1,6 @@
++DUCKDB_1.1.3 {
++    global:
++        *duckdb*;
++    local:
++        *;
++};
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 3e757a4bcc..88ab4005b9 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -135,6 +135,8 @@ else()
+   target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
+   link_threads(duckdb)
+   link_extension_libraries(duckdb)
++  target_link_options(duckdb PRIVATE
++    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb.map)
+ 
+   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
+   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
diff --git a/compute/patches/duckdb_v120.patch b/compute/patches/duckdb_v120.patch
new file mode 100644
index 0000000000..cf317736a5
--- /dev/null
+++ b/compute/patches/duckdb_v120.patch
@@ -0,0 +1,67 @@
+diff --git a/libduckdb_pg_duckdb.map b/libduckdb_pg_duckdb.map
+new file mode 100644
+index 0000000000..0872978b48
+--- /dev/null
++++ b/libduckdb_pg_duckdb.map
+@@ -0,0 +1,6 @@
++DUCKDB_1.2.0 {
++    global:
++        *duckdb*;
++    local:
++        *;
++};
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 58adef3fc0..2c522f91be 100644
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -59,7 +59,7 @@ endfunction()
+ 
+ if(AMALGAMATION_BUILD)
+ 
+-  add_library(duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
++  add_library(duckdb_pg_duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
+   target_link_libraries(duckdb ${DUCKDB_SYSTEM_LIBS})
+   link_threads(duckdb)
+   link_extension_libraries(duckdb)
+@@ -109,7 +109,7 @@ else()
+       duckdb_yyjson
+       duckdb_zstd)
+ 
+-  add_library(duckdb SHARED ${ALL_OBJECT_FILES})
++  add_library(duckdb_pg_duckdb SHARED ${ALL_OBJECT_FILES})
+ 
+   if(WIN32 AND NOT MINGW)
+     ensure_variable_is_number(DUCKDB_MAJOR_VERSION RC_MAJOR_VERSION)
+@@ -131,9 +131,11 @@ else()
+     target_sources(duckdb PRIVATE version.rc)
+   endif()
+ 
+-  target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
+-  link_threads(duckdb)
+-  link_extension_libraries(duckdb)
++  target_link_libraries(duckdb_pg_duckdb ${DUCKDB_LINK_LIBS})
++  link_threads(duckdb_pg_duckdb)
++  link_extension_libraries(duckdb_pg_duckdb)
++  target_link_options(duckdb_pg_duckdb PRIVATE
++    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb_pg_duckdb.map)
+ 
+   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
+   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
+@@ -141,7 +143,7 @@ else()
+   link_extension_libraries(duckdb_static)
+ 
+   target_include_directories(
+-    duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
++    duckdb_pg_duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+                   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+ 
+   target_include_directories(
+@@ -161,7 +163,7 @@ else()
+ endif()
+ 
+ install(
+-  TARGETS duckdb duckdb_static
++  TARGETS duckdb_pg_duckdb duckdb_static
+   EXPORT "${DUCKDB_EXPORT_SET}"
+   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}"
diff --git a/compute/patches/pg_duckdb_v031.patch b/compute/patches/pg_duckdb_v031.patch
index a7e188d69e..edc7fbf69d 100644
--- a/compute/patches/pg_duckdb_v031.patch
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -1,3 +1,25 @@
+diff --git a/Makefile b/Makefile
+index 3235cc8..6b892bc 100644
+--- a/Makefile
++++ b/Makefile
+@@ -32,7 +32,7 @@ else
+ 	DUCKDB_BUILD_TYPE = release
+ endif
+ 
+-DUCKDB_LIB = libduckdb$(DLSUFFIX)
++DUCKDB_LIB = libduckdb_pg_duckdb$(DLSUFFIX)
+ FULL_DUCKDB_LIB = third_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src/$(DUCKDB_LIB)
+ 
+ ERROR_ON_WARNING ?=
+@@ -54,7 +54,7 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} -
+ # changes to the vendored code in one place.
+ override PG_CFLAGS += -Wno-declaration-after-statement
+ 
+-SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4
++SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb_pg_duckdb -lstdc++ -llz4
+ 
+ include Makefile.global
+ 
 diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
 index d777d76..af60106 100644
 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql
diff --git a/compute/patches/pg_repack.patch b/compute/patches/pg_repack.patch
new file mode 100644
index 0000000000..f6b0aa1e13
--- /dev/null
+++ b/compute/patches/pg_repack.patch
@@ -0,0 +1,72 @@
+diff --git a/regress/Makefile b/regress/Makefile
+index bf6edcb..89b4c7f 100644
+--- a/regress/Makefile
++++ b/regress/Makefile
+@@ -17,7 +17,7 @@ INTVERSION := $(shell echo $$(($$(echo $(VERSION).0 | sed 's/\([[:digit:]]\{1,\}
+ # Test suite
+ #
+ 
+-REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper tablespace get_order_by trigger
++REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
+ 
+ USE_PGXS = 1	# use pgxs if not in contrib directory
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+diff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out
+index 8d0a94e..63b68bf 100644
+--- a/regress/expected/nosuper.out
++++ b/regress/expected/nosuper.out
+@@ -4,22 +4,22 @@
+ SET client_min_messages = error;
+ DROP ROLE IF EXISTS nosuper;
+ SET client_min_messages = warning;
+-CREATE ROLE nosuper WITH LOGIN;
++CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
+ -- => OK
+ \! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
+ INFO: repacking table "public.tbl_cluster"
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+ ERROR: pg_repack failed with error: You must be a superuser to use pg_repack
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ ERROR: pg_repack failed with error: ERROR:  permission denied for schema repack
+ LINE 1: select repack.version(), repack.version_sql()
+                ^
+ GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
+ GRANT USAGE ON SCHEMA repack TO nosuper;
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ INFO: repacking table "public.tbl_cluster"
+ ERROR: query failed: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+ DETAIL: query was: RESET lock_timeout
+diff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql
+index 072f0fa..dbe60f8 100644
+--- a/regress/sql/nosuper.sql
++++ b/regress/sql/nosuper.sql
+@@ -4,19 +4,19 @@
+ SET client_min_messages = error;
+ DROP ROLE IF EXISTS nosuper;
+ SET client_min_messages = warning;
+-CREATE ROLE nosuper WITH LOGIN;
++CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
+ -- => OK
+ \! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ 
+ GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
+ GRANT USAGE ON SCHEMA repack TO nosuper;
+ 
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
++\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ 
+ REVOKE ALL ON ALL TABLES IN SCHEMA repack FROM nosuper;
+ REVOKE USAGE ON SCHEMA repack FROM nosuper;
diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml
index 568f0b0444..ff4c3387d9 100644
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -44,6 +44,11 @@ shutdownHook: |
 files:
   - filename: compute_ctl-sudoers
     content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
       # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
       # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
       # regardless of hostname (ALL)
diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml
index 6617c98599..c001040bc9 100644
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -44,6 +44,11 @@ shutdownHook: |
 files:
   - filename: compute_ctl-sudoers
     content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
       # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
       # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
       # regardless of hostname (ALL)
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index c276996df5..8f3bcbeef8 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_tools"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
@@ -46,7 +46,9 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
+tower-otel.workspace = true
 tracing.workspace = true
+tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index a8803ec793..6dae1a2753 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -40,35 +40,33 @@ use std::path::Path;
 use std::process::exit;
 use std::str::FromStr;
 use std::sync::atomic::Ordering;
-use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
-use std::time::SystemTime;
-use std::{thread, time::Duration};
+use std::sync::{Arc, Condvar, Mutex, RwLock, mpsc};
+use std::thread;
+use std::time::Duration;
 
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use compute_tools::disk_quota::set_disk_quota;
-use compute_tools::http::server::Server;
-use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
-use signal_hook::consts::{SIGQUIT, SIGTERM};
-use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
-use url::Url;
-
 use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
 use compute_api::spec::ComputeSpec;
-
 use compute_tools::compute::{
-    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
+    ComputeNode, ComputeState, PG_PID, ParsedSpec, forward_termination_signal,
 };
 use compute_tools::configurator::launch_configurator;
+use compute_tools::disk_quota::set_disk_quota;
 use compute_tools::extension_server::get_pg_version_string;
+use compute_tools::http::server::Server;
 use compute_tools::logger::*;
+use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
 use compute_tools::swap::resize_swap;
-use rlimit::{setrlimit, Resource};
+use rlimit::{Resource, setrlimit};
+use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
+use signal_hook::iterator::Signals;
+use tracing::{error, info, warn};
+use url::Url;
 use utils::failpoint_support;
 
 // this is an arbitrary build tag. Fine as a default / for testing purposes
@@ -86,19 +84,6 @@ fn parse_remote_ext_config(arg: &str) -> Result<String> {
     }
 }
 
-/// Generate a compute ID if one is not supplied. This exists to keep forward
-/// compatibility tests working, but will be removed in a future iteration.
-fn generate_compute_id() -> String {
-    let now = SystemTime::now();
-
-    format!(
-        "compute-{}",
-        now.duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap()
-            .as_secs()
-    )
-}
-
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
@@ -112,16 +97,13 @@ struct Cli {
     /// outside the compute will talk to the compute through this port. Keep
     /// the previous name for this argument around for a smoother release
     /// with the control plane.
-    ///
-    /// TODO: Remove the alias after the control plane release which teaches the
-    /// control plane about the renamed argument.
-    #[arg(long, alias = "http-port", default_value_t = 3080)]
+    #[arg(long, default_value_t = 3080)]
     pub external_http_port: u16,
 
-    /// The port to bind the internal listening HTTP server to. Clients like
+    /// The port to bind the internal listening HTTP server to. Clients include
     /// the neon extension (for installing remote extensions) and local_proxy.
-    #[arg(long)]
-    pub internal_http_port: Option<u16>,
+    #[arg(long, default_value_t = 3081)]
+    pub internal_http_port: u16,
 
     #[arg(short = 'D', long, value_name = "DATADIR")]
     pub pgdata: String,
@@ -156,7 +138,7 @@ struct Cli {
     #[arg(short = 'S', long, group = "spec-path")]
     pub spec_path: Option<OsString>,
 
-    #[arg(short = 'i', long, group = "compute-id", default_value = generate_compute_id())]
+    #[arg(short = 'i', long, group = "compute-id")]
     pub compute_id: String,
 
     #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], value_name = "CONTROL_PLANE_API_BASE_URL")]
@@ -166,6 +148,8 @@ struct Cli {
 fn main() -> Result<()> {
     let cli = Cli::parse();
 
+    let scenario = failpoint_support::init();
+
     // For historical reasons, the main thread that processes the spec and launches postgres
     // is synchronous, but we always have this tokio runtime available and we "enter" it so
     // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
@@ -177,8 +161,6 @@ fn main() -> Result<()> {
 
     let build_tag = runtime.block_on(init())?;
 
-    let scenario = failpoint_support::init();
-
     // enable core dumping for all child processes
     setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
 
@@ -359,7 +341,7 @@ fn wait_spec(
         pgbin: cli.pgbin.clone(),
         pgversion: get_pg_version_string(&cli.pgbin),
         external_http_port: cli.external_http_port,
-        internal_http_port: cli.internal_http_port.unwrap_or(cli.external_http_port + 1),
+        internal_http_port: cli.internal_http_port,
         live_config_allowed,
         state: Mutex::new(new_state),
         state_changed: Condvar::new(),
@@ -383,7 +365,7 @@ fn wait_spec(
 
     // The internal HTTP server could be launched later, but there isn't much
     // sense in waiting.
-    Server::Internal(cli.internal_http_port.unwrap_or(cli.external_http_port + 1)).launch(&compute);
+    Server::Internal(cli.internal_http_port).launch(&compute);
 
     if !spec_set {
         // No spec provided, hang waiting for it.
@@ -424,6 +406,21 @@ fn start_postgres(
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
     // We got all we need, update the state.
     let mut state = compute.state.lock().unwrap();
+
+    // Create a tracing span for the startup operation.
+    //
+    // We could otherwise just annotate the function with #[instrument], but if
+    // we're being configured from a /configure HTTP request, we want the
+    // startup to be considered part of the /configure request.
+    let _this_entered = {
+        // Temporarily enter the /configure request's span, so that the new span
+        // becomes its child.
+        let _parent_entered = state.startup_span.take().map(|p| p.entered());
+
+        tracing::info_span!("start_postgres")
+    }
+    .entered();
+
     state.set_status(ComputeStatus::Init, &compute.state_changed);
 
     info!(
diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs
index 585f3e4e1d..47558be7a0 100644
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -25,13 +25,13 @@
 //! docker push localhost:3030/localregistry/compute-node-v14:latest
 //! ```
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use aws_config::BehaviorVersion;
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
+use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
 use nix::unistd::Pid;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{Instrument, error, info, info_span, warn};
 use utils::fs_ext::is_directory_empty;
 
 #[path = "fast_import/aws_s3_sync.rs"]
@@ -558,7 +558,9 @@ async fn cmd_dumprestore(
                     decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
                         .await?
                 } else {
-                    bail!("destination connection string must be provided in spec for dump_restore command");
+                    bail!(
+                        "destination connection string must be provided in spec for dump_restore command"
+                    );
                 };
 
                 (source, dest)
diff --git a/compute_tools/src/bin/fast_import/aws_s3_sync.rs b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
index 1be10b36d6..d8d007da71 100644
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -1,11 +1,10 @@
 use camino::{Utf8Path, Utf8PathBuf};
 use tokio::task::JoinSet;
+use tracing::{info, warn};
 use walkdir::WalkDir;
 
 use super::s3_uri::S3Uri;
 
-use tracing::{info, warn};
-
 const MAX_PARALLEL_UPLOADS: usize = 10;
 
 /// Upload all files from 'local' to 'remote'
diff --git a/compute_tools/src/bin/fast_import/s3_uri.rs b/compute_tools/src/bin/fast_import/s3_uri.rs
index 52bbef420f..cf4dab7c02 100644
--- a/compute_tools/src/bin/fast_import/s3_uri.rs
+++ b/compute_tools/src/bin/fast_import/s3_uri.rs
@@ -1,6 +1,7 @@
-use anyhow::Result;
 use std::str::FromStr;
 
+use anyhow::Result;
+
 /// Struct to hold parsed S3 components
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct S3Uri {
diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs
index 28b10ce21c..2a7f56e6fc 100644
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,18 +1,20 @@
+use std::path::Path;
+use std::process::Stdio;
+use std::result::Result;
+use std::sync::Arc;
+
+use compute_api::responses::CatalogObjects;
 use futures::Stream;
 use postgres::NoTls;
-use std::{path::Path, process::Stdio, result::Result, sync::Arc};
-use tokio::{
-    io::{AsyncBufReadExt, BufReader},
-    process::Command,
-    spawn,
-};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::spawn;
 use tokio_stream::{self as stream, StreamExt};
 use tokio_util::codec::{BytesCodec, FramedRead};
 use tracing::warn;
 
 use crate::compute::ComputeNode;
 use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};
-use compute_api::responses::CatalogObjects;
 
 pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
     let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles"));
@@ -55,7 +57,7 @@ pub enum SchemaDumpError {
 pub async fn get_database_schema(
     compute: &Arc<ComputeNode>,
     dbname: &str,
-) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
+) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>> + use<>, SchemaDumpError> {
     let pgbin = &compute.pgbin;
     let basepath = Path::new(pgbin).parent().unwrap();
     let pgdump = basepath.join("pg_dump");
diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs
index 62d61a8bc9..e4207876ac 100644
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, Ok, Result};
+use anyhow::{Ok, Result, anyhow};
 use tokio_postgres::NoTls;
 use tracing::{error, instrument, warn};
 
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index d323ea3dcd..97fa45062b 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,42 +1,37 @@
 use std::collections::{HashMap, HashSet};
-use std::env;
-use std::fs;
 use std::iter::once;
-use std::os::unix::fs::{symlink, PermissionsExt};
+use std::os::unix::fs::{PermissionsExt, symlink};
 use std::path::Path;
 use std::process::{Command, Stdio};
 use std::str::FromStr;
-use std::sync::atomic::AtomicU32;
-use std::sync::atomic::Ordering;
+use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
-use std::time::Duration;
-use std::time::Instant;
+use std::time::{Duration, Instant};
+use std::{env, fs};
 
 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
-use compute_api::spec::{Database, PgIdent, Role};
+use compute_api::privilege::Privilege;
+use compute_api::responses::{ComputeMetrics, ComputeStatus};
+use compute_api::spec::{
+    ComputeFeature, ComputeMode, ComputeSpec, Database, ExtVersion, PgIdent, Role,
+};
+use futures::StreamExt;
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
-use futures::StreamExt;
+use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use postgres;
-use postgres::error::SqlState;
 use postgres::NoTls;
+use postgres::error::SqlState;
+use remote_storage::{DownloadError, RemotePath};
+use tokio::spawn;
 use tracing::{debug, error, info, instrument, warn};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
-
-use compute_api::privilege::Privilege;
-use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion};
 use utils::measured_stream::MeasuredReader;
 
-use nix::sys::signal::{kill, Signal};
-use remote_storage::{DownloadError, RemotePath};
-use tokio::spawn;
-
 use crate::installed_extensions::get_installed_extensions;
-use crate::local_proxy;
 use crate::pg_helpers::*;
 use crate::spec::*;
 use crate::spec_apply::ApplySpecPhase::{
@@ -45,13 +40,12 @@ use crate::spec_apply::ApplySpecPhase::{
     HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
     RunInEachDatabase,
 };
-use crate::spec_apply::PerDatabasePhase;
 use crate::spec_apply::PerDatabasePhase::{
     ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
 };
-use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
+use crate::spec_apply::{DB, MutableApplyContext, PerDatabasePhase, apply_operations};
 use crate::sync_sk::{check_if_synced, ping_safekeeper};
-use crate::{config, extension_server};
+use crate::{config, extension_server, local_proxy};
 
 pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0);
 pub static PG_PID: AtomicU32 = AtomicU32::new(0);
@@ -116,7 +110,23 @@ pub struct ComputeState {
     /// compute wasn't used since start.
     pub last_active: Option<DateTime<Utc>>,
     pub error: Option<String>,
+
+    /// Compute spec. This can be received from the CLI or - more likely -
+    /// passed by the control plane with a /configure HTTP request.
     pub pspec: Option<ParsedSpec>,
+
+    /// If the spec is passed by a /configure request, 'startup_span' is the
+    /// /configure request's tracing span. The main thread enters it when it
+    /// processes the compute startup, so that the compute startup is considered
+    /// to be part of the /configure request for tracing purposes.
+    ///
+    /// If the request handling thread/task called startup_compute() directly,
+    /// it would automatically be a child of the request handling span, and we
+    /// wouldn't need this. But because we use the main thread to perform the
+    /// startup, and the /configure task just waits for it to finish, we need to
+    /// set up the span relationship ourselves.
+    pub startup_span: Option<tracing::span::Span>,
+
     pub metrics: ComputeMetrics,
 }
 
@@ -128,6 +138,7 @@ impl ComputeState {
             last_active: None,
             error: None,
             pspec: None,
+            startup_span: None,
             metrics: ComputeMetrics::default(),
         }
     }
@@ -546,6 +557,7 @@ impl ComputeNode {
 
     // Fast path for sync_safekeepers. If they're already synced we get the lsn
     // in one roundtrip. If not, we should do a full sync_safekeepers.
+    #[instrument(skip_all)]
     pub fn check_safekeepers_synced(&self, compute_state: &ComputeState) -> Result<Option<Lsn>> {
         let start_time = Utc::now();
 
@@ -776,8 +788,9 @@ impl ComputeNode {
         Ok(())
     }
 
-    /// Start Postgres as a child process and manage DBs/roles.
-    /// After that this will hang waiting on the postmaster process to exit.
+    /// Start Postgres as a child process and wait for it to start accepting
+    /// connections.
+    ///
     /// Returns a handle to the child process and a handle to the logs thread.
     #[instrument(skip_all)]
     pub fn start_postgres(
@@ -1317,7 +1330,7 @@ impl ComputeNode {
         // Merge-apply spec & changes to PostgreSQL state.
         self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?;
 
-        if let Some(ref local_proxy) = &spec.clone().local_proxy_config {
+        if let Some(local_proxy) = &spec.clone().local_proxy_config {
             info!("configuring local_proxy");
             local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
         }
@@ -1537,7 +1550,9 @@ impl ComputeNode {
                     &postgresql_conf_path,
                     "neon.disable_logical_replication_subscribers=false",
                 )? {
-                    info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false");
+                    info!(
+                        "updated postgresql.conf to set neon.disable_logical_replication_subscribers=false"
+                    );
                 }
                 self.pg_reload_conf()?;
             }
@@ -1764,7 +1779,9 @@ LIMIT 100",
             info!("extension already downloaded, skipping re-download");
             return Ok(0);
         } else if start_time_delta < HANG_TIMEOUT && !first_try {
-            info!("download {ext_archive_name} already started by another process, hanging untill completion or timeout");
+            info!(
+                "download {ext_archive_name} already started by another process, hanging untill completion or timeout"
+            );
             let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(500));
             loop {
                 info!("waiting for download");
diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs
index e1bdfffa54..e8056ec7eb 100644
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -4,11 +4,10 @@ use std::io::prelude::*;
 use std::path::Path;
 
 use anyhow::Result;
-
-use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
 use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
 
+use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value};
+
 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
 pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs
index d88f26ca20..d97bd37285 100644
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -1,9 +1,8 @@
 use std::sync::Arc;
 use std::thread;
 
-use tracing::{error, info, instrument};
-
 use compute_api::responses::ComputeStatus;
+use tracing::{error, info, instrument};
 
 use crate::compute::ComputeNode;
 
diff --git a/compute_tools/src/disk_quota.rs b/compute_tools/src/disk_quota.rs
index e838c5b9fd..1353ab938d 100644
--- a/compute_tools/src/disk_quota.rs
+++ b/compute_tools/src/disk_quota.rs
@@ -1,9 +1,11 @@
 use anyhow::Context;
+use tracing::instrument;
 
 pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
 
 /// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
 /// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
+#[instrument]
 pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
     let size_kb = size_bytes / 1024;
     // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs
index 00f46386e7..77e98359ab 100644
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,15 +71,15 @@ More specifically, here is an example ext_index.json
     }
 }
 */
-use anyhow::Result;
-use anyhow::{bail, Context};
+use std::path::Path;
+use std::str;
+
+use anyhow::{Context, Result, bail};
 use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
 use reqwest::StatusCode;
-use std::path::Path;
-use std::str;
 use tar::Archive;
 use tracing::info;
 use tracing::log::warn;
@@ -244,7 +244,10 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
                 info!("writing file {:?}{:?}", control_path, control_content);
                 std::fs::write(control_path, control_content).unwrap();
             } else {
-                warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path);
+                warn!(
+                    "control file {:?} exists both locally and remotely. ignoring the remote version.",
+                    control_path
+                );
             }
         }
     }
diff --git a/compute_tools/src/http/extract/json.rs b/compute_tools/src/http/extract/json.rs
index 104cc25d5f..1d32e4ff37 100644
--- a/compute_tools/src/http/extract/json.rs
+++ b/compute_tools/src/http/extract/json.rs
@@ -1,6 +1,7 @@
 use std::ops::{Deref, DerefMut};
 
-use axum::extract::{rejection::JsonRejection, FromRequest, Request};
+use axum::extract::rejection::JsonRejection;
+use axum::extract::{FromRequest, Request};
 use compute_api::responses::GenericAPIError;
 use http::StatusCode;
 
diff --git a/compute_tools/src/http/extract/path.rs b/compute_tools/src/http/extract/path.rs
index 09637a96a4..45970cff3d 100644
--- a/compute_tools/src/http/extract/path.rs
+++ b/compute_tools/src/http/extract/path.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};
 
-use axum::extract::{rejection::PathRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::PathRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;
 
 /// Custom `Path` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
diff --git a/compute_tools/src/http/extract/query.rs b/compute_tools/src/http/extract/query.rs
index 9dec3642cf..b8079ea770 100644
--- a/compute_tools/src/http/extract/query.rs
+++ b/compute_tools/src/http/extract/query.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};
 
-use axum::extract::{rejection::QueryRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::QueryRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;
 
 /// Custom `Query` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs
index 93eb6ef5b7..d182278174 100644
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1,6 +1,8 @@
-use axum::{body::Body, response::Response};
+use axum::body::Body;
+use axum::response::Response;
 use compute_api::responses::{ComputeStatus, GenericAPIError};
-use http::{header::CONTENT_TYPE, StatusCode};
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Serialize;
 use tracing::error;
 
diff --git a/compute_tools/src/http/routes/check_writability.rs b/compute_tools/src/http/routes/check_writability.rs
index d7feb055e9..5a12686fa8 100644
--- a/compute_tools/src/http/routes/check_writability.rs
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -1,10 +1,13 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;
 
-use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
+use crate::checker::check_writability;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
 
 /// Check that the compute is currently running.
 pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs
index 2546cbc344..63d428fff4 100644
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -1,18 +1,16 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ConfigurationRequest,
-    responses::{ComputeStatus, ComputeStatusResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ConfigurationRequest;
+use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
 use http::StatusCode;
 use tokio::task;
 use tracing::info;
 
-use crate::{
-    compute::{ComputeNode, ParsedSpec},
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::{ComputeNode, ParsedSpec};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;
 
 // Accept spec in JSON format and request compute configuration. If anything
 // goes wrong after we set the compute status to `ConfigurationPending` and
@@ -47,13 +45,18 @@ pub(in crate::http) async fn configure(
             return JsonResponse::invalid_status(state.status);
         }
 
+        // Pass the tracing span to the main thread that performs the startup,
+        // so that the start_compute operation is considered a child of this
+        // configure request for tracing purposes.
+        state.startup_span = Some(tracing::Span::current());
+
         state.pspec = Some(pspec);
         state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
         drop(state);
     }
 
     // Spawn a blocking thread to wait for compute to become Running. This is
-    // needed to do not block the main pool of workers and be able to serve
+    // needed to not block the main pool of workers and to be able to serve
     // other requests while some particular request is waiting for compute to
     // finish configuration.
     let c = compute.clone();
diff --git a/compute_tools/src/http/routes/database_schema.rs b/compute_tools/src/http/routes/database_schema.rs
index fd716272dc..1f6ca4b79d 100644
--- a/compute_tools/src/http/routes/database_schema.rs
+++ b/compute_tools/src/http/routes/database_schema.rs
@@ -1,14 +1,16 @@
 use std::sync::Arc;
 
-use axum::{body::Body, extract::State, response::Response};
-use http::{header::CONTENT_TYPE, StatusCode};
+use axum::body::Body;
+use axum::extract::State;
+use axum::response::Response;
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Deserialize;
 
-use crate::{
-    catalog::{get_database_schema, SchemaDumpError},
-    compute::ComputeNode,
-    http::{extract::Query, JsonResponse},
-};
+use crate::catalog::{SchemaDumpError, get_database_schema};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Query;
 
 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct DatabaseSchemaParams {
diff --git a/compute_tools/src/http/routes/dbs_and_roles.rs b/compute_tools/src/http/routes/dbs_and_roles.rs
index 4843c3fab4..790fe0dfe3 100644
--- a/compute_tools/src/http/routes/dbs_and_roles.rs
+++ b/compute_tools/src/http/routes/dbs_and_roles.rs
@@ -1,9 +1,12 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;
 
-use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
+use crate::catalog::get_dbs_and_roles;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
 
 /// Get the databases and roles from the compute.
 pub(in crate::http) async fn get_catalog_objects(
diff --git a/compute_tools/src/http/routes/extension_server.rs b/compute_tools/src/http/routes/extension_server.rs
index 5cc9b6d277..b0265d1e99 100644
--- a/compute_tools/src/http/routes/extension_server.rs
+++ b/compute_tools/src/http/routes/extension_server.rs
@@ -1,19 +1,13 @@
 use std::sync::Arc;
 
-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use http::StatusCode;
 use serde::Deserialize;
 
-use crate::{
-    compute::ComputeNode,
-    http::{
-        extract::{Path, Query},
-        JsonResponse,
-    },
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::{Path, Query};
 
 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct ExtensionServerParams {
diff --git a/compute_tools/src/http/routes/extensions.rs b/compute_tools/src/http/routes/extensions.rs
index 1fc03b9109..910e1fa155 100644
--- a/compute_tools/src/http/routes/extensions.rs
+++ b/compute_tools/src/http/routes/extensions.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ExtensionInstallRequest,
-    responses::{ComputeStatus, ExtensionInstallResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ExtensionInstallRequest;
+use compute_api::responses::{ComputeStatus, ExtensionInstallResponse};
 use http::StatusCode;
 
-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;
 
 /// Install a extension.
 pub(in crate::http) async fn install_extension(
diff --git a/compute_tools/src/http/routes/failpoints.rs b/compute_tools/src/http/routes/failpoints.rs
index 836417d784..8f5da99963 100644
--- a/compute_tools/src/http/routes/failpoints.rs
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -17,7 +17,8 @@ pub struct FailpointConfig {
     pub actions: String,
 }
 
-use crate::http::{extract::Json, JsonResponse};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;
 
 /// Configure failpoints for testing purposes.
 pub(in crate::http) async fn configure_failpoints(
diff --git a/compute_tools/src/http/routes/grants.rs b/compute_tools/src/http/routes/grants.rs
index 3f67f011e5..267dcbb27e 100644
--- a/compute_tools/src/http/routes/grants.rs
+++ b/compute_tools/src/http/routes/grants.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::SetRoleGrantsRequest,
-    responses::{ComputeStatus, SetRoleGrantsResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::SetRoleGrantsRequest;
+use compute_api::responses::{ComputeStatus, SetRoleGrantsResponse};
 use http::StatusCode;
 
-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;
 
 /// Add grants for a role.
 pub(in crate::http) async fn add_grant(
diff --git a/compute_tools/src/http/routes/insights.rs b/compute_tools/src/http/routes/insights.rs
index 6b03a461c3..b1ba67161e 100644
--- a/compute_tools/src/http/routes/insights.rs
+++ b/compute_tools/src/http/routes/insights.rs
@@ -1,10 +1,12 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;
 
-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
 
 /// Collect current Postgres usage insights.
 pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
diff --git a/compute_tools/src/http/routes/metrics.rs b/compute_tools/src/http/routes/metrics.rs
index 13150a7588..da8d8b20a5 100644
--- a/compute_tools/src/http/routes/metrics.rs
+++ b/compute_tools/src/http/routes/metrics.rs
@@ -1,10 +1,12 @@
-use axum::{body::Body, response::Response};
-use http::header::CONTENT_TYPE;
+use axum::body::Body;
+use axum::response::Response;
 use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use metrics::proto::MetricFamily;
 use metrics::{Encoder, TextEncoder};
 
-use crate::{http::JsonResponse, metrics::collect};
+use crate::http::JsonResponse;
+use crate::metrics::collect;
 
 /// Expose Prometheus metrics.
 pub(in crate::http) async fn get_metrics() -> Response {
diff --git a/compute_tools/src/http/routes/metrics_json.rs b/compute_tools/src/http/routes/metrics_json.rs
index 0709db5011..bc35ee2645 100644
--- a/compute_tools/src/http/routes/metrics_json.rs
+++ b/compute_tools/src/http/routes/metrics_json.rs
@@ -1,9 +1,11 @@
 use std::sync::Arc;
 
-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;
 
-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
 
 /// Get startup metrics.
 pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
diff --git a/compute_tools/src/http/routes/status.rs b/compute_tools/src/http/routes/status.rs
index d64d53a58f..8ed1299d6b 100644
--- a/compute_tools/src/http/routes/status.rs
+++ b/compute_tools/src/http/routes/status.rs
@@ -1,9 +1,13 @@
-use std::{ops::Deref, sync::Arc};
+use std::ops::Deref;
+use std::sync::Arc;
 
-use axum::{extract::State, http::StatusCode, response::Response};
+use axum::extract::State;
+use axum::http::StatusCode;
+use axum::response::Response;
 use compute_api::responses::ComputeStatusResponse;
 
-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
 
 /// Retrieve the state of the comute.
 pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
diff --git a/compute_tools/src/http/routes/terminate.rs b/compute_tools/src/http/routes/terminate.rs
index 7acd84f236..2c24d4ad6b 100644
--- a/compute_tools/src/http/routes/terminate.rs
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -1,18 +1,14 @@
 use std::sync::Arc;
 
-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;
 use tokio::task;
 use tracing::info;
 
-use crate::{
-    compute::{forward_termination_signal, ComputeNode},
-    http::JsonResponse,
-};
+use crate::compute::{ComputeNode, forward_termination_signal};
+use crate::http::JsonResponse;
 
 /// Terminate the compute.
 pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs
index a523ecd96f..7283401bb5 100644
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -1,23 +1,20 @@
-use std::{
-    fmt::Display,
-    net::{IpAddr, Ipv6Addr, SocketAddr},
-    sync::Arc,
-    time::Duration,
-};
+use std::fmt::Display;
+use std::net::{IpAddr, Ipv6Addr, SocketAddr};
+use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::Result;
-use axum::{
-    extract::Request,
-    middleware::{self, Next},
-    response::{IntoResponse, Response},
-    routing::{get, post},
-    Router,
-};
+use axum::Router;
+use axum::extract::Request;
+use axum::middleware::{self, Next};
+use axum::response::{IntoResponse, Response};
+use axum::routing::{get, post};
 use http::StatusCode;
 use tokio::net::TcpListener;
 use tower::ServiceBuilder;
-use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
-use tracing::{debug, error, info, Span};
+use tower_http::request_id::PropagateRequestIdLayer;
+use tower_http::trace::TraceLayer;
+use tracing::{Span, debug, error, info};
 use uuid::Uuid;
 
 use super::routes::{
@@ -124,6 +121,7 @@ impl From<Server> for Router<Arc<ComputeNode>> {
                 )
                 .layer(PropagateRequestIdLayer::x_request_id()),
         )
+            .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO))
     }
 }
 
diff --git a/compute_tools/src/installed_extensions.rs b/compute_tools/src/installed_extensions.rs
index 173dbf40b0..6921505466 100644
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,7 +1,7 @@
-use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use std::collections::HashMap;
 
 use anyhow::Result;
+use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use postgres::{Client, NoTls};
 
 use crate::metrics::INSTALLED_EXTENSIONS;
diff --git a/compute_tools/src/lsn_lease.rs b/compute_tools/src/lsn_lease.rs
index 3061d387a5..b4ec675ff4 100644
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -1,17 +1,15 @@
-use anyhow::bail;
-use anyhow::Result;
-use postgres::{NoTls, SimpleQueryMessage};
-use std::time::SystemTime;
-use std::{str::FromStr, sync::Arc, thread, time::Duration};
-use utils::id::TenantId;
-use utils::id::TimelineId;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::thread;
+use std::time::{Duration, SystemTime};
 
+use anyhow::{Result, bail};
 use compute_api::spec::ComputeMode;
+use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
-use utils::{
-    lsn::Lsn,
-    shard::{ShardCount, ShardNumber, TenantShardId},
-};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::shard::{ShardCount, ShardNumber, TenantShardId};
 
 use crate::compute::ComputeNode;
 
diff --git a/compute_tools/src/metrics.rs b/compute_tools/src/metrics.rs
index 870b294d08..bc96e5074c 100644
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -1,6 +1,6 @@
 use metrics::core::Collector;
 use metrics::proto::MetricFamily;
-use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
+use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
 use once_cell::sync::Lazy;
 
 pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
diff --git a/compute_tools/src/monitor.rs b/compute_tools/src/monitor.rs
index 184f380a8d..248505e473 100644
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,13 +1,14 @@
 use std::sync::Arc;
-use std::{thread, time::Duration};
+use std::thread;
+use std::time::Duration;
 
 use chrono::{DateTime, Utc};
+use compute_api::responses::ComputeStatus;
+use compute_api::spec::ComputeFeature;
 use postgres::{Client, NoTls};
 use tracing::{debug, error, info, warn};
 
 use crate::compute::ComputeNode;
-use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeFeature;
 
 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
 
diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs
index 86fcf99085..5a2e305e1d 100644
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -9,7 +9,8 @@ use std::process::Child;
 use std::str::FromStr;
 use std::time::{Duration, Instant};
 
-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
+use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 use futures::StreamExt;
 use ini::Ini;
 use notify::{RecursiveMode, Watcher};
@@ -21,8 +22,6 @@ use tokio_postgres;
 use tokio_postgres::NoTls;
 use tracing::{debug, error, info, instrument};
 
-use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
-
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
 
 /// Escape a string for including it in a SQL literal.
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index 6f28bd9733..1d19f2738d 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,20 +1,20 @@
-use anyhow::{anyhow, bail, Result};
-use reqwest::StatusCode;
 use std::fs::File;
 use std::path::Path;
-use tokio_postgres::Client;
-use tracing::{error, info, instrument, warn};
-
-use crate::config;
-use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
-use crate::migration::MigrationRunner;
-use crate::params::PG_HBA_ALL_MD5;
-use crate::pg_helpers::*;
 
+use anyhow::{Result, anyhow, bail};
 use compute_api::responses::{
     ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
 };
 use compute_api::spec::ComputeSpec;
+use reqwest::StatusCode;
+use tokio_postgres::Client;
+use tracing::{error, info, instrument, warn};
+
+use crate::config;
+use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
+use crate::migration::MigrationRunner;
+use crate::params::PG_HBA_ALL_MD5;
+use crate::pg_helpers::*;
 
 // Do control plane request and return response if any. In case of error it
 // returns a bool flag indicating whether it makes sense to retry the request
@@ -141,7 +141,6 @@ pub fn get_spec_from_control_plane(
 /// Check `pg_hba.conf` and update if needed to allow external connections.
 pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
     // XXX: consider making it a part of spec.json
-    info!("checking pg_hba.conf");
     let pghba_path = pgdata_path.join("pg_hba.conf");
 
     if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
@@ -156,12 +155,11 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
 /// Create a standby.signal file
 pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
     // XXX: consider making it a part of spec.json
-    info!("adding standby.signal");
     let signalfile = pgdata_path.join("standby.signal");
 
     if !signalfile.exists() {
-        info!("created standby.signal");
         File::create(signalfile)?;
+        info!("created standby.signal");
     } else {
         info!("reused pre-existing standby.signal");
     }
@@ -170,7 +168,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
 
 #[instrument(skip_all)]
 pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
-    info!("handle neon extension upgrade");
     let query = "ALTER EXTENSION neon UPDATE";
     info!("update neon extension version with query: {}", query);
     client.simple_query(query).await?;
diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs
index 5ee9c5fbd8..b4e084fd91 100644
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -1,18 +1,18 @@
 use std::collections::{HashMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use std::future::Future;
-use std::iter::empty;
-use std::iter::once;
+use std::iter::{empty, once};
 use std::sync::Arc;
 
-use crate::compute::construct_superuser_query;
-use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
-use anyhow::{bail, Result};
+use anyhow::Result;
 use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
-use tracing::{debug, info_span, Instrument};
+use tracing::{Instrument, debug, info_span, warn};
+
+use crate::compute::construct_superuser_query;
+use crate::pg_helpers::{DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal};
 
 #[derive(Clone)]
 pub enum DB {
@@ -47,6 +47,11 @@ pub enum PerDatabasePhase {
     DeleteDBRoleReferences,
     ChangeSchemaPerms,
     HandleAnonExtension,
+    /// This is a shared phase, used for both i) dropping dangling LR subscriptions
+    /// before dropping the DB, and ii) dropping all subscriptions after creating
+    /// a fresh branch.
+    /// N.B. we will skip all DBs that are not present in Postgres, invalid, or
+    /// have `datallowconn = false` (`restrict_conn`).
     DropLogicalSubscriptions,
 }
 
@@ -168,7 +173,7 @@ where
 ///
 /// In the future we may generate a single stream of changes and then
 /// sort/merge/batch execution, but for now this is a nice way to improve
-/// batching behaviour of the commands.
+/// batching behavior of the commands.
 async fn get_operations<'a>(
     spec: &'a ComputeSpec,
     ctx: &'a RwLock<MutableApplyContext>,
@@ -451,6 +456,41 @@ async fn get_operations<'a>(
             )),
         }))),
         ApplySpecPhase::RunInEachDatabase { db, subphase } => {
+            // Do some checks that user DB exists and we can access it.
+            //
+            // During the phases like DropLogicalSubscriptions, DeleteDBRoleReferences,
+            // which happen before dropping the DB, the current run could be a retry,
+            // so it's a valid case when DB is absent already. The case of
+            // `pg_database.datallowconn = false`/`restrict_conn` is a bit tricky, as
+            // in theory user can have some dangling objects there, so we will fail at
+            // the actual drop later. Yet, to fix that in the current code we would need
+            // to ALTER DATABASE, and then check back, but that even more invasive, so
+            // that's not what we really want to do here.
+            //
+            // For ChangeSchemaPerms, skipping DBs we cannot access is totally fine.
+            if let DB::UserDB(db) = db {
+                let databases = &ctx.read().await.dbs;
+
+                let edb = match databases.get(&db.name) {
+                    Some(edb) => edb,
+                    None => {
+                        warn!(
+                            "skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL",
+                            subphase, db.name
+                        );
+                        return Ok(Box::new(empty()));
+                    }
+                };
+
+                if edb.restrict_conn || edb.invalid {
+                    warn!(
+                        "skipping RunInEachDatabase phase {:?}, database {} is (restrict_conn={}, invalid={})",
+                        subphase, db.name, edb.restrict_conn, edb.invalid
+                    );
+                    return Ok(Box::new(empty()));
+                }
+            }
+
             match subphase {
                 PerDatabasePhase::DropLogicalSubscriptions => {
                     match &db {
@@ -530,25 +570,12 @@ async fn get_operations<'a>(
                     Ok(Box::new(operations))
                 }
                 PerDatabasePhase::ChangeSchemaPerms => {
-                    let ctx = ctx.read().await;
-                    let databases = &ctx.dbs;
-
                     let db = match &db {
                         // ignore schema permissions on the system database
                         DB::SystemDB => return Ok(Box::new(empty())),
                         DB::UserDB(db) => db,
                     };
 
-                    if databases.get(&db.name).is_none() {
-                        bail!("database {} doesn't exist in PostgreSQL", db.name);
-                    }
-
-                    let edb = databases.get(&db.name).unwrap();
-
-                    if edb.restrict_conn || edb.invalid {
-                        return Ok(Box::new(empty()));
-                    }
-
                     let operations = vec![
                         Operation {
                             query: format!(
@@ -566,6 +593,7 @@ async fn get_operations<'a>(
 
                     Ok(Box::new(operations))
                 }
+                // TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
                 PerDatabasePhase::HandleAnonExtension => {
                     // Only install Anon into user databases
                     let db = match &db {
diff --git a/compute_tools/src/sql/drop_subscriptions.sql b/compute_tools/src/sql/drop_subscriptions.sql
index dfb925e48e..03e8e158fa 100644
--- a/compute_tools/src/sql/drop_subscriptions.sql
+++ b/compute_tools/src/sql/drop_subscriptions.sql
@@ -2,6 +2,7 @@ DO $$
 DECLARE
     subname TEXT;
 BEGIN
+    LOCK TABLE pg_subscription IN ACCESS EXCLUSIVE MODE;
     FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
         EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
         EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
diff --git a/compute_tools/src/swap.rs b/compute_tools/src/swap.rs
index 024c5b338e..ed27a7cba4 100644
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,10 +1,11 @@
 use std::path::Path;
 
-use anyhow::{anyhow, Context};
-use tracing::warn;
+use anyhow::{Context, anyhow};
+use tracing::{instrument, warn};
 
 pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
 
+#[instrument]
 pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
     // run `/neonvm/bin/resize-swap --once {size_bytes}`
     //
diff --git a/compute_tools/tests/config_test.rs b/compute_tools/tests/config_test.rs
index 9ab16b1930..7b2bff23d5 100644
--- a/compute_tools/tests/config_test.rs
+++ b/compute_tools/tests/config_test.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 mod config_tests {
 
-    use std::fs::{remove_file, File};
+    use std::fs::{File, remove_file};
     use std::io::{Read, Write};
     use std::path::Path;
 
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 02d793400a..7d908ccae9 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -887,20 +887,6 @@ fn print_timeline(
     Ok(())
 }
 
-/// Returns a map of timeline IDs to timeline_id@lsn strings.
-/// Connects to the pageserver to query this information.
-async fn get_timeline_infos(
-    env: &local_env::LocalEnv,
-    tenant_shard_id: &TenantShardId,
-) -> Result<HashMap<TimelineId, TimelineInfo>> {
-    Ok(get_default_pageserver(env)
-        .timeline_list(tenant_shard_id)
-        .await?
-        .into_iter()
-        .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
-        .collect())
-}
-
 /// Helper function to get tenant id from an optional --tenant_id option or from the config file
 fn get_tenant_id(
     tenant_id_arg: Option<TenantId>,
@@ -1251,12 +1237,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
             // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
             // where shard 0 is attached, and query there.
             let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;
-            let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
-                .await
-                .unwrap_or_else(|e| {
-                    eprintln!("Failed to load timeline info: {}", e);
-                    HashMap::new()
-                });
 
             let timeline_name_mappings = env.timeline_name_mappings();
 
@@ -1285,12 +1265,9 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                         lsn.to_string()
                     }
                     _ => {
-                        // -> primary endpoint or hot replica
-                        // Use the LSN at the end of the timeline.
-                        timeline_infos
-                            .get(&endpoint.timeline_id)
-                            .map(|bi| bi.last_record_lsn.to_string())
-                            .unwrap_or_else(|| "?".to_string())
+                        // As the LSN here refers to the one that the compute is started with,
+                        // we display nothing as it is a primary/hot standby compute.
+                        "---".to_string()
                     }
                 };
 
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index c3c8229c38..407578abb8 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -46,6 +46,8 @@ use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
+use std::time::SystemTime;
+use std::time::UNIX_EPOCH;
 
 use anyhow::{anyhow, bail, Context, Result};
 use compute_api::requests::ConfigurationRequest;
@@ -59,6 +61,7 @@ use nix::sys::signal::Signal;
 use pageserver_api::shard::ShardStripeSize;
 use reqwest::header::CONTENT_TYPE;
 use serde::{Deserialize, Serialize};
+use tracing::debug;
 use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};
 
@@ -81,8 +84,10 @@ pub struct EndpointConf {
     internal_http_port: u16,
     pg_version: u32,
     skip_pg_catalog_updates: bool,
+    reconfigure_concurrency: usize,
     drop_subscriptions_before_start: bool,
     features: Vec<ComputeFeature>,
+    cluster: Option<Cluster>,
 }
 
 //
@@ -179,7 +184,9 @@ impl ComputeControlPlane {
             // we also skip catalog updates in the cloud.
             skip_pg_catalog_updates,
             drop_subscriptions_before_start,
+            reconfigure_concurrency: 1,
             features: vec![],
+            cluster: None,
         });
 
         ep.create_endpoint_dir()?;
@@ -196,7 +203,9 @@ impl ComputeControlPlane {
                 pg_version,
                 skip_pg_catalog_updates,
                 drop_subscriptions_before_start,
+                reconfigure_concurrency: 1,
                 features: vec![],
+                cluster: None,
             })?,
         )?;
         std::fs::write(
@@ -261,8 +270,11 @@ pub struct Endpoint {
     skip_pg_catalog_updates: bool,
 
     drop_subscriptions_before_start: bool,
+    reconfigure_concurrency: usize,
     // Feature flags
     features: Vec<ComputeFeature>,
+    // Cluster settings
+    cluster: Option<Cluster>,
 }
 
 #[derive(PartialEq, Eq)]
@@ -302,6 +314,8 @@ impl Endpoint {
         let conf: EndpointConf =
             serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
 
+        debug!("serialized endpoint conf: {:?}", conf);
+
         Ok(Endpoint {
             pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),
             external_http_address: SocketAddr::new(
@@ -319,8 +333,10 @@ impl Endpoint {
             tenant_id: conf.tenant_id,
             pg_version: conf.pg_version,
             skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
+            reconfigure_concurrency: conf.reconfigure_concurrency,
             drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
             features: conf.features,
+            cluster: conf.cluster,
         })
     }
 
@@ -607,7 +623,7 @@ impl Endpoint {
         };
 
         // Create spec file
-        let spec = ComputeSpec {
+        let mut spec = ComputeSpec {
             skip_pg_catalog_updates: self.skip_pg_catalog_updates,
             format_version: 1.0,
             operation_uuid: None,
@@ -640,7 +656,7 @@ impl Endpoint {
                     Vec::new()
                 },
                 settings: None,
-                postgresql_conf: Some(postgresql_conf),
+                postgresql_conf: Some(postgresql_conf.clone()),
             },
             delta_operations: None,
             tenant_id: Some(self.tenant_id),
@@ -653,9 +669,35 @@ impl Endpoint {
             pgbouncer_settings: None,
             shard_stripe_size: Some(shard_stripe_size),
             local_proxy_config: None,
-            reconfigure_concurrency: 1,
+            reconfigure_concurrency: self.reconfigure_concurrency,
             drop_subscriptions_before_start: self.drop_subscriptions_before_start,
         };
+
+        // this strange code is needed to support respec() in tests
+        if self.cluster.is_some() {
+            debug!("Cluster is already set in the endpoint spec, using it");
+            spec.cluster = self.cluster.clone().unwrap();
+
+            debug!("spec.cluster {:?}", spec.cluster);
+
+            // fill missing fields again
+            if create_test_user {
+                spec.cluster.roles.push(Role {
+                    name: PgIdent::from_str("test").unwrap(),
+                    encrypted_password: None,
+                    options: None,
+                });
+                spec.cluster.databases.push(Database {
+                    name: PgIdent::from_str("neondb").unwrap(),
+                    owner: PgIdent::from_str("test").unwrap(),
+                    options: None,
+                    restrict_conn: false,
+                    invalid: false,
+                });
+            }
+            spec.cluster.postgresql_conf = Some(postgresql_conf);
+        }
+
         let spec_path = self.endpoint_path().join("spec.json");
         std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
 
@@ -673,18 +715,14 @@ impl Endpoint {
             println!("Also at '{}'", conn_str);
         }
         let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
-        //cmd.args([
-        //    "--external-http-port",
-        //    &self.external_http_address.port().to_string(),
-        //])
-        //.args([
-        //    "--internal-http-port",
-        //    &self.internal_http_address.port().to_string(),
-        //])
         cmd.args([
-            "--http-port",
+            "--external-http-port",
             &self.external_http_address.port().to_string(),
         ])
+        .args([
+            "--internal-http-port",
+            &self.internal_http_address.port().to_string(),
+        ])
         .args(["--pgdata", self.pgdata().to_str().unwrap()])
         .args(["--connstr", &conn_str])
         .args([
@@ -701,20 +739,16 @@ impl Endpoint {
         ])
         // TODO: It would be nice if we generated compute IDs with the same
         // algorithm as the real control plane.
-        //
-        // TODO: Add this back when
-        // https://github.com/neondatabase/neon/pull/10747 is merged.
-        //
-        //.args([
-        //    "--compute-id",
-        //    &format!(
-        //        "compute-{}",
-        //        SystemTime::now()
-        //            .duration_since(UNIX_EPOCH)
-        //            .unwrap()
-        //            .as_secs()
-        //    ),
-        //])
+        .args([
+            "--compute-id",
+            &format!(
+                "compute-{}",
+                SystemTime::now()
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap()
+                    .as_secs()
+            ),
+        ])
         .stdin(std::process::Stdio::null())
         .stderr(logfile.try_clone()?)
         .stdout(logfile);
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 28d130d9e0..2bf89b7bfa 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -335,13 +335,21 @@ impl PageServerNode {
                 .map(|x| x.parse::<u64>())
                 .transpose()
                 .context("Failed to parse 'checkpoint_distance' as an integer")?,
-            checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
+            checkpoint_timeout: settings
+                .remove("checkpoint_timeout")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'checkpoint_timeout' as duration")?,
             compaction_target_size: settings
                 .remove("compaction_target_size")
                 .map(|x| x.parse::<u64>())
                 .transpose()
                 .context("Failed to parse 'compaction_target_size' as an integer")?,
-            compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
+            compaction_period: settings
+                .remove("compaction_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'compaction_period' as duration")?,
             compaction_threshold: settings
                 .remove("compaction_threshold")
                 .map(|x| x.parse::<usize>())
@@ -387,7 +395,10 @@ impl PageServerNode {
                 .map(|x| x.parse::<u64>())
                 .transpose()
                 .context("Failed to parse 'gc_horizon' as an integer")?,
-            gc_period: settings.remove("gc_period").map(|x| x.to_string()),
+            gc_period: settings.remove("gc_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'gc_period' as duration")?,
             image_creation_threshold: settings
                 .remove("image_creation_threshold")
                 .map(|x| x.parse::<usize>())
@@ -403,13 +414,20 @@ impl PageServerNode {
                 .map(|x| x.parse::<usize>())
                 .transpose()
                 .context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
-            pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
+            pitr_interval: settings.remove("pitr_interval")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'pitr_interval' as duration")?,
             walreceiver_connect_timeout: settings
                 .remove("walreceiver_connect_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'walreceiver_connect_timeout' as duration")?,
             lagging_wal_timeout: settings
                 .remove("lagging_wal_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lagging_wal_timeout' as duration")?,
             max_lsn_wal_lag: settings
                 .remove("max_lsn_wal_lag")
                 .map(|x| x.parse::<NonZeroU64>())
@@ -427,8 +445,14 @@ impl PageServerNode {
                 .context("Failed to parse 'min_resident_size_override' as integer")?,
             evictions_low_residence_duration_metric_threshold: settings
                 .remove("evictions_low_residence_duration_metric_threshold")
-                .map(|x| x.to_string()),
-            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration")?,
+            heatmap_period: settings
+                .remove("heatmap_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'heatmap_period' as duration")?,
             lazy_slru_download: settings
                 .remove("lazy_slru_download")
                 .map(|x| x.parse::<bool>())
@@ -439,10 +463,15 @@ impl PageServerNode {
                 .map(serde_json::from_str)
                 .transpose()
                 .context("parse `timeline_get_throttle` from json")?,
-            lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
+            lsn_lease_length: settings.remove("lsn_lease_length")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length' as duration")?,
             lsn_lease_length_for_ts: settings
                 .remove("lsn_lease_length_for_ts")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length_for_ts' as duration")?,
             timeline_offloading: settings
                 .remove("timeline_offloading")
                 .map(|x| x.parse::<bool>())
diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index 3c574efc63..40b86e4110 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -47,6 +47,9 @@ enum Command {
         listen_http_addr: String,
         #[arg(long)]
         listen_http_port: u16,
+        #[arg(long)]
+        listen_https_port: Option<u16>,
+
         #[arg(long)]
         availability_zone_id: String,
     },
@@ -394,6 +397,7 @@ async fn main() -> anyhow::Result<()> {
             listen_pg_port,
             listen_http_addr,
             listen_http_port,
+            listen_https_port,
             availability_zone_id,
         } => {
             storcon_client
@@ -406,6 +410,7 @@ async fn main() -> anyhow::Result<()> {
                         listen_pg_port,
                         listen_http_addr,
                         listen_http_port,
+                        listen_https_port,
                         availability_zone_id: AvailabilityZone(availability_zone_id),
                     }),
                 )
@@ -954,7 +959,7 @@ async fn main() -> anyhow::Result<()> {
                                 threshold: threshold.into(),
                             },
                         )),
-                        heatmap_period: Some("300s".to_string()),
+                        heatmap_period: Some(Duration::from_secs(300)),
                         ..Default::default()
                     },
                 })
diff --git a/docker-compose/compute_wrapper/shell/compute.sh b/docker-compose/compute_wrapper/shell/compute.sh
index b4f8d3d66a..9dbdcce69f 100755
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -77,4 +77,5 @@ echo "Start compute node"
 /usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
      -C "postgresql://cloud_admin@localhost:55433/postgres"  \
      -b /usr/local/bin/postgres                              \
+     --compute-id "compute-$RANDOM"                          \
      -S ${SPEC_FILE}
diff --git a/docker-compose/docker-compose.yml b/docker-compose/docker-compose.yml
index 489d60f38c..95d4ff7b2a 100644
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -186,7 +186,7 @@ services:
 
   neon-test-extensions:
     profiles: ["test-extensions"]
-    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
+    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
     environment:
       - PGPASSWORD=cloud_admin
     entrypoint:
diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh
index dd520d4986..0f03d600a3 100755
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -51,8 +51,6 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
     done
 
     if [ $pg_version -ge 16 ]; then
-        docker cp ext-src $TEST_CONTAINER_NAME:/
-        docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
         # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
         # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
         echo Adding dummy config
@@ -81,15 +79,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
             [ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
             [ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
             for d in $FAILED $CONTRIB_FAILED; do
-                dn="$(basename $d)"
-                rm -rf $dn
-                mkdir $dn
-                docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
-                docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
-                cat $dn/regression.out $dn/regression.diffs || true
-                rm -rf $dn
+                docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
             done
-        rm -rf $FAILED
         exit 1
         fi
     fi
diff --git a/docker-compose/ext-src/pg_repack-src/test-upgrade.sh b/docker-compose/ext-src/pg_repack-src/test-upgrade.sh
new file mode 100755
index 0000000000..5021eb4027
--- /dev/null
+++ b/docker-compose/ext-src/pg_repack-src/test-upgrade.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./regress --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch
similarity index 100%
rename from docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch
rename to docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch
diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch
similarity index 100%
rename from docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch
rename to docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch
diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh
index 4a9024569b..c399109eb9 100755
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -6,8 +6,8 @@ generate_id() {
     local -n resvar=$1
     printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
 }
-if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
-  echo OLDTAG and NEWTAG must be defined
+if [ -z ${OLD_COMPUTE_TAG+x} ] || [ -z ${NEW_COMPUTE_TAG+x} ] || [ -z "${OLD_COMPUTE_TAG}" ] || [ -z "${NEW_COMPUTE_TAG}" ]; then
+  echo OLD_COMPUTE_TAG and NEW_COMPUTE_TAG must be defined
   exit 1
 fi
 export PG_VERSION=${PG_VERSION:-16}
@@ -43,10 +43,11 @@ EXTENSIONS='[
 {"extname": "semver", "extdir": "pg_semver-src"},
 {"extname": "pg_ivm", "extdir": "pg_ivm-src"},
 {"extname": "pgjwt", "extdir": "pgjwt-src"},
-{"extname": "pgtap", "extdir": "pgtap-src"}
+{"extname": "pgtap", "extdir": "pgtap-src"},
+{"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
-TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
+COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
@@ -54,9 +55,8 @@ create_extensions "${EXTNAMES}"
 query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
 new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
 docker compose --profile test-extensions down
-TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
+COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
 wait_for_ready
-docker compose cp  ext-src neon-test-extensions:/
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression"
@@ -86,8 +86,8 @@ else
     )
     result=$(curl "${PARAMS[@]}")
     echo $result | jq .
-    TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
-    COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
+    TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose down compute compute_is_ready
+    COMPUTE_TAG=${NEW_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
     wait_for_ready
     TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
     if [ ${TID} != ${new_timeline_id} ]; then
diff --git a/libs/compute_api/Cargo.toml b/libs/compute_api/Cargo.toml
index c11a1b6688..0d1618c1b2 100644
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_api"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [dependencies]
diff --git a/libs/compute_api/src/requests.rs b/libs/compute_api/src/requests.rs
index 0c256cae2e..3fbdfcf83f 100644
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,11 +1,10 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-use crate::{
-    privilege::Privilege,
-    responses::ComputeCtlConfig,
-    spec::{ComputeSpec, ExtVersion, PgIdent},
-};
 use serde::{Deserialize, Serialize};
 
+use crate::privilege::Privilege;
+use crate::responses::ComputeCtlConfig;
+use crate::spec::{ComputeSpec, ExtVersion, PgIdent};
+
 /// Request of the /configure API
 ///
 /// We now pass only `spec` in the configuration request, but later we can
diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs
index a6248019d9..35c580bd37 100644
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -6,10 +6,8 @@ use chrono::{DateTime, Utc};
 use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};
 
-use crate::{
-    privilege::Privilege,
-    spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role},
-};
+use crate::privilege::Privilege;
+use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};
 
 #[derive(Serialize, Debug, Deserialize)]
 pub struct GenericAPIError {
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index 767a34bcbc..d02bfd6814 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -5,13 +5,12 @@
 //! and connect it to the storage nodes.
 use std::collections::HashMap;
 
+use regex::Regex;
+use remote_storage::RemotePath;
 use serde::{Deserialize, Serialize};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 
-use regex::Regex;
-use remote_storage::RemotePath;
-
 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
 pub type PgIdent = String;
@@ -252,7 +251,7 @@ pub enum ComputeMode {
     Replica,
 }
 
-#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Cluster {
     pub cluster_id: Option<String>,
     pub name: Option<String>,
@@ -283,7 +282,7 @@ pub struct DeltaOp {
 
 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Role {
     pub name: PgIdent,
     pub encrypted_password: Option<String>,
@@ -292,7 +291,7 @@ pub struct Role {
 
 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Database {
     pub name: PgIdent,
     pub owner: PgIdent,
@@ -308,7 +307,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct GenericOption {
     pub name: String,
     pub value: Option<String>,
@@ -339,9 +338,10 @@ pub struct JwksSettings {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use std::fs::File;
 
+    use super::*;
+
     #[test]
     fn allow_installing_remote_extensions() {
         let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml
index 79da05da6c..87dfdfb5ec 100644
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "pageserver_api"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index 0f33bcf45b..039cc1319e 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -9,19 +9,18 @@ pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN
 pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
 pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
 
+use std::collections::HashMap;
+use std::num::{NonZeroU64, NonZeroUsize};
+use std::str::FromStr;
+use std::time::Duration;
+
 use postgres_backend::AuthType;
 use remote_storage::RemoteStorageConfig;
 use serde_with::serde_as;
-use std::{
-    collections::HashMap,
-    num::{NonZeroU64, NonZeroUsize},
-    str::FromStr,
-    time::Duration,
-};
-use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol};
+use utils::logging::LogFormat;
+use utils::postgres_client::PostgresClientProtocol;
 
-use crate::models::ImageCompressionAlgorithm;
-use crate::models::LsnLease;
+use crate::models::{ImageCompressionAlgorithm, LsnLease};
 
 // Certain metadata (e.g. externally-addressable name, AZ) is delivered
 // as a separate structure.  This information is not neeed by the pageserver
@@ -122,6 +121,8 @@ pub struct ConfigToml {
     pub page_service_pipelining: PageServicePipeliningConfig,
     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
     pub enable_read_path_debugging: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub validate_wal_contiguity: Option<bool>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -365,10 +366,10 @@ pub struct TenantConfigToml {
 }
 
 pub mod defaults {
-    use crate::models::ImageCompressionAlgorithm;
-
     pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
 
+    use crate::models::ImageCompressionAlgorithm;
+
     pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
     pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
 
@@ -521,6 +522,7 @@ impl Default for ConfigToml {
             } else {
                 None
             },
+            validate_wal_contiguity: None,
         }
     }
 }
@@ -581,7 +583,7 @@ pub mod tenant_conf_defaults {
     // image layers should be created.
     pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
     pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
-    pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000;
+    pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
     pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
 }
 
diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index 42f6e47e63..2cfe1a85f9 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -9,11 +9,8 @@ use std::time::{Duration, Instant};
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId};
 
-use crate::models::PageserverUtilization;
-use crate::{
-    models::{ShardParameters, TenantConfig},
-    shard::{ShardStripeSize, TenantShardId},
-};
+use crate::models::{PageserverUtilization, ShardParameters, TenantConfig};
+use crate::shard::{ShardStripeSize, TenantShardId};
 
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
@@ -57,6 +54,7 @@ pub struct NodeRegisterRequest {
 
     pub listen_http_addr: String,
     pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,
 
     pub availability_zone_id: AvailabilityZone,
 }
@@ -105,6 +103,7 @@ pub struct TenantLocateResponseShard {
 
     pub listen_http_addr: String,
     pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,
 }
 
 #[derive(Serialize, Deserialize)]
@@ -148,6 +147,7 @@ pub struct NodeDescribeResponse {
 
     pub listen_http_addr: String,
     pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,
 
     pub listen_pg_addr: String,
     pub listen_pg_port: u16,
@@ -351,7 +351,7 @@ impl FromStr for SkSchedulingPolicy {
             _ => {
                 return Err(anyhow::anyhow!(
                     "Unknown scheduling policy '{s}', try active,pause,decomissioned"
-                ))
+                ));
             }
         })
     }
@@ -454,9 +454,10 @@ pub struct SafekeeperSchedulingPolicyRequest {
 
 #[cfg(test)]
 mod test {
-    use super::*;
     use serde_json;
 
+    use super::*;
+
     /// Check stability of PlacementPolicy's serialization
     #[test]
     fn placement_policy_encoding() -> anyhow::Result<()> {
diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs
index b88a2e46a1..8836e7ec87 100644
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,11 +1,12 @@
-use anyhow::{bail, Result};
-use byteorder::{ByteOrder, BE};
+use std::fmt;
+use std::ops::Range;
+
+use anyhow::{Result, bail};
+use byteorder::{BE, ByteOrder};
 use bytes::Bytes;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
-use postgres_ffi::Oid;
-use postgres_ffi::RepOriginId;
+use postgres_ffi::{Oid, RepOriginId};
 use serde::{Deserialize, Serialize};
-use std::{fmt, ops::Range};
 use utils::const_assert;
 
 use crate::reltag::{BlockNumber, RelTag, SlruKind};
@@ -954,25 +955,22 @@ impl std::str::FromStr for Key {
 mod tests {
     use std::str::FromStr;
 
-    use crate::key::is_metadata_key_slice;
-    use crate::key::Key;
-
-    use rand::Rng;
-    use rand::SeedableRng;
+    use rand::{Rng, SeedableRng};
 
     use super::AUX_KEY_PREFIX;
+    use crate::key::{Key, is_metadata_key_slice};
 
     #[test]
     fn display_fromstr_bijection() {
         let mut rng = rand::rngs::StdRng::seed_from_u64(42);
 
         let key = Key {
-            field1: rng.gen(),
-            field2: rng.gen(),
-            field3: rng.gen(),
-            field4: rng.gen(),
-            field5: rng.gen(),
-            field6: rng.gen(),
+            field1: rng.r#gen(),
+            field2: rng.r#gen(),
+            field3: rng.r#gen(),
+            field4: rng.r#gen(),
+            field5: rng.r#gen(),
+            field6: rng.r#gen(),
         };
 
         assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
diff --git a/libs/pageserver_api/src/keyspace.rs b/libs/pageserver_api/src/keyspace.rs
index c55b9e9484..e505f23e49 100644
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -1,11 +1,10 @@
-use postgres_ffi::BLCKSZ;
 use std::ops::Range;
 
-use crate::{
-    key::Key,
-    shard::{ShardCount, ShardIdentity},
-};
 use itertools::Itertools;
+use postgres_ffi::BLCKSZ;
+
+use crate::key::Key;
+use crate::shard::{ShardCount, ShardIdentity};
 
 ///
 /// Represents a set of Keys, in a compact form.
@@ -609,15 +608,13 @@ pub fn singleton_range(key: Key) -> Range<Key> {
 
 #[cfg(test)]
 mod tests {
+    use std::fmt::Write;
+
     use rand::{RngCore, SeedableRng};
 
-    use crate::{
-        models::ShardParameters,
-        shard::{ShardCount, ShardNumber},
-    };
-
     use super::*;
-    use std::fmt::Write;
+    use crate::models::ShardParameters;
+    use crate::shard::{ShardCount, ShardNumber};
 
     // Helper function to create a key range.
     //
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index dd7bea2916..ea565e7769 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -2,38 +2,30 @@ pub mod detach_ancestor;
 pub mod partitioning;
 pub mod utilization;
 
-#[cfg(feature = "testing")]
-use camino::Utf8PathBuf;
-pub use utilization::PageserverUtilization;
-
 use core::ops::Range;
-use std::{
-    collections::HashMap,
-    fmt::Display,
-    io::{BufRead, Read},
-    num::{NonZeroU32, NonZeroU64, NonZeroUsize},
-    str::FromStr,
-    time::{Duration, SystemTime},
-};
+use std::collections::HashMap;
+use std::fmt::Display;
+use std::io::{BufRead, Read};
+use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
+use std::str::FromStr;
+use std::time::{Duration, SystemTime};
 
 use byteorder::{BigEndian, ReadBytesExt};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+#[cfg(feature = "testing")]
+use camino::Utf8PathBuf;
 use postgres_ffi::BLCKSZ;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use serde_with::serde_as;
-use utils::{
-    completion,
-    id::{NodeId, TenantId, TimelineId},
-    lsn::Lsn,
-    postgres_client::PostgresClientProtocol,
-    serde_system_time,
-};
+pub use utilization::PageserverUtilization;
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::postgres_client::PostgresClientProtocol;
+use utils::{completion, serde_system_time};
 
-use crate::{
-    key::{CompactKey, Key},
-    reltag::RelTag,
-    shard::{ShardCount, ShardStripeSize, TenantShardId},
-};
-use bytes::{Buf, BufMut, Bytes, BytesMut};
+use crate::key::{CompactKey, Key};
+use crate::reltag::RelTag;
+use crate::shard::{ShardCount, ShardStripeSize, TenantShardId};
 
 /// The state of a tenant in this pageserver.
 ///
@@ -332,7 +324,8 @@ pub struct ImportPgdataIdempotencyKey(pub String);
 
 impl ImportPgdataIdempotencyKey {
     pub fn random() -> Self {
-        use rand::{distributions::Alphanumeric, Rng};
+        use rand::Rng;
+        use rand::distributions::Alphanumeric;
         Self(
             rand::thread_rng()
                 .sample_iter(&Alphanumeric)
@@ -526,9 +519,13 @@ pub struct TenantConfigPatch {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
 pub struct TenantConfig {
     pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub checkpoint_timeout: Option<Duration>,
     pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub compaction_period: Option<Duration>,
     pub compaction_threshold: Option<usize>,
     pub compaction_upper_limit: Option<usize>,
     // defer parsing compaction_algorithm, like eviction_policy
@@ -539,22 +536,38 @@ pub struct TenantConfig {
     pub l0_flush_stall_threshold: Option<usize>,
     pub l0_flush_wait_upload: Option<bool>,
     pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub gc_period: Option<Duration>,
     pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
-    pub walreceiver_connect_timeout: Option<String>,
-    pub lagging_wal_timeout: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub pitr_interval: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Option<Duration>,
     pub max_lsn_wal_lag: Option<NonZeroU64>,
     pub eviction_policy: Option<EvictionPolicy>,
     pub min_resident_size_override: Option<u64>,
-    pub evictions_low_residence_duration_metric_threshold: Option<String>,
-    pub heatmap_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub heatmap_period: Option<Duration>,
     pub lazy_slru_download: Option<bool>,
     pub timeline_get_throttle: Option<ThrottleConfig>,
     pub image_layer_creation_check_threshold: Option<u8>,
     pub image_creation_preempt_threshold: Option<usize>,
-    pub lsn_lease_length: Option<String>,
-    pub lsn_lease_length_for_ts: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lsn_lease_length: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lsn_lease_length_for_ts: Option<Duration>,
     pub timeline_offloading: Option<bool>,
     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     pub rel_size_v2_enabled: Option<bool>,
@@ -564,7 +577,10 @@ pub struct TenantConfig {
 }
 
 impl TenantConfig {
-    pub fn apply_patch(self, patch: TenantConfigPatch) -> TenantConfig {
+    pub fn apply_patch(
+        self,
+        patch: TenantConfigPatch,
+    ) -> Result<TenantConfig, humantime::DurationError> {
         let Self {
             mut checkpoint_distance,
             mut checkpoint_timeout,
@@ -604,11 +620,17 @@ impl TenantConfig {
         } = self;
 
         patch.checkpoint_distance.apply(&mut checkpoint_distance);
-        patch.checkpoint_timeout.apply(&mut checkpoint_timeout);
+        patch
+            .checkpoint_timeout
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut checkpoint_timeout);
         patch
             .compaction_target_size
             .apply(&mut compaction_target_size);
-        patch.compaction_period.apply(&mut compaction_period);
+        patch
+            .compaction_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut compaction_period);
         patch.compaction_threshold.apply(&mut compaction_threshold);
         patch
             .compaction_upper_limit
@@ -626,15 +648,25 @@ impl TenantConfig {
             .apply(&mut l0_flush_stall_threshold);
         patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
         patch.gc_horizon.apply(&mut gc_horizon);
-        patch.gc_period.apply(&mut gc_period);
+        patch
+            .gc_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut gc_period);
         patch
             .image_creation_threshold
             .apply(&mut image_creation_threshold);
-        patch.pitr_interval.apply(&mut pitr_interval);
+        patch
+            .pitr_interval
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut pitr_interval);
         patch
             .walreceiver_connect_timeout
+            .map(|v| humantime::parse_duration(&v))?
             .apply(&mut walreceiver_connect_timeout);
-        patch.lagging_wal_timeout.apply(&mut lagging_wal_timeout);
+        patch
+            .lagging_wal_timeout
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut lagging_wal_timeout);
         patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
         patch.eviction_policy.apply(&mut eviction_policy);
         patch
@@ -642,8 +674,12 @@ impl TenantConfig {
             .apply(&mut min_resident_size_override);
         patch
             .evictions_low_residence_duration_metric_threshold
+            .map(|v| humantime::parse_duration(&v))?
             .apply(&mut evictions_low_residence_duration_metric_threshold);
-        patch.heatmap_period.apply(&mut heatmap_period);
+        patch
+            .heatmap_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut heatmap_period);
         patch.lazy_slru_download.apply(&mut lazy_slru_download);
         patch
             .timeline_get_throttle
@@ -654,9 +690,13 @@ impl TenantConfig {
         patch
             .image_creation_preempt_threshold
             .apply(&mut image_creation_preempt_threshold);
-        patch.lsn_lease_length.apply(&mut lsn_lease_length);
+        patch
+            .lsn_lease_length
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut lsn_lease_length);
         patch
             .lsn_lease_length_for_ts
+            .map(|v| humantime::parse_duration(&v))?
             .apply(&mut lsn_lease_length_for_ts);
         patch.timeline_offloading.apply(&mut timeline_offloading);
         patch
@@ -673,7 +713,7 @@ impl TenantConfig {
             .gc_compaction_ratio_percent
             .apply(&mut gc_compaction_ratio_percent);
 
-        Self {
+        Ok(Self {
             checkpoint_distance,
             checkpoint_timeout,
             compaction_target_size,
@@ -709,7 +749,7 @@ impl TenantConfig {
             gc_compaction_enabled,
             gc_compaction_initial_threshold_kb,
             gc_compaction_ratio_percent,
-        }
+        })
     }
 }
 
@@ -2241,9 +2281,10 @@ impl Default for PageTraceEvent {
 
 #[cfg(test)]
 mod tests {
-    use serde_json::json;
     use std::str::FromStr;
 
+    use serde_json::json;
+
     use super::*;
 
     #[test]
@@ -2503,7 +2544,7 @@ mod tests {
             ..base.clone()
         };
 
-        let patched = base.apply_patch(decoded.config);
+        let patched = base.apply_patch(decoded.config).unwrap();
 
         assert_eq!(patched, expected);
     }
diff --git a/libs/pageserver_api/src/models/utilization.rs b/libs/pageserver_api/src/models/utilization.rs
index 641aa51989..69c240ff3c 100644
--- a/libs/pageserver_api/src/models/utilization.rs
+++ b/libs/pageserver_api/src/models/utilization.rs
@@ -1,5 +1,7 @@
 use std::time::SystemTime;
-use utils::{serde_percent::Percent, serde_system_time};
+
+use utils::serde_percent::Percent;
+use utils::serde_system_time;
 
 /// Pageserver current utilization and scoring for how good candidate the pageserver would be for
 /// the next tenant.
@@ -131,12 +133,12 @@ impl PageserverUtilization {
 
 /// Test helper
 pub mod test_utilization {
-    use super::PageserverUtilization;
     use std::time::SystemTime;
-    use utils::{
-        serde_percent::Percent,
-        serde_system_time::{self},
-    };
+
+    use utils::serde_percent::Percent;
+    use utils::serde_system_time::{self};
+
+    use super::PageserverUtilization;
 
     // Parameters of the imaginary node used for test utilization instances
     const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;
diff --git a/libs/pageserver_api/src/record.rs b/libs/pageserver_api/src/record.rs
index bb62b35d36..fda504a26e 100644
--- a/libs/pageserver_api/src/record.rs
+++ b/libs/pageserver_api/src/record.rs
@@ -1,7 +1,7 @@
 //! This module defines the WAL record format used within the pageserver.
 
 use bytes::Bytes;
-use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
+use postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record};
 use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
 use serde::{Deserialize, Serialize};
 use utils::bin_ser::DeserializeError;
diff --git a/libs/pageserver_api/src/reltag.rs b/libs/pageserver_api/src/reltag.rs
index 09d1fae221..473a44dbf9 100644
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -1,10 +1,10 @@
-use serde::{Deserialize, Serialize};
 use std::cmp::Ordering;
 use std::fmt;
 
-use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
-use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM};
 use postgres_ffi::Oid;
+use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
+use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
+use serde::{Deserialize, Serialize};
 
 ///
 /// Relation data file segment id throughout the Postgres cluster.
diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs
index e03df02afb..eca04b1f3d 100644
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -33,12 +33,13 @@
 
 use std::hash::{Hash, Hasher};
 
-use crate::{key::Key, models::ShardParameters};
+#[doc(inline)]
+pub use ::utils::shard::*;
 use postgres_ffi::relfile_utils::INIT_FORKNUM;
 use serde::{Deserialize, Serialize};
 
-#[doc(inline)]
-pub use ::utils::shard::*;
+use crate::key::Key;
+use crate::models::ShardParameters;
 
 /// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
 /// and to check whether that [`ShardNumber`] is the same as the current shard.
@@ -337,7 +338,8 @@ pub fn describe(
 mod tests {
     use std::str::FromStr;
 
-    use utils::{id::TenantId, Hex};
+    use utils::Hex;
+    use utils::id::TenantId;
 
     use super::*;
 
diff --git a/libs/pageserver_api/src/upcall_api.rs b/libs/pageserver_api/src/upcall_api.rs
index 2e88836bd0..647d01c3c2 100644
--- a/libs/pageserver_api/src/upcall_api.rs
+++ b/libs/pageserver_api/src/upcall_api.rs
@@ -6,9 +6,9 @@
 use serde::{Deserialize, Serialize};
 use utils::id::NodeId;
 
-use crate::{
-    controller_api::NodeRegisterRequest, models::LocationConfigMode, shard::TenantShardId,
-};
+use crate::controller_api::NodeRegisterRequest;
+use crate::models::LocationConfigMode;
+use crate::shard::TenantShardId;
 
 /// Upcall message sent by the pageserver to the configured `control_plane_api` on
 /// startup.
@@ -30,7 +30,7 @@ fn default_mode() -> LocationConfigMode {
 pub struct ReAttachResponseTenant {
     pub id: TenantShardId,
     /// Mandatory if LocationConfigMode is None or set to an Attached* mode
-    pub gen: Option<u32>,
+    pub r#gen: Option<u32>,
 
     /// Default value only for backward compat: this field should be set
     #[serde(default = "default_mode")]
@@ -44,7 +44,7 @@ pub struct ReAttachResponse {
 #[derive(Serialize, Deserialize)]
 pub struct ValidateRequestTenant {
     pub id: TenantShardId,
-    pub gen: u32,
+    pub r#gen: u32,
 }
 
 #[derive(Serialize, Deserialize)]
diff --git a/libs/pageserver_api/src/value.rs b/libs/pageserver_api/src/value.rs
index 1f8ed30a9a..883d903ff3 100644
--- a/libs/pageserver_api/src/value.rs
+++ b/libs/pageserver_api/src/value.rs
@@ -7,10 +7,11 @@
 //! Note that the [`Value`] type is used for the permananent storage format, so any
 //! changes to it must be backwards compatible.
 
-use crate::record::NeonWalRecord;
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 
+use crate::record::NeonWalRecord;
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub enum Value {
     /// An Image value contains a full copy of the value
@@ -83,11 +84,11 @@ impl ValueBytes {
 
 #[cfg(test)]
 mod test {
-    use super::*;
-
     use bytes::Bytes;
     use utils::bin_ser::BeSer;
 
+    use super::*;
+
     macro_rules! roundtrip {
         ($orig:expr, $expected:expr) => {{
             let orig: Value = $orig;
diff --git a/libs/postgres_connection/Cargo.toml b/libs/postgres_connection/Cargo.toml
index 19027d13ff..462fb4a533 100644
--- a/libs/postgres_connection/Cargo.toml
+++ b/libs/postgres_connection/Cargo.toml
@@ -7,7 +7,6 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 itertools.workspace = true
-postgres.workspace = true
 tokio-postgres.workspace = true
 url.workspace = true
 
diff --git a/libs/postgres_connection/src/lib.rs b/libs/postgres_connection/src/lib.rs
index ddf9f7b610..e3d31c6cfc 100644
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -171,10 +171,10 @@ impl PgConnectionConfig {
             tokio_postgres::Client,
             tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
         ),
-        postgres::Error,
+        tokio_postgres::Error,
     > {
         self.to_tokio_postgres_config()
-            .connect(postgres::NoTls)
+            .connect(tokio_postgres::NoTls)
             .await
     }
 }
diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs
index 0239b56d9c..301bc2f16e 100644
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -278,7 +278,7 @@ pub fn generate_pg_control(
     checkpoint_bytes: &[u8],
     lsn: Lsn,
     pg_version: u32,
-) -> anyhow::Result<(Bytes, u64)> {
+) -> anyhow::Result<(Bytes, u64, bool)> {
     dispatch_pgversion!(
         pg_version,
         pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
diff --git a/libs/postgres_ffi/src/xlog_utils.rs b/libs/postgres_ffi/src/xlog_utils.rs
index 852b20eace..14fb1f2a1f 100644
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -124,23 +124,59 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
     }
 }
 
+/// Generate a pg_control file, for a basebackup for starting up Postgres at the given LSN
+///
+/// 'pg_control_bytes' and 'checkpoint_bytes' are the contents of those keys persisted in
+/// the pageserver. They use the same format as the PostgreSQL control file and the
+/// checkpoint record, but see walingest.rs for how exactly they are kept up to date.
+/// 'lsn' is the LSN at which we're starting up.
+///
+/// Returns:
+/// - pg_control file contents
+/// - system_identifier, extracted from the persisted information
+/// - true, if we're starting up from a "clean shutdown", i.e. if there was a shutdown
+///   checkpoint at the given LSN
 pub fn generate_pg_control(
     pg_control_bytes: &[u8],
     checkpoint_bytes: &[u8],
     lsn: Lsn,
-) -> anyhow::Result<(Bytes, u64)> {
+) -> anyhow::Result<(Bytes, u64, bool)> {
     let mut pg_control = ControlFileData::decode(pg_control_bytes)?;
     let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;
 
     // Generate new pg_control needed for bootstrap
+    //
+    // NB: In the checkpoint struct that we persist in the pageserver, we have a different
+    // convention for the 'redo' field than in PostgreSQL: On a shutdown checkpoint,
+    // 'redo' points the *end* of the checkpoint WAL record. On PostgreSQL, it points to
+    // the beginning. Furthermore, on an online checkpoint, 'redo' is set to 0.
+    //
+    // We didn't always have this convention however, and old persisted records will have
+    // old REDO values that point to some old LSN.
+    //
+    // The upshot is that if 'redo' is equal to the "current" LSN, there was a shutdown
+    // checkpoint record at that point in WAL, with no new WAL records after it. That case
+    // can be treated as starting from a clean shutdown. All other cases are treated as
+    // non-clean shutdown. In Neon, we don't do WAL replay at startup in either case, so
+    // that distinction doesn't matter very much. As of this writing, it only affects
+    // whether the persisted pg_stats information can be used or not.
+    //
+    // In the Checkpoint struct in the returned pg_control file, the redo pointer is
+    // always set to the LSN we're starting at, to hint that no WAL replay is required.
+    // (There's some neon-specific code in Postgres startup to make that work, though.
+    // Just setting the redo pointer is not sufficient.)
+    let was_shutdown = Lsn(checkpoint.redo) == lsn;
     checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;
 
-    //save new values in pg_control
+    // We use DBState_DB_SHUTDOWNED even if it was not a clean shutdown.  The
+    // neon-specific code at postgres startup ignores the state stored in the control
+    // file, similar to archive recovery in standalone PostgreSQL. Similarly, the
+    // checkPoint pointer is ignored, so just set it to 0.
     pg_control.checkPoint = 0;
     pg_control.checkPointCopy = checkpoint;
     pg_control.state = DBState_DB_SHUTDOWNED;
 
-    Ok((pg_control.encode(), pg_control.system_identifier))
+    Ok((pg_control.encode(), pg_control.system_identifier, was_shutdown))
 }
 
 pub fn get_current_timestamp() -> TimestampTz {
diff --git a/libs/proxy/postgres-protocol2/Cargo.toml b/libs/proxy/postgres-protocol2/Cargo.toml
index f66a292d5e..7ebb05eec1 100644
--- a/libs/proxy/postgres-protocol2/Cargo.toml
+++ b/libs/proxy/postgres-protocol2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "postgres-protocol2"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 license = "MIT/Apache-2.0"
 
 [dependencies]
diff --git a/libs/proxy/postgres-protocol2/src/authentication/sasl.rs b/libs/proxy/postgres-protocol2/src/authentication/sasl.rs
index f2200a40ce..27e05e24ec 100644
--- a/libs/proxy/postgres-protocol2/src/authentication/sasl.rs
+++ b/libs/proxy/postgres-protocol2/src/authentication/sasl.rs
@@ -1,14 +1,12 @@
 //! SASL-based authentication support.
 
+use std::fmt::Write;
+use std::{io, iter, mem, str};
+
 use hmac::{Hmac, Mac};
 use rand::{self, Rng};
 use sha2::digest::FixedOutput;
 use sha2::{Digest, Sha256};
-use std::fmt::Write;
-use std::io;
-use std::iter;
-use std::mem;
-use std::str;
 use tokio::task::yield_now;
 
 const NONCE_LENGTH: usize = 24;
@@ -493,11 +491,9 @@ mod test {
         let nonce = "9IZ2O01zb9IgiIZ1WJ/zgpJB";
 
         let client_first = "n,,n=,r=9IZ2O01zb9IgiIZ1WJ/zgpJB";
-        let server_first =
-            "r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,s=fs3IXBy7U7+IvVjZ,i\
+        let server_first = "r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,s=fs3IXBy7U7+IvVjZ,i\
              =4096";
-        let client_final =
-            "c=biws,r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,p=AmNKosjJzS3\
+        let client_final = "c=biws,r=9IZ2O01zb9IgiIZ1WJ/zgpJBjx/oIRLs02gGSHcw1KEty3eY,p=AmNKosjJzS3\
              1NTlQYNs5BTeQjdHdk7lOflDo5re2an8=";
         let server_final = "v=U+ppxD5XUKtradnv8e2MkeupiA8FU87Sg8CXzXHDAzw=";
 
diff --git a/libs/proxy/postgres-protocol2/src/lib.rs b/libs/proxy/postgres-protocol2/src/lib.rs
index 6032440f9a..afbd1e92bd 100644
--- a/libs/proxy/postgres-protocol2/src/lib.rs
+++ b/libs/proxy/postgres-protocol2/src/lib.rs
@@ -11,9 +11,10 @@
 //! set to `UTF8`. It will most likely not behave properly if that is not the case.
 #![warn(missing_docs, clippy::all)]
 
+use std::io;
+
 use byteorder::{BigEndian, ByteOrder};
 use bytes::{BufMut, BytesMut};
-use std::io;
 
 pub mod authentication;
 pub mod escape;
diff --git a/libs/proxy/postgres-protocol2/src/message/backend.rs b/libs/proxy/postgres-protocol2/src/message/backend.rs
index 097964f9c1..d7eaef9509 100644
--- a/libs/proxy/postgres-protocol2/src/message/backend.rs
+++ b/libs/proxy/postgres-protocol2/src/message/backend.rs
@@ -1,13 +1,13 @@
 #![allow(missing_docs)]
 
+use std::io::{self, Read};
+use std::ops::Range;
+use std::{cmp, str};
+
 use byteorder::{BigEndian, ByteOrder, ReadBytesExt};
 use bytes::{Bytes, BytesMut};
 use fallible_iterator::FallibleIterator;
 use memchr::memchr;
-use std::cmp;
-use std::io::{self, Read};
-use std::ops::Range;
-use std::str;
 
 use crate::Oid;
 
diff --git a/libs/proxy/postgres-protocol2/src/message/frontend.rs b/libs/proxy/postgres-protocol2/src/message/frontend.rs
index 640f35ada3..b447290ea8 100644
--- a/libs/proxy/postgres-protocol2/src/message/frontend.rs
+++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs
@@ -1,13 +1,13 @@
 //! Frontend message serialization.
 #![allow(missing_docs)]
 
+use std::error::Error;
+use std::{io, marker};
+
 use byteorder::{BigEndian, ByteOrder};
 use bytes::{Buf, BufMut, BytesMut};
-use std::error::Error;
-use std::io;
-use std::marker;
 
-use crate::{write_nullable, FromUsize, IsNull, Oid};
+use crate::{FromUsize, IsNull, Oid, write_nullable};
 
 #[inline]
 fn write_body<F, E>(buf: &mut BytesMut, f: F) -> Result<(), E>
diff --git a/libs/proxy/postgres-protocol2/src/password/mod.rs b/libs/proxy/postgres-protocol2/src/password/mod.rs
index 38eb31dfcf..4cd9bfb060 100644
--- a/libs/proxy/postgres-protocol2/src/password/mod.rs
+++ b/libs/proxy/postgres-protocol2/src/password/mod.rs
@@ -6,12 +6,13 @@
 //! side. This is good because it ensures the cleartext password won't
 //! end up in logs pg_stat displays, etc.
 
-use crate::authentication::sasl;
 use hmac::{Hmac, Mac};
 use rand::RngCore;
 use sha2::digest::FixedOutput;
 use sha2::{Digest, Sha256};
 
+use crate::authentication::sasl;
+
 #[cfg(test)]
 mod test;
 
diff --git a/libs/proxy/postgres-protocol2/src/types/mod.rs b/libs/proxy/postgres-protocol2/src/types/mod.rs
index 78131c05bf..6a9b334bcb 100644
--- a/libs/proxy/postgres-protocol2/src/types/mod.rs
+++ b/libs/proxy/postgres-protocol2/src/types/mod.rs
@@ -1,11 +1,12 @@
 //! Conversions to and from Postgres's binary format for various types.
-use byteorder::{BigEndian, ReadBytesExt};
-use bytes::{BufMut, BytesMut};
-use fallible_iterator::FallibleIterator;
 use std::boxed::Box as StdBox;
 use std::error::Error;
 use std::str;
 
+use byteorder::{BigEndian, ReadBytesExt};
+use bytes::{BufMut, BytesMut};
+use fallible_iterator::FallibleIterator;
+
 use crate::Oid;
 
 #[cfg(test)]
diff --git a/libs/proxy/postgres-types2/Cargo.toml b/libs/proxy/postgres-types2/Cargo.toml
index 57efd94cd3..25ad23ba35 100644
--- a/libs/proxy/postgres-types2/Cargo.toml
+++ b/libs/proxy/postgres-types2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "postgres-types2"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 license = "MIT/Apache-2.0"
 
 [dependencies]
diff --git a/libs/proxy/postgres-types2/src/lib.rs b/libs/proxy/postgres-types2/src/lib.rs
index d4f3afdfd4..0ccd8c295f 100644
--- a/libs/proxy/postgres-types2/src/lib.rs
+++ b/libs/proxy/postgres-types2/src/lib.rs
@@ -4,19 +4,18 @@
 //! unless you want to define your own `ToSql` or `FromSql` definitions.
 #![warn(clippy::all, missing_docs)]
 
-use fallible_iterator::FallibleIterator;
-use postgres_protocol2::types;
 use std::any::type_name;
 use std::error::Error;
 use std::fmt;
 use std::sync::Arc;
 
-use crate::type_gen::{Inner, Other};
-
+use bytes::BytesMut;
+use fallible_iterator::FallibleIterator;
 #[doc(inline)]
 pub use postgres_protocol2::Oid;
+use postgres_protocol2::types;
 
-use bytes::BytesMut;
+use crate::type_gen::{Inner, Other};
 
 /// Generates a simple implementation of `ToSql::accepts` which accepts the
 /// types passed to it.
diff --git a/libs/proxy/postgres-types2/src/private.rs b/libs/proxy/postgres-types2/src/private.rs
index 774f9a301c..188b982812 100644
--- a/libs/proxy/postgres-types2/src/private.rs
+++ b/libs/proxy/postgres-types2/src/private.rs
@@ -1,7 +1,9 @@
-use crate::{FromSql, Type};
-pub use bytes::BytesMut;
 use std::error::Error;
 
+pub use bytes::BytesMut;
+
+use crate::{FromSql, Type};
+
 pub fn read_be_i32(buf: &mut &[u8]) -> Result<i32, Box<dyn Error + Sync + Send>> {
     if buf.len() < 4 {
         return Err("invalid buffer size".into());
diff --git a/libs/proxy/tokio-postgres2/Cargo.toml b/libs/proxy/tokio-postgres2/Cargo.toml
index 161c6b8309..540876742f 100644
--- a/libs/proxy/tokio-postgres2/Cargo.toml
+++ b/libs/proxy/tokio-postgres2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tokio-postgres2"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 license = "MIT/Apache-2.0"
 
 [dependencies]
diff --git a/libs/proxy/tokio-postgres2/src/cancel_query.rs b/libs/proxy/tokio-postgres2/src/cancel_query.rs
index cddbf16336..b65fb571e6 100644
--- a/libs/proxy/tokio-postgres2/src/cancel_query.rs
+++ b/libs/proxy/tokio-postgres2/src/cancel_query.rs
@@ -1,10 +1,11 @@
+use std::io;
+
 use tokio::net::TcpStream;
 
 use crate::client::SocketConfig;
 use crate::config::{Host, SslMode};
 use crate::tls::MakeTlsConnect;
-use crate::{cancel_query_raw, connect_socket, Error};
-use std::io;
+use crate::{Error, cancel_query_raw, connect_socket};
 
 pub(crate) async fn cancel_query<T>(
     config: Option<SocketConfig>,
@@ -22,7 +23,7 @@ where
             return Err(Error::connect(io::Error::new(
                 io::ErrorKind::InvalidInput,
                 "unknown host",
-            )))
+            )));
         }
     };
 
diff --git a/libs/proxy/tokio-postgres2/src/cancel_query_raw.rs b/libs/proxy/tokio-postgres2/src/cancel_query_raw.rs
index 8c08296435..c720214e9b 100644
--- a/libs/proxy/tokio-postgres2/src/cancel_query_raw.rs
+++ b/libs/proxy/tokio-postgres2/src/cancel_query_raw.rs
@@ -1,10 +1,11 @@
-use crate::config::SslMode;
-use crate::tls::TlsConnect;
-use crate::{connect_tls, Error};
 use bytes::BytesMut;
 use postgres_protocol2::message::frontend;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 
+use crate::config::SslMode;
+use crate::tls::TlsConnect;
+use crate::{Error, connect_tls};
+
 pub async fn cancel_query_raw<S, T>(
     stream: S,
     mode: SslMode,
diff --git a/libs/proxy/tokio-postgres2/src/cancel_token.rs b/libs/proxy/tokio-postgres2/src/cancel_token.rs
index 718f903a92..f6526395ee 100644
--- a/libs/proxy/tokio-postgres2/src/cancel_token.rs
+++ b/libs/proxy/tokio-postgres2/src/cancel_token.rs
@@ -1,12 +1,12 @@
-use crate::config::SslMode;
-use crate::tls::TlsConnect;
-
-use crate::{cancel_query, client::SocketConfig, tls::MakeTlsConnect};
-use crate::{cancel_query_raw, Error};
 use serde::{Deserialize, Serialize};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::TcpStream;
 
+use crate::client::SocketConfig;
+use crate::config::SslMode;
+use crate::tls::{MakeTlsConnect, TlsConnect};
+use crate::{Error, cancel_query, cancel_query_raw};
+
 /// The capability to request cancellation of in-progress queries on a
 /// connection.
 #[derive(Clone, Serialize, Deserialize)]
diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs
index 46151ab924..39b1db75da 100644
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -1,31 +1,28 @@
-use crate::codec::{BackendMessages, FrontendMessage};
-
-use crate::config::Host;
-use crate::config::SslMode;
-use crate::connection::{Request, RequestMessages};
-
-use crate::query::RowStream;
-use crate::simple_query::SimpleQueryStream;
-
-use crate::types::{Oid, ToSql, Type};
-
-use crate::{
-    query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
-    SimpleQueryMessage, Statement, Transaction, TransactionBuilder,
-};
-use bytes::BytesMut;
-use fallible_iterator::FallibleIterator;
-use futures_util::{future, ready, TryStreamExt};
-use parking_lot::Mutex;
-use postgres_protocol2::message::{backend::Message, frontend};
-use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::fmt;
 use std::sync::Arc;
 use std::task::{Context, Poll};
+use std::time::Duration;
+
+use bytes::BytesMut;
+use fallible_iterator::FallibleIterator;
+use futures_util::{TryStreamExt, future, ready};
+use parking_lot::Mutex;
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
+use serde::{Deserialize, Serialize};
 use tokio::sync::mpsc;
 
-use std::time::Duration;
+use crate::codec::{BackendMessages, FrontendMessage};
+use crate::config::{Host, SslMode};
+use crate::connection::{Request, RequestMessages};
+use crate::query::RowStream;
+use crate::simple_query::SimpleQueryStream;
+use crate::types::{Oid, ToSql, Type};
+use crate::{
+    CancelToken, Error, ReadyForQueryStatus, Row, SimpleQueryMessage, Statement, Transaction,
+    TransactionBuilder, query, simple_query, slice_iter,
+};
 
 pub struct Responses {
     receiver: mpsc::Receiver<BackendMessages>,
diff --git a/libs/proxy/tokio-postgres2/src/codec.rs b/libs/proxy/tokio-postgres2/src/codec.rs
index 0ec46198ce..f1fd9b47b3 100644
--- a/libs/proxy/tokio-postgres2/src/codec.rs
+++ b/libs/proxy/tokio-postgres2/src/codec.rs
@@ -1,8 +1,9 @@
+use std::io;
+
 use bytes::{Buf, Bytes, BytesMut};
 use fallible_iterator::FallibleIterator;
 use postgres_protocol2::message::backend;
 use postgres_protocol2::message::frontend::CopyData;
-use std::io;
 use tokio_util::codec::{Decoder, Encoder};
 
 pub enum FrontendMessage {
diff --git a/libs/proxy/tokio-postgres2/src/config.rs b/libs/proxy/tokio-postgres2/src/config.rs
index 47cc45ac80..4c25491b67 100644
--- a/libs/proxy/tokio-postgres2/src/config.rs
+++ b/libs/proxy/tokio-postgres2/src/config.rs
@@ -1,21 +1,19 @@
 //! Connection configuration.
 
-use crate::connect::connect;
-use crate::connect_raw::connect_raw;
-use crate::connect_raw::RawConnection;
-use crate::tls::MakeTlsConnect;
-use crate::tls::TlsConnect;
-use crate::{Client, Connection, Error};
-use postgres_protocol2::message::frontend::StartupMessageParams;
-use serde::{Deserialize, Serialize};
-use std::fmt;
-use std::str;
 use std::time::Duration;
-use tokio::io::{AsyncRead, AsyncWrite};
+use std::{fmt, str};
 
 pub use postgres_protocol2::authentication::sasl::ScramKeys;
+use postgres_protocol2::message::frontend::StartupMessageParams;
+use serde::{Deserialize, Serialize};
+use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::TcpStream;
 
+use crate::connect::connect;
+use crate::connect_raw::{RawConnection, connect_raw};
+use crate::tls::{MakeTlsConnect, TlsConnect};
+use crate::{Client, Connection, Error};
+
 /// TLS configuration.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
 #[non_exhaustive]
diff --git a/libs/proxy/tokio-postgres2/src/connect.rs b/libs/proxy/tokio-postgres2/src/connect.rs
index e0cb69748d..d2bd0dfbcd 100644
--- a/libs/proxy/tokio-postgres2/src/connect.rs
+++ b/libs/proxy/tokio-postgres2/src/connect.rs
@@ -1,3 +1,7 @@
+use postgres_protocol2::message::backend::Message;
+use tokio::net::TcpStream;
+use tokio::sync::mpsc;
+
 use crate::client::SocketConfig;
 use crate::codec::BackendMessage;
 use crate::config::Host;
@@ -5,9 +9,6 @@ use crate::connect_raw::connect_raw;
 use crate::connect_socket::connect_socket;
 use crate::tls::{MakeTlsConnect, TlsConnect};
 use crate::{Client, Config, Connection, Error, RawConnection};
-use postgres_protocol2::message::backend::Message;
-use tokio::net::TcpStream;
-use tokio::sync::mpsc;
 
 pub async fn connect<T>(
     mut tls: T,
diff --git a/libs/proxy/tokio-postgres2/src/connect_raw.rs b/libs/proxy/tokio-postgres2/src/connect_raw.rs
index 66db85e07d..20dc538cf2 100644
--- a/libs/proxy/tokio-postgres2/src/connect_raw.rs
+++ b/libs/proxy/tokio-postgres2/src/connect_raw.rs
@@ -1,22 +1,24 @@
+use std::collections::HashMap;
+use std::io;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use bytes::BytesMut;
+use fallible_iterator::FallibleIterator;
+use futures_util::{Sink, SinkExt, Stream, TryStreamExt, ready};
+use postgres_protocol2::authentication::sasl;
+use postgres_protocol2::authentication::sasl::ScramSha256;
+use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message, NoticeResponseBody};
+use postgres_protocol2::message::frontend;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::codec::Framed;
+
+use crate::Error;
 use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
 use crate::config::{self, AuthKeys, Config};
 use crate::connect_tls::connect_tls;
 use crate::maybe_tls_stream::MaybeTlsStream;
 use crate::tls::{TlsConnect, TlsStream};
-use crate::Error;
-use bytes::BytesMut;
-use fallible_iterator::FallibleIterator;
-use futures_util::{ready, Sink, SinkExt, Stream, TryStreamExt};
-use postgres_protocol2::authentication::sasl;
-use postgres_protocol2::authentication::sasl::ScramSha256;
-use postgres_protocol2::message::backend::{AuthenticationSaslBody, Message, NoticeResponseBody};
-use postgres_protocol2::message::frontend;
-use std::collections::HashMap;
-use std::io;
-use std::pin::Pin;
-use std::task::{Context, Poll};
-use tokio::io::{AsyncRead, AsyncWrite};
-use tokio_util::codec::Framed;
 
 pub struct StartupStream<S, T> {
     inner: Framed<MaybeTlsStream<S, T>, PostgresCodec>,
@@ -158,7 +160,7 @@ where
         | Some(Message::AuthenticationSspi) => {
             return Err(Error::authentication(
                 "unsupported authentication method".into(),
-            ))
+            ));
         }
         Some(Message::ErrorResponse(body)) => return Err(Error::db(body)),
         Some(_) => return Err(Error::unexpected_message()),
diff --git a/libs/proxy/tokio-postgres2/src/connect_socket.rs b/libs/proxy/tokio-postgres2/src/connect_socket.rs
index 336a13317f..15411f7ef3 100644
--- a/libs/proxy/tokio-postgres2/src/connect_socket.rs
+++ b/libs/proxy/tokio-postgres2/src/connect_socket.rs
@@ -1,11 +1,13 @@
-use crate::config::Host;
-use crate::Error;
 use std::future::Future;
 use std::io;
 use std::time::Duration;
+
 use tokio::net::{self, TcpStream};
 use tokio::time;
 
+use crate::Error;
+use crate::config::Host;
+
 pub(crate) async fn connect_socket(
     host: &Host,
     port: u16,
diff --git a/libs/proxy/tokio-postgres2/src/connect_tls.rs b/libs/proxy/tokio-postgres2/src/connect_tls.rs
index 64b0b68abc..4dc929a9e2 100644
--- a/libs/proxy/tokio-postgres2/src/connect_tls.rs
+++ b/libs/proxy/tokio-postgres2/src/connect_tls.rs
@@ -1,12 +1,13 @@
-use crate::config::SslMode;
-use crate::maybe_tls_stream::MaybeTlsStream;
-use crate::tls::private::ForcePrivateApi;
-use crate::tls::TlsConnect;
-use crate::Error;
 use bytes::BytesMut;
 use postgres_protocol2::message::frontend;
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
 
+use crate::Error;
+use crate::config::SslMode;
+use crate::maybe_tls_stream::MaybeTlsStream;
+use crate::tls::TlsConnect;
+use crate::tls::private::ForcePrivateApi;
+
 pub async fn connect_tls<S, T>(
     mut stream: S,
     mode: SslMode,
@@ -19,7 +20,7 @@ where
     match mode {
         SslMode::Disable => return Ok(MaybeTlsStream::Raw(stream)),
         SslMode::Prefer if !tls.can_connect(ForcePrivateApi) => {
-            return Ok(MaybeTlsStream::Raw(stream))
+            return Ok(MaybeTlsStream::Raw(stream));
         }
         SslMode::Prefer | SslMode::Require => {}
     }
diff --git a/libs/proxy/tokio-postgres2/src/connection.rs b/libs/proxy/tokio-postgres2/src/connection.rs
index f478717e0d..60e39b3b44 100644
--- a/libs/proxy/tokio-postgres2/src/connection.rs
+++ b/libs/proxy/tokio-postgres2/src/connection.rs
@@ -1,22 +1,24 @@
-use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
-use crate::error::DbError;
-use crate::maybe_tls_stream::MaybeTlsStream;
-use crate::{AsyncMessage, Error, Notification};
-use bytes::BytesMut;
-use fallible_iterator::FallibleIterator;
-use futures_util::{ready, Sink, Stream};
-use log::{info, trace};
-use postgres_protocol2::message::backend::Message;
-use postgres_protocol2::message::frontend;
 use std::collections::{HashMap, VecDeque};
 use std::future::Future;
 use std::pin::Pin;
 use std::task::{Context, Poll};
+
+use bytes::BytesMut;
+use fallible_iterator::FallibleIterator;
+use futures_util::{Sink, Stream, ready};
+use log::{info, trace};
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::sync::mpsc;
 use tokio_util::codec::Framed;
 use tokio_util::sync::PollSender;
 
+use crate::codec::{BackendMessage, BackendMessages, FrontendMessage, PostgresCodec};
+use crate::error::DbError;
+use crate::maybe_tls_stream::MaybeTlsStream;
+use crate::{AsyncMessage, Error, Notification};
+
 pub enum RequestMessages {
     Single(FrontendMessage),
 }
@@ -139,7 +141,7 @@ where
                 Some(response) => response,
                 None => match messages.next().map_err(Error::parse)? {
                     Some(Message::ErrorResponse(error)) => {
-                        return Poll::Ready(Err(Error::db(error)))
+                        return Poll::Ready(Err(Error::db(error)));
                     }
                     _ => return Poll::Ready(Err(Error::unexpected_message())),
                 },
diff --git a/libs/proxy/tokio-postgres2/src/error/mod.rs b/libs/proxy/tokio-postgres2/src/error/mod.rs
index 922c348525..b12e76e5bf 100644
--- a/libs/proxy/tokio-postgres2/src/error/mod.rs
+++ b/libs/proxy/tokio-postgres2/src/error/mod.rs
@@ -1,10 +1,10 @@
 //! Errors.
 
+use std::error::{self, Error as _Error};
+use std::{fmt, io};
+
 use fallible_iterator::FallibleIterator;
 use postgres_protocol2::message::backend::{ErrorFields, ErrorResponseBody};
-use std::error::{self, Error as _Error};
-use std::fmt;
-use std::io;
 
 pub use self::sqlstate::*;
 
diff --git a/libs/proxy/tokio-postgres2/src/generic_client.rs b/libs/proxy/tokio-postgres2/src/generic_client.rs
index 042b5a675e..31c3d8fa3e 100644
--- a/libs/proxy/tokio-postgres2/src/generic_client.rs
+++ b/libs/proxy/tokio-postgres2/src/generic_client.rs
@@ -1,9 +1,10 @@
 #![allow(async_fn_in_trait)]
 
+use postgres_protocol2::Oid;
+
 use crate::query::RowStream;
 use crate::types::Type;
 use crate::{Client, Error, Transaction};
-use postgres_protocol2::Oid;
 
 mod private {
     pub trait Sealed {}
diff --git a/libs/proxy/tokio-postgres2/src/lib.rs b/libs/proxy/tokio-postgres2/src/lib.rs
index 7426279167..c8ebba5487 100644
--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -1,6 +1,8 @@
 //! An asynchronous, pipelined, PostgreSQL client.
 #![warn(clippy::all)]
 
+use postgres_protocol2::message::backend::ReadyForQueryBody;
+
 pub use crate::cancel_token::CancelToken;
 pub use crate::client::{Client, SocketConfig};
 pub use crate::config::Config;
@@ -17,7 +19,6 @@ pub use crate::tls::NoTls;
 pub use crate::transaction::Transaction;
 pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
 use crate::types::ToSql;
-use postgres_protocol2::message::backend::ReadyForQueryBody;
 
 /// After executing a query, the connection will be in one of these states
 #[derive(Clone, Copy, Debug, PartialEq)]
diff --git a/libs/proxy/tokio-postgres2/src/maybe_tls_stream.rs b/libs/proxy/tokio-postgres2/src/maybe_tls_stream.rs
index 9a7e248997..4aa838613e 100644
--- a/libs/proxy/tokio-postgres2/src/maybe_tls_stream.rs
+++ b/libs/proxy/tokio-postgres2/src/maybe_tls_stream.rs
@@ -1,12 +1,14 @@
 //! MaybeTlsStream.
 //!
 //! Represents a stream that may or may not be encrypted with TLS.
-use crate::tls::{ChannelBinding, TlsStream};
 use std::io;
 use std::pin::Pin;
 use std::task::{Context, Poll};
+
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 
+use crate::tls::{ChannelBinding, TlsStream};
+
 /// A stream that may or may not be encrypted with TLS.
 pub enum MaybeTlsStream<S, T> {
     /// An unencrypted stream.
diff --git a/libs/proxy/tokio-postgres2/src/prepare.rs b/libs/proxy/tokio-postgres2/src/prepare.rs
index 58bbb26cbc..b36d2e5f74 100644
--- a/libs/proxy/tokio-postgres2/src/prepare.rs
+++ b/libs/proxy/tokio-postgres2/src/prepare.rs
@@ -1,18 +1,19 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use bytes::Bytes;
+use fallible_iterator::FallibleIterator;
+use futures_util::{TryStreamExt, pin_mut};
+use log::debug;
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
+
 use crate::client::InnerClient;
 use crate::codec::FrontendMessage;
 use crate::connection::RequestMessages;
 use crate::types::{Field, Kind, Oid, Type};
-use crate::{query, slice_iter};
-use crate::{Column, Error, Statement};
-use bytes::Bytes;
-use fallible_iterator::FallibleIterator;
-use futures_util::{pin_mut, TryStreamExt};
-use log::debug;
-use postgres_protocol2::message::backend::Message;
-use postgres_protocol2::message::frontend;
-use std::future::Future;
-use std::pin::Pin;
-use std::sync::Arc;
+use crate::{Column, Error, Statement, query, slice_iter};
 
 pub(crate) const TYPEINFO_QUERY: &str = "\
 SELECT t.typname, t.typtype, t.typelem, r.rngsubtype, t.typbasetype, n.nspname, t.typrelid
diff --git a/libs/proxy/tokio-postgres2/src/query.rs b/libs/proxy/tokio-postgres2/src/query.rs
index e21631c85d..29f05fba79 100644
--- a/libs/proxy/tokio-postgres2/src/query.rs
+++ b/libs/proxy/tokio-postgres2/src/query.rs
@@ -1,22 +1,24 @@
-use crate::client::{InnerClient, Responses};
-use crate::codec::FrontendMessage;
-use crate::connection::RequestMessages;
-use crate::types::IsNull;
-use crate::{Column, Error, ReadyForQueryStatus, Row, Statement};
-use bytes::{BufMut, Bytes, BytesMut};
-use fallible_iterator::FallibleIterator;
-use futures_util::{ready, Stream};
-use log::{debug, log_enabled, Level};
-use pin_project_lite::pin_project;
-use postgres_protocol2::message::backend::Message;
-use postgres_protocol2::message::frontend;
-use postgres_types2::{Format, ToSql, Type};
 use std::fmt;
 use std::marker::PhantomPinned;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use bytes::{BufMut, Bytes, BytesMut};
+use fallible_iterator::FallibleIterator;
+use futures_util::{Stream, ready};
+use log::{Level, debug, log_enabled};
+use pin_project_lite::pin_project;
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
+use postgres_types2::{Format, ToSql, Type};
+
+use crate::client::{InnerClient, Responses};
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
+use crate::types::IsNull;
+use crate::{Column, Error, ReadyForQueryStatus, Row, Statement};
+
 struct BorrowToSqlParamsDebug<'a>(&'a [&'a (dyn ToSql + Sync)]);
 
 impl fmt::Debug for BorrowToSqlParamsDebug<'_> {
@@ -257,7 +259,7 @@ impl Stream for RowStream {
                         this.statement.clone(),
                         body,
                         *this.output_format,
-                    )?)))
+                    )?)));
                 }
                 Message::EmptyQueryResponse | Message::PortalSuspended => {}
                 Message::CommandComplete(body) => {
diff --git a/libs/proxy/tokio-postgres2/src/row.rs b/libs/proxy/tokio-postgres2/src/row.rs
index 10e130707d..5fc955eef4 100644
--- a/libs/proxy/tokio-postgres2/src/row.rs
+++ b/libs/proxy/tokio-postgres2/src/row.rs
@@ -1,17 +1,18 @@
 //! Rows.
 
+use std::ops::Range;
+use std::sync::Arc;
+use std::{fmt, str};
+
+use fallible_iterator::FallibleIterator;
+use postgres_protocol2::message::backend::DataRowBody;
+use postgres_types2::{Format, WrongFormat};
+
 use crate::row::sealed::{AsName, Sealed};
 use crate::simple_query::SimpleColumn;
 use crate::statement::Column;
 use crate::types::{FromSql, Type, WrongType};
 use crate::{Error, Statement};
-use fallible_iterator::FallibleIterator;
-use postgres_protocol2::message::backend::DataRowBody;
-use postgres_types2::{Format, WrongFormat};
-use std::fmt;
-use std::ops::Range;
-use std::str;
-use std::sync::Arc;
 
 mod sealed {
     pub trait Sealed {}
diff --git a/libs/proxy/tokio-postgres2/src/simple_query.rs b/libs/proxy/tokio-postgres2/src/simple_query.rs
index fb2550377b..f13d63983f 100644
--- a/libs/proxy/tokio-postgres2/src/simple_query.rs
+++ b/libs/proxy/tokio-postgres2/src/simple_query.rs
@@ -1,19 +1,21 @@
-use crate::client::{InnerClient, Responses};
-use crate::codec::FrontendMessage;
-use crate::connection::RequestMessages;
-use crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};
-use bytes::Bytes;
-use fallible_iterator::FallibleIterator;
-use futures_util::{ready, Stream};
-use log::debug;
-use pin_project_lite::pin_project;
-use postgres_protocol2::message::backend::Message;
-use postgres_protocol2::message::frontend;
 use std::marker::PhantomPinned;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
+use bytes::Bytes;
+use fallible_iterator::FallibleIterator;
+use futures_util::{Stream, ready};
+use log::debug;
+use pin_project_lite::pin_project;
+use postgres_protocol2::message::backend::Message;
+use postgres_protocol2::message::frontend;
+
+use crate::client::{InnerClient, Responses};
+use crate::codec::FrontendMessage;
+use crate::connection::RequestMessages;
+use crate::{Error, ReadyForQueryStatus, SimpleQueryMessage, SimpleQueryRow};
+
 /// Information about a column of a single query row.
 #[derive(Debug)]
 pub struct SimpleColumn {
diff --git a/libs/proxy/tokio-postgres2/src/statement.rs b/libs/proxy/tokio-postgres2/src/statement.rs
index 591872fbc5..e4828db712 100644
--- a/libs/proxy/tokio-postgres2/src/statement.rs
+++ b/libs/proxy/tokio-postgres2/src/statement.rs
@@ -1,15 +1,14 @@
+use std::fmt;
+use std::sync::{Arc, Weak};
+
+use postgres_protocol2::Oid;
+use postgres_protocol2::message::backend::Field;
+use postgres_protocol2::message::frontend;
+
 use crate::client::InnerClient;
 use crate::codec::FrontendMessage;
 use crate::connection::RequestMessages;
 use crate::types::Type;
-use postgres_protocol2::{
-    message::{backend::Field, frontend},
-    Oid,
-};
-use std::{
-    fmt,
-    sync::{Arc, Weak},
-};
 
 struct StatementInner {
     client: Weak<InnerClient>,
diff --git a/libs/proxy/tokio-postgres2/src/tls.rs b/libs/proxy/tokio-postgres2/src/tls.rs
index dc8140719f..41b51368ff 100644
--- a/libs/proxy/tokio-postgres2/src/tls.rs
+++ b/libs/proxy/tokio-postgres2/src/tls.rs
@@ -5,6 +5,7 @@ use std::future::Future;
 use std::pin::Pin;
 use std::task::{Context, Poll};
 use std::{fmt, io};
+
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 
 pub(crate) mod private {
diff --git a/libs/proxy/tokio-postgres2/src/transaction.rs b/libs/proxy/tokio-postgres2/src/transaction.rs
index 03a57e4947..eecbfc5873 100644
--- a/libs/proxy/tokio-postgres2/src/transaction.rs
+++ b/libs/proxy/tokio-postgres2/src/transaction.rs
@@ -1,8 +1,9 @@
+use postgres_protocol2::message::frontend;
+
 use crate::codec::FrontendMessage;
 use crate::connection::RequestMessages;
 use crate::query::RowStream;
 use crate::{CancelToken, Client, Error, ReadyForQueryStatus};
-use postgres_protocol2::message::frontend;
 
 /// A representation of a PostgreSQL database transaction.
 ///
diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml
index 33fa6e89f5..7bdf340f74 100644
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "remote_storage"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [dependencies]
diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs
index 9027a8bf55..dee61a410d 100644
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -2,33 +2,26 @@
 
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::env;
 use std::fmt::Display;
-use std::io;
 use std::num::NonZeroU32;
 use std::pin::Pin;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::Duration;
-use std::time::SystemTime;
+use std::time::{Duration, SystemTime};
+use std::{env, io};
 
-use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
-use anyhow::Context;
-use anyhow::Result;
+use anyhow::{Context, Result};
 use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
-use azure_core::HttpClient;
-use azure_core::TransportOptions;
-use azure_core::{Continuable, RetryOptions};
+use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions};
 use azure_storage::StorageCredentials;
 use azure_storage_blobs::blob::CopyStatus;
-use azure_storage_blobs::prelude::ClientBuilder;
-use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
+use azure_storage_blobs::blob::operations::GetBlobBuilder;
+use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient};
 use bytes::Bytes;
+use futures::FutureExt;
 use futures::future::Either;
 use futures::stream::Stream;
-use futures::FutureExt;
-use futures_util::StreamExt;
-use futures_util::TryStreamExt;
+use futures_util::{StreamExt, TryStreamExt};
 use http_types::{StatusCode, Url};
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
@@ -36,12 +29,13 @@ use tracing::debug;
 use utils::backoff;
 use utils::backoff::exponential_backoff_duration_seconds;
 
-use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
-use crate::DownloadKind;
+use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
+use crate::config::AzureConfig;
+use crate::error::Cancelled;
+use crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests};
 use crate::{
-    config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
-    DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
-    TimeTravelError, TimeoutOrCancel,
+    ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode,
+    ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
 };
 
 pub struct AzureBlobStorage {
diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs
index ff34158c9c..52978be5b4 100644
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -1,8 +1,10 @@
-use std::{fmt::Debug, num::NonZeroUsize, str::FromStr, time::Duration};
+use std::fmt::Debug;
+use std::num::NonZeroUsize;
+use std::str::FromStr;
+use std::time::Duration;
 
 use aws_sdk_s3::types::StorageClass;
 use camino::Utf8PathBuf;
-
 use serde::{Deserialize, Serialize};
 
 use crate::{
diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs
index 69b522d63e..6eb5570d9b 100644
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -18,40 +18,35 @@ mod s3_bucket;
 mod simulate_failures;
 mod support;
 
-use std::{
-    collections::HashMap,
-    fmt::Debug,
-    num::NonZeroU32,
-    ops::Bound,
-    pin::{pin, Pin},
-    sync::Arc,
-    time::SystemTime,
-};
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::num::NonZeroU32;
+use std::ops::Bound;
+use std::pin::{Pin, pin};
+use std::sync::Arc;
+use std::time::SystemTime;
 
 use anyhow::Context;
-use camino::{Utf8Path, Utf8PathBuf};
-
+/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
+pub use azure_core::Etag;
 use bytes::Bytes;
-use futures::{stream::Stream, StreamExt};
+use camino::{Utf8Path, Utf8PathBuf};
+pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
+use futures::StreamExt;
+use futures::stream::Stream;
 use itertools::Itertools as _;
+use s3_bucket::RequestKind;
 use serde::{Deserialize, Serialize};
 use tokio::sync::Semaphore;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 
-pub use self::{
-    azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
-    simulate_failures::UnreliableWrapper,
-};
-use s3_bucket::RequestKind;
-
+pub use self::azure_blob::AzureBlobStorage;
+pub use self::local_fs::LocalFs;
+pub use self::s3_bucket::S3Bucket;
+pub use self::simulate_failures::UnreliableWrapper;
 pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
 
-/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
-pub use azure_core::Etag;
-
-pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
-
 /// Default concurrency limit for S3 operations
 ///
 /// Currently, sync happens with AWS S3, that has two limits on requests per second:
@@ -640,8 +635,13 @@ impl GenericRemoteStorage {
                 let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
                 let access_key_id =
                     std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
-                info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
-                      s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
+                info!(
+                    "Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
+                    s3_config.bucket_name,
+                    s3_config.bucket_region,
+                    s3_config.prefix_in_bucket,
+                    s3_config.endpoint
+                );
                 Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
             }
             RemoteStorageKind::AzureContainer(azure_config) => {
@@ -649,8 +649,12 @@ impl GenericRemoteStorage {
                     .storage_account
                     .as_deref()
                     .unwrap_or("<AZURE_STORAGE_ACCOUNT>");
-                info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
-                      azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
+                info!(
+                    "Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
+                    azure_config.container_name,
+                    azure_config.container_region,
+                    azure_config.prefix_in_container
+                );
                 Self::AzureBlob(Arc::new(AzureBlobStorage::new(
                     azure_config,
                     timeout,
diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs
index a8b00173ba..f03d6ac8ee 100644
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -4,31 +4,26 @@
 //! This storage used in tests, but can also be used in cases when a certain persistent
 //! volume is mounted to the local FS.
 
-use std::{
-    collections::HashSet,
-    io::ErrorKind,
-    num::NonZeroU32,
-    time::{Duration, SystemTime, UNIX_EPOCH},
-};
+use std::collections::HashSet;
+use std::io::ErrorKind;
+use std::num::NonZeroU32;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
-use anyhow::{bail, ensure, Context};
+use anyhow::{Context, bail, ensure};
 use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
 use futures::stream::Stream;
-use tokio::{
-    fs,
-    io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
-};
-use tokio_util::{io::ReaderStream, sync::CancellationToken};
+use tokio::fs;
+use tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
+use tokio_util::io::ReaderStream;
+use tokio_util::sync::CancellationToken;
 use utils::crashsafe::path_with_suffix_extension;
 
-use crate::{
-    Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
-    TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
-};
-
 use super::{RemoteStorage, StorageMetadata};
-use crate::Etag;
+use crate::{
+    Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject,
+    REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel,
+};
 
 const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
 
@@ -91,7 +86,8 @@ impl LocalFs {
 
     #[cfg(test)]
     async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {
-        use std::{future::Future, pin::Pin};
+        use std::future::Future;
+        use std::pin::Pin;
         fn get_all_files<'a, P>(
             directory_path: P,
         ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>
@@ -284,7 +280,9 @@ impl LocalFs {
             })?;
 
         if bytes_read < from_size_bytes {
-            bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes");
+            bail!(
+                "Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"
+            );
         }
         // Check if there is any extra data after the given size.
         let mut from = buffer_to_read.into_inner();
@@ -642,10 +640,13 @@ fn mock_etag(meta: &std::fs::Metadata) -> Etag {
 
 #[cfg(test)]
 mod fs_tests {
-    use super::*;
+    use std::collections::HashMap;
+    use std::io::Write;
+    use std::ops::Bound;
 
     use camino_tempfile::tempdir;
-    use std::{collections::HashMap, io::Write, ops::Bound};
+
+    use super::*;
 
     async fn read_and_check_metadata(
         storage: &LocalFs,
@@ -736,9 +737,14 @@ mod fs_tests {
         );
 
         let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
-        match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
+        match storage
+            .download(&non_existing_path, &DownloadOpts::default(), &cancel)
+            .await
+        {
             Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
-            other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
+            other => panic!(
+                "Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"
+            ),
         }
         Ok(())
     }
diff --git a/libs/remote_storage/src/metrics.rs b/libs/remote_storage/src/metrics.rs
index 48c121fbc8..81e68e9a29 100644
--- a/libs/remote_storage/src/metrics.rs
+++ b/libs/remote_storage/src/metrics.rs
@@ -1,5 +1,5 @@
 use metrics::{
-    register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, IntCounter,
+    Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec,
 };
 use once_cell::sync::Lazy;
 
@@ -16,8 +16,8 @@ pub(crate) enum RequestKind {
     Head = 6,
 }
 
-use scopeguard::ScopeGuard;
 use RequestKind::*;
+use scopeguard::ScopeGuard;
 
 impl RequestKind {
     const fn as_str(&self) -> &'static str {
diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs
index d3f19f0b11..ba7ce9e1e7 100644
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -4,56 +4,50 @@
 //! allowing multiple api users to independently work with the same S3 bucket, if
 //! their bucket prefixes are both specified and different.
 
-use std::{
-    borrow::Cow,
-    collections::HashMap,
-    num::NonZeroU32,
-    pin::Pin,
-    sync::Arc,
-    task::{Context, Poll},
-    time::{Duration, SystemTime},
-};
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::num::NonZeroU32;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use std::time::{Duration, SystemTime};
 
-use anyhow::{anyhow, Context as _};
-use aws_config::{
-    default_provider::credentials::DefaultCredentialsChain,
-    retry::{RetryConfigBuilder, RetryMode},
-    BehaviorVersion,
-};
-use aws_sdk_s3::{
-    config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
-    error::SdkError,
-    operation::{get_object::GetObjectError, head_object::HeadObjectError},
-    types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass},
-    Client,
-};
+use anyhow::{Context as _, anyhow};
+use aws_config::BehaviorVersion;
+use aws_config::default_provider::credentials::DefaultCredentialsChain;
+use aws_config::retry::{RetryConfigBuilder, RetryMode};
+use aws_sdk_s3::Client;
+use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep};
+use aws_sdk_s3::error::SdkError;
+use aws_sdk_s3::operation::get_object::GetObjectError;
+use aws_sdk_s3::operation::head_object::HeadObjectError;
+use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass};
 use aws_smithy_async::rt::sleep::TokioSleep;
-use http_body_util::StreamBody;
-use http_types::StatusCode;
-
-use aws_smithy_types::{body::SdkBody, DateTime};
-use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
+use aws_smithy_types::DateTime;
+use aws_smithy_types::body::SdkBody;
+use aws_smithy_types::byte_stream::ByteStream;
+use aws_smithy_types::date_time::ConversionError;
 use bytes::Bytes;
 use futures::stream::Stream;
 use futures_util::StreamExt;
+use http_body_util::StreamBody;
+use http_types::StatusCode;
 use hyper::body::Frame;
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
 use utils::backoff;
 
 use super::StorageMetadata;
-use crate::{
-    config::S3Config,
-    error::Cancelled,
-    metrics::{start_counting_cancelled_wait, start_measuring_requests},
-    support::PermitCarrying,
-    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
-    RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE_S3,
-    REMOTE_STORAGE_PREFIX_SEPARATOR,
-};
-
-use crate::metrics::AttemptOutcome;
+use crate::config::S3Config;
+use crate::error::Cancelled;
 pub(super) use crate::metrics::RequestKind;
+use crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests};
+use crate::support::PermitCarrying;
+use crate::{
+    ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
+    MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage,
+    TimeTravelError, TimeoutOrCancel,
+};
 
 /// AWS S3 storage.
 pub struct S3Bucket {
@@ -958,8 +952,10 @@ impl RemoteStorage for S3Bucket {
                 version_id, key, ..
             } = &vd;
             if version_id == "null" {
-                return Err(TimeTravelError::Other(anyhow!("Received ListVersions response for key={key} with version_id='null', \
-                    indicating either disabled versioning, or legacy objects with null version id values")));
+                return Err(TimeTravelError::Other(anyhow!(
+                    "Received ListVersions response for key={key} with version_id='null', \
+                    indicating either disabled versioning, or legacy objects with null version id values"
+                )));
             }
             tracing::trace!(
                 "Parsing version key={key} version_id={version_id} kind={:?}",
@@ -1126,9 +1122,10 @@ impl VerOrDelete {
 
 #[cfg(test)]
 mod tests {
-    use camino::Utf8Path;
     use std::num::NonZeroUsize;
 
+    use camino::Utf8Path;
+
     use crate::{RemotePath, S3Bucket, S3Config};
 
     #[tokio::test]
diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs
index 63c24beb51..f56be873c4 100644
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -1,14 +1,15 @@
 //! This module provides a wrapper around a real RemoteStorage implementation that
 //! causes the first N attempts at each upload or download operatio to fail. For
 //! testing purposes.
-use bytes::Bytes;
-use futures::stream::Stream;
-use futures::StreamExt;
 use std::collections::HashMap;
+use std::collections::hash_map::Entry;
 use std::num::NonZeroU32;
-use std::sync::Mutex;
+use std::sync::{Arc, Mutex};
 use std::time::SystemTime;
-use std::{collections::hash_map::Entry, sync::Arc};
+
+use bytes::Bytes;
+use futures::StreamExt;
+use futures::stream::Stream;
 use tokio_util::sync::CancellationToken;
 
 use crate::{
diff --git a/libs/remote_storage/src/support.rs b/libs/remote_storage/src/support.rs
index 1ed9ed9305..07da38cf77 100644
--- a/libs/remote_storage/src/support.rs
+++ b/libs/remote_storage/src/support.rs
@@ -1,9 +1,7 @@
-use std::{
-    future::Future,
-    pin::Pin,
-    task::{Context, Poll},
-    time::Duration,
-};
+use std::future::Future;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+use std::time::Duration;
 
 use bytes::Bytes;
 use futures_util::Stream;
@@ -114,9 +112,10 @@ pub(crate) fn cancel_or_timeout(
 
 #[cfg(test)]
 mod tests {
+    use futures::stream::StreamExt;
+
     use super::*;
     use crate::DownloadError;
-    use futures::stream::StreamExt;
 
     #[tokio::test(start_paused = true)]
     async fn cancelled_download_stream() {
diff --git a/libs/remote_storage/tests/common/tests.rs b/libs/remote_storage/tests/common/tests.rs
index d5da1d48e9..6a78ddc01e 100644
--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -1,19 +1,20 @@
+use std::collections::HashSet;
+use std::num::NonZeroU32;
+use std::ops::Bound;
+use std::sync::Arc;
+
 use anyhow::Context;
 use camino::Utf8Path;
 use futures::StreamExt;
 use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
-use std::ops::Bound;
-use std::sync::Arc;
-use std::{collections::HashSet, num::NonZeroU32};
 use test_context::test_context;
 use tokio_util::sync::CancellationToken;
 use tracing::debug;
 
-use crate::common::{download_to_vec, upload_stream, wrap_stream};
-
 use super::{
     MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,
 };
+use crate::common::{download_to_vec, upload_stream, wrap_stream};
 
 /// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
 /// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
@@ -62,7 +63,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
         .into_iter()
         .collect::<HashSet<_>>();
     assert_eq!(
-        root_remote_prefixes, HashSet::from([base_prefix.clone()]),
+        root_remote_prefixes,
+        HashSet::from([base_prefix.clone()]),
         "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
     );
 
@@ -84,7 +86,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
         .difference(&nested_remote_prefixes)
         .collect::<HashSet<_>>();
     assert_eq!(
-        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
+        remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
+        0,
         "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
     );
 
@@ -119,7 +122,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a
         .difference(&nested_remote_prefixes_combined)
         .collect::<HashSet<_>>();
     assert_eq!(
-        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
+        remote_only_prefixes.len() + missing_uploaded_prefixes.len(),
+        0,
         "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
     );
 
diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs
index 15004dbf83..31c9ca3200 100644
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -1,9 +1,9 @@
+use std::collections::HashSet;
 use std::env;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
 use std::sync::Arc;
-use std::time::UNIX_EPOCH;
-use std::{collections::HashSet, time::Duration};
+use std::time::{Duration, UNIX_EPOCH};
 
 use anyhow::Context;
 use remote_storage::{
@@ -208,7 +208,7 @@ async fn create_azure_client(
         .as_millis();
 
     // because nanos can be the same for two threads so can millis, add randomness
-    let random = rand::thread_rng().gen::<u32>();
+    let random = rand::thread_rng().r#gen::<u32>();
 
     let remote_storage_config = RemoteStorageConfig {
         storage: RemoteStorageKind::AzureContainer(AzureConfig {
diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs
index e60ec18c93..6996bb27ae 100644
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -1,13 +1,12 @@
+use std::collections::HashSet;
 use std::env;
 use std::fmt::{Debug, Display};
 use std::future::Future;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
 use std::sync::Arc;
-use std::time::{Duration, UNIX_EPOCH};
-use std::{collections::HashSet, time::SystemTime};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
-use crate::common::{download_to_vec, upload_stream};
 use anyhow::Context;
 use camino::Utf8Path;
 use futures_util::StreamExt;
@@ -15,12 +14,13 @@ use remote_storage::{
     DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
     RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
-use test_context::test_context;
-use test_context::AsyncTestContext;
+use test_context::{AsyncTestContext, test_context};
 use tokio::io::AsyncBufReadExt;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 
+use crate::common::{download_to_vec, upload_stream};
+
 mod common;
 
 #[path = "common/tests.rs"]
@@ -128,8 +128,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
         let t0_hwt = t0 + half_wt;
         let t1_hwt = t1 - half_wt;
         if !(t0_hwt..=t1_hwt).contains(&last_modified) {
-            panic!("last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
-                This likely means a large lock discrepancy between S3 and the local clock.");
+            panic!(
+                "last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
+                This likely means a large lock discrepancy between S3 and the local clock."
+            );
         }
     }
 
@@ -383,7 +385,7 @@ async fn create_s3_client(
         .as_millis();
 
     // because nanos can be the same for two threads so can millis, add randomness
-    let random = rand::thread_rng().gen::<u32>();
+    let random = rand::thread_rng().r#gen::<u32>();
 
     let remote_storage_config = RemoteStorageConfig {
         storage: RemoteStorageKind::AwsS3(S3Config {
diff --git a/libs/safekeeper_api/Cargo.toml b/libs/safekeeper_api/Cargo.toml
index 6b72ace019..d9d080e8fe 100644
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "safekeeper_api"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [dependencies]
diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs
index 8b14a4f290..bb8934744a 100644
--- a/libs/safekeeper_api/src/membership.rs
+++ b/libs/safekeeper_api/src/membership.rs
@@ -2,7 +2,8 @@
 //! rfcs/035-safekeeper-dynamic-membership-change.md
 //! for details.
 
-use std::{collections::HashSet, fmt::Display};
+use std::collections::HashSet;
+use std::fmt::Display;
 
 use anyhow;
 use anyhow::bail;
@@ -68,14 +69,12 @@ impl Display for SafekeeperId {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(transparent)]
 pub struct MemberSet {
-    pub members: Vec<SafekeeperId>,
+    pub m: Vec<SafekeeperId>,
 }
 
 impl MemberSet {
     pub fn empty() -> Self {
-        MemberSet {
-            members: Vec::new(),
-        }
+        MemberSet { m: Vec::new() }
     }
 
     pub fn new(members: Vec<SafekeeperId>) -> anyhow::Result<Self> {
@@ -83,21 +82,21 @@ impl MemberSet {
         if hs.len() != members.len() {
             bail!("duplicate safekeeper id in the set {:?}", members);
         }
-        Ok(MemberSet { members })
+        Ok(MemberSet { m: members })
     }
 
-    pub fn contains(&self, sk: &SafekeeperId) -> bool {
-        self.members.iter().any(|m| m.id == sk.id)
+    pub fn contains(&self, sk: NodeId) -> bool {
+        self.m.iter().any(|m| m.id == sk)
     }
 
     pub fn add(&mut self, sk: SafekeeperId) -> anyhow::Result<()> {
-        if self.contains(&sk) {
+        if self.contains(sk.id) {
             bail!(format!(
                 "sk {} is already member of the set {}",
                 sk.id, self
             ));
         }
-        self.members.push(sk);
+        self.m.push(sk);
         Ok(())
     }
 }
@@ -105,11 +104,7 @@ impl MemberSet {
 impl Display for MemberSet {
     /// Display as a comma separated list of members.
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let sks_str = self
-            .members
-            .iter()
-            .map(|m| m.to_string())
-            .collect::<Vec<_>>();
+        let sks_str = self.m.iter().map(|sk| sk.to_string()).collect::<Vec<_>>();
         write!(f, "({})", sks_str.join(", "))
     }
 }
@@ -135,6 +130,11 @@ impl Configuration {
             new_members: None,
         }
     }
+
+    /// Is `sk_id` member of the configuration?
+    pub fn contains(&self, sk_id: NodeId) -> bool {
+        self.members.contains(sk_id) || self.new_members.as_ref().is_some_and(|m| m.contains(sk_id))
+    }
 }
 
 impl Display for Configuration {
@@ -154,9 +154,10 @@ impl Display for Configuration {
 
 #[cfg(test)]
 mod tests {
-    use super::{MemberSet, SafekeeperId};
     use utils::id::NodeId;
 
+    use super::{MemberSet, SafekeeperId};
+
     #[test]
     fn test_member_set() {
         let mut members = MemberSet::empty();
diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs
index 41ccdaa428..2f2aeaa429 100644
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -1,18 +1,17 @@
 //! Types used in safekeeper http API. Many of them are also reused internally.
 
+use std::net::SocketAddr;
+
 use pageserver_api::shard::ShardIdentity;
 use postgres_ffi::TimestampTz;
 use serde::{Deserialize, Serialize};
-use std::net::SocketAddr;
 use tokio::time::Instant;
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
 
-use utils::{
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-    pageserver_feedback::PageserverFeedback,
-};
-
-use crate::{membership::Configuration, ServerInfo, Term};
+use crate::membership::Configuration;
+use crate::{ServerInfo, Term};
 
 #[derive(Debug, Serialize)]
 pub struct SafekeeperStatus {
diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml
index 62e0f4cfba..5020d82adf 100644
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -27,7 +27,7 @@ humantime.workspace = true
 fail.workspace = true
 futures = { workspace = true }
 jsonwebtoken.workspace = true
-nix = {workspace = true, features = [ "ioctl" ] }
+nix = { workspace = true, features = ["ioctl"] }
 once_cell.workspace = true
 pin-project-lite.workspace = true
 regex.workspace = true
@@ -61,6 +61,7 @@ bytes.workspace = true
 criterion.workspace = true
 hex-literal.workspace = true
 camino-tempfile.workspace = true
+pprof.workspace = true
 serde_assert.workspace = true
 tokio = { workspace = true, features = ["test-util"] }
 
diff --git a/libs/utils/benches/README.md b/libs/utils/benches/README.md
new file mode 100644
index 0000000000..5afbe3cf2b
--- /dev/null
+++ b/libs/utils/benches/README.md
@@ -0,0 +1,26 @@
+## Utils Benchmarks
+
+To run benchmarks:
+
+```sh
+# All benchmarks.
+cargo bench --package utils
+
+# Specific file.
+cargo bench --package utils --bench benchmarks
+
+# Specific benchmark.
+cargo bench --package utils --bench benchmarks log_slow/enabled=true
+
+# List available benchmarks.
+cargo bench --package utils --benches -- --list
+
+# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
+# Output in target/criterion/*/profile/flamegraph.svg.
+cargo bench --package utils --bench benchmarks log_slow/enabled=true --profile-time 10
+```
+
+Additional charts and statistics are available in `target/criterion/report/index.html`.
+
+Benchmarks are automatically compared against the previous run. To compare against other runs, see
+`--baseline` and `--save-baseline`.
\ No newline at end of file
diff --git a/libs/utils/benches/benchmarks.rs b/libs/utils/benches/benchmarks.rs
index 44eb36387c..348e27ac47 100644
--- a/libs/utils/benches/benchmarks.rs
+++ b/libs/utils/benches/benchmarks.rs
@@ -1,5 +1,18 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use std::time::Duration;
+
+use criterion::{criterion_group, criterion_main, Bencher, Criterion};
+use pprof::criterion::{Output, PProfProfiler};
 use utils::id;
+use utils::logging::log_slow;
+
+// Register benchmarks with Criterion.
+criterion_group!(
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench_id_stringify,
+    bench_log_slow,
+);
+criterion_main!(benches);
 
 pub fn bench_id_stringify(c: &mut Criterion) {
     // Can only use public methods.
@@ -16,5 +29,31 @@ pub fn bench_id_stringify(c: &mut Criterion) {
     });
 }
 
-criterion_group!(benches, bench_id_stringify);
-criterion_main!(benches);
+pub fn bench_log_slow(c: &mut Criterion) {
+    for enabled in [false, true] {
+        c.bench_function(&format!("log_slow/enabled={enabled}"), |b| {
+            run_bench(b, enabled).unwrap()
+        });
+    }
+
+    // The actual benchmark.
+    fn run_bench(b: &mut Bencher, enabled: bool) -> anyhow::Result<()> {
+        const THRESHOLD: Duration = Duration::from_secs(1);
+
+        // Use a multi-threaded runtime to avoid thread parking overhead when yielding.
+        let runtime = tokio::runtime::Builder::new_multi_thread()
+            .enable_all()
+            .build()?;
+
+        // Test both with and without log_slow, since we're essentially measuring Tokio scheduling
+        // performance too. Use a simple noop future that yields once, to avoid any scheduler fast
+        // paths for a ready future.
+        if enabled {
+            b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now())));
+        } else {
+            b.iter(|| runtime.block_on(tokio::task::yield_now()));
+        }
+
+        Ok(())
+    }
+}
diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs
index 4a6069294d..2c36942f43 100644
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,9 +1,13 @@
+use std::future::Future;
 use std::str::FromStr;
+use std::time::Duration;
 
 use anyhow::Context;
 use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, VariantNames};
+use tokio::time::Instant;
+use tracing::info;
 
 /// Logs a critical error, similarly to `tracing::error!`. This will:
 ///
@@ -318,6 +322,43 @@ impl std::fmt::Debug for SecretString {
     }
 }
 
+/// Logs a periodic message if a future is slow to complete.
+///
+/// This is performance-sensitive as it's used on the GetPage read path.
+///
+/// TODO: consider upgrading this to a warning, but currently it fires too often.
+#[inline]
+pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
+    // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
+    // won't fit on the stack.
+    let mut f = Box::pin(f);
+
+    let started = Instant::now();
+    let mut attempt = 1;
+
+    loop {
+        // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
+        // case where the timeout doesn't fire.
+        let deadline = started + attempt * threshold;
+        if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
+            // NB: we check if we exceeded the threshold even if the timeout never fired, because
+            // scheduling or execution delays may cause the future to succeed even if it exceeds the
+            // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
+            // false negatives.
+            let elapsed = started.elapsed();
+            if elapsed >= threshold {
+                info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
+            }
+            return output;
+        }
+
+        let elapsed = started.elapsed().as_secs_f64();
+        info!("slow {name} still running after {elapsed:.3}s",);
+
+        attempt += 1;
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use metrics::{core::Opts, IntCounterVec};
diff --git a/libs/vm_monitor/Cargo.toml b/libs/vm_monitor/Cargo.toml
index ba73902d38..a70465921c 100644
--- a/libs/vm_monitor/Cargo.toml
+++ b/libs/vm_monitor/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "vm_monitor"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [[bin]]
diff --git a/libs/vm_monitor/src/cgroup.rs b/libs/vm_monitor/src/cgroup.rs
index 1d70cedcf9..dda9b23818 100644
--- a/libs/vm_monitor/src/cgroup.rs
+++ b/libs/vm_monitor/src/cgroup.rs
@@ -1,12 +1,10 @@
 use std::fmt::{self, Debug, Formatter};
 use std::time::{Duration, Instant};
 
-use anyhow::{anyhow, Context};
-use cgroups_rs::{
-    hierarchies::{self, is_cgroup2_unified_mode},
-    memory::MemController,
-    Subsystem,
-};
+use anyhow::{Context, anyhow};
+use cgroups_rs::Subsystem;
+use cgroups_rs::hierarchies::{self, is_cgroup2_unified_mode};
+use cgroups_rs::memory::MemController;
 use tokio::sync::watch;
 use tracing::{info, warn};
 
diff --git a/libs/vm_monitor/src/dispatcher.rs b/libs/vm_monitor/src/dispatcher.rs
index c81848cb70..7b7201ab77 100644
--- a/libs/vm_monitor/src/dispatcher.rs
+++ b/libs/vm_monitor/src/dispatcher.rs
@@ -6,17 +6,15 @@
 //! the cgroup (requesting upscale), and the signals that go to the cgroup
 //! (notifying it of upscale).
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use axum::extract::ws::{Message, Utf8Bytes, WebSocket};
-use futures::{
-    stream::{SplitSink, SplitStream},
-    SinkExt, StreamExt,
-};
+use futures::stream::{SplitSink, SplitStream};
+use futures::{SinkExt, StreamExt};
 use tracing::{debug, info};
 
 use crate::protocol::{
-    OutboundMsg, OutboundMsgKind, ProtocolRange, ProtocolResponse, ProtocolVersion,
-    PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION,
+    OutboundMsg, OutboundMsgKind, PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION, ProtocolRange,
+    ProtocolResponse, ProtocolVersion,
 };
 
 /// The central handler for all communications in the monitor.
diff --git a/libs/vm_monitor/src/filecache.rs b/libs/vm_monitor/src/filecache.rs
index 4f5bf1c1e3..bc42347e5a 100644
--- a/libs/vm_monitor/src/filecache.rs
+++ b/libs/vm_monitor/src/filecache.rs
@@ -2,12 +2,14 @@
 
 use std::num::NonZeroU64;
 
-use crate::MiB;
-use anyhow::{anyhow, Context};
-use tokio_postgres::{types::ToSql, Client, NoTls, Row};
+use anyhow::{Context, anyhow};
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{Client, NoTls, Row};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};
 
+use crate::MiB;
+
 /// Manages Postgres' file cache by keeping a connection open.
 #[derive(Debug)]
 pub struct FileCacheState {
diff --git a/libs/vm_monitor/src/lib.rs b/libs/vm_monitor/src/lib.rs
index 0cd97d4ca1..7c77aca35d 100644
--- a/libs/vm_monitor/src/lib.rs
+++ b/libs/vm_monitor/src/lib.rs
@@ -2,24 +2,26 @@
 #![deny(clippy::undocumented_unsafe_blocks)]
 #![cfg(target_os = "linux")]
 
+use std::fmt::Debug;
+use std::net::SocketAddr;
+use std::time::Duration;
+
 use anyhow::Context;
-use axum::{
-    extract::{ws::WebSocket, State, WebSocketUpgrade},
-    response::Response,
-};
-use axum::{routing::get, Router};
+use axum::Router;
+use axum::extract::ws::WebSocket;
+use axum::extract::{State, WebSocketUpgrade};
+use axum::response::Response;
+use axum::routing::get;
 use clap::Parser;
 use futures::Future;
-use std::net::SocketAddr;
-use std::{fmt::Debug, time::Duration};
+use runner::Runner;
 use sysinfo::{RefreshKind, System, SystemExt};
 use tokio::net::TcpListener;
-use tokio::{sync::broadcast, task::JoinHandle};
+use tokio::sync::broadcast;
+use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};
 
-use runner::Runner;
-
 // Code that interfaces with agent
 pub mod dispatcher;
 pub mod protocol;
diff --git a/libs/vm_monitor/src/protocol.rs b/libs/vm_monitor/src/protocol.rs
index 5f07435503..4fce3cdefc 100644
--- a/libs/vm_monitor/src/protocol.rs
+++ b/libs/vm_monitor/src/protocol.rs
@@ -35,7 +35,8 @@
 use core::fmt;
 use std::cmp;
 
-use serde::{de::Error, Deserialize, Serialize};
+use serde::de::Error;
+use serde::{Deserialize, Serialize};
 
 /// A Message we send to the agent.
 #[derive(Serialize, Deserialize, Debug, Clone)]
diff --git a/libs/vm_monitor/src/runner.rs b/libs/vm_monitor/src/runner.rs
index 8839f5803f..6f75ff0abd 100644
--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -7,7 +7,7 @@
 use std::fmt::Debug;
 use std::time::{Duration, Instant};
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use axum::extract::ws::{Message, WebSocket};
 use futures::StreamExt;
 use tokio::sync::{broadcast, watch};
@@ -18,7 +18,7 @@ use crate::cgroup::{self, CgroupWatcher};
 use crate::dispatcher::Dispatcher;
 use crate::filecache::{FileCacheConfig, FileCacheState};
 use crate::protocol::{InboundMsg, InboundMsgKind, OutboundMsg, OutboundMsgKind, Resources};
-use crate::{bytes_to_mebibytes, get_total_system_memory, spawn_with_cancel, Args, MiB};
+use crate::{Args, MiB, bytes_to_mebibytes, get_total_system_memory, spawn_with_cancel};
 
 /// Central struct that interacts with agent, dispatcher, and cgroup to handle
 /// signals from the agent.
@@ -233,7 +233,9 @@ impl Runner {
             //
             // TODO: make the duration here configurable.
             if last_time.elapsed() > Duration::from_secs(5) {
-                bail!("haven't gotten cgroup memory stats recently enough to determine downscaling information");
+                bail!(
+                    "haven't gotten cgroup memory stats recently enough to determine downscaling information"
+                );
             } else if last_history.samples_count <= 1 {
                 let status = "haven't received enough cgroup memory stats yet";
                 info!(status, "discontinuing downscale");
diff --git a/libs/wal_decoder/proto/interpreted_wal.proto b/libs/wal_decoder/proto/interpreted_wal.proto
index d68484d30f..7b40201a75 100644
--- a/libs/wal_decoder/proto/interpreted_wal.proto
+++ b/libs/wal_decoder/proto/interpreted_wal.proto
@@ -5,6 +5,7 @@ package interpreted_wal;
 message InterpretedWalRecords {
   repeated InterpretedWalRecord records = 1;
   optional uint64 next_record_lsn = 2;
+  optional uint64 raw_wal_start_lsn = 3;
 }
 
 message InterpretedWalRecord {
diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs
index 51bf7e44ab..7e1934c6c3 100644
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -60,7 +60,11 @@ pub struct InterpretedWalRecords {
     pub records: Vec<InterpretedWalRecord>,
     // Start LSN of the next record after the batch.
     // Note that said record may not belong to the current shard.
-    pub next_record_lsn: Option<Lsn>,
+    pub next_record_lsn: Lsn,
+    // Inclusive start LSN of the PG WAL from which the interpreted
+    // WAL records were extracted. Note that this is not necessarily the
+    // start LSN of the first interpreted record in the batch.
+    pub raw_wal_start_lsn: Option<Lsn>,
 }
 
 /// An interpreted Postgres WAL record, ready to be handled by the pageserver
diff --git a/libs/wal_decoder/src/wire_format.rs b/libs/wal_decoder/src/wire_format.rs
index 944ee5c919..52ed5c70b5 100644
--- a/libs/wal_decoder/src/wire_format.rs
+++ b/libs/wal_decoder/src/wire_format.rs
@@ -167,7 +167,8 @@ impl TryFrom<InterpretedWalRecords> for proto::InterpretedWalRecords {
             .collect::<Result<Vec<_>, _>>()?;
         Ok(proto::InterpretedWalRecords {
             records,
-            next_record_lsn: value.next_record_lsn.map(|l| l.0),
+            next_record_lsn: Some(value.next_record_lsn.0),
+            raw_wal_start_lsn: value.raw_wal_start_lsn.map(|l| l.0),
         })
     }
 }
@@ -254,7 +255,11 @@ impl TryFrom<proto::InterpretedWalRecords> for InterpretedWalRecords {
 
         Ok(InterpretedWalRecords {
             records,
-            next_record_lsn: value.next_record_lsn.map(Lsn::from),
+            next_record_lsn: value
+                .next_record_lsn
+                .map(Lsn::from)
+                .expect("Always provided"),
+            raw_wal_start_lsn: value.raw_wal_start_lsn.map(Lsn::from),
         })
     }
 }
diff --git a/libs/walproposer/src/walproposer.rs b/libs/walproposer/src/walproposer.rs
index ba75171db2..60b606c64a 100644
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -215,6 +215,7 @@ impl Wrapper {
             syncSafekeepers: config.sync_safekeepers,
             systemId: 0,
             pgTimeline: 1,
+            proto_version: 3,
             callback_data,
         };
         let c_config = Box::into_raw(Box::new(c_config));
@@ -276,6 +277,7 @@ mod tests {
     use core::panic;
     use std::{
         cell::Cell,
+        ffi::CString,
         sync::{atomic::AtomicUsize, mpsc::sync_channel},
     };
 
@@ -496,57 +498,64 @@ mod tests {
         // Messages definitions are at walproposer.h
         // xxx: it would be better to extract them from safekeeper crate and
         // use serialization/deserialization here.
-        let greeting_tag = (b'g' as u64).to_ne_bytes();
-        let proto_version = 2_u32.to_ne_bytes();
-        let pg_version: [u8; 4] = PG_VERSION_NUM.to_ne_bytes();
-        let proposer_id = [0; 16];
-        let system_id = 0_u64.to_ne_bytes();
-        let tenant_id = ttid.tenant_id.as_arr();
-        let timeline_id = ttid.timeline_id.as_arr();
-        let pg_tli = 1_u32.to_ne_bytes();
-        let wal_seg_size = 16777216_u32.to_ne_bytes();
+        let greeting_tag = (b'g').to_be_bytes();
+        let tenant_id = CString::new(ttid.tenant_id.to_string())
+            .unwrap()
+            .into_bytes_with_nul();
+        let timeline_id = CString::new(ttid.timeline_id.to_string())
+            .unwrap()
+            .into_bytes_with_nul();
+        let mconf_gen = 0_u32.to_be_bytes();
+        let mconf_members_len = 0_u32.to_be_bytes();
+        let mconf_members_new_len = 0_u32.to_be_bytes();
+        let pg_version: [u8; 4] = PG_VERSION_NUM.to_be_bytes();
+        let system_id = 0_u64.to_be_bytes();
+        let wal_seg_size = 16777216_u32.to_be_bytes();
+
         let proposer_greeting = [
             greeting_tag.as_slice(),
-            proto_version.as_slice(),
-            pg_version.as_slice(),
-            proposer_id.as_slice(),
-            system_id.as_slice(),
             tenant_id.as_slice(),
             timeline_id.as_slice(),
-            pg_tli.as_slice(),
+            mconf_gen.as_slice(),
+            mconf_members_len.as_slice(),
+            mconf_members_new_len.as_slice(),
+            pg_version.as_slice(),
+            system_id.as_slice(),
             wal_seg_size.as_slice(),
         ]
         .concat();
 
-        let voting_tag = (b'v' as u64).to_ne_bytes();
-        let vote_request_term = 3_u64.to_ne_bytes();
-        let proposer_id = [0; 16];
+        let voting_tag = (b'v').to_be_bytes();
+        let vote_request_term = 3_u64.to_be_bytes();
         let vote_request = [
             voting_tag.as_slice(),
+            mconf_gen.as_slice(),
             vote_request_term.as_slice(),
-            proposer_id.as_slice(),
         ]
         .concat();
 
-        let acceptor_greeting_term = 2_u64.to_ne_bytes();
-        let acceptor_greeting_node_id = 1_u64.to_ne_bytes();
+        let acceptor_greeting_term = 2_u64.to_be_bytes();
+        let acceptor_greeting_node_id = 1_u64.to_be_bytes();
         let acceptor_greeting = [
             greeting_tag.as_slice(),
-            acceptor_greeting_term.as_slice(),
             acceptor_greeting_node_id.as_slice(),
+            mconf_gen.as_slice(),
+            mconf_members_len.as_slice(),
+            mconf_members_new_len.as_slice(),
+            acceptor_greeting_term.as_slice(),
         ]
         .concat();
 
-        let vote_response_term = 3_u64.to_ne_bytes();
-        let vote_given = 1_u64.to_ne_bytes();
-        let flush_lsn = 0x539_u64.to_ne_bytes();
-        let truncate_lsn = 0x539_u64.to_ne_bytes();
-        let th_len = 1_u32.to_ne_bytes();
-        let th_term = 2_u64.to_ne_bytes();
-        let th_lsn = 0x539_u64.to_ne_bytes();
-        let timeline_start_lsn = 0x539_u64.to_ne_bytes();
+        let vote_response_term = 3_u64.to_be_bytes();
+        let vote_given = 1_u8.to_be_bytes();
+        let flush_lsn = 0x539_u64.to_be_bytes();
+        let truncate_lsn = 0x539_u64.to_be_bytes();
+        let th_len = 1_u32.to_be_bytes();
+        let th_term = 2_u64.to_be_bytes();
+        let th_lsn = 0x539_u64.to_be_bytes();
         let vote_response = [
             voting_tag.as_slice(),
+            mconf_gen.as_slice(),
             vote_response_term.as_slice(),
             vote_given.as_slice(),
             flush_lsn.as_slice(),
@@ -554,7 +563,6 @@ mod tests {
             th_len.as_slice(),
             th_term.as_slice(),
             th_lsn.as_slice(),
-            timeline_start_lsn.as_slice(),
         ]
         .concat();
 
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 41ac3b69b8..7330856be4 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "pageserver"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
@@ -40,7 +40,6 @@ num_cpus.workspace = true
 num-traits.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
-postgres.workspace = true
 postgres_backend.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs
index b67a9cc479..b1103948d6 100644
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -1,22 +1,20 @@
-use std::{env, num::NonZeroUsize};
+use std::env;
+use std::num::NonZeroUsize;
 
 use bytes::Bytes;
 use camino::Utf8PathBuf;
-use criterion::{criterion_group, criterion_main, Criterion};
-use pageserver::{
-    config::PageServerConf,
-    context::{DownloadBehavior, RequestContext},
-    l0_flush::{L0FlushConfig, L0FlushGlobalState},
-    page_cache,
-    task_mgr::TaskKind,
-    tenant::storage_layer::InMemoryLayer,
-    virtual_file,
-};
-use pageserver_api::{key::Key, shard::TenantShardId, value::Value};
-use utils::{
-    bin_ser::BeSer,
-    id::{TenantId, TimelineId},
-};
+use criterion::{Criterion, criterion_group, criterion_main};
+use pageserver::config::PageServerConf;
+use pageserver::context::{DownloadBehavior, RequestContext};
+use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
+use pageserver::task_mgr::TaskKind;
+use pageserver::tenant::storage_layer::InMemoryLayer;
+use pageserver::{page_cache, virtual_file};
+use pageserver_api::key::Key;
+use pageserver_api::shard::TenantShardId;
+use pageserver_api::value::Value;
+use utils::bin_ser::BeSer;
+use utils::id::{TenantId, TimelineId};
 use wal_decoder::serialized_batch::SerializedValueBatch;
 
 // A very cheap hash for generating non-sequential keys.
diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs
index 5c5b52db44..e11af49449 100644
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -1,23 +1,21 @@
-use criterion::measurement::WallTime;
-use pageserver::keyspace::{KeyPartitioning, KeySpace};
-use pageserver::tenant::layer_map::LayerMap;
-use pageserver::tenant::storage_layer::LayerName;
-use pageserver::tenant::storage_layer::PersistentLayerDesc;
-use pageserver_api::key::Key;
-use pageserver_api::shard::TenantShardId;
-use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use std::cmp::{max, min};
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::path::PathBuf;
 use std::str::FromStr;
 use std::time::Instant;
+
+use criterion::measurement::WallTime;
+use criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main};
+use pageserver::keyspace::{KeyPartitioning, KeySpace};
+use pageserver::tenant::layer_map::LayerMap;
+use pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc};
+use pageserver_api::key::Key;
+use pageserver_api::shard::TenantShardId;
+use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use utils::id::{TenantId, TimelineId};
-
 use utils::lsn::Lsn;
 
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion};
-
 fn fixture_path(relative: &str) -> PathBuf {
     PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative)
 }
diff --git a/pageserver/benches/bench_walredo.rs b/pageserver/benches/bench_walredo.rs
index d3551b56e1..77b3f90b3e 100644
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -56,20 +56,23 @@
 //! medium/128              time:   [10.412 ms 10.574 ms 10.718 ms]
 //! ```
 
+use std::future::Future;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
 use anyhow::Context;
 use bytes::{Buf, Bytes};
 use criterion::{BenchmarkId, Criterion};
 use once_cell::sync::Lazy;
-use pageserver::{config::PageServerConf, walredo::PostgresRedoManager};
+use pageserver::config::PageServerConf;
+use pageserver::walredo::PostgresRedoManager;
+use pageserver_api::key::Key;
 use pageserver_api::record::NeonWalRecord;
-use pageserver_api::{key::Key, shard::TenantShardId};
-use std::{
-    future::Future,
-    sync::Arc,
-    time::{Duration, Instant},
-};
-use tokio::{sync::Barrier, task::JoinSet};
-use utils::{id::TenantId, lsn::Lsn};
+use pageserver_api::shard::TenantShardId;
+use tokio::sync::Barrier;
+use tokio::task::JoinSet;
+use utils::id::TenantId;
+use utils::lsn::Lsn;
 
 fn bench(c: &mut Criterion) {
     macro_rules! bench_group {
diff --git a/pageserver/benches/upload_queue.rs b/pageserver/benches/upload_queue.rs
index ed5daa8ae1..8de06a6c25 100644
--- a/pageserver/benches/upload_queue.rs
+++ b/pageserver/benches/upload_queue.rs
@@ -1,15 +1,15 @@
 //! Upload queue benchmarks.
 
 use std::str::FromStr as _;
-use std::sync::atomic::AtomicU32;
 use std::sync::Arc;
+use std::sync::atomic::AtomicU32;
 
-use criterion::{criterion_group, criterion_main, Bencher, Criterion};
+use criterion::{Bencher, Criterion, criterion_group, criterion_main};
+use pageserver::tenant::IndexPart;
 use pageserver::tenant::metadata::TimelineMetadata;
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::storage_layer::LayerName;
 use pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask};
-use pageserver::tenant::IndexPart;
 use pprof::criterion::{Output, PProfProfiler};
 use utils::generation::Generation;
 use utils::shard::{ShardCount, ShardIndex, ShardNumber};
diff --git a/pageserver/client/Cargo.toml b/pageserver/client/Cargo.toml
index db77a395e0..970a437a42 100644
--- a/pageserver/client/Cargo.toml
+++ b/pageserver/client/Cargo.toml
@@ -21,5 +21,4 @@ tokio.workspace = true
 futures.workspace = true
 tokio-util.workspace = true
 anyhow.workspace = true
-postgres.workspace = true
 bytes.workspace = true
diff --git a/pageserver/client/src/page_service.rs b/pageserver/client/src/page_service.rs
index 27280912b4..47da83b0eb 100644
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -34,7 +34,8 @@ pub struct BasebackupRequest {
 
 impl Client {
     pub async fn new(connstring: String) -> anyhow::Result<Self> {
-        let (client, connection) = tokio_postgres::connect(&connstring, postgres::NoTls).await?;
+        let (client, connection) =
+            tokio_postgres::connect(&connstring, tokio_postgres::NoTls).await?;
 
         let conn_task_cancel = CancellationToken::new();
         let conn_task = tokio::spawn({
diff --git a/pageserver/compaction/src/helpers.rs b/pageserver/compaction/src/helpers.rs
index 6b739d85a7..7e4e3042b3 100644
--- a/pageserver/compaction/src/helpers.rs
+++ b/pageserver/compaction/src/helpers.rs
@@ -221,12 +221,12 @@ where
             // performed implicitly when `top` is dropped).
             if let Some(mut top) = this.heap.peek_mut() {
                 match top.deref_mut() {
-                    LazyLoadLayer::Unloaded(ref mut l) => {
+                    LazyLoadLayer::Unloaded(l) => {
                         let fut = l.load_keys(this.ctx);
                         this.load_future.set(Some(Box::pin(fut)));
                         continue;
                     }
-                    LazyLoadLayer::Loaded(ref mut entries) => {
+                    LazyLoadLayer::Loaded(entries) => {
                         let result = entries.pop_front().unwrap();
                         if entries.is_empty() {
                             std::collections::binary_heap::PeekMut::pop(top);
diff --git a/pageserver/ctl/src/key.rs b/pageserver/ctl/src/key.rs
index af4b5a21ab..c7f0719c41 100644
--- a/pageserver/ctl/src/key.rs
+++ b/pageserver/ctl/src/key.rs
@@ -345,6 +345,7 @@ impl AuxFileV2 {
                 AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
             }
             (2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
+            (3, 1) => AuxFileV2::Recognized("pg_stat/pgstat.stat", hash),
             (1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
             (0xff, 0xff) => AuxFileV2::Other(hash),
             _ => return None,
diff --git a/pageserver/pagebench/src/util/request_stats.rs b/pageserver/pagebench/src/util/request_stats.rs
index 4aa6950782..ebe7bc031d 100644
--- a/pageserver/pagebench/src/util/request_stats.rs
+++ b/pageserver/pagebench/src/util/request_stats.rs
@@ -40,9 +40,7 @@ impl Stats {
         }
     }
     pub(crate) fn add(&mut self, other: &Self) {
-        let Self {
-            ref mut latency_histo,
-        } = self;
+        let Self { latency_histo } = self;
         latency_histo.add(&other.latency_histo).unwrap();
     }
 }
diff --git a/pageserver/src/assert_u64_eq_usize.rs b/pageserver/src/assert_u64_eq_usize.rs
index 66ca7fd057..c4b8d9acba 100644
--- a/pageserver/src/assert_u64_eq_usize.rs
+++ b/pageserver/src/assert_u64_eq_usize.rs
@@ -2,7 +2,9 @@
 
 pub(crate) const _ASSERT_U64_EQ_USIZE: () = {
     if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {
-        panic!("the traits defined in this module assume that usize and u64 can be converted to each other without loss of information");
+        panic!(
+            "the traits defined in this module assume that usize and u64 can be converted to each other without loss of information"
+        );
     }
 };
 
diff --git a/pageserver/src/aux_file.rs b/pageserver/src/aux_file.rs
index 5e527b7d61..b76c0e045f 100644
--- a/pageserver/src/aux_file.rs
+++ b/pageserver/src/aux_file.rs
@@ -2,7 +2,7 @@ use std::sync::Arc;
 
 use ::metrics::IntGauge;
 use bytes::{Buf, BufMut, Bytes};
-use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
+use pageserver_api::key::{AUX_KEY_PREFIX, Key, METADATA_KEY_SIZE};
 use tracing::warn;
 
 // BEGIN Copyright (c) 2017 Servo Contributors
@@ -39,6 +39,7 @@ fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key
 
 const AUX_DIR_PG_LOGICAL: u8 = 0x01;
 const AUX_DIR_PG_REPLSLOT: u8 = 0x02;
+const AUX_DIR_PG_STAT: u8 = 0x03;
 const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
 
 /// Encode the aux file into a fixed-size key.
@@ -53,6 +54,7 @@ const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
 /// * pg_logical/replorigin_checkpoint -> 0x0103
 /// * pg_logical/others -> 0x01FF
 /// * pg_replslot/ -> 0x0201
+/// * pg_stat/pgstat.stat -> 0x0301
 /// * others -> 0xFFFF
 ///
 /// If you add new AUX files to this function, please also add a test case to `test_encoding_portable`.
@@ -75,6 +77,8 @@ pub fn encode_aux_file_key(path: &str) -> Key {
         aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0xFF, fname.as_bytes())
     } else if let Some(fname) = path.strip_prefix("pg_replslot/") {
         aux_hash_to_metadata_key(AUX_DIR_PG_REPLSLOT, 0x01, fname.as_bytes())
+    } else if let Some(fname) = path.strip_prefix("pg_stat/") {
+        aux_hash_to_metadata_key(AUX_DIR_PG_STAT, 0x01, fname.as_bytes())
     } else {
         if cfg!(debug_assertions) {
             warn!(
diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs
index e03b1bbe96..ce54bd9c1c 100644
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -10,33 +10,31 @@
 //! This module is responsible for creation of such tarball
 //! from data stored in object storage.
 //!
-use anyhow::{anyhow, Context};
-use bytes::{BufMut, Bytes, BytesMut};
-use fail::fail_point;
-use pageserver_api::key::{rel_block_to_key, Key};
-use postgres_ffi::pg_constants;
 use std::fmt::Write as FmtWrite;
 use std::time::{Instant, SystemTime};
+
+use anyhow::{Context, anyhow};
+use bytes::{BufMut, Bytes, BytesMut};
+use fail::fail_point;
+use pageserver_api::key::{Key, rel_block_to_key};
+use pageserver_api::reltag::{RelTag, SlruKind};
+use postgres_ffi::pg_constants::{
+    DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID, PG_HBA, PGDATA_SPECIAL_FILES,
+};
+use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
+use postgres_ffi::{
+    BLCKSZ, PG_TLI, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName, dispatch_pgversion, pg_constants,
+};
 use tokio::io;
 use tokio::io::AsyncWrite;
-use tracing::*;
-
 use tokio_tar::{Builder, EntryType, Header};
+use tracing::*;
+use utils::lsn::Lsn;
 
 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::Version;
-use crate::tenant::storage_layer::IoConcurrency;
 use crate::tenant::Timeline;
-use pageserver_api::reltag::{RelTag, SlruKind};
-
-use postgres_ffi::dispatch_pgversion;
-use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
-use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
-use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
-use postgres_ffi::XLogFileName;
-use postgres_ffi::PG_TLI;
-use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
-use utils::lsn::Lsn;
+use crate::tenant::storage_layer::IoConcurrency;
 
 #[derive(Debug, thiserror::Error)]
 pub enum BasebackupError {
@@ -264,6 +262,31 @@ where
     async fn send_tarball(mut self) -> Result<(), BasebackupError> {
         // TODO include checksum
 
+        // Construct the pg_control file from the persisted checkpoint and pg_control
+        // information. But we only add this to the tarball at the end, so that if the
+        // writing is interrupted half-way through, the resulting incomplete tarball will
+        // be missing the pg_control file, which prevents PostgreSQL from starting up on
+        // it. With proper error handling, you should never try to start up from an
+        // incomplete basebackup in the first place, of course, but this is a nice little
+        // extra safety measure.
+        let checkpoint_bytes = self
+            .timeline
+            .get_checkpoint(self.lsn, self.ctx)
+            .await
+            .context("failed to get checkpoint bytes")?;
+        let pg_control_bytes = self
+            .timeline
+            .get_control_file(self.lsn, self.ctx)
+            .await
+            .context("failed to get control bytes")?;
+        let (pg_control_bytes, system_identifier, was_shutdown) =
+            postgres_ffi::generate_pg_control(
+                &pg_control_bytes,
+                &checkpoint_bytes,
+                self.lsn,
+                self.timeline.pg_version,
+            )?;
+
         let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
 
         let pgversion = self.timeline.pg_version;
@@ -401,6 +424,10 @@ where
                 // In future we will not generate AUX record for "pg_logical/replorigin_checkpoint" at all,
                 // but now we should handle (skip) it for backward compatibility.
                 continue;
+            } else if path == "pg_stat/pgstat.stat" && !was_shutdown {
+                // Drop statistic in case of abnormal termination, i.e. if we're not starting from the exact LSN
+                // of a shutdown checkpoint.
+                continue;
             }
             let header = new_tar_header(&path, content.len() as u64)?;
             self.ar
@@ -462,8 +489,9 @@ where
             )))
         });
 
-        // Generate pg_control and bootstrap WAL segment.
-        self.add_pgcontrol_file().await?;
+        // Last, add the pg_control file and bootstrap WAL segment.
+        self.add_pgcontrol_file(pg_control_bytes, system_identifier)
+            .await?;
         self.ar
             .finish()
             .await
@@ -671,7 +699,11 @@ where
     // Add generated pg_control file and bootstrap WAL segment.
     // Also send zenith.signal file with extra bootstrap data.
     //
-    async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> {
+    async fn add_pgcontrol_file(
+        &mut self,
+        pg_control_bytes: Bytes,
+        system_identifier: u64,
+    ) -> Result<(), BasebackupError> {
         // add zenith.signal file
         let mut zenith_signal = String::new();
         if self.prev_record_lsn == Lsn(0) {
@@ -694,24 +726,6 @@ where
             .await
             .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,zenith.signal"))?;
 
-        let checkpoint_bytes = self
-            .timeline
-            .get_checkpoint(self.lsn, self.ctx)
-            .await
-            .context("failed to get checkpoint bytes")?;
-        let pg_control_bytes = self
-            .timeline
-            .get_control_file(self.lsn, self.ctx)
-            .await
-            .context("failed get control bytes")?;
-
-        let (pg_control_bytes, system_identifier) = postgres_ffi::generate_pg_control(
-            &pg_control_bytes,
-            &checkpoint_bytes,
-            self.lsn,
-            self.timeline.pg_version,
-        )?;
-
         //send pg_control
         let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
         self.ar
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index fa098e9364..703629aed5 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -3,49 +3,42 @@
 //! Main entry point for the Page Server executable.
 
 use std::env;
-use std::env::{var, VarError};
+use std::env::{VarError, var};
 use std::io::Read;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{anyhow, Context};
+use anyhow::{Context, anyhow};
 use camino::Utf8Path;
 use clap::{Arg, ArgAction, Command};
-
-use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
-use pageserver::config::PageserverIdentity;
+use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric};
+use metrics::set_build_info_metric;
+use nix::sys::socket::{setsockopt, sockopt};
+use pageserver::config::{PageServerConf, PageserverIdentity};
 use pageserver::controller_upcall_client::ControllerUpcallClient;
+use pageserver::deletion_queue::DeletionQueue;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
-use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
-use pageserver::tenant::{secondary, TenantSharedResources};
-use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener};
+use pageserver::task_mgr::{
+    BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
+};
+use pageserver::tenant::{TenantSharedResources, mgr, secondary};
+use pageserver::{
+    CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service,
+    task_mgr, virtual_file,
+};
+use postgres_backend::AuthType;
 use remote_storage::GenericRemoteStorage;
 use tokio::signal::unix::SignalKind;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-
-use metrics::set_build_info_metric;
-use pageserver::{
-    config::PageServerConf,
-    deletion_queue::DeletionQueue,
-    http, page_cache, page_service, task_mgr,
-    task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME},
-    tenant::mgr,
-    virtual_file,
-};
-use postgres_backend::AuthType;
+use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::crashsafe::syncfs;
-use utils::failpoint_support;
 use utils::logging::TracingErrorLayerEnablement;
-use utils::{
-    auth::{JwtAuth, SwappableJwtAuth},
-    logging, project_build_tag, project_git_version,
-    sentry_init::init_sentry,
-    tcp_listener,
-};
+use utils::sentry_init::init_sentry;
+use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener};
 
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
@@ -57,7 +50,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 /// This adds roughly 3% overhead for allocations on average, which is acceptable considering
 /// performance-sensitive code will avoid allocations as far as possible anyway.
 #[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
+#[unsafe(export_name = "malloc_conf")]
 pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
 
 const PID_FILE_NAME: &str = "pageserver.pid";
@@ -85,6 +78,9 @@ fn main() -> anyhow::Result<()> {
         return Ok(());
     }
 
+    // Initialize up failpoints support
+    let scenario = failpoint_support::init();
+
     let workdir = arg_matches
         .get_one::<String>("workdir")
         .map(Utf8Path::new)
@@ -134,6 +130,7 @@ fn main() -> anyhow::Result<()> {
     info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
     info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
     info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
+    info!(?conf.validate_wal_contiguity, "starting with WAL contiguity validation");
     info!(?conf.page_service_pipelining, "starting with page service pipelining config");
     info!(?conf.get_vectored_concurrent_io, "starting with get_vectored IO concurrency config");
 
@@ -177,9 +174,6 @@ fn main() -> anyhow::Result<()> {
         }
     }
 
-    // Initialize up failpoints support
-    let scenario = failpoint_support::init();
-
     // Basic initialization of things that don't change after startup
     tracing::info!("Initializing virtual_file...");
     virtual_file::init(
@@ -216,7 +210,9 @@ fn initialize_config(
         Ok(mut f) => {
             let md = f.metadata().context("stat config file")?;
             if !md.is_file() {
-                anyhow::bail!("Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ...");
+                anyhow::bail!(
+                    "Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ..."
+                );
             }
 
             let mut s = String::new();
@@ -224,7 +220,9 @@ fn initialize_config(
             toml_edit::de::from_str::<PageserverIdentity>(&s)?
         }
         Err(e) => {
-            anyhow::bail!("Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ...");
+            anyhow::bail!(
+                "Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ..."
+            );
         }
     };
 
@@ -350,6 +348,13 @@ fn start_pageserver(
     info!("Starting pageserver pg protocol handler on {pg_addr}");
     let pageserver_listener = tcp_listener::bind(pg_addr)?;
 
+    // Enable SO_KEEPALIVE on the socket, to detect dead connections faster.
+    // These are configured via net.ipv4.tcp_keepalive_* sysctls.
+    //
+    // TODO: also set this on the walreceiver socket, but tokio-postgres doesn't
+    // support enabling keepalives while using the default OS sysctls.
+    setsockopt(&pageserver_listener, sockopt::KeepAlive, &true)?;
+
     // Launch broker client
     // The storage_broker::connect call needs to happen inside a tokio runtime thread.
     let broker_client = WALRECEIVER_RUNTIME
@@ -400,11 +405,9 @@ fn start_pageserver(
         Err(VarError::NotPresent) => {
             info!("No JWT token for authentication with Safekeeper detected");
         }
-        Err(e) => {
-            return Err(e).with_context(|| {
-                "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable"
-            })
-        }
+        Err(e) => return Err(e).with_context(
+            || "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable",
+        ),
     };
 
     // Top-level cancellation token for the process
@@ -710,7 +713,9 @@ async fn create_remote_storage_client(
     // wrapper that simulates failures.
     if conf.test_remote_failures > 0 {
         if !cfg!(feature = "testing") {
-            anyhow::bail!("test_remote_failures option is not available because pageserver was compiled without the 'testing' feature");
+            anyhow::bail!(
+                "test_remote_failures option is not available because pageserver was compiled without the 'testing' feature"
+            );
         }
         info!(
             "Simulating remote failures for first {} attempts of each op",
diff --git a/pageserver/src/bin/test_helper_slow_client_reads.rs b/pageserver/src/bin/test_helper_slow_client_reads.rs
index c1ce332b6c..0215dd06fb 100644
--- a/pageserver/src/bin/test_helper_slow_client_reads.rs
+++ b/pageserver/src/bin/test_helper_slow_client_reads.rs
@@ -1,14 +1,10 @@
-use std::{
-    io::{stdin, stdout, Read, Write},
-    time::Duration,
-};
+use std::io::{Read, Write, stdin, stdout};
+use std::time::Duration;
 
 use clap::Parser;
 use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
 
 #[derive(clap::Parser)]
 struct Args {
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index c5368f6806..64d00882b9 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -4,36 +4,29 @@
 //! file, or on the command line.
 //! See also `settings.md` for better description on every parameter.
 
-use anyhow::{bail, ensure, Context};
-use pageserver_api::models::ImageCompressionAlgorithm;
-use pageserver_api::{
-    config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes},
-    shard::TenantShardId,
-};
-use remote_storage::{RemotePath, RemoteStorageConfig};
 use std::env;
-use storage_broker::Uri;
-use utils::logging::SecretString;
-use utils::postgres_client::PostgresClientProtocol;
-
-use once_cell::sync::OnceCell;
-use reqwest::Url;
 use std::num::NonZeroUsize;
 use std::sync::Arc;
 use std::time::Duration;
 
+use anyhow::{Context, bail, ensure};
 use camino::{Utf8Path, Utf8PathBuf};
+use once_cell::sync::OnceCell;
+use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
+use pageserver_api::models::ImageCompressionAlgorithm;
+use pageserver_api::shard::TenantShardId;
 use postgres_backend::AuthType;
-use utils::{
-    id::{NodeId, TimelineId},
-    logging::LogFormat,
-};
+use remote_storage::{RemotePath, RemoteStorageConfig};
+use reqwest::Url;
+use storage_broker::Uri;
+use utils::id::{NodeId, TimelineId};
+use utils::logging::{LogFormat, SecretString};
+use utils::postgres_client::PostgresClientProtocol;
 
 use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
 use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
-use crate::virtual_file;
 use crate::virtual_file::io_engine;
-use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME};
+use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
 
 /// Global state of pageserver.
 ///
@@ -197,6 +190,10 @@ pub struct PageServerConf {
     /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
     /// files read.
     pub enable_read_path_debugging: bool,
+
+    /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
+    /// safekeepers does not have gaps.
+    pub validate_wal_contiguity: bool,
 }
 
 /// Token for authentication to safekeepers
@@ -360,6 +357,7 @@ impl PageServerConf {
             page_service_pipelining,
             get_vectored_concurrent_io,
             enable_read_path_debugging,
+            validate_wal_contiguity,
         } = config_toml;
 
         let mut conf = PageServerConf {
@@ -435,7 +433,9 @@ impl PageServerConf {
                     io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
                     io_engine::FeatureTestResult::Worse { engine, remark } => {
                         // TODO: bubble this up to the caller so we can tracing::warn! it.
-                        eprintln!("auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}");
+                        eprintln!(
+                            "auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"
+                        );
                         engine
                     }
                 },
@@ -446,6 +446,7 @@ impl PageServerConf {
             virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
             no_sync: no_sync.unwrap_or(false),
             enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
+            validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
         };
 
         // ------------------------------------------------------------
diff --git a/pageserver/src/consumption_metrics.rs b/pageserver/src/consumption_metrics.rs
index 7e8c00c293..0231190e69 100644
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -1,13 +1,9 @@
 //! Periodically collect consumption metrics for all active tenants
 //! and push them to a HTTP endpoint.
-use crate::config::PageServerConf;
-use crate::consumption_metrics::metrics::MetricsKey;
-use crate::consumption_metrics::upload::KeyGen as _;
-use crate::context::{DownloadBehavior, RequestContext};
-use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::size::CalculateSyntheticSizeError;
-use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::{Duration, SystemTime};
+
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use itertools::Itertools as _;
@@ -15,14 +11,21 @@ use pageserver_api::models::TenantState;
 use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::{Duration, SystemTime};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::NodeId;
 
+use crate::config::PageServerConf;
+use crate::consumption_metrics::metrics::MetricsKey;
+use crate::consumption_metrics::upload::KeyGen as _;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
+use crate::tenant::mgr::TenantManager;
+use crate::tenant::size::CalculateSyntheticSizeError;
+use crate::tenant::tasks::BackgroundLoopKind;
+use crate::tenant::{LogicalSizeCalculationCause, Tenant};
+
 mod disk_cache;
 mod metrics;
 mod upload;
diff --git a/pageserver/src/consumption_metrics/disk_cache.rs b/pageserver/src/consumption_metrics/disk_cache.rs
index 54a505a134..f1dad8793d 100644
--- a/pageserver/src/consumption_metrics/disk_cache.rs
+++ b/pageserver/src/consumption_metrics/disk_cache.rs
@@ -1,10 +1,10 @@
-use anyhow::Context;
-use camino::{Utf8Path, Utf8PathBuf};
 use std::sync::Arc;
 
-use crate::consumption_metrics::NewMetricsRefRoot;
+use anyhow::Context;
+use camino::{Utf8Path, Utf8PathBuf};
 
 use super::{NewMetricsRoot, NewRawMetric, RawMetric};
+use crate::consumption_metrics::NewMetricsRefRoot;
 
 pub(super) fn read_metrics_from_serde_value(
     json_value: serde_json::Value,
diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs
index 07fac09f6f..71910011ea 100644
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -1,15 +1,16 @@
-use crate::tenant::mgr::TenantManager;
-use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
+use std::sync::Arc;
+use std::time::SystemTime;
+
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
-use std::{sync::Arc, time::SystemTime};
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
 
 use super::{Cache, NewRawMetric};
+use crate::context::RequestContext;
+use crate::tenant::mgr::TenantManager;
+use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 
 /// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events`
 /// instead of static str.
diff --git a/pageserver/src/consumption_metrics/metrics/tests.rs b/pageserver/src/consumption_metrics/metrics/tests.rs
index 3ed7b44123..52b4fb8680 100644
--- a/pageserver/src/consumption_metrics/metrics/tests.rs
+++ b/pageserver/src/consumption_metrics/metrics/tests.rs
@@ -1,7 +1,7 @@
-use crate::consumption_metrics::RawMetric;
+use std::collections::HashMap;
 
 use super::*;
-use std::collections::HashMap;
+use crate::consumption_metrics::RawMetric;
 
 #[test]
 fn startup_collected_timeline_metrics_before_advancing() {
diff --git a/pageserver/src/consumption_metrics/upload.rs b/pageserver/src/consumption_metrics/upload.rs
index 448bf47525..59e0145a5b 100644
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -2,15 +2,16 @@ use std::error::Error as _;
 use std::time::SystemTime;
 
 use chrono::{DateTime, Utc};
-use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE};
+use consumption_metrics::{CHUNK_SIZE, Event, EventChunk, IdempotencyKey};
 use remote_storage::{GenericRemoteStorage, RemotePath};
 use tokio::io::AsyncWriteExt;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
-
-use super::{metrics::Name, Cache, MetricsKey, NewRawMetric, RawMetric};
 use utils::id::{TenantId, TimelineId};
 
+use super::metrics::Name;
+use super::{Cache, MetricsKey, NewRawMetric, RawMetric};
+
 /// How the metrics from pageserver are identified.
 #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)]
 struct Ids {
@@ -438,14 +439,13 @@ async fn upload(
 
 #[cfg(test)]
 mod tests {
-    use crate::consumption_metrics::{
-        disk_cache::read_metrics_from_serde_value, NewMetricsRefRoot,
-    };
-
-    use super::*;
     use chrono::{DateTime, Utc};
     use once_cell::sync::Lazy;
 
+    use super::*;
+    use crate::consumption_metrics::NewMetricsRefRoot;
+    use crate::consumption_metrics::disk_cache::read_metrics_from_serde_value;
+
     #[test]
     fn chunked_serialization() {
         let examples = metric_samples();
diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs
index d41bfd9021..8462594607 100644
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -1,21 +1,23 @@
 use std::collections::HashMap;
 
 use futures::Future;
-use pageserver_api::{
-    controller_api::{AvailabilityZone, NodeRegisterRequest},
-    shard::TenantShardId,
-    upcall_api::{
-        ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
-        ValidateRequestTenant, ValidateResponse,
-    },
+use pageserver_api::config::NodeMetadata;
+use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest};
+use pageserver_api::shard::TenantShardId;
+use pageserver_api::upcall_api::{
+    ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
+    ValidateRequestTenant, ValidateResponse,
 };
-use serde::{de::DeserializeOwned, Serialize};
+use serde::Serialize;
+use serde::de::DeserializeOwned;
 use tokio_util::sync::CancellationToken;
 use url::Url;
-use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
+use utils::generation::Generation;
+use utils::id::NodeId;
+use utils::{backoff, failpoint_support};
 
-use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
-use pageserver_api::config::NodeMetadata;
+use crate::config::PageServerConf;
+use crate::virtual_file::on_fatal_io_error;
 
 /// The Pageserver's client for using the storage controller upcall API: this is a small API
 /// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
@@ -157,14 +159,18 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
                         match az_id_from_metadata {
                             Some(az_id) => Some(AvailabilityZone(az_id)),
                             None => {
-                                tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
+                                tracing::warn!(
+                                    "metadata.json does not contain an 'availability_zone_id' field"
+                                );
                                 conf.availability_zone.clone().map(AvailabilityZone)
                             }
                         }
                     };
 
                     if az_id.is_none() {
-                        panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
+                        panic!(
+                            "Availablity zone id could not be inferred from metadata.json or pageserver config"
+                        );
                     }
 
                     Some(NodeRegisterRequest {
@@ -173,6 +179,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
                         listen_pg_port: m.postgres_port,
                         listen_http_addr: m.http_host,
                         listen_http_port: m.http_port,
+                        listen_https_port: None, // TODO: Support https.
                         availability_zone_id: az_id.expect("Checked above"),
                     })
                 }
@@ -235,7 +242,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
                     .iter()
                     .map(|(id, generation)| ValidateRequestTenant {
                         id: *id,
-                        gen: (*generation).into().expect(
+                        r#gen: (*generation).into().expect(
                             "Generation should always be valid for a Tenant doing deletions",
                         ),
                     })
diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs
index a2395b0dca..8118f66252 100644
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -6,38 +6,31 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
 
-use crate::controller_upcall_client::ControlPlaneGenerationsApi;
-use crate::metrics;
-use crate::tenant::remote_timeline_client::remote_timeline_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
-use crate::virtual_file::MaybeFatalIo;
-use crate::virtual_file::VirtualFile;
 use anyhow::Context;
 use camino::Utf8PathBuf;
+use deleter::DeleterMessage;
+use list_writer::ListWriterQueueMessage;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{GenericRemoteStorage, RemotePath};
-use serde::Deserialize;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio_util::sync::CancellationToken;
-use tracing::Instrument;
-use tracing::{debug, error};
+use tracing::{Instrument, debug, error};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::generation::Generation;
 use utils::id::TimelineId;
-use utils::lsn::AtomicLsn;
-use utils::lsn::Lsn;
-
-use self::deleter::Deleter;
-use self::list_writer::DeletionOp;
-use self::list_writer::ListWriter;
-use self::list_writer::RecoverOp;
-use self::validator::Validator;
-use deleter::DeleterMessage;
-use list_writer::ListWriterQueueMessage;
+use utils::lsn::{AtomicLsn, Lsn};
 use validator::ValidatorQueueMessage;
 
-use crate::{config::PageServerConf, tenant::storage_layer::LayerName};
+use self::deleter::Deleter;
+use self::list_writer::{DeletionOp, ListWriter, RecoverOp};
+use self::validator::Validator;
+use crate::config::PageServerConf;
+use crate::controller_upcall_client::ControlPlaneGenerationsApi;
+use crate::metrics;
+use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path};
+use crate::tenant::storage_layer::LayerName;
+use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 
 // TODO: configurable for how long to wait before executing deletions
 
@@ -664,21 +657,22 @@ impl DeletionQueue {
 
 #[cfg(test)]
 mod test {
+    use std::io::ErrorKind;
+    use std::time::Duration;
+
     use camino::Utf8Path;
     use hex_literal::hex;
-    use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant};
-    use std::{io::ErrorKind, time::Duration};
-    use tracing::info;
-
+    use pageserver_api::key::Key;
+    use pageserver_api::shard::ShardIndex;
+    use pageserver_api::upcall_api::ReAttachResponseTenant;
     use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
     use tokio::task::JoinHandle;
-
-    use crate::{
-        controller_upcall_client::RetryForeverError,
-        tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
-    };
+    use tracing::info;
 
     use super::*;
+    use crate::controller_upcall_client::RetryForeverError;
+    use crate::tenant::harness::TenantHarness;
+    use crate::tenant::storage_layer::DeltaLayerName;
     pub const TIMELINE_ID: TimelineId =
         TimelineId::from_array(hex!("11223344556677881122334455667788"));
 
@@ -724,26 +718,26 @@ mod test {
                 .expect("Failed to join workers for previous deletion queue");
         }
 
-        fn set_latest_generation(&self, gen: Generation) {
+        fn set_latest_generation(&self, gen_: Generation) {
             let tenant_shard_id = self.harness.tenant_shard_id;
             self.mock_control_plane
                 .latest_generation
                 .lock()
                 .unwrap()
-                .insert(tenant_shard_id, gen);
+                .insert(tenant_shard_id, gen_);
         }
 
         /// Returns remote layer file name, suitable for use in assert_remote_files
         fn write_remote_layer(
             &self,
             file_name: LayerName,
-            gen: Generation,
+            gen_: Generation,
         ) -> anyhow::Result<String> {
             let tenant_shard_id = self.harness.tenant_shard_id;
             let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
             let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
             std::fs::create_dir_all(&remote_timeline_path)?;
-            let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
+            let remote_layer_file_name = format!("{}{}", file_name, gen_.get_suffix());
 
             let content: Vec<u8> = format!("placeholder contents of {file_name}").into();
 
@@ -1098,11 +1092,12 @@ mod test {
 /// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it.
 #[cfg(test)]
 pub(crate) mod mock {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
     use tracing::info;
 
     use super::*;
     use crate::tenant::remote_timeline_client::remote_layer_path;
-    use std::sync::atomic::{AtomicUsize, Ordering};
 
     pub struct ConsumerState {
         rx: tokio::sync::mpsc::UnboundedReceiver<ListWriterQueueMessage>,
diff --git a/pageserver/src/deletion_queue/deleter.rs b/pageserver/src/deletion_queue/deleter.rs
index ef1dfbac19..691ba75cc7 100644
--- a/pageserver/src/deletion_queue/deleter.rs
+++ b/pageserver/src/deletion_queue/deleter.rs
@@ -6,21 +6,16 @@
 //! number of full-sized DeleteObjects requests, rather than a larger number of
 //! smaller requests.
 
-use remote_storage::GenericRemoteStorage;
-use remote_storage::RemotePath;
-use remote_storage::TimeoutOrCancel;
 use std::time::Duration;
+
+use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
 use tokio_util::sync::CancellationToken;
-use tracing::info;
-use tracing::warn;
-use utils::backoff;
-use utils::pausable_failpoint;
+use tracing::{info, warn};
+use utils::{backoff, pausable_failpoint};
 
+use super::{DeletionQueueError, FlushOp};
 use crate::metrics;
 
-use super::DeletionQueueError;
-use super::FlushOp;
-
 const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10);
 
 pub(super) enum DeleterMessage {
diff --git a/pageserver/src/deletion_queue/list_writer.rs b/pageserver/src/deletion_queue/list_writer.rs
index ae3b2c9180..a385e35a02 100644
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -10,11 +10,6 @@
 //!
 //! DeletionLists are passed onwards to the Validator.
 
-use super::DeletionHeader;
-use super::DeletionList;
-use super::FlushOp;
-use super::ValidatorQueueMessage;
-
 use std::collections::HashMap;
 use std::fs::create_dir_all;
 use std::time::Duration;
@@ -23,20 +18,17 @@ use pageserver_api::shard::TenantShardId;
 use regex::Regex;
 use remote_storage::RemotePath;
 use tokio_util::sync::CancellationToken;
-use tracing::debug;
-use tracing::info;
-use tracing::warn;
+use tracing::{debug, info, warn};
 use utils::generation::Generation;
 use utils::id::TimelineId;
 
+use super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage};
 use crate::config::PageServerConf;
 use crate::deletion_queue::TEMP_SUFFIX;
 use crate::metrics;
-use crate::tenant::remote_timeline_client::remote_layer_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
+use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path};
 use crate::tenant::storage_layer::LayerName;
-use crate::virtual_file::on_fatal_io_error;
-use crate::virtual_file::MaybeFatalIo;
+use crate::virtual_file::{MaybeFatalIo, on_fatal_io_error};
 
 // The number of keys in a DeletionList before we will proactively persist it
 // (without reaching a flush deadline).  This aims to deliver objects of the order
diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs
index 1d55581ebd..b0ce2b80b4 100644
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -20,22 +20,14 @@ use std::time::Duration;
 
 use camino::Utf8PathBuf;
 use tokio_util::sync::CancellationToken;
-use tracing::debug;
-use tracing::info;
-use tracing::warn;
-
-use crate::config::PageServerConf;
-use crate::controller_upcall_client::ControlPlaneGenerationsApi;
-use crate::controller_upcall_client::RetryForeverError;
-use crate::metrics;
-use crate::virtual_file::MaybeFatalIo;
+use tracing::{debug, info, warn};
 
 use super::deleter::DeleterMessage;
-use super::DeletionHeader;
-use super::DeletionList;
-use super::DeletionQueueError;
-use super::FlushOp;
-use super::VisibleLsnUpdates;
+use super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates};
+use crate::config::PageServerConf;
+use crate::controller_upcall_client::{ControlPlaneGenerationsApi, RetryForeverError};
+use crate::metrics;
+use crate::virtual_file::MaybeFatalIo;
 
 // After this length of time, do any validation work that is pending,
 // even if we haven't accumulated many keys to delete.
@@ -190,7 +182,10 @@ where
                 }
             } else {
                 // If we failed validation, then do not apply any of the projected updates
-                info!("Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", tenant_lsn_state.generation);
+                info!(
+                    "Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}",
+                    tenant_lsn_state.generation
+                );
                 metrics::DELETION_QUEUE.dropped_lsn_updates.inc();
             }
         }
diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs
index 738a783813..13252037e5 100644
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -41,30 +41,31 @@
 // - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
 //   reading these fields. We use the Debug impl for semi-structured logging, though.
 
-use std::{sync::Arc, time::SystemTime};
+use std::sync::Arc;
+use std::time::SystemTime;
 
 use anyhow::Context;
-use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId};
+use pageserver_api::config::DiskUsageEvictionTaskConfig;
+use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;
 use serde::Serialize;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, instrument, warn, Instrument};
-use utils::{completion, id::TimelineId};
+use tracing::{Instrument, debug, error, info, instrument, warn};
+use utils::completion;
+use utils::id::TimelineId;
 
-use crate::{
-    config::PageServerConf,
-    metrics::disk_usage_based_eviction::METRICS,
-    task_mgr::{self, BACKGROUND_RUNTIME},
-    tenant::{
-        mgr::TenantManager,
-        remote_timeline_client::LayerFileMetadata,
-        secondary::SecondaryTenant,
-        storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint},
-        tasks::sleep_random,
-    },
-    CancellableTask, DiskUsageEvictionTask,
+use crate::config::PageServerConf;
+use crate::metrics::disk_usage_based_eviction::METRICS;
+use crate::task_mgr::{self, BACKGROUND_RUNTIME};
+use crate::tenant::mgr::TenantManager;
+use crate::tenant::remote_timeline_client::LayerFileMetadata;
+use crate::tenant::secondary::SecondaryTenant;
+use crate::tenant::storage_layer::{
+    AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint,
 };
+use crate::tenant::tasks::sleep_random;
+use crate::{CancellableTask, DiskUsageEvictionTask};
 
 /// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
 /// partitioning.
@@ -1007,10 +1008,14 @@ async fn collect_eviction_candidates(
         }
     }
 
-    debug_assert!(EvictionPartition::Above < EvictionPartition::Below,
-        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
-    debug_assert!(EvictionPartition::EvictNow < EvictionPartition::Above,
-        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
+    debug_assert!(
+        EvictionPartition::Above < EvictionPartition::Below,
+        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
+    );
+    debug_assert!(
+        EvictionPartition::EvictNow < EvictionPartition::Above,
+        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"
+    );
 
     eviction_order.sort(&mut candidates);
 
@@ -1157,9 +1162,8 @@ mod filesystem_level_usage {
     use anyhow::Context;
     use camino::Utf8Path;
 
-    use crate::statvfs::Statvfs;
-
     use super::DiskUsageEvictionTaskConfig;
+    use crate::statvfs::Statvfs;
 
     #[derive(Debug, Clone, Copy)]
     pub struct Usage<'a> {
@@ -1224,10 +1228,12 @@ mod filesystem_level_usage {
 
     #[test]
     fn max_usage_pct_pressure() {
-        use super::Usage as _;
         use std::time::Duration;
+
         use utils::serde_percent::Percent;
 
+        use super::Usage as _;
+
         let mut usage = Usage {
             config: &DiskUsageEvictionTaskConfig {
                 max_usage_pct: Percent::new(85).unwrap(),
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 56a84a98a8..dd5a24a41f 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2,125 +2,83 @@
 //! Management HTTP API
 //!
 use std::cmp::Reverse;
-use std::collections::BinaryHeap;
-use std::collections::HashMap;
+use std::collections::{BinaryHeap, HashMap};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{anyhow, Context, Result};
+use anyhow::{Context, Result, anyhow};
 use enumset::EnumSet;
 use futures::future::join_all;
-use futures::StreamExt;
-use futures::TryFutureExt;
+use futures::{StreamExt, TryFutureExt};
 use http_utils::endpoint::{
-    profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
+    self, attach_openapi_ui, auth_middleware, check_permission_with, profile_cpu_handler,
+    profile_heap_handler, prometheus_metrics_handler, request_span,
 };
+use http_utils::error::{ApiError, HttpErrorBody};
 use http_utils::failpoints::failpoints_handler;
-use http_utils::request::must_parse_query_param;
-use http_utils::request::{get_request_param, must_get_query_param, parse_query_param};
+use http_utils::json::{json_request, json_request_maybe, json_response};
+use http_utils::request::{
+    get_request_param, must_get_query_param, must_parse_query_param, parse_query_param,
+    parse_request_param,
+};
+use http_utils::{RequestExt, RouterBuilder};
 use humantime::format_rfc3339;
-use hyper::header;
-use hyper::StatusCode;
-use hyper::{Body, Request, Response, Uri};
+use hyper::{Body, Request, Response, StatusCode, Uri, header};
 use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::virtual_file::IoMode;
-use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
-use pageserver_api::models::IngestAuxFilesRequest;
-use pageserver_api::models::ListAuxFilesRequest;
-use pageserver_api::models::LocationConfig;
-use pageserver_api::models::LocationConfigListResponse;
-use pageserver_api::models::LocationConfigMode;
-use pageserver_api::models::LsnLease;
-use pageserver_api::models::LsnLeaseRequest;
-use pageserver_api::models::OffloadedTimelineInfo;
-use pageserver_api::models::PageTraceEvent;
-use pageserver_api::models::ShardParameters;
-use pageserver_api::models::TenantConfigPatchRequest;
-use pageserver_api::models::TenantDetails;
-use pageserver_api::models::TenantLocationConfigRequest;
-use pageserver_api::models::TenantLocationConfigResponse;
-use pageserver_api::models::TenantScanRemoteStorageResponse;
-use pageserver_api::models::TenantScanRemoteStorageShard;
-use pageserver_api::models::TenantShardLocation;
-use pageserver_api::models::TenantShardSplitRequest;
-use pageserver_api::models::TenantShardSplitResponse;
-use pageserver_api::models::TenantSorting;
-use pageserver_api::models::TenantState;
-use pageserver_api::models::TenantWaitLsnRequest;
-use pageserver_api::models::TimelineArchivalConfigRequest;
-use pageserver_api::models::TimelineCreateRequestMode;
-use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
-use pageserver_api::models::TimelinesInfoAndOffloaded;
-use pageserver_api::models::TopTenantShardItem;
-use pageserver_api::models::TopTenantShardsRequest;
-use pageserver_api::models::TopTenantShardsResponse;
-use pageserver_api::shard::ShardCount;
-use pageserver_api::shard::TenantShardId;
-use remote_storage::DownloadError;
-use remote_storage::GenericRemoteStorage;
-use remote_storage::TimeTravelError;
+use pageserver_api::models::{
+    DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest,
+    LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest,
+    OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse,
+    TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo,
+    TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse,
+    TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest,
+    TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest,
+    TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode,
+    TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo,
+    TimelinesInfoAndOffloaded, TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse,
+};
+use pageserver_api::shard::{ShardCount, TenantShardId};
+use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError};
 use scopeguard::defer;
-use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel};
+use tenant_size_model::svg::SvgBranchKind;
+use tenant_size_model::{SizeResult, StorageModel};
 use tokio::time::Instant;
 use tokio_util::io::StreamReader;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
+use utils::auth::SwappableJwtAuth;
+use utils::generation::Generation;
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
 
 use crate::config::PageServerConf;
-use crate::context::RequestContextBuilder;
-use crate::context::{DownloadBehavior, RequestContext};
+use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::{LocationConf, TenantConfOpt};
-use crate::tenant::mgr::GetActiveTenantError;
 use crate::tenant::mgr::{
-    GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError,
-    TenantSlotUpsertError, TenantStateError,
+    GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError,
+    TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError,
+};
+use crate::tenant::remote_timeline_client::{
+    download_index_part, list_remote_tenant_shards, list_remote_timelines,
 };
-use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
-use crate::tenant::remote_timeline_client;
-use crate::tenant::remote_timeline_client::download_index_part;
-use crate::tenant::remote_timeline_client::list_remote_tenant_shards;
-use crate::tenant::remote_timeline_client::list_remote_timelines;
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
-use crate::tenant::storage_layer::IoConcurrency;
-use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::storage_layer::LayerName;
-use crate::tenant::timeline::import_pgdata;
-use crate::tenant::timeline::offload::offload_timeline;
-use crate::tenant::timeline::offload::OffloadError;
-use crate::tenant::timeline::CompactFlags;
-use crate::tenant::timeline::CompactOptions;
-use crate::tenant::timeline::CompactRequest;
-use crate::tenant::timeline::CompactionError;
-use crate::tenant::timeline::Timeline;
-use crate::tenant::timeline::WaitLsnTimeout;
-use crate::tenant::timeline::WaitLsnWaiter;
-use crate::tenant::GetTimelineError;
-use crate::tenant::OffloadedTimeline;
-use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
-use crate::DEFAULT_PG_VERSION;
-use crate::{disk_usage_eviction_task, tenant};
-use http_utils::{
-    endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
-    error::{ApiError, HttpErrorBody},
-    json::{json_request, json_request_maybe, json_response},
-    request::parse_request_param,
-    RequestExt, RouterBuilder,
+use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
+use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
+use crate::tenant::timeline::{
+    CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
+    WaitLsnWaiter, import_pgdata,
 };
-use pageserver_api::models::{
-    StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
-    TimelineInfo,
-};
-use utils::{
-    auth::SwappableJwtAuth,
-    generation::Generation,
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
+use crate::tenant::{
+    GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError,
+    remote_timeline_client,
 };
+use crate::{DEFAULT_PG_VERSION, disk_usage_eviction_task, tenant};
 
 // For APIs that require an Active tenant, how long should we block waiting for that state?
 // This is not functionally necessary (clients will retry), but avoids generating a lot of
@@ -1128,12 +1086,12 @@ async fn tenant_list_handler(
             ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
         })?
         .iter()
-        .map(|(id, state, gen)| TenantInfo {
+        .map(|(id, state, gen_)| TenantInfo {
             id: *id,
             state: state.clone(),
             current_physical_size: None,
             attachment_status: state.attachment_status(),
-            generation: (*gen)
+            generation: (*gen_)
                 .into()
                 .expect("Tenants are always attached with a generation"),
             gc_blocking: None,
@@ -1670,9 +1628,8 @@ async fn block_or_unblock_gc(
     request: Request<Body>,
     block: bool,
 ) -> Result<Response<Body>, ApiError> {
-    use crate::tenant::{
-        remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized,
-    };
+    use crate::tenant::remote_timeline_client::WaitCompletionError;
+    use crate::tenant::upload_queue::NotInitialized;
     let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -2058,7 +2015,9 @@ async fn tenant_time_travel_remote_storage_handler(
         )));
     }
 
-    tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");
+    tracing::info!(
+        "Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}"
+    );
 
     remote_timeline_client::upload::time_travel_recover_tenant(
         &state.remote_storage,
@@ -2396,7 +2355,8 @@ async fn timeline_checkpoint_handler(
                         CompactionError::ShuttingDown => ApiError::ShuttingDown,
                         CompactionError::Offload(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
                         CompactionError::CollectKeySpaceError(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
-                        CompactionError::Other(e) => ApiError::InternalServerError(e)
+                        CompactionError::Other(e) => ApiError::InternalServerError(e),
+                        CompactionError::AlreadyRunning(_) => ApiError::InternalServerError(anyhow::anyhow!(e)),
                     }
                 )?;
         }
@@ -2458,9 +2418,10 @@ async fn timeline_detach_ancestor_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    use crate::tenant::timeline::detach_ancestor;
     use pageserver_api::models::detach_ancestor::AncestorDetached;
 
+    use crate::tenant::timeline::detach_ancestor;
+
     let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -2805,14 +2766,19 @@ async fn tenant_scan_remote_handler(
             .await
             {
                 Ok((index_part, index_generation, _index_mtime)) => {
-                    tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
-                        index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn());
+                    tracing::info!(
+                        "Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
+                        index_part.layer_metadata.len(),
+                        index_part.metadata.disk_consistent_lsn()
+                    );
                     generation = std::cmp::max(generation, index_generation);
                 }
                 Err(DownloadError::NotFound) => {
                     // This is normal for tenants that were created with multiple shards: they have an unsharded path
                     // containing the timeline's initdb tarball but no index.  Otherwise it is a bit strange.
-                    tracing::info!("Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping");
+                    tracing::info!(
+                        "Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping"
+                    );
                     continue;
                 }
                 Err(e) => {
@@ -3431,7 +3397,9 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow
         anyhow::bail!("unexpected non-zero bytes after the tar archive");
     }
     if trailing_bytes % 512 != 0 {
-        anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive");
+        anyhow::bail!(
+            "unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"
+        );
     }
     Ok(())
 }
diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs
index a73fa5cec8..6dd005de50 100644
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -4,14 +4,22 @@
 //!
 use std::path::{Path, PathBuf};
 
-use anyhow::{bail, ensure, Context, Result};
+use anyhow::{Context, Result, bail, ensure};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
 use pageserver_api::key::rel_block_to_key;
+use pageserver_api::reltag::{RelTag, SlruKind};
+use postgres_ffi::relfile_utils::*;
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_ffi::{
+    BLCKSZ, ControlFileData, DBState_DB_SHUTDOWNED, Oid, WAL_SEGMENT_SIZE, XLogFileName,
+    pg_constants,
+};
 use tokio::io::{AsyncRead, AsyncReadExt};
 use tokio_tar::Archive;
 use tracing::*;
+use utils::lsn::Lsn;
 use wal_decoder::models::InterpretedWalRecord;
 use walkdir::WalkDir;
 
@@ -20,16 +28,6 @@ use crate::metrics::WAL_INGEST;
 use crate::pgdatadir_mapping::*;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
-use pageserver_api::reltag::{RelTag, SlruKind};
-use postgres_ffi::pg_constants;
-use postgres_ffi::relfile_utils::*;
-use postgres_ffi::waldecoder::WalStreamDecoder;
-use postgres_ffi::ControlFileData;
-use postgres_ffi::DBState_DB_SHUTDOWNED;
-use postgres_ffi::Oid;
-use postgres_ffi::XLogFileName;
-use postgres_ffi::{BLCKSZ, WAL_SEGMENT_SIZE};
-use utils::lsn::Lsn;
 
 // Returns checkpoint LSN from controlfile
 pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
diff --git a/pageserver/src/l0_flush.rs b/pageserver/src/l0_flush.rs
index 491c9fb96c..6cfecef0cf 100644
--- a/pageserver/src/l0_flush.rs
+++ b/pageserver/src/l0_flush.rs
@@ -1,4 +1,5 @@
-use std::{num::NonZeroUsize, sync::Arc};
+use std::num::NonZeroUsize;
+use std::sync::Arc;
 
 #[derive(Debug, PartialEq, Eq, Clone)]
 pub enum L0FlushConfig {
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index f43cd08cf7..02767055fb 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -15,7 +15,8 @@ pub mod l0_flush;
 
 extern crate hyper0 as hyper;
 
-use futures::{stream::FuturesUnordered, StreamExt};
+use futures::StreamExt;
+use futures::stream::FuturesUnordered;
 pub use pageserver_api::keyspace;
 use tokio_util::sync::CancellationToken;
 mod assert_u64_eq_usize;
@@ -35,10 +36,8 @@ pub mod walredo;
 
 use camino::Utf8Path;
 use deletion_queue::DeletionQueue;
-use tenant::{
-    mgr::{BackgroundPurges, TenantManager},
-    secondary,
-};
+use tenant::mgr::{BackgroundPurges, TenantManager};
+use tenant::secondary;
 use tracing::{info, info_span};
 
 /// Current storage format version
@@ -350,9 +349,10 @@ async fn timed_after_cancellation<Fut: std::future::Future>(
 
 #[cfg(test)]
 mod timed_tests {
-    use super::timed;
     use std::time::Duration;
 
+    use super::timed;
+
     #[tokio::test]
     async fn timed_completes_when_inner_future_completes() {
         // A future that completes on time should have its result returned
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index e1c26b0684..eb8a9b8e24 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -10,11 +10,11 @@ use std::time::{Duration, Instant};
 use enum_map::{Enum as _, EnumMap};
 use futures::Future;
 use metrics::{
+    Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
+    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
     register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
     register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
-    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::config::{
@@ -24,9 +24,8 @@ use pageserver_api::config::{
 use pageserver_api::models::InMemoryLayerInfo;
 use pageserver_api::shard::TenantShardId;
 use pin_project_lite::pin_project;
-use postgres_backend::{is_expected_io_error, QueryError};
+use postgres_backend::{QueryError, is_expected_io_error};
 use pq_proto::framed::ConnectionError;
-
 use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
 use strum_macros::{IntoStaticStr, VariantNames};
 use utils::id::TimelineId;
@@ -35,12 +34,12 @@ use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext};
 use crate::pgdatadir_mapping::DatadirModificationStats;
 use crate::task_mgr::TaskKind;
+use crate::tenant::Timeline;
 use crate::tenant::layer_map::LayerMap;
 use crate::tenant::mgr::TenantSlot;
 use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
 use crate::tenant::tasks::BackgroundLoopKind;
 use crate::tenant::throttle::ThrottleResult;
-use crate::tenant::Timeline;
 
 /// Prometheus histogram buckets (in seconds) for operations in the critical
 /// path. In other words, operations that directly affect that latency of user
@@ -363,7 +362,7 @@ pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
 pub(crate) mod page_cache_eviction_metrics {
     use std::num::NonZeroUsize;
 
-    use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
+    use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
     use once_cell::sync::Lazy;
 
     #[derive(Clone, Copy)]
@@ -722,7 +721,7 @@ pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
 });
 
 pub(crate) mod initial_logical_size {
-    use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
+    use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
     use once_cell::sync::Lazy;
 
     pub(crate) struct StartCalculation(IntCounterVec);
@@ -1105,12 +1104,17 @@ impl EvictionsWithLowResidenceDuration {
                 // - future "drop panick => abort"
                 //
                 // so just nag: (the error has the labels)
-                tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
+                tracing::warn!(
+                    "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
+                );
             }
             Ok(()) => {
                 // to help identify cases where we double-remove the same values, let's log all
                 // deletions?
-                tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
+                tracing::info!(
+                    "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
+                    self.data_source
+                );
             }
         }
     }
@@ -3574,12 +3578,10 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
 }
 
 pub mod tokio_epoll_uring {
-    use std::{
-        collections::HashMap,
-        sync::{Arc, Mutex},
-    };
+    use std::collections::HashMap;
+    use std::sync::{Arc, Mutex};
 
-    use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
+    use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
     use once_cell::sync::Lazy;
 
     /// Shared storage for tokio-epoll-uring thread local metrics.
@@ -3588,7 +3590,9 @@ pub mod tokio_epoll_uring {
             let slots_submission_queue_depth = register_histogram!(
                 "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
                 "The slots waiters queue depth of each tokio_epoll_uring system",
-                vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
+                vec![
+                    1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
+                ],
             )
             .expect("failed to define a metric");
             ThreadLocalMetricsStorage {
@@ -3765,7 +3769,7 @@ pub mod tokio_epoll_uring {
 }
 
 pub(crate) mod tenant_throttling {
-    use metrics::{register_int_counter_vec, IntCounter};
+    use metrics::{IntCounter, register_int_counter_vec};
     use once_cell::sync::Lazy;
     use utils::shard::TenantShardId;
 
diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs
index 45bf02362a..984dd125a9 100644
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -67,23 +67,18 @@
 //! mapping is automatically removed and the slot is marked free.
 //!
 
-use std::{
-    collections::{hash_map::Entry, HashMap},
-    sync::{
-        atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering},
-        Arc, Weak,
-    },
-    time::Duration,
-};
+use std::collections::HashMap;
+use std::collections::hash_map::Entry;
+use std::sync::atomic::{AtomicU8, AtomicU64, AtomicUsize, Ordering};
+use std::sync::{Arc, Weak};
+use std::time::Duration;
 
 use anyhow::Context;
 use once_cell::sync::OnceCell;
 
-use crate::{
-    context::RequestContext,
-    metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
-    virtual_file::{IoBufferMut, IoPageSlice},
-};
+use crate::context::RequestContext;
+use crate::metrics::{PageCacheSizeMetrics, page_cache_eviction_metrics};
+use crate::virtual_file::{IoBufferMut, IoPageSlice};
 
 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
 const TEST_PAGE_CACHE_SIZE: usize = 50;
@@ -168,11 +163,7 @@ impl Slot {
         let count_res =
             self.usage_count
                 .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| {
-                    if val == 0 {
-                        None
-                    } else {
-                        Some(val - 1)
-                    }
+                    if val == 0 { None } else { Some(val - 1) }
                 });
 
         match count_res {
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 0c8da6f2a8..8972515163 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1,7 +1,15 @@
 //! The Page Service listens for client connections and serves their GetPage@LSN
 //! requests.
 
-use anyhow::{bail, Context};
+use std::borrow::Cow;
+use std::num::NonZeroUsize;
+use std::os::fd::AsRawFd;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::{Duration, Instant, SystemTime};
+use std::{io, str};
+
+use anyhow::{Context, bail};
 use async_compression::tokio::write::GzipEncoder;
 use bytes::Buf;
 use futures::FutureExt;
@@ -11,69 +19,57 @@ use pageserver_api::config::{
     PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
     PageServiceProtocolPipelinedExecutionStrategy,
 };
-use pageserver_api::models::{self, TenantState};
+use pageserver_api::key::rel_block_to_key;
 use pageserver_api::models::{
-    PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
+    self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
     PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
     PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
     PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
-    PagestreamProtocolVersion, PagestreamRequest,
+    PagestreamProtocolVersion, PagestreamRequest, TenantState,
 };
+use pageserver_api::reltag::SlruKind;
 use pageserver_api::shard::TenantShardId;
 use postgres_backend::{
-    is_expected_io_error, AuthType, PostgresBackend, PostgresBackendReader, QueryError,
+    AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
 };
+use postgres_ffi::BLCKSZ;
+use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use pq_proto::framed::ConnectionError;
-use pq_proto::FeStartupPacket;
-use pq_proto::{BeMessage, FeMessage, RowDescriptor};
-use std::borrow::Cow;
-use std::io;
-use std::num::NonZeroUsize;
-use std::str;
-use std::str::FromStr;
-use std::sync::Arc;
-use std::time::SystemTime;
-use std::time::{Duration, Instant};
-use tokio::io::{AsyncRead, AsyncWrite};
-use tokio::io::{AsyncWriteExt, BufWriter};
+use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
+use strum_macros::IntoStaticStr;
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter};
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
+use utils::auth::{Claims, Scope, SwappableJwtAuth};
+use utils::failpoint_support;
+use utils::id::{TenantId, TimelineId};
+use utils::logging::log_slow;
+use utils::lsn::Lsn;
+use utils::simple_rcu::RcuReadGuard;
 use utils::sync::gate::{Gate, GateGuard};
 use utils::sync::spsc_fold;
-use utils::{
-    auth::{Claims, Scope, SwappableJwtAuth},
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-    simple_rcu::RcuReadGuard,
-};
 
 use crate::auth::check_permission;
 use crate::basebackup::BasebackupError;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::metrics::{self, SmgrOpTimer};
-use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
+use crate::metrics::{
+    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer,
+};
 use crate::pgdatadir_mapping::Version;
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
-use crate::task_mgr::TaskKind;
-use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME};
-use crate::tenant::mgr::ShardSelector;
-use crate::tenant::mgr::TenantManager;
-use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult};
+use crate::span::{
+    debug_assert_current_span_has_tenant_and_timeline_id,
+    debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
+};
+use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind};
+use crate::tenant::mgr::{
+    GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager,
+};
 use crate::tenant::storage_layer::IoConcurrency;
 use crate::tenant::timeline::{self, WaitLsnError};
-use crate::tenant::GetTimelineError;
-use crate::tenant::PageReconstructError;
-use crate::tenant::Timeline;
+use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
 use crate::{basebackup, timed_after_cancellation};
-use pageserver_api::key::rel_block_to_key;
-use pageserver_api::models::PageTraceEvent;
-use pageserver_api::reltag::SlruKind;
-use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
-use postgres_ffi::BLCKSZ;
-use std::os::fd::AsRawFd;
 
 /// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
 /// is not yet in state [`TenantState::Active`].
@@ -81,6 +77,9 @@ use std::os::fd::AsRawFd;
 /// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
 const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
 
+/// Threshold at which to log slow GetPage requests.
+const LOG_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30);
+
 ///////////////////////////////////////////////////////////////////////////////
 
 pub struct Listener {
@@ -594,6 +593,7 @@ struct BatchedTestRequest {
 /// NB: we only hold [`timeline::handle::WeakHandle`] inside this enum,
 /// so that we don't keep the [`Timeline::gate`] open while the batch
 /// is being built up inside the [`spsc_fold`] (pagestream pipelining).
+#[derive(IntoStaticStr)]
 enum BatchedFeMessage {
     Exists {
         span: Span,
@@ -638,6 +638,10 @@ enum BatchedFeMessage {
 }
 
 impl BatchedFeMessage {
+    fn as_static_str(&self) -> &'static str {
+        self.into()
+    }
+
     fn observe_execution_start(&mut self, at: Instant) {
         match self {
             BatchedFeMessage::Exists { timer, .. }
@@ -975,7 +979,7 @@ impl PageServerHandler {
                 Ok(BatchedFeMessage::GetPage {
                     span: _,
                     shard: accum_shard,
-                    pages: ref mut accum_pages,
+                    pages: accum_pages,
                     effective_request_lsn: accum_lsn,
                 }),
                 BatchedFeMessage::GetPage {
@@ -1076,133 +1080,13 @@ impl PageServerHandler {
             batch
         };
 
-        // invoke handler function
-        let (mut handler_results, span): (
-            Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
-            _,
-        ) = match batch {
-            BatchedFeMessage::Exists {
-                span,
-                timer,
-                shard,
-                req,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::exists");
-                (
-                    vec![self
-                        .handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
-                        .instrument(span.clone())
-                        .await
-                        .map(|msg| (msg, timer))
-                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
-                    span,
-                )
-            }
-            BatchedFeMessage::Nblocks {
-                span,
-                timer,
-                shard,
-                req,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::nblocks");
-                (
-                    vec![self
-                        .handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
-                        .instrument(span.clone())
-                        .await
-                        .map(|msg| (msg, timer))
-                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
-                    span,
-                )
-            }
-            BatchedFeMessage::GetPage {
-                span,
-                shard,
-                effective_request_lsn,
-                pages,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::getpage");
-                (
-                    {
-                        let npages = pages.len();
-                        trace!(npages, "handling getpage request");
-                        let res = self
-                            .handle_get_page_at_lsn_request_batched(
-                                &*shard.upgrade()?,
-                                effective_request_lsn,
-                                pages,
-                                io_concurrency,
-                                ctx,
-                            )
-                            .instrument(span.clone())
-                            .await;
-                        assert_eq!(res.len(), npages);
-                        res
-                    },
-                    span,
-                )
-            }
-            BatchedFeMessage::DbSize {
-                span,
-                timer,
-                shard,
-                req,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::dbsize");
-                (
-                    vec![self
-                        .handle_db_size_request(&*shard.upgrade()?, &req, ctx)
-                        .instrument(span.clone())
-                        .await
-                        .map(|msg| (msg, timer))
-                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
-                    span,
-                )
-            }
-            BatchedFeMessage::GetSlruSegment {
-                span,
-                timer,
-                shard,
-                req,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
-                (
-                    vec![self
-                        .handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
-                        .instrument(span.clone())
-                        .await
-                        .map(|msg| (msg, timer))
-                        .map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
-                    span,
-                )
-            }
-            #[cfg(feature = "testing")]
-            BatchedFeMessage::Test {
-                span,
-                shard,
-                requests,
-            } => {
-                fail::fail_point!("ps::handle-pagerequest-message::test");
-                (
-                    {
-                        let npages = requests.len();
-                        trace!(npages, "handling getpage request");
-                        let res = self
-                            .handle_test_request_batch(&*shard.upgrade()?, requests, ctx)
-                            .instrument(span.clone())
-                            .await;
-                        assert_eq!(res.len(), npages);
-                        res
-                    },
-                    span,
-                )
-            }
-            BatchedFeMessage::RespondError { span, error } => {
-                // We've already decided to respond with an error, so we don't need to
-                // call the handler.
-                (vec![Err(error)], span)
-            }
-        };
+        // Dispatch the batch to the appropriate request handler.
+        let (mut handler_results, span) = log_slow(
+            batch.as_static_str(),
+            LOG_SLOW_GETPAGE_THRESHOLD,
+            self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx),
+        )
+        .await?;
 
         // We purposefully don't count flush time into the smgr operation timer.
         //
@@ -1288,6 +1172,8 @@ impl PageServerHandler {
                 &response_msg.serialize(protocol_version),
             ))?;
 
+            failpoint_support::sleep_millis_async!("before-pagestream-msg-flush", cancel);
+
             // what we want to do
             let socket_fd = pgb_writer.socket_fd;
             let flush_fut = pgb_writer.flush();
@@ -1320,6 +1206,149 @@ impl PageServerHandler {
         Ok(())
     }
 
+    /// Helper which dispatches a batched message to the appropriate handler.
+    /// Returns a vec of results, along with the extracted trace span.
+    async fn pagestream_dispatch_batched_message(
+        &mut self,
+        batch: BatchedFeMessage,
+        io_concurrency: IoConcurrency,
+        ctx: &RequestContext,
+    ) -> Result<
+        (
+            Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
+            Span,
+        ),
+        QueryError,
+    > {
+        Ok(match batch {
+            BatchedFeMessage::Exists {
+                span,
+                timer,
+                shard,
+                req,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::exists");
+                (
+                    vec![
+                        self.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx)
+                            .instrument(span.clone())
+                            .await
+                            .map(|msg| (msg, timer))
+                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
+                    ],
+                    span,
+                )
+            }
+            BatchedFeMessage::Nblocks {
+                span,
+                timer,
+                shard,
+                req,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::nblocks");
+                (
+                    vec![
+                        self.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx)
+                            .instrument(span.clone())
+                            .await
+                            .map(|msg| (msg, timer))
+                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
+                    ],
+                    span,
+                )
+            }
+            BatchedFeMessage::GetPage {
+                span,
+                shard,
+                effective_request_lsn,
+                pages,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::getpage");
+                (
+                    {
+                        let npages = pages.len();
+                        trace!(npages, "handling getpage request");
+                        let res = self
+                            .handle_get_page_at_lsn_request_batched(
+                                &*shard.upgrade()?,
+                                effective_request_lsn,
+                                pages,
+                                io_concurrency,
+                                ctx,
+                            )
+                            .instrument(span.clone())
+                            .await;
+                        assert_eq!(res.len(), npages);
+                        res
+                    },
+                    span,
+                )
+            }
+            BatchedFeMessage::DbSize {
+                span,
+                timer,
+                shard,
+                req,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::dbsize");
+                (
+                    vec![
+                        self.handle_db_size_request(&*shard.upgrade()?, &req, ctx)
+                            .instrument(span.clone())
+                            .await
+                            .map(|msg| (msg, timer))
+                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
+                    ],
+                    span,
+                )
+            }
+            BatchedFeMessage::GetSlruSegment {
+                span,
+                timer,
+                shard,
+                req,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
+                (
+                    vec![
+                        self.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx)
+                            .instrument(span.clone())
+                            .await
+                            .map(|msg| (msg, timer))
+                            .map_err(|err| BatchedPageStreamError { err, req: req.hdr }),
+                    ],
+                    span,
+                )
+            }
+            #[cfg(feature = "testing")]
+            BatchedFeMessage::Test {
+                span,
+                shard,
+                requests,
+            } => {
+                fail::fail_point!("ps::handle-pagerequest-message::test");
+                (
+                    {
+                        let npages = requests.len();
+                        trace!(npages, "handling getpage request");
+                        let res = self
+                            .handle_test_request_batch(&*shard.upgrade()?, requests, ctx)
+                            .instrument(span.clone())
+                            .await;
+                        assert_eq!(res.len(), npages);
+                        res
+                    },
+                    span,
+                )
+            }
+            BatchedFeMessage::RespondError { span, error } => {
+                // We've already decided to respond with an error, so we don't need to
+                // call the handler.
+                (vec![Err(error)], span)
+            }
+        })
+    }
+
     /// Pagestream sub-protocol handler.
     ///
     /// It is a simple request-response protocol inside a COPYBOTH session.
@@ -1463,7 +1492,7 @@ impl PageServerHandler {
                 }
             };
 
-            let err = self
+            let result = self
                 .pagesteam_handle_batched_message(
                     pgb_writer,
                     msg,
@@ -1473,7 +1502,7 @@ impl PageServerHandler {
                     ctx,
                 )
                 .await;
-            match err {
+            match result {
                 Ok(()) => {}
                 Err(e) => break e,
             }
@@ -2080,9 +2109,10 @@ impl PageServerHandler {
         set_tracing_field_shard_id(&timeline);
 
         if timeline.is_archived() == Some(true) {
-            // TODO after a grace period, turn this log line into a hard error
-            tracing::warn!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it.");
-            //return Err(QueryError::NotFound("timeline is archived".into()))
+            tracing::info!(
+                "timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it."
+            );
+            return Err(QueryError::NotFound("timeline is archived".into()));
         }
 
         let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index ae2762bd1e..787b1b895c 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -6,6 +6,36 @@
 //! walingest.rs handles a few things like implicit relation creation and extension.
 //! Clarify that)
 //!
+use std::collections::{BTreeMap, HashMap, HashSet, hash_map};
+use std::ops::{ControlFlow, Range};
+
+use anyhow::{Context, ensure};
+use bytes::{Buf, Bytes, BytesMut};
+use enum_map::Enum;
+use itertools::Itertools;
+use pageserver_api::key::{
+    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists,
+    TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range,
+    rel_size_to_key, rel_tag_sparse_key, rel_tag_sparse_key_range, relmap_file_key,
+    repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
+    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
+};
+use pageserver_api::keyspace::SparseKeySpace;
+use pageserver_api::record::NeonWalRecord;
+use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::shard::ShardIdentity;
+use pageserver_api::value::Value;
+use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
+use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId};
+use serde::{Deserialize, Serialize};
+use strum::IntoEnumIterator;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, info, trace, warn};
+use utils::bin_ser::{BeSer, DeserializeError};
+use utils::lsn::Lsn;
+use utils::pausable_failpoint;
+use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
+
 use super::tenant::{PageReconstructError, Timeline};
 use crate::aux_file;
 use crate::context::RequestContext;
@@ -19,37 +49,6 @@ use crate::span::{
 };
 use crate::tenant::storage_layer::IoConcurrency;
 use crate::tenant::timeline::GetVectoredError;
-use anyhow::{ensure, Context};
-use bytes::{Buf, Bytes, BytesMut};
-use enum_map::Enum;
-use itertools::Itertools;
-use pageserver_api::key::{
-    dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
-    rel_tag_sparse_key_range, relmap_file_key, repl_origin_key, repl_origin_key_range,
-    slru_block_to_key, slru_dir_to_key, slru_segment_key_range, slru_segment_size_to_key,
-    twophase_file_key, twophase_key_range, CompactKey, RelDirExists, AUX_FILES_KEY, CHECKPOINT_KEY,
-    CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
-};
-use pageserver_api::key::{rel_tag_sparse_key, Key};
-use pageserver_api::keyspace::SparseKeySpace;
-use pageserver_api::record::NeonWalRecord;
-use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
-use pageserver_api::shard::ShardIdentity;
-use pageserver_api::value::Value;
-use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
-use postgres_ffi::BLCKSZ;
-use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
-use serde::{Deserialize, Serialize};
-use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
-use std::ops::ControlFlow;
-use std::ops::Range;
-use strum::IntoEnumIterator;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, trace, warn};
-use utils::bin_ser::DeserializeError;
-use utils::pausable_failpoint;
-use utils::{bin_ser::BeSer, lsn::Lsn};
-use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
 
 /// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached.
 pub const MAX_AUX_FILE_DELTAS: usize = 1024;
@@ -327,16 +326,16 @@ impl Timeline {
                         let clone = match &res {
                             Ok(buf) => Ok(buf.clone()),
                             Err(err) => Err(match err {
-                                PageReconstructError::Cancelled => {
-                                    PageReconstructError::Cancelled
-                                }
+                                PageReconstructError::Cancelled => PageReconstructError::Cancelled,
 
-                                x @ PageReconstructError::Other(_) |
-                                x @ PageReconstructError::AncestorLsnTimeout(_) |
-                                x @ PageReconstructError::WalRedo(_) |
-                                x @ PageReconstructError::MissingKey(_) => {
-                                    PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
-                                },
+                                x @ PageReconstructError::Other(_)
+                                | x @ PageReconstructError::AncestorLsnTimeout(_)
+                                | x @ PageReconstructError::WalRedo(_)
+                                | x @ PageReconstructError::MissingKey(_) => {
+                                    PageReconstructError::Other(anyhow::anyhow!(
+                                        "there was more than one request for this key in the batch, error logged once: {x:?}"
+                                    ))
+                                }
                             }),
                         };
 
@@ -355,23 +354,23 @@ impl Timeline {
                     // this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
                     // but without taking ownership of the GetVectoredError
                     let err = match &err {
-                        GetVectoredError::Cancelled => {
-                            Err(PageReconstructError::Cancelled)
-                        }
+                        GetVectoredError::Cancelled => Err(PageReconstructError::Cancelled),
                         // TODO: restructure get_vectored API to make this error per-key
                         GetVectoredError::MissingKey(err) => {
-                            Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}")))
+                            Err(PageReconstructError::Other(anyhow::anyhow!(
+                                "whole vectored get request failed because one or more of the requested keys were missing: {err:?}"
+                            )))
                         }
                         // TODO: restructure get_vectored API to make this error per-key
                         GetVectoredError::GetReadyAncestorError(err) => {
-                            Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}")))
+                            Err(PageReconstructError::Other(anyhow::anyhow!(
+                                "whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"
+                            )))
                         }
                         // TODO: restructure get_vectored API to make this error per-key
-                        GetVectoredError::Other(err) => {
-                            Err(PageReconstructError::Other(
-                                anyhow::anyhow!("whole vectored get request failed: {err:?}"),
-                            ))
-                        }
+                        GetVectoredError::Other(err) => Err(PageReconstructError::Other(
+                            anyhow::anyhow!("whole vectored get request failed: {err:?}"),
+                        )),
                         // TODO: we can prevent this error class by moving this check into the type system
                         GetVectoredError::InvalidLsn(e) => {
                             Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
@@ -379,10 +378,7 @@ impl Timeline {
                         // NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
                         // TODO: we can prevent this error class by moving this check into the type system
                         GetVectoredError::Oversized(err) => {
-                            Err(anyhow::anyhow!(
-                                "batching oversized: {err:?}"
-                            )
-                            .into())
+                            Err(anyhow::anyhow!("batching oversized: {err:?}").into())
                         }
                     };
 
@@ -715,7 +711,10 @@ impl Timeline {
             {
                 Ok(res) => res,
                 Err(PageReconstructError::MissingKey(e)) => {
-                    warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e);
+                    warn!(
+                        "Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}",
+                        e
+                    );
                     // Return that we didn't find any requests smaller than the LSN, and logging the error.
                     return Ok(LsnForTimestamp::Past(min_lsn));
                 }
@@ -2264,6 +2263,13 @@ impl DatadirModification<'_> {
                 self.tline.aux_file_size_estimator.on_add(content.len());
                 new_files.push((path, content));
             }
+            // Compute may request delete of old version of pgstat AUX file if new one exceeds size limit.
+            // Compute doesn't know if previous version of this file exists or not, so
+            // attempt to delete non-existing file can cause this message.
+            // To avoid false alarms, log it as info rather than warning.
+            (None, true) if path.starts_with("pg_stat/") => {
+                info!("removing non-existing pg_stat file: {}", path)
+            }
             (None, true) => warn!("removing non-existing aux file: {}", path),
         }
         let new_val = aux_file::encode_file_value(&new_files)?;
@@ -2457,10 +2463,12 @@ impl DatadirModification<'_> {
             // modifications before ingesting DB create operations, which are the only kind that reads
             // data pages during ingest.
             if cfg!(debug_assertions) {
-                assert!(!self
-                    .pending_data_batch
-                    .as_ref()
-                    .is_some_and(|b| b.updates_key(&key)));
+                assert!(
+                    !self
+                        .pending_data_batch
+                        .as_ref()
+                        .is_some_and(|b| b.updates_key(&key))
+                );
             }
         }
 
@@ -2659,15 +2667,14 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
 #[cfg(test)]
 mod tests {
     use hex_literal::hex;
-    use pageserver_api::{models::ShardParameters, shard::ShardStripeSize};
-    use utils::{
-        id::TimelineId,
-        shard::{ShardCount, ShardNumber},
-    };
+    use pageserver_api::models::ShardParameters;
+    use pageserver_api::shard::ShardStripeSize;
+    use utils::id::TimelineId;
+    use utils::shard::{ShardCount, ShardNumber};
 
     use super::*;
-
-    use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
+    use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::harness::TenantHarness;
 
     /// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline
     #[tokio::test]
diff --git a/pageserver/src/statvfs.rs b/pageserver/src/statvfs.rs
index 4e8be58d58..85c2ed8499 100644
--- a/pageserver/src/statvfs.rs
+++ b/pageserver/src/statvfs.rs
@@ -73,11 +73,10 @@ impl Statvfs {
 
 pub mod mock {
     use camino::Utf8Path;
+    pub use pageserver_api::config::statvfs::mock::Behavior;
     use regex::Regex;
     use tracing::log::info;
 
-    pub use pageserver_api::config::statvfs::mock::Behavior;
-
     pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {
         info!("running mocked statvfs");
 
@@ -85,7 +84,7 @@ pub mod mock {
             Behavior::Success {
                 blocksize,
                 total_blocks,
-                ref name_filter,
+                name_filter,
             } => {
                 let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
 
@@ -134,7 +133,7 @@ pub mod mock {
                 }
                 Err(e) => {
                     return Err(anyhow::Error::new(e)
-                        .context(format!("get metadata of {:?}", entry.path())))
+                        .context(format!("get metadata of {:?}", entry.path())));
                 }
             };
             total += m.len();
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index cc93a06ccd..0b71b2cf5b 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -40,15 +40,12 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 
 use futures::FutureExt;
+use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
 use tokio::task::JoinHandle;
 use tokio::task_local;
 use tokio_util::sync::CancellationToken;
-
 use tracing::{debug, error, info, warn};
-
-use once_cell::sync::Lazy;
-
 use utils::env;
 use utils::id::TimelineId;
 
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index efb35625f2..9243f131ad 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -12,149 +12,99 @@
 //! parent timeline, and the last LSN that has been written to disk.
 //!
 
-use anyhow::{bail, Context};
+use std::collections::hash_map::Entry;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::{Debug, Display};
+use std::fs::File;
+use std::future::Future;
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::sync::{Arc, Mutex, Weak};
+use std::time::{Duration, Instant, SystemTime};
+use std::{fmt, fs};
+
+use anyhow::{Context, bail};
 use arc_swap::ArcSwap;
-use camino::Utf8Path;
-use camino::Utf8PathBuf;
+use camino::{Utf8Path, Utf8PathBuf};
 use chrono::NaiveDateTime;
 use enumset::EnumSet;
-use futures::stream::FuturesUnordered;
 use futures::StreamExt;
+use futures::stream::FuturesUnordered;
 use itertools::Itertools as _;
+use once_cell::sync::Lazy;
 use pageserver_api::models;
-use pageserver_api::models::CompactInfoResponse;
-use pageserver_api::models::LsnLease;
-use pageserver_api::models::TimelineArchivalState;
-use pageserver_api::models::TimelineState;
-use pageserver_api::models::TopTenantShardItem;
-use pageserver_api::models::WalRedoManagerStatus;
-use pageserver_api::shard::ShardIdentity;
-use pageserver_api::shard::ShardStripeSize;
-use pageserver_api::shard::TenantShardId;
-use remote_storage::DownloadError;
-use remote_storage::GenericRemoteStorage;
-use remote_storage::TimeoutOrCancel;
-use remote_timeline_client::manifest::{
-    OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION,
+pub use pageserver_api::models::TenantState;
+use pageserver_api::models::{
+    CompactInfoResponse, LsnLease, TimelineArchivalState, TimelineState, TopTenantShardItem,
+    WalRedoManagerStatus,
 };
-use remote_timeline_client::UploadQueueNotReadyError;
-use remote_timeline_client::FAILED_REMOTE_OP_RETRIES;
-use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD;
-use secondary::heatmap::HeatMapTenant;
-use secondary::heatmap::HeatMapTimeline;
-use std::collections::BTreeMap;
-use std::fmt;
-use std::future::Future;
-use std::sync::atomic::AtomicBool;
-use std::sync::Weak;
-use std::time::SystemTime;
+use pageserver_api::shard::{ShardIdentity, ShardStripeSize, TenantShardId};
+use remote_storage::{DownloadError, GenericRemoteStorage, TimeoutOrCancel};
+use remote_timeline_client::index::GcCompactionState;
+use remote_timeline_client::manifest::{
+    LATEST_TENANT_MANIFEST_VERSION, OffloadedTimelineManifest, TenantManifest,
+};
+use remote_timeline_client::{
+    FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError,
+};
+use secondary::heatmap::{HeatMapTenant, HeatMapTimeline};
 use storage_broker::BrokerClientChannel;
-use timeline::compaction::CompactionOutcome;
-use timeline::compaction::GcCompactionQueue;
-use timeline::import_pgdata;
-use timeline::offload::offload_timeline;
-use timeline::offload::OffloadError;
-use timeline::CompactFlags;
-use timeline::CompactOptions;
-use timeline::CompactionError;
-use timeline::PreviousHeatmap;
-use timeline::ShutdownMode;
+use timeline::compaction::{CompactionOutcome, GcCompactionQueue};
+use timeline::offload::{OffloadError, offload_timeline};
+use timeline::{
+    CompactFlags, CompactOptions, CompactionError, PreviousHeatmap, ShutdownMode, import_pgdata,
+};
 use tokio::io::BufReader;
-use tokio::sync::watch;
-use tokio::sync::Notify;
+use tokio::sync::{Notify, Semaphore, watch};
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use upload_queue::NotInitialized;
-use utils::backoff;
 use utils::circuit_breaker::CircuitBreaker;
-use utils::completion;
 use utils::crashsafe::path_with_suffix_extension;
-use utils::failpoint_support;
-use utils::fs_ext;
-use utils::pausable_failpoint;
-use utils::sync::gate::Gate;
-use utils::sync::gate::GateGuard;
-use utils::timeout::timeout_cancellable;
-use utils::timeout::TimeoutCancellableError;
+use utils::sync::gate::{Gate, GateGuard};
+use utils::timeout::{TimeoutCancellableError, timeout_cancellable};
 use utils::try_rcu::ArcSwapExt;
-use utils::zstd::create_zst_tarball;
-use utils::zstd::extract_zst_tarball;
+use utils::zstd::{create_zst_tarball, extract_zst_tarball};
+use utils::{backoff, completion, failpoint_support, fs_ext, pausable_failpoint};
 
-use self::config::AttachedLocationConfig;
-use self::config::AttachmentMode;
-use self::config::LocationConf;
-use self::config::TenantConf;
+use self::config::{AttachedLocationConfig, AttachmentMode, LocationConf, TenantConf};
 use self::metadata::TimelineMetadata;
-use self::mgr::GetActiveTenantError;
-use self::mgr::GetTenantError;
+use self::mgr::{GetActiveTenantError, GetTenantError};
 use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};
 use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
-use self::timeline::uninit::TimelineCreateGuard;
-use self::timeline::uninit::TimelineExclusionError;
-use self::timeline::uninit::UninitializedTimeline;
-use self::timeline::EvictionTaskTenantState;
-use self::timeline::GcCutoffs;
-use self::timeline::TimelineDeleteProgress;
-use self::timeline::TimelineResources;
-use self::timeline::WaitLsnError;
+use self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, UninitializedTimeline};
+use self::timeline::{
+    EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError,
+};
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::deletion_queue::DeletionQueueClient;
-use crate::deletion_queue::DeletionQueueError;
-use crate::import_datadir;
+use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
 use crate::l0_flush::L0FlushGlobalState;
-use crate::metrics::CONCURRENT_INITDBS;
-use crate::metrics::INITDB_RUN_TIME;
-use crate::metrics::INITDB_SEMAPHORE_ACQUISITION_TIME;
-use crate::metrics::TENANT;
 use crate::metrics::{
-    remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN,
-    TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC,
+    BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS,
+    INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_STATE_METRIC,
+    TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics,
 };
-use crate::task_mgr;
 use crate::task_mgr::TaskKind;
-use crate::tenant::config::LocationMode;
-use crate::tenant::config::TenantConfOpt;
+use crate::tenant::config::{LocationMode, TenantConfOpt};
 use crate::tenant::gc_result::GcResult;
 pub use crate::tenant::remote_timeline_client::index::IndexPart;
-use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
-use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
-use crate::tenant::remote_timeline_client::INITDB_PATH;
-use crate::tenant::storage_layer::DeltaLayer;
-use crate::tenant::storage_layer::ImageLayer;
-use crate::walingest::WalLagCooldown;
-use crate::walredo;
-use crate::InitializationOrder;
-use std::collections::hash_map::Entry;
-use std::collections::HashMap;
-use std::collections::HashSet;
-use std::fmt::Debug;
-use std::fmt::Display;
-use std::fs;
-use std::fs::File;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::Arc;
-use std::sync::Mutex;
-use std::time::{Duration, Instant};
-
-use crate::span;
+use crate::tenant::remote_timeline_client::{
+    INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path,
+};
+use crate::tenant::storage_layer::{DeltaLayer, ImageLayer};
 use crate::tenant::timeline::delete::DeleteTimelineFlow;
 use crate::tenant::timeline::uninit::cleanup_timeline_directory;
 use crate::virtual_file::VirtualFile;
+use crate::walingest::WalLagCooldown;
 use crate::walredo::PostgresRedoManager;
-use crate::TEMP_FILE_SUFFIX;
-use once_cell::sync::Lazy;
-pub use pageserver_api::models::TenantState;
-use tokio::sync::Semaphore;
+use crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mgr, walredo};
 
 static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
-use utils::{
-    crashsafe,
-    generation::Generation,
-    id::TimelineId,
-    lsn::{Lsn, RecordLsn},
-};
+use utils::crashsafe;
+use utils::generation::Generation;
+use utils::id::TimelineId;
+use utils::lsn::{Lsn, RecordLsn};
 
 pub mod blob_io;
 pub mod block_io;
@@ -183,9 +133,9 @@ mod gc_block;
 mod gc_result;
 pub(crate) mod throttle;
 
-pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 pub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
 
+pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 // re-export for use in walreceiver
 pub use crate::tenant::timeline::WalReceiverInfo;
 
@@ -250,7 +200,9 @@ impl AttachedTenantConf {
                 Ok(Self::new(location_conf.tenant_conf, *attach_conf))
             }
             LocationMode::Secondary(_) => {
-                anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode")
+                anyhow::bail!(
+                    "Attempted to construct AttachedTenantConf from a LocationConf in secondary mode"
+                )
             }
         }
     }
@@ -464,7 +416,9 @@ impl WalredoManagerId {
         static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
         let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
         if id == 0 {
-            panic!("WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique");
+            panic!(
+                "WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique"
+            );
         }
         Self(id)
     }
@@ -1168,6 +1122,7 @@ impl Tenant {
             resources,
             CreateTimelineCause::Load,
             idempotency.clone(),
+            index_part.gc_compaction.clone(),
         )?;
         let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
         anyhow::ensure!(
@@ -1189,12 +1144,47 @@ impl Tenant {
                 format!("Failed to load layermap for timeline {tenant_id}/{timeline_id}")
             })?;
 
+        // When unarchiving, we've mostly likely lost the heatmap generated prior
+        // to the archival operation. To allow warming this timeline up, generate
+        // a previous heatmap which contains all visible layers in the layer map.
+        // This previous heatmap will be used whenever a fresh heatmap is generated
+        // for the timeline.
+        if matches!(cause, LoadTimelineCause::Unoffload) {
+            let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn()));
+            while let Some((tline, end_lsn)) = tline_ending_at {
+                let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await;
+                if !tline.is_previous_heatmap_active() {
+                    tline
+                        .previous_heatmap
+                        .store(Some(Arc::new(unarchival_heatmap)));
+                } else {
+                    tracing::info!("Previous heatmap still active. Dropping unarchival heatmap.")
+                }
+
+                match tline.ancestor_timeline() {
+                    Some(ancestor) => {
+                        if ancestor.update_layer_visibility().await.is_err() {
+                            // Ancestor timeline is shutting down.
+                            break;
+                        }
+
+                        tline_ending_at = Some((ancestor, tline.get_ancestor_lsn()));
+                    }
+                    None => {
+                        tline_ending_at = None;
+                    }
+                }
+            }
+        }
+
         match import_pgdata {
             Some(import_pgdata) if !import_pgdata.is_done() => {
                 match cause {
                     LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (),
                     LoadTimelineCause::ImportPgdata { .. } => {
-                        unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3")
+                        unreachable!(
+                            "ImportPgdata should not be reloading timeline import is done and persisted as such in s3"
+                        )
                     }
                 }
                 let mut guard = self.timelines_creating.lock().unwrap();
@@ -1227,8 +1217,8 @@ impl Tenant {
                         // We should never try and load the same timeline twice during startup
                         Entry::Occupied(_) => {
                             unreachable!(
-                            "Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
-                        );
+                                "Timeline {tenant_id}/{timeline_id} already exists in the tenant map"
+                            );
                         }
                         Entry::Vacant(v) => {
                             v.insert(Arc::clone(&timeline));
@@ -1622,7 +1612,9 @@ impl Tenant {
         failpoint_support::sleep_millis_async!("before-attaching-tenant");
 
         let Some(preload) = preload else {
-            anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
+            anyhow::bail!(
+                "local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624"
+            );
         };
 
         let mut offloaded_timeline_ids = HashSet::new();
@@ -2006,7 +1998,7 @@ impl Tenant {
         remote_storage: GenericRemoteStorage,
         previous_heatmap: Option<PreviousHeatmap>,
         cancel: CancellationToken,
-    ) -> impl Future<Output = TimelinePreload> {
+    ) -> impl Future<Output = TimelinePreload> + use<> {
         let client = self.build_timeline_client(timeline_id, remote_storage);
         async move {
             debug_assert_current_span_has_tenant_and_timeline_id();
@@ -2701,7 +2693,9 @@ impl Tenant {
                 timeline
             }
             CreateTimelineResult::ImportSpawned(timeline) => {
-                info!("import task spawned, timeline will become visible and activated once the import is done");
+                info!(
+                    "import task spawned, timeline will become visible and activated once the import is done"
+                );
                 timeline
             }
         };
@@ -2747,7 +2741,7 @@ impl Tenant {
         {
             StartCreatingTimelineResult::CreateGuard(guard) => guard,
             StartCreatingTimelineResult::Idempotent(timeline) => {
-                return Ok(CreateTimelineResult::Idempotent(timeline))
+                return Ok(CreateTimelineResult::Idempotent(timeline));
             }
         };
 
@@ -2881,7 +2875,9 @@ impl Tenant {
         let index_part = match index_part {
             MaybeDeletedIndexPart::Deleted(_) => {
                 // likely concurrent delete call, cplane should prevent this
-                anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but")
+                anyhow::bail!(
+                    "index part says deleted but we are not done creating yet, this should not happen but"
+                )
             }
             MaybeDeletedIndexPart::IndexPart(p) => p,
         };
@@ -3092,20 +3088,19 @@ impl Tenant {
 
             // If we're done compacting, check the scheduled GC compaction queue for more work.
             if outcome == CompactionOutcome::Done {
-                let queue = self
-                    .scheduled_compaction_tasks
-                    .lock()
-                    .unwrap()
-                    .get(&timeline.timeline_id)
-                    .cloned();
-                if let Some(queue) = queue {
-                    outcome = queue
-                        .iteration(cancel, ctx, &self.gc_block, &timeline)
-                        .instrument(
-                            info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
-                        )
-                        .await?;
-                }
+                let queue = {
+                    let mut guard = self.scheduled_compaction_tasks.lock().unwrap();
+                    guard
+                        .entry(timeline.timeline_id)
+                        .or_insert_with(|| Arc::new(GcCompactionQueue::new()))
+                        .clone()
+                };
+                outcome = queue
+                    .iteration(cancel, ctx, &self.gc_block, &timeline)
+                    .instrument(
+                        info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id),
+                    )
+                    .await?;
             }
 
             // If we're done compacting, offload the timeline if requested.
@@ -3146,11 +3141,13 @@ impl Tenant {
     /// Trips the compaction circuit breaker if appropriate.
     pub(crate) fn maybe_trip_compaction_breaker(&self, err: &CompactionError) {
         match err {
+            err if err.is_cancel() => {}
             CompactionError::ShuttingDown => (),
             // Offload failures don't trip the circuit breaker, since they're cheap to retry and
             // shouldn't block compaction.
             CompactionError::Offload(_) => {}
             CompactionError::CollectKeySpaceError(err) => {
+                // CollectKeySpaceError::Cancelled and PageRead::Cancelled are handled in `err.is_cancel` branch.
                 self.compaction_circuit_breaker
                     .lock()
                     .unwrap()
@@ -3162,6 +3159,7 @@ impl Tenant {
                     .unwrap()
                     .fail(&CIRCUIT_BREAKERS_BROKEN, err);
             }
+            CompactionError::AlreadyRunning(_) => {}
         }
     }
 
@@ -3872,7 +3870,9 @@ where
     if !later.is_empty() {
         for (missing_id, orphan_ids) in later {
             for (orphan_id, _) in orphan_ids {
-                error!("could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded");
+                error!(
+                    "could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded"
+                );
             }
         }
         bail!("could not load tenant because some timelines are missing ancestors");
@@ -4117,6 +4117,7 @@ impl Tenant {
         resources: TimelineResources,
         cause: CreateTimelineCause,
         create_idempotency: CreateTimelineIdempotency,
+        gc_compaction_state: Option<GcCompactionState>,
     ) -> anyhow::Result<Arc<Timeline>> {
         let state = match cause {
             CreateTimelineCause::Load => {
@@ -4148,6 +4149,7 @@ impl Tenant {
             state,
             self.attach_wal_lag_cooldown.clone(),
             create_idempotency,
+            gc_compaction_state,
             self.cancel.child_token(),
         );
 
@@ -4790,7 +4792,10 @@ impl Tenant {
             let gc_info = src_timeline.gc_info.read().unwrap();
             let planned_cutoff = gc_info.min_cutoff();
             if gc_info.lsn_covered_by_lease(start_lsn) {
-                tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *applied_gc_cutoff_lsn);
+                tracing::info!(
+                    "skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease",
+                    *applied_gc_cutoff_lsn
+                );
             } else {
                 src_timeline
                     .check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn)
@@ -4936,7 +4941,9 @@ impl Tenant {
                         }
                         // Idempotent <=> CreateTimelineIdempotency is identical
                         (x, y) if x == y => {
-                            info!("timeline already exists and idempotency matches, succeeding request");
+                            info!(
+                                "timeline already exists and idempotency matches, succeeding request"
+                            );
                             // fallthrough
                         }
                         (_, _) => {
@@ -5018,7 +5025,7 @@ impl Tenant {
         {
             StartCreatingTimelineResult::CreateGuard(guard) => guard,
             StartCreatingTimelineResult::Idempotent(timeline) => {
-                return Ok(CreateTimelineResult::Idempotent(timeline))
+                return Ok(CreateTimelineResult::Idempotent(timeline));
             }
         };
 
@@ -5213,6 +5220,7 @@ impl Tenant {
                 resources,
                 CreateTimelineCause::Load,
                 create_guard.idempotency.clone(),
+                None,
             )
             .context("Failed to create timeline data structure")?;
 
@@ -5222,7 +5230,9 @@ impl Tenant {
             .create_timeline_files(&create_guard.timeline_path)
             .await
         {
-            error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}");
+            error!(
+                "Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}"
+            );
             cleanup_timeline_directory(create_guard);
             return Err(e);
         }
@@ -5587,20 +5597,19 @@ pub async fn dump_layerfile_from_path(
 #[cfg(test)]
 pub(crate) mod harness {
     use bytes::{Bytes, BytesMut};
+    use hex_literal::hex;
     use once_cell::sync::OnceCell;
+    use pageserver_api::key::Key;
     use pageserver_api::models::ShardParameters;
+    use pageserver_api::record::NeonWalRecord;
     use pageserver_api::shard::ShardIndex;
+    use utils::id::TenantId;
     use utils::logging;
 
+    use super::*;
     use crate::deletion_queue::mock::MockDeletionQueue;
     use crate::l0_flush::L0FlushConfig;
     use crate::walredo::apply_neon;
-    use pageserver_api::key::Key;
-    use pageserver_api::record::NeonWalRecord;
-
-    use super::*;
-    use hex_literal::hex;
-    use utils::id::TenantId;
 
     pub const TIMELINE_ID: TimelineId =
         TimelineId::from_array(hex!("11223344556677881122334455667788"));
@@ -5881,34 +5890,34 @@ pub(crate) mod harness {
 mod tests {
     use std::collections::{BTreeMap, BTreeSet};
 
-    use super::*;
-    use crate::keyspace::KeySpaceAccum;
-    use crate::tenant::harness::*;
-    use crate::tenant::timeline::CompactFlags;
-    use crate::DEFAULT_PG_VERSION;
     use bytes::{Bytes, BytesMut};
     use hex_literal::hex;
     use itertools::Itertools;
-    use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
+    #[cfg(feature = "testing")]
+    use models::CompactLsnRange;
+    use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
     use pageserver_api::keyspace::KeySpace;
     use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
+    #[cfg(feature = "testing")]
+    use pageserver_api::record::NeonWalRecord;
     use pageserver_api::value::Value;
     use pageserver_compaction::helpers::overlaps_with;
-    use rand::{thread_rng, Rng};
+    use rand::{Rng, thread_rng};
     use storage_layer::{IoConcurrency, PersistentLayerKey};
     use tests::storage_layer::ValuesReconstructState;
     use tests::timeline::{GetVectoredError, ShutdownMode};
+    #[cfg(feature = "testing")]
+    use timeline::GcInfo;
+    #[cfg(feature = "testing")]
+    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
     use timeline::{CompactOptions, DeltaLayerTestDesc};
     use utils::id::TenantId;
 
-    #[cfg(feature = "testing")]
-    use models::CompactLsnRange;
-    #[cfg(feature = "testing")]
-    use pageserver_api::record::NeonWalRecord;
-    #[cfg(feature = "testing")]
-    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
-    #[cfg(feature = "testing")]
-    use timeline::GcInfo;
+    use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::keyspace::KeySpaceAccum;
+    use crate::tenant::harness::*;
+    use crate::tenant::timeline::CompactFlags;
 
     static TEST_KEY: Lazy<Key> =
         Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
@@ -6158,11 +6167,12 @@ mod tests {
                     panic!("wrong error type")
                 };
                 assert!(err.to_string().contains("invalid branch start lsn"));
-                assert!(err
-                    .source()
-                    .unwrap()
-                    .to_string()
-                    .contains("we might've already garbage collected needed data"))
+                assert!(
+                    err.source()
+                        .unwrap()
+                        .to_string()
+                        .contains("we might've already garbage collected needed data")
+                )
             }
         }
 
@@ -6191,11 +6201,12 @@ mod tests {
                     panic!("wrong error type");
                 };
                 assert!(&err.to_string().contains("invalid branch start lsn"));
-                assert!(&err
-                    .source()
-                    .unwrap()
-                    .to_string()
-                    .contains("is earlier than latest GC cutoff"));
+                assert!(
+                    &err.source()
+                        .unwrap()
+                        .to_string()
+                        .contains("is earlier than latest GC cutoff")
+                );
             }
         }
 
@@ -7504,10 +7515,12 @@ mod tests {
             }
         }
 
-        assert!(!harness
-            .conf
-            .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
-            .exists());
+        assert!(
+            !harness
+                .conf
+                .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID)
+                .exists()
+        );
 
         Ok(())
     }
@@ -7708,7 +7721,10 @@ mod tests {
 
         let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len();
 
-        assert!(after_num_l0_delta_files < before_num_l0_delta_files, "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}");
+        assert!(
+            after_num_l0_delta_files < before_num_l0_delta_files,
+            "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}"
+        );
 
         assert_eq!(
             tline.get(test_key, lsn, &ctx).await?,
@@ -7875,7 +7891,10 @@ mod tests {
                 let (_, after_delta_file_accessed) =
                     scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
                         .await?;
-                assert!(after_delta_file_accessed < before_delta_file_accessed, "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}");
+                assert!(
+                    after_delta_file_accessed < before_delta_file_accessed,
+                    "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}"
+                );
                 // Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances.
                 assert!(
                     after_delta_file_accessed <= 2,
@@ -7929,10 +7948,12 @@ mod tests {
             get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?,
             Some(test_img("data key 1"))
         );
-        assert!(get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
-            .await
-            .unwrap_err()
-            .is_missing_key_error());
+        assert!(
+            get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx)
+                .await
+                .unwrap_err()
+                .is_missing_key_error()
+        );
         assert!(
             get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx)
                 .await
diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs
index 7b55df52a5..b16a88eaa4 100644
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -14,6 +14,9 @@
 //! len <  128: 0XXXXXXX
 //! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX
 //!
+use std::cmp::min;
+use std::io::{Error, ErrorKind};
+
 use async_compression::Level;
 use bytes::{BufMut, BytesMut};
 use pageserver_api::models::ImageCompressionAlgorithm;
@@ -24,10 +27,8 @@ use tracing::warn;
 use crate::context::RequestContext;
 use crate::page_cache::PAGE_SZ;
 use crate::tenant::block_io::BlockCursor;
-use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
 use crate::virtual_file::VirtualFile;
-use std::cmp::min;
-use std::io::{Error, ErrorKind};
+use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
 
 #[derive(Copy, Clone, Debug)]
 pub struct CompressionInfo {
@@ -414,12 +415,15 @@ impl BlobWriter<false> {
 
 #[cfg(test)]
 pub(crate) mod tests {
-    use super::*;
-    use crate::{context::DownloadBehavior, task_mgr::TaskKind, tenant::block_io::BlockReaderRef};
     use camino::Utf8PathBuf;
     use camino_tempfile::Utf8TempDir;
     use rand::{Rng, SeedableRng};
 
+    use super::*;
+    use crate::context::DownloadBehavior;
+    use crate::task_mgr::TaskKind;
+    use crate::tenant::block_io::BlockReaderRef;
+
     async fn round_trip_test<const BUFFERED: bool>(blobs: &[Vec<u8>]) -> Result<(), Error> {
         round_trip_test_compressed::<BUFFERED>(blobs, false).await
     }
@@ -486,7 +490,7 @@ pub(crate) mod tests {
 
     pub(crate) fn random_array(len: usize) -> Vec<u8> {
         let mut rng = rand::thread_rng();
-        (0..len).map(|_| rng.gen()).collect::<_>()
+        (0..len).map(|_| rng.r#gen()).collect::<_>()
     }
 
     #[tokio::test]
@@ -544,9 +548,9 @@ pub(crate) mod tests {
         let mut rng = rand::rngs::StdRng::seed_from_u64(42);
         let blobs = (0..1024)
             .map(|_| {
-                let mut sz: u16 = rng.gen();
+                let mut sz: u16 = rng.r#gen();
                 // Make 50% of the arrays small
-                if rng.gen() {
+                if rng.r#gen() {
                     sz &= 63;
                 }
                 random_array(sz.into())
diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs
index 990211f80a..66c586daff 100644
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -2,14 +2,16 @@
 //! Low-level Block-oriented I/O functions
 //!
 
+use std::ops::Deref;
+
+use bytes::Bytes;
+
 use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
 use crate::context::RequestContext;
-use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
+use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult};
 #[cfg(test)]
 use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::VirtualFile;
-use bytes::Bytes;
-use std::ops::Deref;
 
 /// This is implemented by anything that can read 8 kB (PAGE_SZ)
 /// blocks, using the page cache
diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs
index f98356242e..d5b979ab2a 100644
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -63,9 +63,9 @@ pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
                     && overlaps_with(&layer.key_range, &other_layer.key_range)
                 {
                     let err = format!(
-                            "layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
-                            layer, other_layer
-                        );
+                        "layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
+                        layer, other_layer
+                    );
                     return Some(err);
                 }
             }
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index c6bcfdf2fb..334fb04604 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -8,16 +8,17 @@
 //! We cannot use global or default config instead, because wrong settings
 //! may lead to a data loss.
 //!
+use std::num::NonZeroU64;
+use std::time::Duration;
+
 pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
-use pageserver_api::models::CompactionAlgorithmSettings;
-use pageserver_api::models::EvictionPolicy;
-use pageserver_api::models::{self, TenantConfigPatch};
+use pageserver_api::models::{
+    self, CompactionAlgorithmSettings, EvictionPolicy, TenantConfigPatch,
+};
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::de::IntoDeserializer;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
-use std::num::NonZeroU64;
-use std::time::Duration;
 use utils::generation::Generation;
 use utils::postgres_client::PostgresClientProtocol;
 
@@ -693,16 +694,15 @@ impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
 /// This is a conversion from our internal tenant config object to the one used
 /// in external APIs.
 impl From<TenantConfOpt> for models::TenantConfig {
+    // TODO(vlad): These are now the same, but they have different serialization logic.
+    // Can we merge them?
     fn from(value: TenantConfOpt) -> Self {
-        fn humantime(d: Duration) -> String {
-            format!("{}s", d.as_secs())
-        }
         Self {
             checkpoint_distance: value.checkpoint_distance,
-            checkpoint_timeout: value.checkpoint_timeout.map(humantime),
+            checkpoint_timeout: value.checkpoint_timeout,
             compaction_algorithm: value.compaction_algorithm,
             compaction_target_size: value.compaction_target_size,
-            compaction_period: value.compaction_period.map(humantime),
+            compaction_period: value.compaction_period,
             compaction_threshold: value.compaction_threshold,
             compaction_upper_limit: value.compaction_upper_limit,
             compaction_l0_first: value.compaction_l0_first,
@@ -711,24 +711,23 @@ impl From<TenantConfOpt> for models::TenantConfig {
             l0_flush_stall_threshold: value.l0_flush_stall_threshold,
             l0_flush_wait_upload: value.l0_flush_wait_upload,
             gc_horizon: value.gc_horizon,
-            gc_period: value.gc_period.map(humantime),
+            gc_period: value.gc_period,
             image_creation_threshold: value.image_creation_threshold,
-            pitr_interval: value.pitr_interval.map(humantime),
-            walreceiver_connect_timeout: value.walreceiver_connect_timeout.map(humantime),
-            lagging_wal_timeout: value.lagging_wal_timeout.map(humantime),
+            pitr_interval: value.pitr_interval,
+            walreceiver_connect_timeout: value.walreceiver_connect_timeout,
+            lagging_wal_timeout: value.lagging_wal_timeout,
             max_lsn_wal_lag: value.max_lsn_wal_lag,
             eviction_policy: value.eviction_policy,
             min_resident_size_override: value.min_resident_size_override,
             evictions_low_residence_duration_metric_threshold: value
-                .evictions_low_residence_duration_metric_threshold
-                .map(humantime),
-            heatmap_period: value.heatmap_period.map(humantime),
+                .evictions_low_residence_duration_metric_threshold,
+            heatmap_period: value.heatmap_period,
             lazy_slru_download: value.lazy_slru_download,
             timeline_get_throttle: value.timeline_get_throttle,
             image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
             image_creation_preempt_threshold: value.image_creation_preempt_threshold,
-            lsn_lease_length: value.lsn_lease_length.map(humantime),
-            lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
+            lsn_lease_length: value.lsn_lease_length,
+            lsn_lease_length_for_ts: value.lsn_lease_length_for_ts,
             timeline_offloading: value.timeline_offloading,
             wal_receiver_protocol_override: value.wal_receiver_protocol_override,
             rel_size_v2_enabled: value.rel_size_v2_enabled,
@@ -741,9 +740,10 @@ impl From<TenantConfOpt> for models::TenantConfig {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use models::TenantConfig;
 
+    use super::*;
+
     #[test]
     fn de_serializing_pageserver_config_omits_empty_values() {
         let small_conf = TenantConfOpt {
@@ -760,29 +760,10 @@ mod tests {
         assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
     }
 
-    #[test]
-    fn test_try_from_models_tenant_config_err() {
-        let tenant_config = models::TenantConfig {
-            lagging_wal_timeout: Some("5a".to_string()),
-            ..TenantConfig::default()
-        };
-
-        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config);
-
-        assert!(
-            tenant_conf_opt.is_err(),
-            "Suceeded to convert TenantConfig to TenantConfOpt"
-        );
-
-        let expected_error_str =
-            "lagging_wal_timeout: invalid value: string \"5a\", expected a duration";
-        assert_eq!(tenant_conf_opt.unwrap_err().to_string(), expected_error_str);
-    }
-
     #[test]
     fn test_try_from_models_tenant_config_success() {
         let tenant_config = models::TenantConfig {
-            lagging_wal_timeout: Some("5s".to_string()),
+            lagging_wal_timeout: Some(Duration::from_secs(5)),
             ..TenantConfig::default()
         };
 
diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs
index bb9df020b5..73c105b34e 100644
--- a/pageserver/src/tenant/disk_btree.rs
+++ b/pageserver/src/tenant/disk_btree.rs
@@ -18,27 +18,23 @@
 //! - An Iterator interface would be more convenient for the callers than the
 //!   'visit' function
 //!
+use std::cmp::Ordering;
+use std::iter::Rev;
+use std::ops::{Range, RangeInclusive};
+use std::{io, result};
+
 use async_stream::try_stream;
-use byteorder::{ReadBytesExt, BE};
+use byteorder::{BE, ReadBytesExt};
 use bytes::{BufMut, Bytes, BytesMut};
 use either::Either;
 use futures::{Stream, StreamExt};
 use hex;
-use std::{
-    cmp::Ordering,
-    io,
-    iter::Rev,
-    ops::{Range, RangeInclusive},
-    result,
-};
 use thiserror::Error;
 use tracing::error;
 
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    task_mgr::TaskKind,
-    tenant::block_io::{BlockReader, BlockWriter},
-};
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::task_mgr::TaskKind;
+use crate::tenant::block_io::{BlockReader, BlockWriter};
 
 // The maximum size of a value stored in the B-tree. 5 bytes is enough currently.
 pub const VALUE_SZ: usize = 5;
@@ -833,12 +829,14 @@ impl<const L: usize> BuildNode<L> {
 
 #[cfg(test)]
 pub(crate) mod tests {
-    use super::*;
-    use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
-    use rand::Rng;
     use std::collections::BTreeMap;
     use std::sync::atomic::{AtomicUsize, Ordering};
 
+    use rand::Rng;
+
+    use super::*;
+    use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef};
+
     #[derive(Clone, Default)]
     pub(crate) struct TestDisk {
         blocks: Vec<Bytes>,
@@ -1115,7 +1113,7 @@ pub(crate) mod tests {
 
         // Test get() operations on random keys, most of which will not exist
         for _ in 0..100000 {
-            let key_int = rand::thread_rng().gen::<u128>();
+            let key_int = rand::thread_rng().r#gen::<u128>();
             let search_key = u128::to_be_bytes(key_int);
             assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned());
         }
diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs
index ba79672bc7..cb25fa6185 100644
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -1,6 +1,17 @@
 //! Implementation of append-only file data structure
 //! used to keep in-memory layers spilled on disk.
 
+use std::io;
+use std::sync::Arc;
+use std::sync::atomic::AtomicU64;
+
+use camino::Utf8PathBuf;
+use num_traits::Num;
+use pageserver_api::shard::TenantShardId;
+use tokio_epoll_uring::{BoundedBuf, Slice};
+use tracing::error;
+use utils::id::TimelineId;
+
 use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
 use crate::config::PageServerConf;
 use crate::context::RequestContext;
@@ -9,17 +20,7 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
 use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
 use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
 use crate::virtual_file::owned_buffers_io::write::Buffer;
-use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile};
-use camino::Utf8PathBuf;
-use num_traits::Num;
-use pageserver_api::shard::TenantShardId;
-use tokio_epoll_uring::{BoundedBuf, Slice};
-use tracing::error;
-
-use std::io;
-use std::sync::atomic::AtomicU64;
-use std::sync::Arc;
-use utils::id::TimelineId;
+use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
 
 pub struct EphemeralFile {
     _tenant_shard_id: TenantShardId,
@@ -319,13 +320,14 @@ pub fn is_ephemeral_file(filename: &str) -> bool {
 
 #[cfg(test)]
 mod tests {
+    use std::fs;
+    use std::str::FromStr;
+
     use rand::Rng;
 
     use super::*;
     use crate::context::DownloadBehavior;
     use crate::task_mgr::TaskKind;
-    use std::fs;
-    use std::str::FromStr;
 
     fn harness(
         test_name: &str,
diff --git a/pageserver/src/tenant/gc_block.rs b/pageserver/src/tenant/gc_block.rs
index af73acb2be..7aa920c953 100644
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -1,4 +1,5 @@
-use std::{collections::HashMap, sync::Arc};
+use std::collections::HashMap;
+use std::sync::Arc;
 
 use utils::id::TimelineId;
 
diff --git a/pageserver/src/tenant/gc_result.rs b/pageserver/src/tenant/gc_result.rs
index c805aafeab..7a7d6d19cb 100644
--- a/pageserver/src/tenant/gc_result.rs
+++ b/pageserver/src/tenant/gc_result.rs
@@ -1,8 +1,9 @@
-use anyhow::Result;
-use serde::Serialize;
 use std::ops::AddAssign;
 use std::time::Duration;
 
+use anyhow::Result;
+use serde::Serialize;
+
 ///
 /// Result of performing GC
 ///
diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs
index a69cce932e..59f5a6bd90 100644
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -46,24 +46,24 @@
 mod historic_layer_coverage;
 mod layer_coverage;
 
-use crate::context::RequestContext;
-use crate::keyspace::KeyPartitioning;
-use crate::tenant::storage_layer::InMemoryLayer;
-use anyhow::Result;
-use pageserver_api::key::Key;
-use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
-use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
 use std::collections::{HashMap, VecDeque};
 use std::iter::Peekable;
 use std::ops::Range;
 use std::sync::Arc;
+
+use anyhow::Result;
+use historic_layer_coverage::BufferedHistoricLayerCoverage;
+pub use historic_layer_coverage::LayerKey;
+use pageserver_api::key::Key;
+use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
+use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
 use tokio::sync::watch;
 use utils::lsn::Lsn;
 
-use historic_layer_coverage::BufferedHistoricLayerCoverage;
-pub use historic_layer_coverage::LayerKey;
-
 use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc};
+use crate::context::RequestContext;
+use crate::keyspace::KeyPartitioning;
+use crate::tenant::storage_layer::InMemoryLayer;
 
 ///
 /// LayerMap tracks what layers exist on a timeline.
@@ -1066,18 +1066,17 @@ impl LayerMap {
 
 #[cfg(test)]
 mod tests {
-    use crate::tenant::{storage_layer::LayerName, IndexPart};
-    use pageserver_api::{
-        key::DBDIR_KEY,
-        keyspace::{KeySpace, KeySpaceRandomAccum},
-    };
-    use std::{collections::HashMap, path::PathBuf};
-    use utils::{
-        id::{TenantId, TimelineId},
-        shard::TenantShardId,
-    };
+    use std::collections::HashMap;
+    use std::path::PathBuf;
+
+    use pageserver_api::key::DBDIR_KEY;
+    use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
+    use utils::id::{TenantId, TimelineId};
+    use utils::shard::TenantShardId;
 
     use super::*;
+    use crate::tenant::IndexPart;
+    use crate::tenant::storage_layer::LayerName;
 
     #[derive(Clone)]
     struct LayerDesc {
@@ -1417,9 +1416,11 @@ mod tests {
         assert!(!shadow.ranges.is_empty());
 
         // At least some layers should be marked covered
-        assert!(layer_visibilities
-            .iter()
-            .any(|i| matches!(i.1, LayerVisibilityHint::Covered)));
+        assert!(
+            layer_visibilities
+                .iter()
+                .any(|i| matches!(i.1, LayerVisibilityHint::Covered))
+        );
 
         let layer_visibilities = layer_visibilities.into_iter().collect::<HashMap<_, _>>();
 
diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
index 136f68bc36..f8bec48886 100644
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -3,9 +3,8 @@ use std::ops::Range;
 
 use tracing::info;
 
-use crate::tenant::storage_layer::PersistentLayerDesc;
-
 use super::layer_coverage::LayerCoverageTuple;
+use crate::tenant::storage_layer::PersistentLayerDesc;
 
 /// Layers in this module are identified and indexed by this data.
 ///
diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs
index 15c6955260..77f9a3579d 100644
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -19,8 +19,9 @@
 
 use anyhow::ensure;
 use serde::{Deserialize, Serialize};
-use utils::bin_ser::SerializeError;
-use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
+use utils::bin_ser::{BeSer, SerializeError};
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
 
 /// Use special format number to enable backward compatibility.
 const METADATA_FORMAT_VERSION: u16 = 4;
@@ -345,9 +346,10 @@ impl TimelineMetadata {
 }
 
 pub(crate) mod modern_serde {
-    use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
     use serde::{Deserialize, Serialize};
 
+    use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
+
     pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>
     where
         D: serde::de::Deserializer<'de>,
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 22ee560dbf..003f84e640 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -1,34 +1,42 @@
 //! This module acts as a switchboard to access different repositories managed by this
 //! page server.
 
-use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
-use futures::StreamExt;
-use itertools::Itertools;
-use pageserver_api::key::Key;
-use pageserver_api::models::LocationConfigMode;
-use pageserver_api::shard::{
-    ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
-};
-use pageserver_api::upcall_api::ReAttachResponseTenant;
-use rand::{distributions::Alphanumeric, Rng};
-use remote_storage::TimeoutOrCancel;
 use std::borrow::Cow;
 use std::cmp::Ordering;
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::ops::Deref;
 use std::sync::Arc;
 use std::time::Duration;
-use sysinfo::SystemExt;
-use tokio::fs;
 
 use anyhow::Context;
+use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
+use futures::StreamExt;
+use itertools::Itertools;
 use once_cell::sync::Lazy;
+use pageserver_api::key::Key;
+use pageserver_api::models::LocationConfigMode;
+use pageserver_api::shard::{
+    ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId,
+};
+use pageserver_api::upcall_api::ReAttachResponseTenant;
+use rand::Rng;
+use rand::distributions::Alphanumeric;
+use remote_storage::TimeoutOrCancel;
+use sysinfo::SystemExt;
+use tokio::fs;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-
+use utils::crashsafe::path_with_suffix_extension;
+use utils::fs_ext::PathExt;
+use utils::generation::Generation;
+use utils::id::{TenantId, TimelineId};
 use utils::{backoff, completion, crashsafe};
 
+use super::remote_timeline_client::remote_tenant_path;
+use super::secondary::SecondaryTenant;
+use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
+use super::{GlobalShutDown, TenantSharedResources};
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::controller_upcall_client::{
@@ -37,7 +45,7 @@ use crate::controller_upcall_client::{
 use crate::deletion_queue::DeletionQueueClient;
 use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
 use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
-use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
+use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind};
 use crate::tenant::config::{
     AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
 };
@@ -48,16 +56,6 @@ use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Ten
 use crate::virtual_file::MaybeFatalIo;
 use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
 
-use utils::crashsafe::path_with_suffix_extension;
-use utils::fs_ext::PathExt;
-use utils::generation::Generation;
-use utils::id::{TenantId, TimelineId};
-
-use super::remote_timeline_client::remote_tenant_path;
-use super::secondary::SecondaryTenant;
-use super::timeline::detach_ancestor::{self, PreparedTimelineDetach};
-use super::{GlobalShutDown, TenantSharedResources};
-
 /// For a tenant that appears in TenantsMap, it may either be
 /// - `Attached`: has a full Tenant object, is elegible to service
 ///    reads and ingest WAL.
@@ -140,7 +138,7 @@ impl TenantStartupMode {
     /// If this returns None, the re-attach struct is in an invalid state and
     /// should be ignored in the response.
     fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option<Self> {
-        match (rart.mode, rart.gen) {
+        match (rart.mode, rart.r#gen) {
             (LocationConfigMode::Detached, _) => None,
             (LocationConfigMode::Secondary, _) => Some(Self::Secondary),
             (LocationConfigMode::AttachedMulti, Some(g)) => {
@@ -376,7 +374,7 @@ async fn init_load_generations(
                 TenantStartupMode::Attached((_mode, generation)) => Some(generation),
                 TenantStartupMode::Secondary => None,
             }
-            .map(|gen| (*id, *gen))
+            .map(|gen_| (*id, *gen_))
         })
         .collect();
     resources.deletion_queue_client.recover(attached_tenants)?;
@@ -502,7 +500,9 @@ pub async fn init_tenant_mgr(
             .total_memory();
     let max_ephemeral_layer_bytes =
         conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024);
-    tracing::info!("Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory");
+    tracing::info!(
+        "Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory"
+    );
     inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store(
         max_ephemeral_layer_bytes,
         std::sync::atomic::Ordering::Relaxed,
@@ -700,10 +700,11 @@ fn tenant_spawn(
     // to avoid impacting prod runtime performance.
     assert!(!crate::is_temporary(tenant_path));
     debug_assert!(tenant_path.is_dir());
-    debug_assert!(conf
-        .tenant_location_config_path(&tenant_shard_id)
-        .try_exists()
-        .unwrap());
+    debug_assert!(
+        conf.tenant_location_config_path(&tenant_shard_id)
+            .try_exists()
+            .unwrap()
+    );
 
     Tenant::spawn(
         conf,
@@ -791,7 +792,9 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
                 (total_in_progress, total_attached)
             }
             TenantsMap::ShuttingDown(_) => {
-                error!("already shutting down, this function isn't supposed to be called more than once");
+                error!(
+                    "already shutting down, this function isn't supposed to be called more than once"
+                );
                 return;
             }
         }
@@ -1016,9 +1019,9 @@ impl TenantManager {
                             Ok(Ok(_)) => return Ok(Some(tenant)),
                             Err(_) => {
                                 tracing::warn!(
-                                timeout_ms = flush_timeout.as_millis(),
-                                "Timed out waiting for flush to remote storage, proceeding anyway."
-                            )
+                                    timeout_ms = flush_timeout.as_millis(),
+                                    "Timed out waiting for flush to remote storage, proceeding anyway."
+                                )
                             }
                         }
                     }
@@ -1194,7 +1197,9 @@ impl TenantManager {
                     }
                     TenantSlot::Attached(tenant) => {
                         let (_guard, progress) = utils::completion::channel();
-                        info!("Shutting down just-spawned tenant, because tenant manager is shut down");
+                        info!(
+                            "Shutting down just-spawned tenant, because tenant manager is shut down"
+                        );
                         match tenant.shutdown(progress, ShutdownMode::Hard).await {
                             Ok(()) => {
                                 info!("Finished shutting down just-spawned tenant");
@@ -1784,7 +1789,7 @@ impl TenantManager {
                             _ => {
                                 return Err(anyhow::anyhow!(e).context(format!(
                                     "Hard linking {relative_layer} into {child_prefix}"
-                                )))
+                                )));
                             }
                         }
                     }
@@ -2025,8 +2030,8 @@ impl TenantManager {
                 .wait_to_become_active(std::time::Duration::from_secs(9999))
                 .await
                 .map_err(|e| {
-                    use pageserver_api::models::TenantState;
                     use GetActiveTenantError::{Cancelled, WillNotBecomeActive};
+                    use pageserver_api::models::TenantState;
                     match e {
                         Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => {
                             Error::ShuttingDown
@@ -2089,7 +2094,7 @@ impl TenantManager {
 
                     match selector {
                         ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
-                            return ShardResolveResult::Found(tenant.clone())
+                            return ShardResolveResult::Found(tenant.clone());
                         }
                         ShardSelector::Page(key) => {
                             // First slot we see for this tenant, calculate the expected shard number
@@ -2486,7 +2491,7 @@ impl SlotGuard {
                 TenantsMap::Initializing => {
                     return Err(TenantSlotUpsertError::MapState(
                         TenantMapError::StillInitializing,
-                    ))
+                    ));
                 }
                 TenantsMap::ShuttingDown(_) => {
                     return Err(TenantSlotUpsertError::ShuttingDown((
@@ -2815,21 +2820,22 @@ where
     }
 }
 
-use {
-    crate::tenant::gc_result::GcResult, http_utils::error::ApiError,
-    pageserver_api::models::TimelineGcRequest,
-};
+use http_utils::error::ApiError;
+use pageserver_api::models::TimelineGcRequest;
+
+use crate::tenant::gc_result::GcResult;
 
 #[cfg(test)]
 mod tests {
     use std::collections::BTreeMap;
     use std::sync::Arc;
+
     use tracing::Instrument;
 
+    use super::super::harness::TenantHarness;
+    use super::TenantsMap;
     use crate::tenant::mgr::TenantSlot;
 
-    use super::{super::harness::TenantHarness, TenantsMap};
-
     #[tokio::test(start_paused = true)]
     async fn shutdown_awaits_in_progress_tenant() {
         // Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 713efbb9a4..4ba5844fea 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -179,77 +179,64 @@ pub mod index;
 pub mod manifest;
 pub(crate) mod upload;
 
-use anyhow::Context;
-use camino::Utf8Path;
-use chrono::{NaiveDateTime, Utc};
-
-pub(crate) use download::download_initdb_tar_zst;
-use pageserver_api::models::TimelineArchivalState;
-use pageserver_api::shard::{ShardIndex, TenantShardId};
-use regex::Regex;
-use scopeguard::ScopeGuard;
-use tokio_util::sync::CancellationToken;
-use utils::backoff::{
-    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
-};
-use utils::pausable_failpoint;
-use utils::shard::ShardNumber;
-
 use std::collections::{HashMap, HashSet, VecDeque};
+use std::ops::DerefMut;
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex, OnceLock};
 use std::time::Duration;
 
+use anyhow::Context;
+use camino::Utf8Path;
+use chrono::{NaiveDateTime, Utc};
+pub(crate) use download::{
+    download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,
+    list_remote_tenant_shards, list_remote_timelines,
+};
+use index::GcCompactionState;
+pub(crate) use index::LayerFileMetadata;
+use pageserver_api::models::TimelineArchivalState;
+use pageserver_api::shard::{ShardIndex, TenantShardId};
+use regex::Regex;
 use remote_storage::{
     DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel,
 };
-use std::ops::DerefMut;
-use tracing::{debug, error, info, instrument, warn};
-use tracing::{info_span, Instrument};
-use utils::lsn::Lsn;
-
-use crate::context::RequestContext;
-use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
-use crate::metrics::{
-    MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
-    RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
-    REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
+use scopeguard::ScopeGuard;
+use tokio_util::sync::CancellationToken;
+use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
+pub(crate) use upload::upload_initdb_dir;
+use utils::backoff::{
+    self, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,
 };
-use crate::task_mgr::shutdown_token;
-use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::remote_timeline_client::download::download_retry;
-use crate::tenant::storage_layer::AsLayerDesc;
-use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
-use crate::tenant::TIMELINES_SEGMENT_NAME;
-use crate::{
-    config::PageServerConf,
-    task_mgr,
-    task_mgr::TaskKind,
-    task_mgr::BACKGROUND_RUNTIME,
-    tenant::metadata::TimelineMetadata,
-    tenant::upload_queue::{
-        UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, UploadTask,
-    },
-    TENANT_HEATMAP_BASENAME,
-};
-
 use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::pausable_failpoint;
+use utils::shard::ShardNumber;
 
 use self::index::IndexPart;
-
 use super::config::AttachedLocationConfig;
 use super::metadata::MetadataUpdate;
 use super::storage_layer::{Layer, LayerName, ResidentLayer};
 use super::timeline::import_pgdata;
 use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
 use super::{DeleteTimelineError, Generation};
-
-pub(crate) use download::{
-    download_index_part, download_tenant_manifest, is_temp_download_file,
-    list_remote_tenant_shards, list_remote_timelines,
+use crate::config::PageServerConf;
+use crate::context::RequestContext;
+use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
+use crate::metrics::{
+    MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
+    RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
+    RemoteTimelineClientMetricsCallTrackSize,
 };
-pub(crate) use index::LayerFileMetadata;
-pub(crate) use upload::upload_initdb_dir;
+use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token};
+use crate::tenant::metadata::TimelineMetadata;
+use crate::tenant::remote_timeline_client::download::download_retry;
+use crate::tenant::storage_layer::AsLayerDesc;
+use crate::tenant::upload_queue::{
+    Delete, OpType, UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped,
+    UploadQueueStoppedDeletable, UploadTask,
+};
+use crate::tenant::{TIMELINES_SEGMENT_NAME, debug_assert_current_span_has_tenant_and_timeline_id};
+use crate::{TENANT_HEATMAP_BASENAME, task_mgr};
 
 // Occasional network issues and such can cause remote operations to fail, and
 // that's expected. If a download fails, we log it at info-level, and retry.
@@ -913,6 +900,18 @@ impl RemoteTimelineClient {
         Ok(())
     }
 
+    /// Launch an index-file upload operation in the background, setting `import_pgdata` field.
+    pub(crate) fn schedule_index_upload_for_gc_compaction_state_update(
+        self: &Arc<Self>,
+        gc_compaction_state: GcCompactionState,
+    ) -> anyhow::Result<()> {
+        let mut guard = self.upload_queue.lock().unwrap();
+        let upload_queue = guard.initialized_mut()?;
+        upload_queue.dirty.gc_compaction = Some(gc_compaction_state);
+        self.schedule_index_upload(upload_queue);
+        Ok(())
+    }
+
     ///
     /// Launch an index-file upload operation in the background, if necessary.
     ///
@@ -1078,7 +1077,11 @@ impl RemoteTimelineClient {
                     if !wanted(x) && wanted(y) {
                         // this could be avoided by having external in-memory synchronization, like
                         // timeline detach ancestor
-                        warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason");
+                        warn!(
+                            ?reason,
+                            op = "insert",
+                            "unexpected: two racing processes to enable and disable a gc blocking reason"
+                        );
                     }
 
                     // at this point, the metadata must always show that there is a parent
@@ -1132,7 +1135,11 @@ impl RemoteTimelineClient {
                 (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)),
                 (x, y) => {
                     if !wanted(x) && wanted(y) {
-                        warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)");
+                        warn!(
+                            ?reason,
+                            op = "remove",
+                            "unexpected: two racing processes to enable and disable a gc blocking reason (remove)"
+                        );
                     }
 
                     upload_queue.dirty.gc_blocking =
@@ -1274,12 +1281,14 @@ impl RemoteTimelineClient {
 
         #[cfg(feature = "testing")]
         for (name, metadata) in &with_metadata {
-            let gen = metadata.generation;
-            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
-                if unexpected == gen {
+            let gen_ = metadata.generation;
+            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen_) {
+                if unexpected == gen_ {
                     tracing::error!("{name} was unlinked twice with same generation");
                 } else {
-                    tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
+                    tracing::error!(
+                        "{name} was unlinked twice with different generations {gen_:?} and {unexpected:?}"
+                    );
                 }
             }
         }
@@ -1341,11 +1350,11 @@ impl RemoteTimelineClient {
 
         #[cfg(feature = "testing")]
         for (name, meta) in &with_metadata {
-            let gen = meta.generation;
+            let gen_ = meta.generation;
             match upload_queue.dangling_files.remove(name) {
-                Some(same) if same == gen => { /* expected */ }
+                Some(same) if same == gen_ => { /* expected */ }
                 Some(other) => {
-                    tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
+                    tracing::error!("{name} was unlinked with {other:?} but deleted with {gen_:?}");
                 }
                 None => {
                     tracing::error!("{name} was unlinked but was not dangling");
@@ -1442,7 +1451,9 @@ impl RemoteTimelineClient {
         // proper stop is yet to be called. On cancel the original or some later task must call
         // `stop` or `shutdown`.
         let sg = scopeguard::guard((), |_| {
-            tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
+            tracing::error!(
+                "RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error"
+            )
         });
 
         let fut = {
@@ -1458,7 +1469,7 @@ impl RemoteTimelineClient {
                     scopeguard::ScopeGuard::into_inner(sg);
                     return;
                 }
-                UploadQueue::Initialized(ref mut init) => init,
+                UploadQueue::Initialized(init) => init,
             };
 
             // if the queue is already stuck due to a shutdown operation which was cancelled, then
@@ -1818,7 +1829,9 @@ impl RemoteTimelineClient {
                     .map(|n| n.starts_with(IndexPart::FILE_NAME))
                     .unwrap_or(false)
             })
-            .filter_map(|o| parse_remote_index_path(o.key.clone()).map(|gen| (o.key.clone(), gen)))
+            .filter_map(|o| {
+                parse_remote_index_path(o.key.clone()).map(|gen_| (o.key.clone(), gen_))
+            })
             .max_by_key(|i| i.1)
             .map(|i| i.0.clone())
             .unwrap_or(
@@ -2010,7 +2023,7 @@ impl RemoteTimelineClient {
             }
 
             let upload_result: anyhow::Result<()> = match &task.op {
-                UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
+                UploadOp::UploadLayer(layer, layer_metadata, mode) => {
                     // TODO: check if this mechanism can be removed now that can_bypass() performs
                     // conflict checks during scheduling.
                     if let Some(OpType::FlushDeletion) = mode {
@@ -2100,7 +2113,7 @@ impl RemoteTimelineClient {
                     )
                     .await
                 }
-                UploadOp::UploadMetadata { ref uploaded } => {
+                UploadOp::UploadMetadata { uploaded } => {
                     let res = upload::upload_index_part(
                         &self.storage_impl,
                         &self.tenant_shard_id,
@@ -2216,11 +2229,11 @@ impl RemoteTimelineClient {
         let lsn_update = {
             let mut upload_queue_guard = self.upload_queue.lock().unwrap();
             let upload_queue = match upload_queue_guard.deref_mut() {
-                UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"),
-                UploadQueue::Stopped(_stopped) => {
-                    None
-                },
-                UploadQueue::Initialized(qi) => { Some(qi) }
+                UploadQueue::Uninitialized => panic!(
+                    "callers are responsible for ensuring this is only called on an initialized queue"
+                ),
+                UploadQueue::Stopped(_stopped) => None,
+                UploadQueue::Initialized(qi) => Some(qi),
             };
 
             let upload_queue = match upload_queue {
@@ -2242,7 +2255,11 @@ impl RemoteTimelineClient {
                     let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);
                     let monotone = is_later || last_updater.is_none();
 
-                    assert!(monotone, "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", task.task_id);
+                    assert!(
+                        monotone,
+                        "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}",
+                        task.task_id
+                    );
 
                     // not taking ownership is wasteful
                     upload_queue.clean.0.clone_from(uploaded);
@@ -2641,20 +2658,16 @@ pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation>
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::{
-        context::RequestContext,
-        tenant::{
-            config::AttachmentMode,
-            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::layer::local_layer_path,
-            Tenant, Timeline,
-        },
-        DEFAULT_PG_VERSION,
-    };
-
     use std::collections::HashSet;
 
+    use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::context::RequestContext;
+    use crate::tenant::config::AttachmentMode;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    use crate::tenant::storage_layer::layer::local_layer_path;
+    use crate::tenant::{Tenant, Timeline};
+
     pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
         format!("contents for {name}").into()
     }
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index b4d45dca75..92be2145ce 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -8,41 +8,39 @@ use std::future::Future;
 use std::str::FromStr;
 use std::time::SystemTime;
 
-use anyhow::{anyhow, Context};
+use anyhow::{Context, anyhow};
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::shard::TenantShardId;
+use remote_storage::{
+    DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
+};
 use tokio::fs::{self, File, OpenOptions};
 use tokio::io::{AsyncSeekExt, AsyncWriteExt};
 use tokio_util::io::StreamReader;
 use tokio_util::sync::CancellationToken;
 use tracing::warn;
-use utils::backoff;
+use utils::crashsafe::path_with_suffix_extension;
+use utils::id::{TenantId, TimelineId};
+use utils::{backoff, pausable_failpoint};
 
+use super::index::{IndexPart, LayerFileMetadata};
+use super::manifest::TenantManifest;
+use super::{
+    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,
+    parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path,
+    remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
+    remote_tenant_manifest_prefix, remote_tenant_path,
+};
+use crate::TEMP_FILE_SUFFIX;
 use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::span::{
     debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
 };
+use crate::tenant::Generation;
 use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
 use crate::tenant::storage_layer::LayerName;
-use crate::tenant::Generation;
-use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
-use crate::TEMP_FILE_SUFFIX;
-use remote_storage::{
-    DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
-};
-use utils::crashsafe::path_with_suffix_extension;
-use utils::id::{TenantId, TimelineId};
-use utils::pausable_failpoint;
-
-use super::index::{IndexPart, LayerFileMetadata};
-use super::manifest::TenantManifest;
-use super::{
-    parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path,
-    remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
-    remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
-    FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
-};
+use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
 
 ///
 /// If 'metadata' is given, we will validate that the downloaded file's size matches that
@@ -207,9 +205,9 @@ async fn download_object(
         }
         #[cfg(target_os = "linux")]
         crate::virtual_file::io_engine::IoEngine::TokioEpollUring => {
-            use crate::virtual_file::owned_buffers_io;
-            use crate::virtual_file::IoBufferMut;
             use std::sync::Arc;
+
+            use crate::virtual_file::{IoBufferMut, owned_buffers_io};
             async {
                 let destination_file = Arc::new(
                     VirtualFile::create(dst_path, ctx)
diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs
index b8b18005fd..ceaed58bbd 100644
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -7,16 +7,16 @@ use std::collections::HashMap;
 
 use chrono::NaiveDateTime;
 use pageserver_api::models::AuxFilePolicy;
+use pageserver_api::shard::ShardIndex;
 use serde::{Deserialize, Serialize};
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
 
 use super::is_same_remote_layer_path;
+use crate::tenant::Generation;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::import_pgdata;
-use crate::tenant::Generation;
-use pageserver_api::shard::ShardIndex;
-use utils::id::TimelineId;
-use utils::lsn::Lsn;
 
 /// In-memory representation of an `index_part.json` file
 ///
@@ -85,9 +85,36 @@ pub struct IndexPart {
     #[serde(skip_serializing_if = "Option::is_none", default)]
     pub(crate) rel_size_migration: Option<RelSizeMigration>,
 
-    /// The LSN of gc-compaction horizon. Once gc-compaction is finished for all layer files below an LSN, this LSN will be updated.
+    /// Not used anymore -- kept here for backwards compatibility. Merged into the `gc_compaction` field.
     #[serde(skip_serializing_if = "Option::is_none", default)]
-    pub(crate) l2_lsn: Option<Lsn>,
+    l2_lsn: Option<Lsn>,
+
+    /// State for the garbage-collecting compaction pass.
+    ///
+    /// Garbage-collecting compaction (gc-compaction) prunes `Value`s that are outside
+    /// the PITR window and not needed by child timelines.
+    ///
+    /// A commonly used synonym for this compaction pass is
+    /// "bottommost-compaction"  because the affected LSN range
+    /// is the "bottom" of the (key,lsn) map.
+    ///
+    /// Gc-compaction is a quite expensive operation; that's why we use
+    /// trigger condition.
+    /// This field here holds the state pertaining to that trigger condition
+    /// and (in future) to the progress of the gc-compaction, so that it's
+    /// resumable across restarts & migrations.
+    ///
+    /// Note that the underlying algorithm is _also_ called `gc-compaction`
+    /// in most places & design docs; but in fact it is more flexible than
+    /// just the specific use case here; it needs a new name.
+    #[serde(skip_serializing_if = "Option::is_none", default)]
+    pub(crate) gc_compaction: Option<GcCompactionState>,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
+pub struct GcCompactionState {
+    /// The upper bound of the last completed garbage-collecting compaction, aka. L2 LSN.
+    pub(crate) last_completed_lsn: Lsn,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@@ -123,10 +150,11 @@ impl IndexPart {
     /// - 10: +import_pgdata
     /// - 11: +rel_size_migration
     /// - 12: +l2_lsn
-    const LATEST_VERSION: usize = 12;
+    /// - 13: +gc_compaction
+    const LATEST_VERSION: usize = 13;
 
     // Versions we may see when reading from a bucket.
-    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
 
     pub const FILE_NAME: &'static str = "index_part.json";
 
@@ -144,6 +172,7 @@ impl IndexPart {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         }
     }
 
@@ -406,10 +435,12 @@ impl GcBlocking {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use std::str::FromStr;
+
     use utils::id::TimelineId;
 
+    use super::*;
+
     #[test]
     fn v1_indexpart_is_parsed() {
         let example = r#"{
@@ -450,6 +481,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -497,6 +529,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -545,6 +578,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -596,6 +630,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
@@ -642,6 +677,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -691,6 +727,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -745,6 +782,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -804,6 +842,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -864,6 +903,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -929,6 +969,7 @@ mod tests {
             import_pgdata: None,
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -1007,6 +1048,7 @@ mod tests {
             }))),
             rel_size_migration: None,
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -1086,6 +1128,7 @@ mod tests {
             }))),
             rel_size_migration: Some(RelSizeMigration::Legacy),
             l2_lsn: None,
+            gc_compaction: None,
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
@@ -1093,7 +1136,7 @@ mod tests {
     }
 
     #[test]
-    fn v12_l2_lsn_is_parsed() {
+    fn v12_v13_l2_gc_ompaction_is_parsed() {
         let example = r#"{
             "version": 12,
             "layer_metadata":{
@@ -1124,7 +1167,10 @@ mod tests {
                 }
             },
             "rel_size_migration": "legacy",
-            "l2_lsn": "0/16960E8"
+            "l2_lsn": "0/16960E8",
+            "gc_compaction": {
+                "last_completed_lsn": "0/16960E8"
+            }
         }"#;
 
         let expected = IndexPart {
@@ -1166,6 +1212,9 @@ mod tests {
             }))),
             rel_size_migration: Some(RelSizeMigration::Legacy),
             l2_lsn: Some("0/16960E8".parse::<Lsn>().unwrap()),
+            gc_compaction: Some(GcCompactionState {
+                last_completed_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
+            }),
         };
 
         let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
diff --git a/pageserver/src/tenant/remote_timeline_client/manifest.rs b/pageserver/src/tenant/remote_timeline_client/manifest.rs
index 2029847a12..543ccc219d 100644
--- a/pageserver/src/tenant/remote_timeline_client/manifest.rs
+++ b/pageserver/src/tenant/remote_timeline_client/manifest.rs
@@ -1,6 +1,7 @@
 use chrono::NaiveDateTime;
 use serde::{Deserialize, Serialize};
-use utils::{id::TimelineId, lsn::Lsn};
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
 
 /// Tenant-shard scoped manifest
 #[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs
index af4dbbbfb6..7d9f47665a 100644
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -1,28 +1,28 @@
 //! Helper functions to upload files to remote storage with a RemoteStorage
 
-use anyhow::{bail, Context};
+use std::io::{ErrorKind, SeekFrom};
+use std::time::SystemTime;
+
+use anyhow::{Context, bail};
 use bytes::Bytes;
 use camino::Utf8Path;
 use fail::fail_point;
 use pageserver_api::shard::TenantShardId;
-use std::io::{ErrorKind, SeekFrom};
-use std::time::SystemTime;
+use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
 use tokio::fs::{self, File};
 use tokio::io::AsyncSeekExt;
 use tokio_util::sync::CancellationToken;
+use tracing::info;
+use utils::id::{TenantId, TimelineId};
 use utils::{backoff, pausable_failpoint};
 
+use super::Generation;
 use super::index::IndexPart;
 use super::manifest::TenantManifest;
-use super::Generation;
 use crate::tenant::remote_timeline_client::{
     remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
     remote_tenant_manifest_path,
 };
-use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
-use utils::id::{TenantId, TimelineId};
-
-use tracing::info;
 
 /// Serializes and uploads the given index part data to the remote storage.
 pub(crate) async fn upload_index_part(
@@ -134,7 +134,9 @@ pub(super) async fn upload_timeline_layer<'a>(
         .len();
 
     if metadata_size != fs_size {
-        bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
+        bail!(
+            "File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"
+        );
     }
 
     let fs_size = usize::try_from(fs_size)
diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs
index 4bc208331b..8f8622c796 100644
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -3,40 +3,31 @@ pub mod heatmap;
 mod heatmap_uploader;
 mod scheduler;
 
-use std::{sync::Arc, time::SystemTime};
+use std::sync::Arc;
+use std::time::SystemTime;
 
-use crate::{
-    context::RequestContext,
-    disk_usage_eviction_task::DiskUsageEvictionInfo,
-    metrics::SECONDARY_HEATMAP_TOTAL_SIZE,
-    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
-};
-
-use self::{
-    downloader::{downloader_task, SecondaryDetail},
-    heatmap_uploader::heatmap_uploader_task,
-};
-
-use super::{
-    config::{SecondaryLocationConfig, TenantConfOpt},
-    mgr::TenantManager,
-    span::debug_assert_current_span_has_tenant_id,
-    storage_layer::LayerName,
-    GetTenantError,
-};
-
-use crate::metrics::SECONDARY_RESIDENT_PHYSICAL_SIZE;
 use metrics::UIntGauge;
-use pageserver_api::{
-    models,
-    shard::{ShardIdentity, TenantShardId},
-};
+use pageserver_api::models;
+use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use remote_storage::GenericRemoteStorage;
-
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::instrument;
-use utils::{completion::Barrier, id::TimelineId, sync::gate::Gate};
+use utils::completion::Barrier;
+use utils::id::TimelineId;
+use utils::sync::gate::Gate;
+
+use self::downloader::{SecondaryDetail, downloader_task};
+use self::heatmap_uploader::heatmap_uploader_task;
+use super::GetTenantError;
+use super::config::{SecondaryLocationConfig, TenantConfOpt};
+use super::mgr::TenantManager;
+use super::span::debug_assert_current_span_has_tenant_id;
+use super::storage_layer::LayerName;
+use crate::context::RequestContext;
+use crate::disk_usage_eviction_task::DiskUsageEvictionInfo;
+use crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE};
+use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
 
 enum DownloadCommand {
     Download(TenantShardId),
diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs
index 2e8c3946bd..a13b9323ac 100644
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -1,47 +1,8 @@
-use std::{
-    collections::{HashMap, HashSet},
-    pin::Pin,
-    str::FromStr,
-    sync::Arc,
-    time::{Duration, Instant, SystemTime},
-};
-
-use crate::{
-    config::PageServerConf,
-    context::RequestContext,
-    disk_usage_eviction_task::{
-        finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
-    },
-    metrics::SECONDARY_MODE,
-    tenant::{
-        config::SecondaryLocationConfig,
-        debug_assert_current_span_has_tenant_and_timeline_id,
-        ephemeral_file::is_ephemeral_file,
-        remote_timeline_client::{
-            index::LayerFileMetadata, is_temp_download_file, FAILED_DOWNLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-        },
-        span::debug_assert_current_span_has_tenant_id,
-        storage_layer::{layer::local_layer_path, LayerName, LayerVisibilityHint},
-        tasks::{warn_when_period_overrun, BackgroundLoopKind},
-    },
-    virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
-    TEMP_FILE_SUFFIX,
-};
-
-use super::{
-    heatmap::HeatMapLayer,
-    scheduler::{
-        self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult,
-        TenantBackgroundJobs,
-    },
-    GetTenantError, SecondaryTenant, SecondaryTenantError,
-};
-
-use crate::tenant::{
-    mgr::TenantManager,
-    remote_timeline_client::{download::download_layer_file, remote_heatmap_path},
-};
+use std::collections::{HashMap, HashSet};
+use std::pin::Pin;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::{Duration, Instant, SystemTime};
 
 use camino::Utf8PathBuf;
 use chrono::format::{DelayedFormat, StrftimeItems};
@@ -50,18 +11,43 @@ use metrics::UIntGauge;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage};
-
 use tokio_util::sync::CancellationToken;
-use tracing::{info_span, instrument, warn, Instrument};
-use utils::{
-    backoff, completion::Barrier, crashsafe::path_with_suffix_extension, failpoint_support, fs_ext,
-    id::TimelineId, pausable_failpoint, serde_system_time,
-};
+use tracing::{Instrument, info_span, instrument, warn};
+use utils::completion::Barrier;
+use utils::crashsafe::path_with_suffix_extension;
+use utils::id::TimelineId;
+use utils::{backoff, failpoint_support, fs_ext, pausable_failpoint, serde_system_time};
 
-use super::{
-    heatmap::{HeatMapTenant, HeatMapTimeline},
-    CommandRequest, DownloadCommand,
+use super::heatmap::{HeatMapLayer, HeatMapTenant, HeatMapTimeline};
+use super::scheduler::{
+    self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, period_jitter,
+    period_warmup,
 };
+use super::{
+    CommandRequest, DownloadCommand, GetTenantError, SecondaryTenant, SecondaryTenantError,
+};
+use crate::TEMP_FILE_SUFFIX;
+use crate::config::PageServerConf;
+use crate::context::RequestContext;
+use crate::disk_usage_eviction_task::{
+    DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32,
+};
+use crate::metrics::SECONDARY_MODE;
+use crate::tenant::config::SecondaryLocationConfig;
+use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::tenant::ephemeral_file::is_ephemeral_file;
+use crate::tenant::mgr::TenantManager;
+use crate::tenant::remote_timeline_client::download::download_layer_file;
+use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
+use crate::tenant::remote_timeline_client::{
+    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, is_temp_download_file,
+    remote_heatmap_path,
+};
+use crate::tenant::span::debug_assert_current_span_has_tenant_id;
+use crate::tenant::storage_layer::layer::local_layer_path;
+use crate::tenant::storage_layer::{LayerName, LayerVisibilityHint};
+use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
+use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error};
 
 /// For each tenant, default period for how long must have passed since the last download_tenant call before
 /// calling it again.  This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first
diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs
index 0fa10ca294..4a938e9095 100644
--- a/pageserver/src/tenant/secondary/heatmap.rs
+++ b/pageserver/src/tenant/secondary/heatmap.rs
@@ -1,11 +1,13 @@
-use std::{collections::HashMap, time::SystemTime};
-
-use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName};
+use std::collections::HashMap;
+use std::time::SystemTime;
 
 use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
+use serde_with::{DisplayFromStr, TimestampSeconds, serde_as};
+use utils::generation::Generation;
+use utils::id::TimelineId;
 
-use utils::{generation::Generation, id::TimelineId};
+use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
+use crate::tenant::storage_layer::LayerName;
 
 #[derive(Serialize, Deserialize)]
 pub(crate) struct HeatMapTenant {
diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs
index d72c337369..3375714a66 100644
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -1,42 +1,33 @@
-use std::{
-    collections::HashMap,
-    pin::Pin,
-    sync::{Arc, Weak},
-    time::{Duration, Instant},
-};
-
-use crate::{
-    metrics::SECONDARY_MODE,
-    tenant::{
-        config::AttachmentMode,
-        mgr::{GetTenantError, TenantManager},
-        remote_timeline_client::remote_heatmap_path,
-        span::debug_assert_current_span_has_tenant_id,
-        tasks::{warn_when_period_overrun, BackgroundLoopKind},
-        Tenant,
-    },
-    virtual_file::VirtualFile,
-    TEMP_FILE_SUFFIX,
-};
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::{Arc, Weak};
+use std::time::{Duration, Instant};
 
 use futures::Future;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};
-
-use super::{
-    heatmap::HeatMapTenant,
-    scheduler::{
-        self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult,
-        TenantBackgroundJobs,
-    },
-    CommandRequest, SecondaryTenantError, UploadCommand,
-};
 use tokio_util::sync::CancellationToken;
-use tracing::{info_span, instrument, Instrument};
-use utils::{
-    backoff, completion::Barrier, crashsafe::path_with_suffix_extension,
-    yielding_loop::yielding_loop,
+use tracing::{Instrument, info_span, instrument};
+use utils::backoff;
+use utils::completion::Barrier;
+use utils::crashsafe::path_with_suffix_extension;
+use utils::yielding_loop::yielding_loop;
+
+use super::heatmap::HeatMapTenant;
+use super::scheduler::{
+    self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, period_jitter,
+    period_warmup,
 };
+use super::{CommandRequest, SecondaryTenantError, UploadCommand};
+use crate::TEMP_FILE_SUFFIX;
+use crate::metrics::SECONDARY_MODE;
+use crate::tenant::Tenant;
+use crate::tenant::config::AttachmentMode;
+use crate::tenant::mgr::{GetTenantError, TenantManager};
+use crate::tenant::remote_timeline_client::remote_heatmap_path;
+use crate::tenant::span::debug_assert_current_span_has_tenant_id;
+use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun};
+use crate::virtual_file::VirtualFile;
 
 pub(super) async fn heatmap_uploader_task(
     tenant_manager: Arc<TenantManager>,
diff --git a/pageserver/src/tenant/secondary/scheduler.rs b/pageserver/src/tenant/secondary/scheduler.rs
index e963c722b9..f948f9114f 100644
--- a/pageserver/src/tenant/secondary/scheduler.rs
+++ b/pageserver/src/tenant/secondary/scheduler.rs
@@ -1,16 +1,15 @@
-use futures::Future;
-use rand::Rng;
-use std::{
-    collections::HashMap,
-    marker::PhantomData,
-    pin::Pin,
-    time::{Duration, Instant},
-};
+use std::collections::HashMap;
+use std::marker::PhantomData;
+use std::pin::Pin;
+use std::time::{Duration, Instant};
 
+use futures::Future;
 use pageserver_api::shard::TenantShardId;
+use rand::Rng;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
-use utils::{completion::Barrier, yielding_loop::yielding_loop};
+use utils::completion::Barrier;
+use utils::yielding_loop::yielding_loop;
 
 use super::{CommandRequest, CommandResponse, SecondaryTenantError};
 
diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs
index 1e84a9d9dc..ed6b351c75 100644
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -4,21 +4,18 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
 use tenant_size_model::svg::SvgBranchKind;
-use tokio::sync::oneshot::error::RecvError;
+use tenant_size_model::{Segment, StorageModel};
 use tokio::sync::Semaphore;
+use tokio::sync::oneshot::error::RecvError;
 use tokio_util::sync::CancellationToken;
-
-use crate::context::RequestContext;
-use crate::pgdatadir_mapping::CalculateLogicalSizeError;
-
-use super::{GcError, LogicalSizeCalculationCause, Tenant};
-use crate::tenant::{MaybeOffloaded, Timeline};
+use tracing::*;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;
 
-use tracing::*;
-
-use tenant_size_model::{Segment, StorageModel};
+use super::{GcError, LogicalSizeCalculationCause, Tenant};
+use crate::context::RequestContext;
+use crate::pgdatadir_mapping::CalculateLogicalSizeError;
+use crate::tenant::{MaybeOffloaded, Timeline};
 
 /// Inputs to the actual tenant sizing model
 ///
@@ -498,7 +495,9 @@ async fn fill_logical_sizes(
             }
             Err(join_error) => {
                 // cannot really do anything, as this panic is likely a bug
-                error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
+                error!(
+                    "task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}"
+                );
 
                 have_any_error = Some(CalculateSyntheticSizeError::Fatal(
                     anyhow::anyhow!(join_error)
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index f9f843ef6b..7f313f46a2 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -10,42 +10,39 @@ mod layer_desc;
 mod layer_name;
 pub mod merge_iterator;
 
-use crate::config::PageServerConf;
-use crate::context::{AccessStatsBehavior, RequestContext};
-use bytes::Bytes;
-use futures::stream::FuturesUnordered;
-use futures::StreamExt;
-use pageserver_api::key::Key;
-use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
-use pageserver_api::record::NeonWalRecord;
-use pageserver_api::value::Value;
 use std::cmp::Ordering;
 use std::collections::hash_map::Entry;
 use std::collections::{BinaryHeap, HashMap};
 use std::future::Future;
 use std::ops::Range;
 use std::pin::Pin;
-use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;
+use std::sync::atomic::AtomicUsize;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
-use tracing::{trace, Instrument};
-use utils::sync::gate::GateGuard;
-
-use utils::lsn::Lsn;
 
 pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter};
+use bytes::Bytes;
 pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
+use futures::StreamExt;
+use futures::stream::FuturesUnordered;
 pub use image_layer::{ImageLayer, ImageLayerWriter};
 pub use inmemory_layer::InMemoryLayer;
+pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
 pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
 pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
-
-pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
+use pageserver_api::key::Key;
+use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
+use pageserver_api::record::NeonWalRecord;
+use pageserver_api::value::Value;
+use tracing::{Instrument, trace};
+use utils::lsn::Lsn;
+use utils::sync::gate::GateGuard;
 
 use self::inmemory_layer::InMemoryLayerFileId;
-
-use super::timeline::{GetVectoredError, ReadPath};
 use super::PageReconstructError;
+use super::timeline::{GetVectoredError, ReadPath};
+use crate::config::PageServerConf;
+use crate::context::{AccessStatsBehavior, RequestContext};
 
 pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
 where
@@ -510,6 +507,7 @@ impl IoConcurrency {
     #[cfg(test)]
     pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut<Target = Self> {
         use std::ops::{Deref, DerefMut};
+
         use tracing::info;
         use utils::sync::gate::Gate;
 
diff --git a/pageserver/src/tenant/storage_layer/batch_split_writer.rs b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
index 7da51c27df..fd50e4805d 100644
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -1,17 +1,22 @@
-use std::{future::Future, ops::Range, sync::Arc};
+use std::future::Future;
+use std::ops::Range;
+use std::sync::Arc;
 
 use bytes::Bytes;
-use pageserver_api::key::{Key, KEY_SIZE};
-use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId};
-
-use crate::tenant::storage_layer::Layer;
-use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline};
+use pageserver_api::key::{KEY_SIZE, Key};
 use pageserver_api::value::Value;
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
+use utils::shard::TenantShardId;
 
 use super::layer::S3_UPLOAD_LIMIT;
 use super::{
     DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer,
 };
+use crate::config::PageServerConf;
+use crate::context::RequestContext;
+use crate::tenant::Timeline;
+use crate::tenant::storage_layer::Layer;
 
 pub(crate) enum BatchWriterResult {
     Produced(ResidentLayer),
@@ -423,15 +428,10 @@ mod tests {
     use itertools::Itertools;
     use rand::{RngCore, SeedableRng};
 
-    use crate::{
-        tenant::{
-            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::AsLayerDesc,
-        },
-        DEFAULT_PG_VERSION,
-    };
-
     use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    use crate::tenant::storage_layer::AsLayerDesc;
 
     fn get_key(id: u32) -> Key {
         let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index 885c50425f..d9afdc2405 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -27,6 +27,38 @@
 //! "values" part.  The actual page images and WAL records are stored in the
 //! "values" part.
 //!
+use std::collections::{HashMap, VecDeque};
+use std::fs::File;
+use std::io::SeekFrom;
+use std::ops::Range;
+use std::os::unix::fs::FileExt;
+use std::str::FromStr;
+use std::sync::Arc;
+
+use anyhow::{Context, Result, bail, ensure};
+use camino::{Utf8Path, Utf8PathBuf};
+use futures::StreamExt;
+use itertools::Itertools;
+use pageserver_api::config::MaxVectoredReadBytes;
+use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
+use pageserver_api::keyspace::KeySpace;
+use pageserver_api::models::ImageCompressionAlgorithm;
+use pageserver_api::shard::TenantShardId;
+use pageserver_api::value::Value;
+use rand::Rng;
+use rand::distributions::Alphanumeric;
+use serde::{Deserialize, Serialize};
+use tokio::sync::OnceCell;
+use tokio_epoll_uring::IoBuf;
+use tracing::*;
+use utils::bin_ser::BeSer;
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+
+use super::{
+    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
+    ValuesReconstructState,
+};
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::page_cache::{self, FileId, PAGE_SZ};
@@ -42,44 +74,8 @@ use crate::tenant::vectored_blob_io::{
     VectoredReadPlanner,
 };
 use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
-use crate::virtual_file::IoBufferMut;
-use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
-use crate::TEMP_FILE_SUFFIX;
-use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
-use anyhow::{bail, ensure, Context, Result};
-use camino::{Utf8Path, Utf8PathBuf};
-use futures::StreamExt;
-use itertools::Itertools;
-use pageserver_api::config::MaxVectoredReadBytes;
-use pageserver_api::key::DBDIR_KEY;
-use pageserver_api::key::{Key, KEY_SIZE};
-use pageserver_api::keyspace::KeySpace;
-use pageserver_api::models::ImageCompressionAlgorithm;
-use pageserver_api::shard::TenantShardId;
-use pageserver_api::value::Value;
-use rand::{distributions::Alphanumeric, Rng};
-use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, VecDeque};
-use std::fs::File;
-use std::io::SeekFrom;
-use std::ops::Range;
-use std::os::unix::fs::FileExt;
-use std::str::FromStr;
-use std::sync::Arc;
-use tokio::sync::OnceCell;
-use tokio_epoll_uring::IoBuf;
-use tracing::*;
-
-use utils::{
-    bin_ser::BeSer,
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
-
-use super::{
-    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
-    ValuesReconstructState,
-};
+use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
+use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
 
 ///
 /// Header stored in the beginning of the file
@@ -967,7 +963,10 @@ impl DeltaLayerInner {
                 .as_slice()
                 .iter()
                 .filter_map(|(_, blob_meta)| {
-                    if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
+                    if blob_meta.key.is_rel_dir_key()
+                        || blob_meta.key == DBDIR_KEY
+                        || blob_meta.key.is_aux_file_key()
+                    {
                         // The size of values for these keys is unbounded and can
                         // grow very large in pathological cases.
                         None
@@ -1128,10 +1127,11 @@ impl DeltaLayerInner {
         until: Lsn,
         ctx: &RequestContext,
     ) -> anyhow::Result<usize> {
+        use futures::stream::TryStreamExt;
+
         use crate::tenant::vectored_blob_io::{
             BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
         };
-        use futures::stream::TryStreamExt;
 
         #[derive(Debug)]
         enum Item {
@@ -1597,23 +1597,21 @@ impl DeltaLayerIterator<'_> {
 pub(crate) mod test {
     use std::collections::BTreeMap;
 
+    use bytes::Bytes;
     use itertools::MinMaxResult;
-    use rand::prelude::{SeedableRng, SliceRandom, StdRng};
+    use pageserver_api::value::Value;
     use rand::RngCore;
+    use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 
     use super::*;
-    use crate::tenant::harness::TIMELINE_ID;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::context::DownloadBehavior;
+    use crate::task_mgr::TaskKind;
+    use crate::tenant::disk_btree::tests::TestDisk;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
     use crate::tenant::storage_layer::{Layer, ResidentLayer};
     use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
     use crate::tenant::{Tenant, Timeline};
-    use crate::{
-        context::DownloadBehavior,
-        task_mgr::TaskKind,
-        tenant::{disk_btree::tests::TestDisk, harness::TenantHarness},
-        DEFAULT_PG_VERSION,
-    };
-    use bytes::Bytes;
-    use pageserver_api::value::Value;
 
     /// Construct an index for a fictional delta layer and and then
     /// traverse in order to plan vectored reads for a query. Finally,
diff --git a/pageserver/src/tenant/storage_layer/filter_iterator.rs b/pageserver/src/tenant/storage_layer/filter_iterator.rs
index 8660be1fcc..8d172a1c19 100644
--- a/pageserver/src/tenant/storage_layer/filter_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/filter_iterator.rs
@@ -1,18 +1,14 @@
-use std::{ops::Range, sync::Arc};
+use std::ops::Range;
+use std::sync::Arc;
 
 use anyhow::bail;
-use pageserver_api::{
-    key::Key,
-    keyspace::{KeySpace, SparseKeySpace},
-};
+use pageserver_api::key::Key;
+use pageserver_api::keyspace::{KeySpace, SparseKeySpace};
+use pageserver_api::value::Value;
 use utils::lsn::Lsn;
 
-use pageserver_api::value::Value;
-
-use super::{
-    merge_iterator::{MergeIterator, MergeIteratorItem},
-    PersistentLayerKey,
-};
+use super::PersistentLayerKey;
+use super::merge_iterator::{MergeIterator, MergeIteratorItem};
 
 /// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
 ///
@@ -98,19 +94,14 @@ impl<'a> FilterIterator<'a> {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-
     use itertools::Itertools;
     use pageserver_api::key::Key;
     use utils::lsn::Lsn;
 
-    use crate::{
-        tenant::{
-            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::delta_layer::test::produce_delta_layer,
-        },
-        DEFAULT_PG_VERSION,
-    };
+    use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    use crate::tenant::storage_layer::delta_layer::test::produce_delta_layer;
 
     async fn assert_filter_iter_equal(
         filter_iter: &mut FilterIterator<'_>,
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index c49281dc45..0db9e8c845 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -25,6 +25,39 @@
 //! layer, and offsets to the other parts. The "index" is a B-tree,
 //! mapping from Key to an offset in the "values" part.  The
 //! actual page images are stored in the "values" part.
+use std::collections::{HashMap, VecDeque};
+use std::fs::File;
+use std::io::SeekFrom;
+use std::ops::Range;
+use std::os::unix::prelude::FileExt;
+use std::str::FromStr;
+use std::sync::Arc;
+
+use anyhow::{Context, Result, bail, ensure};
+use bytes::Bytes;
+use camino::{Utf8Path, Utf8PathBuf};
+use hex;
+use itertools::Itertools;
+use pageserver_api::config::MaxVectoredReadBytes;
+use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key};
+use pageserver_api::keyspace::KeySpace;
+use pageserver_api::shard::{ShardIdentity, TenantShardId};
+use pageserver_api::value::Value;
+use rand::Rng;
+use rand::distributions::Alphanumeric;
+use serde::{Deserialize, Serialize};
+use tokio::sync::OnceCell;
+use tokio_stream::StreamExt;
+use tracing::*;
+use utils::bin_ser::BeSer;
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+
+use super::layer_name::ImageLayerName;
+use super::{
+    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
+    ValuesReconstructState,
+};
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::page_cache::{self, FileId, PAGE_SZ};
@@ -39,44 +72,8 @@ use crate::tenant::vectored_blob_io::{
     VectoredReadPlanner,
 };
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
-use crate::virtual_file::IoBufferMut;
-use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
+use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
-use anyhow::{bail, ensure, Context, Result};
-use bytes::Bytes;
-use camino::{Utf8Path, Utf8PathBuf};
-use hex;
-use itertools::Itertools;
-use pageserver_api::config::MaxVectoredReadBytes;
-use pageserver_api::key::DBDIR_KEY;
-use pageserver_api::key::{Key, KEY_SIZE};
-use pageserver_api::keyspace::KeySpace;
-use pageserver_api::shard::{ShardIdentity, TenantShardId};
-use pageserver_api::value::Value;
-use rand::{distributions::Alphanumeric, Rng};
-use serde::{Deserialize, Serialize};
-use std::collections::{HashMap, VecDeque};
-use std::fs::File;
-use std::io::SeekFrom;
-use std::ops::Range;
-use std::os::unix::prelude::FileExt;
-use std::str::FromStr;
-use std::sync::Arc;
-use tokio::sync::OnceCell;
-use tokio_stream::StreamExt;
-use tracing::*;
-
-use utils::{
-    bin_ser::BeSer,
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
-
-use super::layer_name::ImageLayerName;
-use super::{
-    AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer,
-    ValuesReconstructState,
-};
 
 ///
 /// Header stored in the beginning of the file
@@ -603,7 +600,10 @@ impl ImageLayerInner {
                     .as_slice()
                     .iter()
                     .filter_map(|(_, blob_meta)| {
-                        if blob_meta.key.is_rel_dir_key() || blob_meta.key == DBDIR_KEY {
+                        if blob_meta.key.is_rel_dir_key()
+                            || blob_meta.key == DBDIR_KEY
+                            || blob_meta.key.is_aux_file_key()
+                        {
                             // The size of values for these keys is unbounded and can
                             // grow very large in pathological cases.
                             None
@@ -1133,34 +1133,26 @@ impl ImageLayerIterator<'_> {
 
 #[cfg(test)]
 mod test {
-    use std::{sync::Arc, time::Duration};
+    use std::sync::Arc;
+    use std::time::Duration;
 
     use bytes::Bytes;
     use itertools::Itertools;
-    use pageserver_api::{
-        key::Key,
-        shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize},
-        value::Value,
-    };
-    use utils::{
-        generation::Generation,
-        id::{TenantId, TimelineId},
-        lsn::Lsn,
-    };
-
-    use crate::{
-        context::RequestContext,
-        tenant::{
-            config::TenantConf,
-            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::{Layer, ResidentLayer},
-            vectored_blob_io::StreamingVectoredReadPlanner,
-            Tenant, Timeline,
-        },
-        DEFAULT_PG_VERSION,
-    };
+    use pageserver_api::key::Key;
+    use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
+    use pageserver_api::value::Value;
+    use utils::generation::Generation;
+    use utils::id::{TenantId, TimelineId};
+    use utils::lsn::Lsn;
 
     use super::{ImageLayerIterator, ImageLayerWriter};
+    use crate::DEFAULT_PG_VERSION;
+    use crate::context::RequestContext;
+    use crate::tenant::config::TenantConf;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    use crate::tenant::storage_layer::{Layer, ResidentLayer};
+    use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
+    use crate::tenant::{Tenant, Timeline};
 
     #[tokio::test]
     async fn image_layer_rewrite() {
@@ -1170,10 +1162,10 @@ mod test {
             ..TenantConf::default()
         };
         let tenant_id = TenantId::generate();
-        let mut gen = Generation::new(0xdead0001);
+        let mut gen_ = Generation::new(0xdead0001);
         let mut get_next_gen = || {
-            let ret = gen;
-            gen = gen.next();
+            let ret = gen_;
+            gen_ = gen_.next();
             ret
         };
         // The LSN at which we will create an image layer to filter
diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
index 61a0fdea8c..ffdfe1dc27 100644
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -4,38 +4,39 @@
 //! held in an ephemeral file, not in memory. The metadata for each page version, i.e.
 //! its position in the file, is kept in memory, though.
 //!
-use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64};
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, HashMap};
+use std::fmt::Write;
+use std::ops::Range;
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering};
+use std::sync::{Arc, OnceLock};
+use std::time::Instant;
+
+use anyhow::Result;
+use camino::Utf8PathBuf;
+use pageserver_api::key::{CompactKey, Key};
+use pageserver_api::keyspace::KeySpace;
+use pageserver_api::models::InMemoryLayerInfo;
+use pageserver_api::shard::TenantShardId;
+use tokio::sync::RwLock;
+use tracing::*;
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
+use utils::vec_map::VecMap;
+use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
+
+use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
+use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64, u64_to_usize};
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
+// avoid binding to Write (conflicts with std::io::Write)
+// while being able to use std::fmt::Write's methods
+use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
 use crate::tenant::ephemeral_file::EphemeralFile;
 use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo};
 use crate::tenant::timeline::GetVectoredError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::{l0_flush, page_cache};
-use anyhow::Result;
-use camino::Utf8PathBuf;
-use pageserver_api::key::CompactKey;
-use pageserver_api::key::Key;
-use pageserver_api::keyspace::KeySpace;
-use pageserver_api::models::InMemoryLayerInfo;
-use pageserver_api::shard::TenantShardId;
-use std::collections::{BTreeMap, HashMap};
-use std::sync::{Arc, OnceLock};
-use std::time::Instant;
-use tracing::*;
-use utils::{id::TimelineId, lsn::Lsn, vec_map::VecMap};
-use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta};
-// avoid binding to Write (conflicts with std::io::Write)
-// while being able to use std::fmt::Write's methods
-use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
-use std::cmp::Ordering;
-use std::fmt::Write;
-use std::ops::Range;
-use std::sync::atomic::Ordering as AtomicOrdering;
-use std::sync::atomic::{AtomicU64, AtomicUsize};
-use tokio::sync::RwLock;
-
-use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState};
 
 pub(crate) mod vectored_dio_read;
 
@@ -555,7 +556,9 @@ impl InMemoryLayer {
         gate: &utils::sync::gate::Gate,
         ctx: &RequestContext,
     ) -> Result<InMemoryLayer> {
-        trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");
+        trace!(
+            "initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"
+        );
 
         let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?;
         let key = InMemoryLayerFileId(file.page_cache_file_id());
@@ -816,8 +819,7 @@ mod tests {
     #[test]
     fn test_index_entry() {
         const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS;
-        use IndexEntryNewArgs as Args;
-        use IndexEntryUnpacked as Unpacked;
+        use {IndexEntryNewArgs as Args, IndexEntryUnpacked as Unpacked};
 
         let roundtrip = |args, expect: Unpacked| {
             let res = IndexEntry::new(args).expect("this tests expects no errors");
diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
index 1d86015fab..90455fd0ca 100644
--- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
@@ -1,16 +1,13 @@
-use std::{
-    collections::BTreeMap,
-    sync::{Arc, RwLock},
-};
+use std::collections::BTreeMap;
+use std::sync::{Arc, RwLock};
 
 use itertools::Itertools;
 use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};
 
-use crate::{
-    assert_u64_eq_usize::{U64IsUsize, UsizeIsU64},
-    context::RequestContext,
-    virtual_file::{owned_buffers_io::io_buf_aligned::IoBufAlignedMut, IoBufferMut},
-};
+use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
+use crate::context::RequestContext;
+use crate::virtual_file::IoBufferMut;
+use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
 
 /// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].
 pub trait File: Send {
@@ -132,7 +129,9 @@ where
         let req_len = match cur {
             LogicalReadState::NotStarted(buf) => {
                 if buf.len() != 0 {
-                    panic!("The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`");
+                    panic!(
+                        "The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`"
+                    );
                 }
                 // buf.cap() == 0 is ok
 
@@ -141,7 +140,9 @@ where
                 *state = LogicalReadState::Ongoing(buf);
                 req_len
             }
-            x => panic!("must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"),
+            x => panic!(
+                "must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"
+            ),
         };
 
         // plan which chunks we need to read from
@@ -422,15 +423,15 @@ impl Buffer for Vec<u8> {
 #[cfg(test)]
 #[allow(clippy::assertions_on_constants)]
 mod tests {
+    use std::cell::RefCell;
+    use std::collections::VecDeque;
+
     use rand::Rng;
 
-    use crate::{
-        context::DownloadBehavior, task_mgr::TaskKind,
-        virtual_file::owned_buffers_io::slice::SliceMutExt,
-    };
-
     use super::*;
-    use std::{cell::RefCell, collections::VecDeque};
+    use crate::context::DownloadBehavior;
+    use crate::task_mgr::TaskKind;
+    use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
 
     struct InMemoryFile {
         content: Vec<u8>,
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 0bf606cf0a..ae06aca63b 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -1,32 +1,32 @@
+use std::ops::Range;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Arc, Weak};
+use std::time::{Duration, SystemTime};
+
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::HistoricLayerInfo;
 use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId};
-use std::ops::Range;
-use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
-use std::sync::{Arc, Weak};
-use std::time::{Duration, SystemTime};
 use tracing::Instrument;
+use utils::generation::Generation;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;
 use utils::sync::{gate, heavier_once_cell};
 
-use crate::config::PageServerConf;
-use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::task_mgr::TaskKind;
-use crate::tenant::timeline::{CompactionError, GetVectoredError};
-use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
-
 use super::delta_layer::{self};
 use super::image_layer::{self};
 use super::{
     AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName,
     LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState,
 };
-
-use utils::generation::Generation;
+use crate::config::PageServerConf;
+use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder};
+use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::task_mgr::TaskKind;
+use crate::tenant::Timeline;
+use crate::tenant::remote_timeline_client::LayerFileMetadata;
+use crate::tenant::timeline::{CompactionError, GetVectoredError};
 
 #[cfg(test)]
 mod tests;
@@ -1873,8 +1873,8 @@ impl ResidentLayer {
         self.owner.record_access(ctx);
 
         let res = match inner {
-            Delta(ref d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
-            Image(ref i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
+            Delta(d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await,
+            Image(i) => image_layer::ImageLayerInner::load_keys(i, ctx).await,
         };
         res.with_context(|| format!("Layer index is corrupted for {self}"))
     }
@@ -1920,7 +1920,7 @@ impl ResidentLayer {
         let owner = &self.owner.0;
 
         match self.downloaded.get(owner, ctx).await? {
-            Delta(ref d) => d
+            Delta(d) => d
                 .copy_prefix(writer, until, ctx)
                 .await
                 .with_context(|| format!("copy_delta_prefix until {until} of {self}")),
@@ -1943,7 +1943,7 @@ impl ResidentLayer {
     ) -> anyhow::Result<&delta_layer::DeltaLayerInner> {
         use LayerKind::*;
         match self.downloaded.get(&self.owner.0, ctx).await? {
-            Delta(ref d) => Ok(d),
+            Delta(d) => Ok(d),
             Image(_) => Err(anyhow::anyhow!("image layer")),
         }
     }
@@ -1955,7 +1955,7 @@ impl ResidentLayer {
     ) -> anyhow::Result<&image_layer::ImageLayerInner> {
         use LayerKind::*;
         match self.downloaded.get(&self.owner.0, ctx).await? {
-            Image(ref d) => Ok(d),
+            Image(d) => Ok(d),
             Delta(_) => Err(anyhow::anyhow!("delta layer")),
         }
     }
diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs
index d93c378ffc..724150d27f 100644
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -1,22 +1,16 @@
 use std::time::UNIX_EPOCH;
 
-use pageserver_api::key::{Key, CONTROLFILE_KEY};
+use pageserver_api::key::{CONTROLFILE_KEY, Key};
 use tokio::task::JoinSet;
-use utils::{
-    completion::{self, Completion},
-    id::TimelineId,
-};
+use utils::completion::{self, Completion};
+use utils::id::TimelineId;
 
 use super::failpoints::{Failpoint, FailpointKind};
 use super::*;
-use crate::{
-    context::DownloadBehavior,
-    tenant::{
-        harness::test_img,
-        storage_layer::{IoConcurrency, LayerVisibilityHint},
-    },
-};
-use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness};
+use crate::context::DownloadBehavior;
+use crate::task_mgr::TaskKind;
+use crate::tenant::harness::{TenantHarness, test_img};
+use crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint};
 
 /// Used in tests to advance a future to wanted await point, and not futher.
 const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600);
@@ -771,10 +765,12 @@ async fn evict_and_wait_does_not_wait_for_download() {
     let (arrival, _download_arrived) = utils::completion::channel();
     layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier));
 
-    let mut download = std::pin::pin!(layer
-        .0
-        .get_or_maybe_download(true, None)
-        .instrument(download_span));
+    let mut download = std::pin::pin!(
+        layer
+            .0
+            .get_or_maybe_download(true, None)
+            .instrument(download_span)
+    );
 
     assert!(
         !layer.is_likely_resident(),
diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs
index 2097e90764..ed16dcaa0d 100644
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -1,16 +1,15 @@
 use core::fmt::Display;
-use pageserver_api::shard::TenantShardId;
 use std::ops::Range;
-use utils::{id::TimelineId, lsn::Lsn};
 
 use pageserver_api::key::Key;
-
-use super::{DeltaLayerName, ImageLayerName, LayerName};
-
+use pageserver_api::shard::TenantShardId;
 use serde::{Deserialize, Serialize};
-
 #[cfg(test)]
 use utils::id::TenantId;
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
+
+use super::{DeltaLayerName, ImageLayerName, LayerName};
 
 /// A unique identifier of a persistent layer.
 ///
diff --git a/pageserver/src/tenant/storage_layer/layer_name.rs b/pageserver/src/tenant/storage_layer/layer_name.rs
index addf3b85d9..0f7995f87b 100644
--- a/pageserver/src/tenant/storage_layer/layer_name.rs
+++ b/pageserver/src/tenant/storage_layer/layer_name.rs
@@ -1,12 +1,12 @@
 //!
 //! Helper functions for dealing with filenames of the image and delta layer files.
 //!
-use pageserver_api::key::Key;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Range;
 use std::str::FromStr;
 
+use pageserver_api::key::Key;
 use utils::lsn::Lsn;
 
 use super::PersistentLayerDesc;
@@ -305,7 +305,7 @@ impl FromStr for LayerName {
             (None, None) => {
                 return Err(format!(
                     "neither delta nor image layer file name: {value:?}"
-                ))
+                ));
             }
             (Some(delta), None) => Self::Delta(delta),
             (None, Some(image)) => Self::Image(image),
diff --git a/pageserver/src/tenant/storage_layer/merge_iterator.rs b/pageserver/src/tenant/storage_layer/merge_iterator.rs
index 19cfcb0867..76cdddd06a 100644
--- a/pageserver/src/tenant/storage_layer/merge_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs
@@ -1,21 +1,16 @@
-use std::{
-    cmp::Ordering,
-    collections::{binary_heap, BinaryHeap},
-    sync::Arc,
-};
+use std::cmp::Ordering;
+use std::collections::{BinaryHeap, binary_heap};
+use std::sync::Arc;
 
 use anyhow::bail;
 use pageserver_api::key::Key;
+use pageserver_api::value::Value;
 use utils::lsn::Lsn;
 
+use super::delta_layer::{DeltaLayerInner, DeltaLayerIterator};
+use super::image_layer::{ImageLayerInner, ImageLayerIterator};
+use super::{PersistentLayerDesc, PersistentLayerKey};
 use crate::context::RequestContext;
-use pageserver_api::value::Value;
-
-use super::{
-    delta_layer::{DeltaLayerInner, DeltaLayerIterator},
-    image_layer::{ImageLayerInner, ImageLayerIterator},
-    PersistentLayerDesc, PersistentLayerKey,
-};
 
 #[derive(Clone, Copy)]
 pub(crate) enum LayerRef<'a> {
@@ -349,24 +344,18 @@ impl<'a> MergeIterator<'a> {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-
     use itertools::Itertools;
     use pageserver_api::key::Key;
-    use utils::lsn::Lsn;
-
-    use crate::{
-        tenant::{
-            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::delta_layer::test::{produce_delta_layer, sort_delta},
-        },
-        DEFAULT_PG_VERSION,
-    };
-
-    #[cfg(feature = "testing")]
-    use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
     #[cfg(feature = "testing")]
     use pageserver_api::record::NeonWalRecord;
+    use utils::lsn::Lsn;
+
+    use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    #[cfg(feature = "testing")]
+    use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
+    use crate::tenant::storage_layer::delta_layer::test::{produce_delta_layer, sort_delta};
 
     async fn assert_merge_iter_equal(
         merge_iter: &mut MergeIterator<'_>,
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 5e63f59fd8..c90f81889b 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -8,24 +8,24 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use once_cell::sync::Lazy;
+use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
 use rand::Rng;
 use scopeguard::defer;
 use tokio::sync::{Semaphore, SemaphorePermit};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-
-use crate::context::{DownloadBehavior, RequestContext};
-use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
-use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS};
-use crate::tenant::throttle::Stats;
-use crate::tenant::timeline::compaction::CompactionOutcome;
-use crate::tenant::timeline::CompactionError;
-use crate::tenant::{Tenant, TenantState};
-use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD;
 use utils::backoff::exponential_backoff_duration;
 use utils::completion::Barrier;
 use utils::pausable_failpoint;
 
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
+use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
+use crate::tenant::throttle::Stats;
+use crate::tenant::timeline::CompactionError;
+use crate::tenant::timeline::compaction::CompactionOutcome;
+use crate::tenant::{Tenant, TenantState};
+
 /// Semaphore limiting concurrent background tasks (across all tenants).
 ///
 /// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work.
@@ -287,15 +287,16 @@ fn log_compaction_error(
     sleep_duration: Duration,
     task_cancelled: bool,
 ) {
-    use crate::pgdatadir_mapping::CollectKeySpaceError;
-    use crate::tenant::upload_queue::NotInitialized;
-    use crate::tenant::PageReconstructError;
     use CompactionError::*;
 
+    use crate::tenant::PageReconstructError;
+    use crate::tenant::upload_queue::NotInitialized;
+
     let level = match err {
+        e if e.is_cancel() => return,
         ShuttingDown => return,
         Offload(_) => Level::ERROR,
-        CollectKeySpaceError(CollectKeySpaceError::Cancelled) => Level::INFO,
+        AlreadyRunning(_) => Level::ERROR,
         CollectKeySpaceError(_) => Level::ERROR,
         _ if task_cancelled => Level::INFO,
         Other(err) => {
diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs
index 300d779125..6c37c3771b 100644
--- a/pageserver/src/tenant/throttle.rs
+++ b/pageserver/src/tenant/throttle.rs
@@ -1,10 +1,6 @@
-use std::{
-    sync::{
-        atomic::{AtomicU64, Ordering},
-        Arc,
-    },
-    time::Instant,
-};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::Instant;
 
 use arc_swap::ArcSwap;
 use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter};
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 48c208d5d7..dfa50d498c 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -14,55 +14,6 @@ pub mod span;
 pub mod uninit;
 mod walreceiver;
 
-use anyhow::{anyhow, bail, ensure, Context, Result};
-use arc_swap::{ArcSwap, ArcSwapOption};
-use bytes::Bytes;
-use camino::Utf8Path;
-use chrono::{DateTime, Utc};
-use compaction::CompactionOutcome;
-use enumset::EnumSet;
-use fail::fail_point;
-use futures::FutureExt;
-use futures::{stream::FuturesUnordered, StreamExt};
-use handle::ShardTimelineId;
-use layer_manager::Shutdown;
-use offload::OffloadError;
-use once_cell::sync::Lazy;
-use pageserver_api::models::PageTraceEvent;
-use pageserver_api::{
-    key::{
-        KEY_SIZE, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
-        SPARSE_RANGE,
-    },
-    keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning},
-    models::{
-        CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
-        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
-        InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState,
-    },
-    reltag::BlockNumber,
-    shard::{ShardIdentity, ShardNumber, TenantShardId},
-};
-use rand::Rng;
-use remote_storage::DownloadError;
-use serde_with::serde_as;
-use storage_broker::BrokerClientChannel;
-use tokio::runtime::Handle;
-use tokio::sync::mpsc::Sender;
-use tokio::sync::{oneshot, watch, Notify};
-use tokio_util::sync::CancellationToken;
-use tracing::*;
-use utils::critical;
-use utils::rate_limit::RateLimit;
-use utils::{
-    fs_ext,
-    guard_arc_swap::GuardArcSwap,
-    pausable_failpoint,
-    postgres_client::PostgresClientProtocol,
-    sync::gate::{Gate, GateGuard},
-};
-use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
-
 use std::array;
 use std::cmp::{max, min};
 use std::collections::btree_map::Entry;
@@ -72,74 +23,58 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering};
 use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak};
 use std::time::{Duration, Instant, SystemTime};
 
-use crate::l0_flush::{self, L0FlushGlobalState};
-use crate::tenant::storage_layer::ImageLayerName;
-use crate::{
-    aux_file::AuxFileSizeEstimator,
-    page_service::TenantManagerTypes,
-    tenant::{
-        config::AttachmentMode,
-        layer_map::{LayerMap, SearchResult},
-        metadata::TimelineMetadata,
-        storage_layer::{
-            inmemory_layer::IndexEntry, BatchLayerWriter, IoConcurrency, PersistentLayerDesc,
-            ValueReconstructSituation,
-        },
-    },
-    walingest::WalLagCooldown,
-    walredo,
-};
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    disk_usage_eviction_task::DiskUsageEvictionInfo,
-    pgdatadir_mapping::CollectKeySpaceError,
-};
-use crate::{
-    disk_usage_eviction_task::finite_f32,
-    tenant::storage_layer::{
-        AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
-        LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructState,
-        ValuesReconstructState,
-    },
-};
-use crate::{
-    disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
-};
-use crate::{
-    metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
-};
-use crate::{
-    pgdatadir_mapping::DirectoryKind,
-    virtual_file::{MaybeFatalIo, VirtualFile},
-};
-use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
-use crate::{pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS, tenant::storage_layer::PersistentLayerKey};
+use anyhow::{Context, Result, anyhow, bail, ensure};
+use arc_swap::{ArcSwap, ArcSwapOption};
+use bytes::Bytes;
+use camino::Utf8Path;
+use chrono::{DateTime, Utc};
+use compaction::{CompactionOutcome, GcCompactionCombinedSettings};
+use enumset::EnumSet;
+use fail::fail_point;
+use futures::stream::FuturesUnordered;
+use futures::{FutureExt, StreamExt};
+use handle::ShardTimelineId;
+use layer_manager::Shutdown;
+use offload::OffloadError;
+use once_cell::sync::Lazy;
 use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
-
-use crate::config::PageServerConf;
-use crate::keyspace::{KeyPartitioning, KeySpace};
-use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL};
-use crate::pgdatadir_mapping::{CalculateLogicalSizeError, MetricsUpdate};
-use crate::tenant::config::TenantConfOpt;
-use pageserver_api::reltag::RelTag;
-use pageserver_api::shard::ShardIndex;
-
-use postgres_connection::PgConnectionConfig;
-use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE};
-use utils::{
-    completion,
-    generation::Generation,
-    id::TimelineId,
-    lsn::{AtomicLsn, Lsn, RecordLsn},
-    seqwait::SeqWait,
-    simple_rcu::{Rcu, RcuReadGuard},
+use pageserver_api::key::{
+    KEY_SIZE, Key, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
+    SPARSE_RANGE,
 };
-
-use crate::task_mgr;
-use crate::task_mgr::TaskKind;
-use crate::tenant::gc_result::GcResult;
-use crate::ZERO_PAGE;
-use pageserver_api::key::Key;
+use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning};
+use pageserver_api::models::{
+    CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
+    DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
+    InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, TimelineState,
+};
+use pageserver_api::reltag::{BlockNumber, RelTag};
+use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId};
+#[cfg(test)]
+use pageserver_api::value::Value;
+use postgres_connection::PgConnectionConfig;
+use postgres_ffi::v14::xlog_utils;
+use postgres_ffi::{WAL_SEGMENT_SIZE, to_pg_timestamp};
+use rand::Rng;
+use remote_storage::DownloadError;
+use serde_with::serde_as;
+use storage_broker::BrokerClientChannel;
+use tokio::runtime::Handle;
+use tokio::sync::mpsc::Sender;
+use tokio::sync::{Notify, oneshot, watch};
+use tokio_util::sync::CancellationToken;
+use tracing::*;
+use utils::generation::Generation;
+use utils::guard_arc_swap::GuardArcSwap;
+use utils::id::TimelineId;
+use utils::lsn::{AtomicLsn, Lsn, RecordLsn};
+use utils::postgres_client::PostgresClientProtocol;
+use utils::rate_limit::RateLimit;
+use utils::seqwait::SeqWait;
+use utils::simple_rcu::{Rcu, RcuReadGuard};
+use utils::sync::gate::{Gate, GateGuard};
+use utils::{completion, critical, fs_ext, pausable_failpoint};
+use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
 
 use self::delete::DeleteTimelineFlow;
 pub(super) use self::eviction_task::EvictionTaskTenantState;
@@ -147,23 +82,48 @@ use self::eviction_task::EvictionTaskTimelineState;
 use self::layer_manager::LayerManager;
 use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};
-
+use super::config::TenantConf;
+use super::remote_timeline_client::index::{GcCompactionState, IndexPart};
+use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
+use super::secondary::heatmap::HeatMapLayer;
+use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
+use super::upload_queue::NotInitialized;
 use super::{
-    config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized,
-    MaybeOffloaded,
+    AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
+    debug_assert_current_span_has_tenant_and_timeline_id,
 };
-use super::{
-    debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, HeatMapTimeline,
+use crate::aux_file::AuxFileSizeEstimator;
+use crate::config::PageServerConf;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
+use crate::keyspace::{KeyPartitioning, KeySpace};
+use crate::l0_flush::{self, L0FlushGlobalState};
+use crate::metrics::{
+    DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,
 };
-use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe};
-use super::{
-    remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError,
-    storage_layer::ReadableLayer,
+use crate::page_service::TenantManagerTypes;
+use crate::pgdatadir_mapping::{
+    CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp,
+    MAX_AUX_FILE_V2_DELTAS, MetricsUpdate,
 };
-use super::{secondary::heatmap::HeatMapLayer, GcError};
-
-#[cfg(test)]
-use pageserver_api::value::Value;
+use crate::task_mgr::TaskKind;
+use crate::tenant::config::{AttachmentMode, TenantConfOpt};
+use crate::tenant::gc_result::GcResult;
+use crate::tenant::layer_map::{LayerMap, SearchResult};
+use crate::tenant::metadata::TimelineMetadata;
+use crate::tenant::storage_layer::delta_layer::DeltaEntry;
+use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
+use crate::tenant::storage_layer::{
+    AsLayerDesc, BatchLayerWriter, DeltaLayerWriter, EvictionError, ImageLayerName,
+    ImageLayerWriter, InMemoryLayer, IoConcurrency, Layer, LayerAccessStatsReset, LayerName,
+    PersistentLayerDesc, PersistentLayerKey, ResidentLayer, ValueReconstructSituation,
+    ValueReconstructState, ValuesReconstructState,
+};
+use crate::tenant::tasks::BackgroundLoopKind;
+use crate::tenant::timeline::logical_size::CurrentLogicalSize;
+use crate::virtual_file::{MaybeFatalIo, VirtualFile};
+use crate::walingest::WalLagCooldown;
+use crate::{ZERO_PAGE, task_mgr, walredo};
 
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub(crate) enum FlushLoopState {
@@ -323,6 +283,9 @@ pub struct Timeline {
     ancestor_timeline: Option<Arc<Timeline>>,
     ancestor_lsn: Lsn,
 
+    // The LSN of gc-compaction that was last applied to this timeline.
+    gc_compaction_state: ArcSwap<Option<GcCompactionState>>,
+
     pub(super) metrics: TimelineMetrics,
 
     // `Timeline` doesn't write these metrics itself, but it manages the lifetime.  Code
@@ -468,7 +431,7 @@ pub struct Timeline {
     /// If Some, collects GetPage metadata for an ongoing PageTrace.
     pub(crate) page_trace: ArcSwapOption<Sender<PageTraceEvent>>,
 
-    previous_heatmap: ArcSwapOption<PreviousHeatmap>,
+    pub(super) previous_heatmap: ArcSwapOption<PreviousHeatmap>,
 
     /// May host a background Tokio task which downloads all the layers from the current
     /// heatmap on demand.
@@ -1470,13 +1433,22 @@ impl Timeline {
                 | TaskKind::WalReceiverConnectionHandler
                 | TaskKind::WalReceiverConnectionPoller => {
                     let is_myself = match who_is_waiting {
-                        WaitLsnWaiter::Timeline(waiter) => Weak::ptr_eq(&waiter.myself, &self.myself),
-                        WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService | WaitLsnWaiter::HttpEndpoint => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()),
+                        WaitLsnWaiter::Timeline(waiter) => {
+                            Weak::ptr_eq(&waiter.myself, &self.myself)
+                        }
+                        WaitLsnWaiter::Tenant
+                        | WaitLsnWaiter::PageService
+                        | WaitLsnWaiter::HttpEndpoint => unreachable!(
+                            "tenant or page_service context are not expected to have task kind {:?}",
+                            ctx.task_kind()
+                        ),
                     };
                     if is_myself {
                         if let Err(current) = self.last_record_lsn.would_wait_for(lsn) {
                             // walingest is the only one that can advance last_record_lsn; it should make sure to never reach here
-                            panic!("this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock");
+                            panic!(
+                                "this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock"
+                            );
                         }
                     } else {
                         // if another  timeline's  is waiting for us, there's no deadlock risk because
@@ -1505,12 +1477,12 @@ impl Timeline {
                         drop(_timer);
                         let walreceiver_status = self.walreceiver_status();
                         Err(WaitLsnError::Timeout(format!(
-                        "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
-                        lsn,
-                        self.get_last_record_lsn(),
-                        self.get_disk_consistent_lsn(),
-                        walreceiver_status,
-                    )))
+                            "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
+                            lsn,
+                            self.get_last_record_lsn(),
+                            self.get_disk_consistent_lsn(),
+                            walreceiver_status,
+                        )))
                     }
                 }
             }
@@ -1614,10 +1586,18 @@ impl Timeline {
                     if init || validate {
                         let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn();
                         if lsn < *latest_gc_cutoff_lsn {
-                            bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
+                            bail!(
+                                "tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}",
+                                lsn,
+                                *latest_gc_cutoff_lsn
+                            );
                         }
                         if lsn < planned_cutoff {
-                            bail!("tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", lsn, planned_cutoff);
+                            bail!(
+                                "tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}",
+                                lsn,
+                                planned_cutoff
+                            );
                         }
                     }
 
@@ -1741,7 +1721,9 @@ impl Timeline {
                 // This is not harmful, but it only happens in relatively rare cases where
                 // time-based checkpoints are not happening fast enough to keep the amount of
                 // ephemeral data within configured limits.  It's a sign of stress on the system.
-                tracing::info!("Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure");
+                tracing::info!(
+                    "Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure"
+                );
             }
         }
 
@@ -1867,7 +1849,9 @@ impl Timeline {
 
         // Last record Lsn could be zero in case the timeline was just created
         if !last_record_lsn.is_valid() {
-            warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}");
+            warn!(
+                "Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}"
+            );
             return Ok(CompactionOutcome::Skipped);
         }
 
@@ -1880,15 +1864,25 @@ impl Timeline {
         };
 
         // Signal compaction failure to avoid L0 flush stalls when it's broken.
-        match result {
+        match &result {
             Ok(_) => self.compaction_failed.store(false, AtomicOrdering::Relaxed),
-            Err(CompactionError::Other(_)) | Err(CompactionError::CollectKeySpaceError(_)) => {
+            Err(e) if e.is_cancel() => {}
+            Err(CompactionError::ShuttingDown) => {
+                // Covered by the `Err(e) if e.is_cancel()` branch.
+            }
+            Err(CompactionError::AlreadyRunning(_)) => {
+                // Covered by the `Err(e) if e.is_cancel()` branch.
+            }
+            Err(CompactionError::Other(_)) => {
+                self.compaction_failed.store(true, AtomicOrdering::Relaxed)
+            }
+            Err(CompactionError::CollectKeySpaceError(_)) => {
+                // Cancelled errors are covered by the `Err(e) if e.is_cancel()` branch.
                 self.compaction_failed.store(true, AtomicOrdering::Relaxed)
             }
             // Don't change the current value on offload failure or shutdown. We don't want to
             // abruptly stall nor resume L0 flushes in these cases.
             Err(CompactionError::Offload(_)) => {}
-            Err(CompactionError::ShuttingDown) => {}
         };
 
         result
@@ -2028,7 +2022,9 @@ impl Timeline {
                 // `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but
                 // we also do a final check here to ensure that the queue is empty.
                 if !self.remote_client.no_pending_work() {
-                    warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
+                    warn!(
+                        "still have pending work in remote upload queue, but continuing shutting down anyways"
+                    );
                 }
             }
         }
@@ -2037,7 +2033,9 @@ impl Timeline {
             // drain the upload queue
             self.remote_client.shutdown().await;
             if !self.remote_client.no_pending_work() {
-                warn!("still have pending work in remote upload queue, but continuing shutting down anyways");
+                warn!(
+                    "still have pending work in remote upload queue, but continuing shutting down anyways"
+                );
             }
         }
 
@@ -2531,6 +2529,31 @@ impl Timeline {
             )
     }
 
+    fn get_gc_compaction_settings(&self) -> GcCompactionCombinedSettings {
+        let tenant_conf = &self.tenant_conf.load();
+        let gc_compaction_enabled = tenant_conf
+            .tenant_conf
+            .gc_compaction_enabled
+            .unwrap_or(self.conf.default_tenant_conf.gc_compaction_enabled);
+        let gc_compaction_initial_threshold_kb = tenant_conf
+            .tenant_conf
+            .gc_compaction_initial_threshold_kb
+            .unwrap_or(
+                self.conf
+                    .default_tenant_conf
+                    .gc_compaction_initial_threshold_kb,
+            );
+        let gc_compaction_ratio_percent = tenant_conf
+            .tenant_conf
+            .gc_compaction_ratio_percent
+            .unwrap_or(self.conf.default_tenant_conf.gc_compaction_ratio_percent);
+        GcCompactionCombinedSettings {
+            gc_compaction_enabled,
+            gc_compaction_initial_threshold_kb,
+            gc_compaction_ratio_percent,
+        }
+    }
+
     fn get_image_creation_preempt_threshold(&self) -> usize {
         let tenant_conf = self.tenant_conf.load();
         tenant_conf
@@ -2609,6 +2632,7 @@ impl Timeline {
         state: TimelineState,
         attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
         create_idempotency: crate::tenant::CreateTimelineIdempotency,
+        gc_compaction_state: Option<GcCompactionState>,
         cancel: CancellationToken,
     ) -> Arc<Self> {
         let disk_consistent_lsn = metadata.disk_consistent_lsn();
@@ -2667,6 +2691,8 @@ impl Timeline {
                 }),
                 disk_consistent_lsn: AtomicLsn::new(disk_consistent_lsn.0),
 
+                gc_compaction_state: ArcSwap::new(Arc::new(gc_compaction_state)),
+
                 last_freeze_at: AtomicLsn::new(disk_consistent_lsn.0),
                 last_freeze_ts: RwLock::new(Instant::now()),
 
@@ -2831,6 +2857,20 @@ impl Timeline {
         );
     }
 
+    pub(crate) fn update_gc_compaction_state(
+        &self,
+        gc_compaction_state: GcCompactionState,
+    ) -> anyhow::Result<()> {
+        self.gc_compaction_state
+            .store(Arc::new(Some(gc_compaction_state.clone())));
+        self.remote_client
+            .schedule_index_upload_for_gc_compaction_state_update(gc_compaction_state)
+    }
+
+    pub(crate) fn get_gc_compaction_state(&self) -> Option<GcCompactionState> {
+        self.gc_compaction_state.load_full().as_ref().clone()
+    }
+
     /// Creates and starts the wal receiver.
     ///
     /// This function is expected to be called at most once per Timeline's lifecycle
@@ -2874,6 +2914,7 @@ impl Timeline {
                 auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
                 availability_zone: self.conf.availability_zone.clone(),
                 ingest_batch_size: self.conf.ingest_batch_size,
+                validate_wal_contiguity: self.conf.validate_wal_contiguity,
             },
             broker_client,
             ctx,
@@ -2898,8 +2939,9 @@ impl Timeline {
         disk_consistent_lsn: Lsn,
         index_part: IndexPart,
     ) -> anyhow::Result<()> {
-        use init::{Decision::*, Discovered, DismissedLayer};
         use LayerName::*;
+        use init::Decision::*;
+        use init::{Discovered, DismissedLayer};
 
         let mut guard = self.layers.write().await;
 
@@ -3114,11 +3156,15 @@ impl Timeline {
                             }
                             TimelineState::Loading => {
                                 // Import does not return an activated timeline.
-                                info!("discarding priority boost for logical size calculation because timeline is not yet active");
+                                info!(
+                                    "discarding priority boost for logical size calculation because timeline is not yet active"
+                                );
                             }
                             TimelineState::Active => {
                                 // activation should be setting the once cell
-                                warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
+                                warn!(
+                                    "unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work"
+                                );
                                 debug_assert!(false);
                             }
                         }
@@ -3523,6 +3569,14 @@ impl Timeline {
         Ok(layer)
     }
 
+    pub(super) fn is_previous_heatmap_active(&self) -> bool {
+        self.previous_heatmap
+            .load()
+            .as_ref()
+            .map(|prev| matches!(**prev, PreviousHeatmap::Active { .. }))
+            .unwrap_or(false)
+    }
+
     /// The timeline heatmap is a hint to secondary locations from the primary location,
     /// indicating which layers are currently on-disk on the primary.
     ///
@@ -3595,6 +3649,7 @@ impl Timeline {
             Some(non_resident) => {
                 let mut non_resident = non_resident.peekable();
                 if non_resident.peek().is_none() {
+                    tracing::info!(timeline_id=%self.timeline_id, "Previous heatmap now obsolete");
                     self.previous_heatmap
                         .store(Some(PreviousHeatmap::Obsolete.into()));
                 }
@@ -3626,6 +3681,36 @@ impl Timeline {
         Some(HeatMapTimeline::new(self.timeline_id, layers))
     }
 
+    pub(super) async fn generate_unarchival_heatmap(&self, end_lsn: Lsn) -> PreviousHeatmap {
+        let guard = self.layers.read().await;
+
+        let now = SystemTime::now();
+        let mut heatmap_layers = Vec::default();
+        for vl in guard.visible_layers() {
+            if vl.layer_desc().get_lsn_range().start >= end_lsn {
+                continue;
+            }
+
+            let hl = HeatMapLayer {
+                name: vl.layer_desc().layer_name(),
+                metadata: vl.metadata(),
+                access_time: now,
+            };
+            heatmap_layers.push(hl);
+        }
+
+        tracing::info!(
+            "Generating unarchival heatmap with {} layers",
+            heatmap_layers.len()
+        );
+
+        let heatmap = HeatMapTimeline::new(self.timeline_id, heatmap_layers);
+        PreviousHeatmap::Active {
+            heatmap,
+            read_at: Instant::now(),
+        }
+    }
+
     /// Returns true if the given lsn is or was an ancestor branchpoint.
     pub(crate) fn is_ancestor_lsn(&self, lsn: Lsn) -> bool {
         // upon timeline detach, we set the ancestor_lsn to Lsn::INVALID and the store the original
@@ -4219,10 +4304,14 @@ impl Timeline {
                 // This path is only taken for tenants with multiple shards: single sharded tenants should
                 // never encounter a gap in the wal.
                 let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
-                tracing::debug!("Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}");
+                tracing::debug!(
+                    "Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}"
+                );
                 if self.set_disk_consistent_lsn(frozen_to_lsn) {
                     if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {
-                        tracing::warn!("Failed to schedule metadata upload after updating disk_consistent_lsn: {e}");
+                        tracing::warn!(
+                            "Failed to schedule metadata upload after updating disk_consistent_lsn: {e}"
+                        );
                     }
                 }
             }
@@ -4447,7 +4536,10 @@ impl Timeline {
     /// This function must only be used from the layer flush task.
     fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
         let old_value = self.disk_consistent_lsn.fetch_max(new_value);
-        assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}");
+        assert!(
+            new_value >= old_value,
+            "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"
+        );
 
         self.metrics
             .disk_consistent_lsn_gauge
@@ -4605,10 +4697,7 @@ impl Timeline {
             ));
         }
 
-        let (dense_ks, sparse_ks) = self
-            .collect_keyspace(lsn, ctx)
-            .await
-            .map_err(CompactionError::CollectKeySpaceError)?;
+        let (dense_ks, sparse_ks) = self.collect_keyspace(lsn, ctx).await?;
         let dense_partitioning = dense_ks.partition(&self.shard_identity, partition_size);
         let sparse_partitioning = SparseKeyPartitioning {
             parts: vec![sparse_ks],
@@ -4742,7 +4831,9 @@ impl Timeline {
                                 // any metadata keys, keys, as that would lead to actual data
                                 // loss.
                                 if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() {
-                                    warn!("could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}");
+                                    warn!(
+                                        "could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}"
+                                    );
                                     ZERO_PAGE.clone()
                                 } else {
                                     return Err(CreateImageLayersError::from(err));
@@ -4821,7 +4912,8 @@ impl Timeline {
 
         let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS;
         info!(
-            "metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s", elapsed.as_secs_f64()
+            "metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s",
+            elapsed.as_secs_f64()
         );
 
         if !trigger_generation && mode == ImageLayerCreationMode::Try {
@@ -5143,7 +5235,8 @@ impl Timeline {
                         if should_yield {
                             tracing::info!(
                                 "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers",
-                                partition.start().unwrap(), partition.end().unwrap()
+                                partition.start().unwrap(),
+                                partition.end().unwrap()
                             );
                             last_partition_processed = Some(partition.clone());
                             all_generated = false;
@@ -5330,9 +5423,40 @@ pub(crate) enum CompactionError {
     Offload(OffloadError),
     /// Compaction cannot be done right now; page reconstruction and so on.
     #[error("Failed to collect keyspace: {0}")]
-    CollectKeySpaceError(CollectKeySpaceError),
+    CollectKeySpaceError(#[from] CollectKeySpaceError),
     #[error(transparent)]
     Other(anyhow::Error),
+    #[error("Compaction already running: {0}")]
+    AlreadyRunning(&'static str),
+}
+
+impl CompactionError {
+    /// Errors that can be ignored, i.e., cancel and shutdown.
+    pub fn is_cancel(&self) -> bool {
+        matches!(
+            self,
+            Self::ShuttingDown
+                | Self::AlreadyRunning(_)
+                | Self::CollectKeySpaceError(CollectKeySpaceError::Cancelled)
+                | Self::CollectKeySpaceError(CollectKeySpaceError::PageRead(
+                    PageReconstructError::Cancelled
+                ))
+                | Self::Offload(OffloadError::Cancelled)
+        )
+    }
+
+    /// Critical errors that indicate data corruption.
+    pub fn is_critical(&self) -> bool {
+        matches!(
+            self,
+            Self::CollectKeySpaceError(
+                CollectKeySpaceError::Decode(_)
+                    | CollectKeySpaceError::PageRead(
+                        PageReconstructError::MissingKey(_) | PageReconstructError::WalRedo(_),
+                    )
+            )
+        )
+    }
 }
 
 impl From<OffloadError> for CompactionError {
@@ -5344,24 +5468,6 @@ impl From<OffloadError> for CompactionError {
     }
 }
 
-impl CompactionError {
-    pub fn is_cancelled(&self) -> bool {
-        matches!(self, CompactionError::ShuttingDown)
-    }
-}
-
-impl From<CollectKeySpaceError> for CompactionError {
-    fn from(err: CollectKeySpaceError) -> Self {
-        match err {
-            CollectKeySpaceError::Cancelled
-            | CollectKeySpaceError::PageRead(PageReconstructError::Cancelled) => {
-                CompactionError::ShuttingDown
-            }
-            e => CompactionError::Other(e.into()),
-        }
-    }
-}
-
 impl From<super::upload_queue::NotInitialized> for CompactionError {
     fn from(value: super::upload_queue::NotInitialized) -> Self {
         match value {
@@ -5505,7 +5611,9 @@ impl Timeline {
                 // because we have not implemented L0 => L0 compaction.
                 duplicated_layers.insert(l.layer_desc().key());
             } else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) {
-                return Err(CompactionError::Other(anyhow::anyhow!("compaction generates a L0 layer file as output, which will cause infinite compaction.")));
+                return Err(CompactionError::Other(anyhow::anyhow!(
+                    "compaction generates a L0 layer file as output, which will cause infinite compaction."
+                )));
             } else {
                 insert_layers.push(l.clone());
             }
@@ -5629,8 +5737,10 @@ impl Timeline {
                 .await
             {
                 Ok((index_part, index_generation, _index_mtime)) => {
-                    tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
-                        index_part.metadata.latest_gc_cutoff_lsn());
+                    tracing::info!(
+                        "GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
+                        index_part.metadata.latest_gc_cutoff_lsn()
+                    );
                     Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
                 }
                 Err(DownloadError::NotFound) => {
@@ -6039,9 +6149,7 @@ impl Timeline {
             if let Some((img_lsn, img)) = &data.img {
                 trace!(
                     "found page image for key {} at {}, no WAL redo required, req LSN {}",
-                    key,
-                    img_lsn,
-                    request_lsn,
+                    key, img_lsn, request_lsn,
                 );
                 Ok(img.clone())
             } else {
@@ -6070,7 +6178,12 @@ impl Timeline {
                         request_lsn
                     );
                 } else {
-                    trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
+                    trace!(
+                        "found {} WAL records that will init the page for {} at {}, performing WAL redo",
+                        data.records.len(),
+                        key,
+                        request_lsn
+                    );
                 };
                 let res = self
                     .walredo_mgr
@@ -6614,7 +6727,9 @@ impl TimelineWriter<'_> {
 
         if let Some(wait_threshold) = wait_threshold {
             if l0_count >= wait_threshold {
-                info!("layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers");
+                debug!(
+                    "layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers"
+                );
                 self.tl.wait_flush_completion(flush_id).await?;
             }
         }
@@ -6801,17 +6916,15 @@ mod tests {
     use pageserver_api::key::Key;
     use pageserver_api::value::Value;
     use tracing::Instrument;
-    use utils::{id::TimelineId, lsn::Lsn};
-
-    use crate::tenant::{
-        harness::{test_img, TenantHarness},
-        layer_map::LayerMap,
-        storage_layer::{Layer, LayerName, LayerVisibilityHint},
-        timeline::{DeltaLayerTestDesc, EvictionError},
-        PreviousHeatmap, Timeline,
-    };
+    use utils::id::TimelineId;
+    use utils::lsn::Lsn;
 
     use super::HeatMapTimeline;
+    use crate::tenant::harness::{TenantHarness, test_img};
+    use crate::tenant::layer_map::LayerMap;
+    use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint};
+    use crate::tenant::timeline::{DeltaLayerTestDesc, EvictionError};
+    use crate::tenant::{PreviousHeatmap, Timeline};
 
     fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) {
         assert_eq!(lhs.layers.len(), rhs.layers.len());
diff --git a/pageserver/src/tenant/timeline/analysis.rs b/pageserver/src/tenant/timeline/analysis.rs
index 6009b0b79a..96864ec44b 100644
--- a/pageserver/src/tenant/timeline/analysis.rs
+++ b/pageserver/src/tenant/timeline/analysis.rs
@@ -1,4 +1,5 @@
-use std::{collections::BTreeSet, ops::Range};
+use std::collections::BTreeSet;
+use std::ops::Range;
 
 use utils::lsn::Lsn;
 
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 4e4f906d78..091bd583d7 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -10,34 +10,42 @@ use std::sync::Arc;
 
 use super::layer_manager::LayerManager;
 use super::{
-    CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, GetVectoredError,
-    ImageLayerCreationMode, LastImageLayerCreationStatus, PageReconstructError, RecordedDuration,
+    CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder,
+    GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration,
     Timeline,
 };
 
-use anyhow::{anyhow, bail, Context};
+use anyhow::{Context, anyhow, bail};
 use bytes::Bytes;
 use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
-use pageserver_api::key::KEY_SIZE;
-use pageserver_api::keyspace::ShardedRange;
+use once_cell::sync::Lazy;
+use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE;
+use pageserver_api::key::{KEY_SIZE, Key};
+use pageserver_api::keyspace::{KeySpace, ShardedRange};
 use pageserver_api::models::CompactInfoResponse;
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
+use pageserver_api::value::Value;
+use pageserver_compaction::helpers::{fully_contains, overlaps_with};
+use pageserver_compaction::interface::*;
 use serde::Serialize;
+use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, info, info_span, trace, warn, Instrument};
+use tracing::{Instrument, debug, error, info, info_span, trace, warn};
 use utils::critical;
 use utils::id::TimelineId;
+use utils::lsn::Lsn;
 
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
-use crate::pgdatadir_mapping::CollectKeySpaceError;
 use crate::statvfs::Statvfs;
 use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::gc_block::GcBlock;
 use crate::tenant::layer_map::LayerMap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
+use crate::tenant::remote_timeline_client::index::GcCompactionState;
 use crate::tenant::storage_layer::batch_split_writer::{
     BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
 };
@@ -46,24 +54,12 @@ use crate::tenant::storage_layer::merge_iterator::MergeIterator;
 use crate::tenant::storage_layer::{
     AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
 };
-use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter};
-use crate::tenant::timeline::{ImageLayerCreationOutcome, IoConcurrency};
-use crate::tenant::timeline::{Layer, ResidentLayer};
-use crate::tenant::{gc_block, DeltaLayer, MaybeOffloaded};
+use crate::tenant::timeline::{
+    DeltaLayerWriter, ImageLayerCreationOutcome, ImageLayerWriter, IoConcurrency, Layer,
+    ResidentLayer, drop_rlock,
+};
+use crate::tenant::{DeltaLayer, MaybeOffloaded, gc_block};
 use crate::virtual_file::{MaybeFatalIo, VirtualFile};
-use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE;
-
-use pageserver_api::key::Key;
-use pageserver_api::keyspace::KeySpace;
-use pageserver_api::record::NeonWalRecord;
-use pageserver_api::value::Value;
-
-use utils::lsn::Lsn;
-
-use pageserver_compaction::helpers::{fully_contains, overlaps_with};
-use pageserver_compaction::interface::*;
-
-use super::CompactionError;
 
 /// Maximum number of deltas before generating an image layer in bottom-most compaction.
 const COMPACTION_DELTA_THRESHOLD: usize = 5;
@@ -77,13 +73,22 @@ impl std::fmt::Display for GcCompactionJobId {
     }
 }
 
+pub struct GcCompactionCombinedSettings {
+    pub gc_compaction_enabled: bool,
+    pub gc_compaction_initial_threshold_kb: u64,
+    pub gc_compaction_ratio_percent: u64,
+}
+
 #[derive(Debug, Clone)]
 pub enum GcCompactionQueueItem {
-    Manual(CompactOptions),
+    MetaJob {
+        /// Compaction options
+        options: CompactOptions,
+        /// Whether the compaction is triggered automatically (determines whether we need to update L2 LSN)
+        auto: bool,
+    },
     SubCompactionJob(CompactOptions),
-    #[allow(dead_code)]
-    UpdateL2Lsn(Lsn),
-    Notify(GcCompactionJobId),
+    Notify(GcCompactionJobId, Option<Lsn>),
 }
 
 impl GcCompactionQueueItem {
@@ -93,7 +98,7 @@ impl GcCompactionQueueItem {
         running: bool,
     ) -> Option<CompactInfoResponse> {
         match self {
-            GcCompactionQueueItem::Manual(options) => Some(CompactInfoResponse {
+            GcCompactionQueueItem::MetaJob { options, .. } => Some(CompactInfoResponse {
                 compact_key_range: options.compact_key_range,
                 compact_lsn_range: options.compact_lsn_range,
                 sub_compaction: options.sub_compaction,
@@ -107,17 +112,22 @@ impl GcCompactionQueueItem {
                 running,
                 job_id: id.0,
             }),
-            GcCompactionQueueItem::UpdateL2Lsn(_) => None,
-            GcCompactionQueueItem::Notify(_) => None,
+            GcCompactionQueueItem::Notify(_, _) => None,
         }
     }
 }
 
+#[derive(Default)]
+struct GcCompactionGuardItems {
+    notify: Option<tokio::sync::oneshot::Sender<()>>,
+    gc_guard: Option<gc_block::Guard>,
+    permit: Option<OwnedSemaphorePermit>,
+}
+
 struct GcCompactionQueueInner {
     running: Option<(GcCompactionJobId, GcCompactionQueueItem)>,
     queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,
-    notify: HashMap<GcCompactionJobId, tokio::sync::oneshot::Sender<()>>,
-    gc_guards: HashMap<GcCompactionJobId, gc_block::Guard>,
+    guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,
     last_id: GcCompactionJobId,
 }
 
@@ -137,14 +147,18 @@ pub struct GcCompactionQueue {
     consumer_lock: tokio::sync::Mutex<()>,
 }
 
+static CONCURRENT_GC_COMPACTION_TASKS: Lazy<Arc<Semaphore>> = Lazy::new(|| {
+    // Only allow two timelines on one pageserver to run gc compaction at a time.
+    Arc::new(Semaphore::new(2))
+});
+
 impl GcCompactionQueue {
     pub fn new() -> Self {
         GcCompactionQueue {
             inner: std::sync::Mutex::new(GcCompactionQueueInner {
                 running: None,
                 queued: VecDeque::new(),
-                notify: HashMap::new(),
-                gc_guards: HashMap::new(),
+                guards: HashMap::new(),
                 last_id: GcCompactionJobId(0),
             }),
             consumer_lock: tokio::sync::Mutex::new(()),
@@ -154,8 +168,9 @@ impl GcCompactionQueue {
     pub fn cancel_scheduled(&self) {
         let mut guard = self.inner.lock().unwrap();
         guard.queued.clear();
-        guard.notify.clear();
-        guard.gc_guards.clear();
+        // TODO: if there is a running job, we should keep the gc guard. However, currently, the cancel
+        // API is only used for testing purposes, so we can drop everything here.
+        guard.guards.clear();
     }
 
     /// Schedule a manual compaction job.
@@ -166,29 +181,159 @@ impl GcCompactionQueue {
     ) -> GcCompactionJobId {
         let mut guard = self.inner.lock().unwrap();
         let id = guard.next_id();
-        guard
-            .queued
-            .push_back((id, GcCompactionQueueItem::Manual(options)));
-        if let Some(notify) = notify {
-            guard.notify.insert(id, notify);
-        }
+        guard.queued.push_back((
+            id,
+            GcCompactionQueueItem::MetaJob {
+                options,
+                auto: false,
+            },
+        ));
+        guard.guards.entry(id).or_default().notify = notify;
         info!("scheduled compaction job id={}", id);
         id
     }
 
+    /// Schedule an auto compaction job.
+    fn schedule_auto_compaction(
+        &self,
+        options: CompactOptions,
+        permit: OwnedSemaphorePermit,
+    ) -> GcCompactionJobId {
+        let mut guard = self.inner.lock().unwrap();
+        let id = guard.next_id();
+        guard.queued.push_back((
+            id,
+            GcCompactionQueueItem::MetaJob {
+                options,
+                auto: true,
+            },
+        ));
+        guard.guards.entry(id).or_default().permit = Some(permit);
+        id
+    }
+
     /// Trigger an auto compaction.
-    #[allow(dead_code)]
-    pub fn trigger_auto_compaction(&self, _: &Arc<Timeline>) {}
+    pub async fn trigger_auto_compaction(&self, timeline: &Arc<Timeline>) {
+        let GcCompactionCombinedSettings {
+            gc_compaction_enabled,
+            gc_compaction_initial_threshold_kb,
+            gc_compaction_ratio_percent,
+        } = timeline.get_gc_compaction_settings();
+        if !gc_compaction_enabled {
+            return;
+        }
+        if self.remaining_jobs_num() > 0 {
+            // Only schedule auto compaction when the queue is empty
+            return;
+        }
+        if timeline.ancestor_timeline().is_some() {
+            // Do not trigger auto compaction for child timelines. We haven't tested
+            // it enough in staging yet.
+            return;
+        }
+
+        let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else {
+            // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure
+            // the fairness of the lock across timelines. We should listen for both `acquire` and `l0_compaction_trigger`
+            // to ensure the fairness while avoid starving other tasks.
+            return;
+        };
+
+        let gc_compaction_state = timeline.get_gc_compaction_state();
+        let l2_lsn = gc_compaction_state
+            .map(|x| x.last_completed_lsn)
+            .unwrap_or(Lsn::INVALID);
+
+        let layers = {
+            let guard = timeline.layers.read().await;
+            let layer_map = guard.layer_map().unwrap();
+            layer_map.iter_historic_layers().collect_vec()
+        };
+        let mut l2_size: u64 = 0;
+        let mut l1_size = 0;
+        let gc_cutoff = *timeline.get_applied_gc_cutoff_lsn();
+        for layer in layers {
+            if layer.lsn_range.start <= l2_lsn {
+                l2_size += layer.file_size();
+            } else if layer.lsn_range.start <= gc_cutoff {
+                l1_size += layer.file_size();
+            }
+        }
+
+        fn trigger_compaction(
+            l1_size: u64,
+            l2_size: u64,
+            gc_compaction_initial_threshold_kb: u64,
+            gc_compaction_ratio_percent: u64,
+        ) -> bool {
+            const AUTO_TRIGGER_LIMIT: u64 = 150 * 1024 * 1024 * 1024; // 150GB
+            if l1_size >= AUTO_TRIGGER_LIMIT || l2_size >= AUTO_TRIGGER_LIMIT {
+                // Do not auto-trigger when physical size >= 150GB
+                return false;
+            }
+            // initial trigger
+            if l2_size == 0 && l1_size >= gc_compaction_initial_threshold_kb * 1024 {
+                info!(
+                    "trigger auto-compaction because l1_size={} >= gc_compaction_initial_threshold_kb={}",
+                    l1_size, gc_compaction_initial_threshold_kb
+                );
+                return true;
+            }
+            // size ratio trigger
+            if l2_size == 0 {
+                return false;
+            }
+            if l1_size as f64 / l2_size as f64 >= (gc_compaction_ratio_percent as f64 / 100.0) {
+                info!(
+                    "trigger auto-compaction because l1_size={} / l2_size={} > gc_compaction_ratio_percent={}",
+                    l1_size, l2_size, gc_compaction_ratio_percent
+                );
+                return true;
+            }
+            false
+        }
+
+        if trigger_compaction(
+            l1_size,
+            l2_size,
+            gc_compaction_initial_threshold_kb,
+            gc_compaction_ratio_percent,
+        ) {
+            self.schedule_auto_compaction(
+                CompactOptions {
+                    flags: {
+                        let mut flags = EnumSet::new();
+                        flags |= CompactFlags::EnhancedGcBottomMostCompaction;
+                        flags
+                    },
+                    sub_compaction: true,
+                    compact_key_range: None,
+                    compact_lsn_range: None,
+                    sub_compaction_max_job_size_mb: None,
+                },
+                permit,
+            );
+            info!(
+                "scheduled auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}",
+                l1_size, l2_size, l2_lsn, gc_cutoff
+            );
+        } else {
+            info!(
+                "did not trigger auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}",
+                l1_size, l2_size, l2_lsn, gc_cutoff
+            );
+        }
+    }
 
     /// Notify the caller the job has finished and unblock GC.
     fn notify_and_unblock(&self, id: GcCompactionJobId) {
         info!("compaction job id={} finished", id);
         let mut guard = self.inner.lock().unwrap();
-        if let Some(blocking) = guard.gc_guards.remove(&id) {
-            drop(blocking)
-        }
-        if let Some(tx) = guard.notify.remove(&id) {
-            let _ = tx.send(());
+        if let Some(items) = guard.guards.remove(&id) {
+            drop(items.gc_guard);
+            if let Some(tx) = items.notify {
+                let _ = tx.send(());
+            }
         }
     }
 
@@ -198,9 +343,12 @@ impl GcCompactionQueue {
         options: CompactOptions,
         timeline: &Arc<Timeline>,
         gc_block: &GcBlock,
+        auto: bool,
     ) -> Result<(), CompactionError> {
-        info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs");
-        let jobs: Vec<GcCompactJob> = timeline
+        info!(
+            "running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"
+        );
+        let jobs = timeline
             .gc_compaction_split_jobs(
                 GcCompactJob::from_compact_options(options.clone()),
                 options.sub_compaction_max_job_size_mb,
@@ -223,6 +371,9 @@ impl GcCompactionQueue {
 
             let jobs_len = jobs.len();
             let mut pending_tasks = Vec::new();
+            // gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.
+            // And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.
+            let expected_l2_lsn = jobs.iter().map(|job| job.compact_lsn_range.end).max();
             for job in jobs {
                 // Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`
                 // until we do further refactors to allow directly call `compact_with_gc`.
@@ -240,10 +391,16 @@ impl GcCompactionQueue {
                 };
                 pending_tasks.push(GcCompactionQueueItem::SubCompactionJob(options));
             }
-            pending_tasks.push(GcCompactionQueueItem::Notify(id));
+
+            if !auto {
+                pending_tasks.push(GcCompactionQueueItem::Notify(id, None));
+            } else {
+                pending_tasks.push(GcCompactionQueueItem::Notify(id, expected_l2_lsn));
+            }
+
             {
                 let mut guard = self.inner.lock().unwrap();
-                guard.gc_guards.insert(id, gc_guard);
+                guard.guards.entry(id).or_default().gc_guard = Some(gc_guard);
                 let mut tasks = Vec::new();
                 for task in pending_tasks {
                     let id = guard.next_id();
@@ -254,7 +411,10 @@ impl GcCompactionQueue {
                     guard.queued.push_front(item);
                 }
             }
-            info!("scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs", jobs_len);
+            info!(
+                "scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs",
+                jobs_len
+            );
         }
         Ok(())
     }
@@ -267,29 +427,48 @@ impl GcCompactionQueue {
         gc_block: &GcBlock,
         timeline: &Arc<Timeline>,
     ) -> Result<CompactionOutcome, CompactionError> {
-        let _one_op_at_a_time_guard = self.consumer_lock.lock().await;
-        let has_pending_tasks;
-        let (id, item) = {
-            let mut guard = self.inner.lock().unwrap();
-            let Some((id, item)) = guard.queued.pop_front() else {
-                return Ok(CompactionOutcome::Done);
-            };
-            guard.running = Some((id, item.clone()));
-            has_pending_tasks = !guard.queued.is_empty();
-            (id, item)
+        let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else {
+            return Err(CompactionError::AlreadyRunning(
+                "cannot run gc-compaction because another gc-compaction is running. This should not happen because we only call this function from the gc-compaction queue.",
+            ));
+        };
+        let has_pending_tasks;
+        let Some((id, item)) = ({
+            let mut guard = self.inner.lock().unwrap();
+            if let Some((id, item)) = guard.queued.pop_front() {
+                guard.running = Some((id, item.clone()));
+                has_pending_tasks = !guard.queued.is_empty();
+                Some((id, item))
+            } else {
+                has_pending_tasks = false;
+                None
+            }
+        }) else {
+            self.trigger_auto_compaction(timeline).await;
+            // Always yield after triggering auto-compaction. Gc-compaction is a low-priority task and we
+            // have not implemented preemption mechanism yet. We always want to yield it to more important
+            // tasks if there is one.
+            return Ok(CompactionOutcome::Done);
         };
-
         match item {
-            GcCompactionQueueItem::Manual(options) => {
+            GcCompactionQueueItem::MetaJob { options, auto } => {
                 if !options
                     .flags
                     .contains(CompactFlags::EnhancedGcBottomMostCompaction)
                 {
-                    warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", options);
+                    warn!(
+                        "ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}",
+                        options
+                    );
                 } else if options.sub_compaction {
-                    self.handle_sub_compaction(id, options, timeline, gc_block)
+                    info!(
+                        "running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"
+                    );
+                    self.handle_sub_compaction(id, options, timeline, gc_block, auto)
                         .await?;
                 } else {
+                    // Auto compaction always enables sub-compaction so we don't need to handle update_l2_lsn
+                    // in this branch.
                     let gc_guard = match gc_block.start().await {
                         Ok(guard) => guard,
                         Err(e) => {
@@ -301,20 +480,37 @@ impl GcCompactionQueue {
                     };
                     {
                         let mut guard = self.inner.lock().unwrap();
-                        guard.gc_guards.insert(id, gc_guard);
+                        guard.guards.entry(id).or_default().gc_guard = Some(gc_guard);
                     }
                     let _ = timeline.compact_with_options(cancel, options, ctx).await?;
                     self.notify_and_unblock(id);
                 }
             }
             GcCompactionQueueItem::SubCompactionJob(options) => {
+                // TODO: error handling, clear the queue if any task fails?
                 let _ = timeline.compact_with_options(cancel, options, ctx).await?;
             }
-            GcCompactionQueueItem::Notify(id) => {
+            GcCompactionQueueItem::Notify(id, l2_lsn) => {
                 self.notify_and_unblock(id);
-            }
-            GcCompactionQueueItem::UpdateL2Lsn(_) => {
-                unreachable!()
+                if let Some(l2_lsn) = l2_lsn {
+                    let current_l2_lsn = timeline
+                        .get_gc_compaction_state()
+                        .map(|x| x.last_completed_lsn)
+                        .unwrap_or(Lsn::INVALID);
+                    if l2_lsn >= current_l2_lsn {
+                        info!("l2_lsn updated to {}", l2_lsn);
+                        timeline
+                            .update_gc_compaction_state(GcCompactionState {
+                                last_completed_lsn: l2_lsn,
+                            })
+                            .map_err(CompactionError::Other)?;
+                    } else {
+                        warn!(
+                            "l2_lsn updated to {} but it is less than the current l2_lsn {}",
+                            l2_lsn, current_l2_lsn
+                        );
+                    }
+                }
             }
         }
         {
@@ -339,7 +535,6 @@ impl GcCompactionQueue {
         (guard.running.clone(), guard.queued.clone())
     }
 
-    #[allow(dead_code)]
     pub fn remaining_jobs_num(&self) -> usize {
         let guard = self.inner.lock().unwrap();
         guard.queued.len() + if guard.running.is_some() { 1 } else { 0 }
@@ -771,34 +966,29 @@ impl Timeline {
                 self.upload_new_image_layers(image_layers)?;
                 if let LastImageLayerCreationStatus::Incomplete { .. } = outcome {
                     // Yield and do not do any other kind of compaction.
-                    info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction).");
+                    info!(
+                        "skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction)."
+                    );
                     return Ok(CompactionOutcome::YieldForL0);
                 }
             }
-            Err(err) => {
-                // no partitioning? This is normal, if the timeline was just created
-                // as an empty timeline. Also in unit tests, when we use the timeline
-                // as a simple key-value store, ignoring the datadir layout. Log the
-                // error but continue.
-                //
-                // Suppress error when it's due to cancellation
-                if !self.cancel.is_cancelled() && !err.is_cancelled() {
-                    if let CompactionError::CollectKeySpaceError(
-                        CollectKeySpaceError::Decode(_)
-                        | CollectKeySpaceError::PageRead(PageReconstructError::MissingKey(_)),
-                    ) = err
-                    {
-                        critical!("could not compact, repartitioning keyspace failed: {err:?}");
-                    } else {
-                        tracing::error!(
-                            "could not compact, repartitioning keyspace failed: {err:?}"
-                        );
-                    }
-                }
+
+            // Suppress errors when cancelled.
+            Err(_) if self.cancel.is_cancelled() => {}
+            Err(err) if err.is_cancel() => {}
+
+            // Alert on critical errors that indicate data corruption.
+            Err(err) if err.is_critical() => {
+                critical!("could not compact, repartitioning keyspace failed: {err:?}");
             }
+
+            // Log other errors. No partitioning? This is normal, if the timeline was just created
+            // as an empty timeline. Also in unit tests, when we use the timeline as a simple
+            // key-value store, ignoring the datadir layout. Log the error but continue.
+            Err(err) => error!("could not compact, repartitioning keyspace failed: {err:?}"),
         };
 
-        let partition_count = self.partitioning.read().0 .0.parts.len();
+        let partition_count = self.partitioning.read().0.0.parts.len();
 
         // 4. Shard ancestor compaction
 
@@ -1007,7 +1197,7 @@ impl Timeline {
             Ok(()) => (),
             Err(WaitCompletionError::NotInitialized(ni)) => return Err(CompactionError::from(ni)),
             Err(WaitCompletionError::UploadQueueShutDownOrStopped) => {
-                return Err(CompactionError::ShuttingDown)
+                return Err(CompactionError::ShuttingDown);
             }
         }
 
@@ -1022,7 +1212,7 @@ impl Timeline {
     ///
     /// The result may be used as an input to eviction and secondary downloads to de-prioritize layers
     /// that we know won't be needed for reads.
-    pub(super) async fn update_layer_visibility(
+    pub(crate) async fn update_layer_visibility(
         &self,
     ) -> Result<(), super::layer_manager::Shutdown> {
         let head_lsn = self.get_last_record_lsn();
@@ -1302,7 +1492,7 @@ impl Timeline {
             let last_record_lsn = self.get_last_record_lsn();
             let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128;
             let min_hole_coverage_size = 3; // TODO: something more flexible?
-                                            // min-heap (reserve space for one more element added before eviction)
+            // min-heap (reserve space for one more element added before eviction)
             let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
             let mut prev: Option<Key> = None;
 
@@ -2165,8 +2355,14 @@ impl Timeline {
         let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */
         if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space
         {
-            return Err(anyhow!("not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}",
-                available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size));
+            return Err(anyhow!(
+                "not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}",
+                available_space,
+                allocated_space,
+                all_layer_size,
+                remote_layer_size,
+                all_layer_size + remote_layer_size
+            ));
         }
         Ok(())
     }
@@ -2205,7 +2401,9 @@ impl Timeline {
         };
 
         if compact_below_lsn == Lsn::INVALID {
-            tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
+            tracing::warn!(
+                "no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction"
+            );
             return Ok(vec![]);
         }
 
@@ -2214,7 +2412,7 @@ impl Timeline {
         let sub_compaction_max_job_size_mb =
             sub_compaction_max_job_size_mb.unwrap_or(GC_COMPACT_MAX_SIZE_MB);
 
-        let mut compact_jobs = Vec::new();
+        let mut compact_jobs = Vec::<GcCompactJob>::new();
         // For now, we simply use the key partitioning information; we should do a more fine-grained partitioning
         // by estimating the amount of files read for a compaction job. We should also partition on LSN.
         let ((dense_ks, sparse_ks), _) = self.partitioning.read().as_ref().clone();
@@ -2301,16 +2499,25 @@ impl Timeline {
                 } else {
                     end
                 };
-                info!(
-                    "splitting compaction job: {}..{}, estimated_size={}",
-                    start, end, total_size
-                );
-                compact_jobs.push(GcCompactJob {
-                    dry_run: job.dry_run,
-                    compact_key_range: start..end,
-                    compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
-                });
-                current_start = Some(end);
+                if total_size == 0 && !compact_jobs.is_empty() {
+                    info!(
+                        "splitting compaction job: {}..{}, estimated_size={}, extending the previous job",
+                        start, end, total_size
+                    );
+                    compact_jobs.last_mut().unwrap().compact_key_range.end = end;
+                    current_start = Some(end);
+                } else {
+                    info!(
+                        "splitting compaction job: {}..{}, estimated_size={}",
+                        start, end, total_size
+                    );
+                    compact_jobs.push(GcCompactJob {
+                        dry_run: job.dry_run,
+                        compact_key_range: start..end,
+                        compact_lsn_range: job.compact_lsn_range.start..compact_below_lsn,
+                    });
+                    current_start = Some(end);
+                }
             }
         }
         Ok(compact_jobs)
@@ -2341,7 +2548,9 @@ impl Timeline {
         let sub_compaction = options.sub_compaction;
         let job = GcCompactJob::from_compact_options(options.clone());
         if sub_compaction {
-            info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs");
+            info!(
+                "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"
+            );
             let jobs = self
                 .gc_compaction_split_jobs(job, options.sub_compaction_max_job_size_mb)
                 .await?;
@@ -2393,7 +2602,13 @@ impl Timeline {
 
         let debug_mode = cfg!(debug_assertions) || cfg!(feature = "testing");
 
-        info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}", compact_key_range.start, compact_key_range.end, compact_lsn_range.start, compact_lsn_range.end);
+        info!(
+            "running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}",
+            compact_key_range.start,
+            compact_key_range.end,
+            compact_lsn_range.start,
+            compact_lsn_range.end
+        );
 
         scopeguard::defer! {
             info!("done enhanced gc bottom-most compaction");
@@ -2422,7 +2637,9 @@ impl Timeline {
                 let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX {
                     if real_gc_cutoff == Lsn::INVALID {
                         // If the gc_cutoff is not generated yet, we should not compact anything.
-                        tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction");
+                        tracing::warn!(
+                            "no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction"
+                        );
                         return Ok(());
                     }
                     real_gc_cutoff
@@ -2430,7 +2647,10 @@ impl Timeline {
                     compact_lsn_range.end
                 };
                 if gc_cutoff > real_gc_cutoff {
-                    warn!("provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff);
+                    warn!(
+                        "provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff",
+                        gc_cutoff, real_gc_cutoff
+                    );
                     gc_cutoff = real_gc_cutoff;
                 }
                 gc_cutoff
@@ -2454,7 +2674,10 @@ impl Timeline {
                 .map(|desc| desc.get_lsn_range().end)
                 .max()
             else {
-                info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff);
+                info!(
+                    "no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}",
+                    gc_cutoff
+                );
                 return Ok(());
             };
             // Next, if the user specifies compact_lsn_range.start, we need to filter some layers out. All the layers (strictly) below
@@ -2472,7 +2695,10 @@ impl Timeline {
                 .map(|desc| desc.get_lsn_range().start)
                 .min()
             else {
-                info!("no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}", compact_lsn_range.end);
+                info!(
+                    "no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}",
+                    compact_lsn_range.end
+                );
                 return Ok(());
             };
             // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key
@@ -2495,7 +2721,10 @@ impl Timeline {
                 }
             }
             if selected_layers.is_empty() {
-                info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compact_key_range.start, compact_key_range.end);
+                info!(
+                    "no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}",
+                    gc_cutoff, compact_key_range.start, compact_key_range.end
+                );
                 return Ok(());
             }
             retain_lsns_below_horizon.sort();
@@ -2577,7 +2806,10 @@ impl Timeline {
             .map(|layer| layer.layer_desc().layer_name())
             .collect_vec();
         if let Some(err) = check_valid_layermap(&layer_names) {
-            bail!("gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err);
+            bail!(
+                "gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss",
+                err
+            );
         }
         // The maximum LSN we are processing in this compaction loop
         let end_lsn = job_desc
@@ -2984,7 +3216,10 @@ impl Timeline {
         // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are
         // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails.
         if let Some(err) = check_valid_layermap(&final_layers) {
-            bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err);
+            bail!(
+                "gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss",
+                err
+            );
         }
 
         // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
@@ -3049,7 +3284,8 @@ impl Timeline {
                 if let Some(to) = compact_to_set.get(&layer.layer_desc().key()) {
                     tracing::info!(
                         "skipping delete {} because found same layer key at different generation {}",
-                        layer, to
+                        layer,
+                        to
                     );
                 } else {
                     compact_from.push(layer.clone());
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 841b2fa1c7..7cdc69e55f 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -1,26 +1,26 @@
-use std::{
-    ops::{Deref, DerefMut},
-    sync::Arc,
-};
+use std::ops::{Deref, DerefMut};
+use std::sync::Arc;
 
 use anyhow::Context;
-use pageserver_api::{models::TimelineState, shard::TenantShardId};
+use pageserver_api::models::TimelineState;
+use pageserver_api::shard::TenantShardId;
 use remote_storage::DownloadError;
 use tokio::sync::OwnedMutexGuard;
-use tracing::{error, info, info_span, instrument, Instrument};
-use utils::{crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
+use tracing::{Instrument, error, info, info_span, instrument};
+use utils::id::TimelineId;
+use utils::{crashsafe, fs_ext, pausable_failpoint};
 
-use crate::{
-    config::PageServerConf,
-    task_mgr::{self, TaskKind},
-    tenant::{
-        metadata::TimelineMetadata,
-        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
-        CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant,
-        TenantManifestError, Timeline, TimelineOrOffloaded,
-    },
-    virtual_file::MaybeFatalIo,
+use crate::config::PageServerConf;
+use crate::task_mgr::{self, TaskKind};
+use crate::tenant::metadata::TimelineMetadata;
+use crate::tenant::remote_timeline_client::{
+    PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
 };
+use crate::tenant::{
+    CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, TenantManifestError,
+    Timeline, TimelineOrOffloaded,
+};
+use crate::virtual_file::MaybeFatalIo;
 
 /// Mark timeline as deleted in S3 so we won't pick it up next time
 /// during attach or pageserver restart.
@@ -137,6 +137,11 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
             timelines.remove(&timeline.timeline_id).expect(
                 "timeline that we were deleting was concurrently removed from 'timelines' map",
             );
+            tenant
+                .scheduled_compaction_tasks
+                .lock()
+                .unwrap()
+                .remove(&timeline.timeline_id);
         }
         TimelineOrOffloaded::Offloaded(timeline) => {
             let offloaded_timeline = timelines_offloaded
@@ -300,6 +305,7 @@ impl DeleteTimelineFlow {
                 // Thus we need to skip the validation here.
                 CreateTimelineCause::Delete,
                 crate::tenant::CreateTimelineIdempotency::FailWithConflict, // doesn't matter what we put here
+                None, // doesn't matter what we put here
             )
             .context("create_timeline_struct")?;
 
diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs
index e0084d3eef..71bd196344 100644
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -1,25 +1,28 @@
-use std::{collections::HashSet, sync::Arc};
+use std::collections::HashSet;
+use std::sync::Arc;
 
-use super::{layer_manager::LayerManager, FlushLayerError, Timeline};
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    task_mgr::TaskKind,
-    tenant::{
-        remote_timeline_client::index::GcBlockingReason::DetachAncestor,
-        storage_layer::{
-            layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer,
-        },
-        Tenant,
-    },
-    virtual_file::{MaybeFatalIo, VirtualFile},
-};
 use anyhow::Context;
 use http_utils::error::ApiError;
-use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity};
+use pageserver_api::models::detach_ancestor::AncestorDetached;
+use pageserver_api::shard::ShardIdentity;
 use tokio::sync::Semaphore;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
-use utils::{completion, generation::Generation, id::TimelineId, lsn::Lsn};
+use utils::completion;
+use utils::generation::Generation;
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
+use utils::sync::gate::GateError;
+
+use super::layer_manager::LayerManager;
+use super::{FlushLayerError, Timeline};
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::task_mgr::TaskKind;
+use crate::tenant::Tenant;
+use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;
+use crate::tenant::storage_layer::layer::local_layer_path;
+use crate::tenant::storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer};
+use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
@@ -64,9 +67,10 @@ impl Error {
     where
         F: Fn(anyhow::Error) -> Error,
     {
+        use remote_storage::TimeoutOrCancel;
+
         use crate::tenant::remote_timeline_client::WaitCompletionError;
         use crate::tenant::upload_queue::NotInitialized;
-        use remote_storage::TimeoutOrCancel;
 
         if e.is::<NotInitialized>()
             || TimeoutOrCancel::caused_by_cancel(&e)
@@ -360,14 +364,25 @@ pub(super) async fn prepare(
 
     let mut tasks = tokio::task::JoinSet::new();
     let limiter = Arc::new(Semaphore::new(options.copy_concurrency.get()));
+    let cancel_eval = CancellationToken::new();
 
     for adopted in rest_of_historic {
         let limiter = limiter.clone();
         let timeline = detached.clone();
+        let cancel_eval = cancel_eval.clone();
 
         tasks.spawn(
             async move {
-                let _permit = limiter.acquire().await;
+                let _permit = tokio::select! {
+                    permit = limiter.acquire() => {
+                        permit
+                    }
+                    // Wait for the cancellation here instead of letting the entire task be cancelled.
+                    // Cancellations are racy in that they might leave layers on disk.
+                    _ = cancel_eval.cancelled() => {
+                        Err(Error::ShuttingDown)?
+                    }
+                };
                 let (owned, did_hardlink) = remote_copy(
                     &adopted,
                     &timeline,
@@ -383,7 +398,22 @@ pub(super) async fn prepare(
         );
     }
 
+    fn delete_layers(timeline: &Timeline, layers: Vec<Layer>) -> Result<(), Error> {
+        // We are deleting layers, so we must hold the gate
+        let _gate = timeline.gate.enter().map_err(|e| match e {
+            GateError::GateClosed => Error::ShuttingDown,
+        })?;
+        {
+            layers.into_iter().for_each(|l: Layer| {
+                l.delete_on_drop();
+                std::mem::drop(l);
+            });
+        }
+        Ok(())
+    }
+
     let mut should_fsync = false;
+    let mut first_err = None;
     while let Some(res) = tasks.join_next().await {
         match res {
             Ok(Ok((owned, did_hardlink))) => {
@@ -392,13 +422,24 @@ pub(super) async fn prepare(
                 }
                 new_layers.push(owned);
             }
+
+            // Don't stop the evaluation on errors, so that we get the full set of hardlinked layers to delete.
             Ok(Err(failed)) => {
-                return Err(failed);
+                cancel_eval.cancel();
+                first_err.get_or_insert(failed);
+            }
+            Err(je) => {
+                cancel_eval.cancel();
+                first_err.get_or_insert(Error::Prepare(je.into()));
             }
-            Err(je) => return Err(Error::Prepare(je.into())),
         }
     }
 
+    if let Some(failed) = first_err {
+        delete_layers(detached, new_layers)?;
+        return Err(failed);
+    }
+
     // fsync directory again if we hardlinked something
     if should_fsync {
         fsync_timeline_dir(detached, ctx).await;
@@ -646,6 +687,11 @@ async fn remote_copy(
     let conf = adoptee.conf;
     let file_name = adopted.layer_desc().layer_name();
 
+    // We don't want to shut the timeline down during this operation because we do `delete_on_drop` below
+    let _gate = adoptee.gate.enter().map_err(|e| match e {
+        GateError::GateClosed => Error::ShuttingDown,
+    })?;
+
     // depending if Layer::keep_resident, do a hardlink
     let did_hardlink;
     let owned = if let Some(adopted_resident) = adopted.keep_resident().await {
@@ -657,8 +703,32 @@ async fn remote_copy(
             &file_name,
             &metadata.generation,
         );
-        std::fs::hard_link(adopted_path, &adoptee_path)
-            .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
+
+        match std::fs::hard_link(adopted_path, &adoptee_path) {
+            Ok(()) => {}
+            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+                // In theory we should not get into this situation as we are doing cleanups of the layer file after errors.
+                // However, we don't do cleanups for errors past `prepare`, so there is the slight chance to get to this branch.
+
+                // Double check that the file is orphan (probably from an earlier attempt), then delete it
+                let key = file_name.clone().into();
+                if adoptee.layers.read().await.contains_key(&key) {
+                    // We are supposed to filter out such cases before coming to this function
+                    return Err(Error::Prepare(anyhow::anyhow!(
+                        "layer file {file_name} already present and inside layer map"
+                    )));
+                }
+                tracing::info!("Deleting orphan layer file to make way for hard linking");
+                // Delete orphan layer file and try again, to ensure this layer has a well understood source
+                std::fs::remove_file(adopted_path)
+                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
+                std::fs::hard_link(adopted_path, &adoptee_path)
+                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
+            }
+            Err(e) => {
+                return Err(Error::launder(e.into(), Error::Prepare));
+            }
+        };
         did_hardlink = true;
         Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()
     } else {
@@ -666,12 +736,21 @@ async fn remote_copy(
         Layer::for_evicted(conf, adoptee, file_name, metadata)
     };
 
-    let layer = adoptee
+    let layer = match adoptee
         .remote_client
         .copy_timeline_layer(adopted, &owned, cancel)
         .await
-        .map(move |()| owned)
-        .map_err(|e| Error::launder(e, Error::Prepare))?;
+    {
+        Ok(()) => owned,
+        Err(e) => {
+            {
+                // Clean up the layer so that on a retry we don't get errors that the file already exists
+                owned.delete_on_drop();
+                std::mem::drop(owned);
+            }
+            return Err(Error::launder(e, Error::Prepare));
+        }
+    };
 
     Ok((layer, did_hardlink))
 }
@@ -780,7 +859,7 @@ pub(super) async fn detach_and_reparent(
             // TODO: make sure there are no `?` before tenant_reset from after a questionmark from
             // here.
             panic!(
-            "bug: detach_and_reparent called on a timeline which has not been detached or which has no live ancestor"
+                "bug: detach_and_reparent called on a timeline which has not been detached or which has no live ancestor"
             );
         }
     };
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index 77c33349e0..187d9f248e 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -13,34 +13,27 @@
 //! Items with parentheses are not (yet) touched by this task.
 //!
 //! See write-up on restart on-demand download spike: <https://gist.github.com/problame/2265bf7b8dc398be834abfead36c76b5>
-use std::{
-    collections::HashMap,
-    ops::ControlFlow,
-    sync::Arc,
-    time::{Duration, SystemTime},
-};
+use std::collections::HashMap;
+use std::ops::ControlFlow;
+use std::sync::Arc;
+use std::time::{Duration, SystemTime};
 
 use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, info, info_span, instrument, warn, Instrument};
-
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    pgdatadir_mapping::CollectKeySpaceError,
-    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
-    tenant::{
-        size::CalculateSyntheticSizeError,
-        storage_layer::LayerVisibilityHint,
-        tasks::{sleep_random, BackgroundLoopKind, BackgroundLoopSemaphorePermit},
-        timeline::EvictionError,
-        LogicalSizeCalculationCause, Tenant,
-    },
-};
-
-use utils::{completion, sync::gate::GateGuard};
+use tracing::{Instrument, debug, info, info_span, instrument, warn};
+use utils::completion;
+use utils::sync::gate::GateGuard;
 
 use super::Timeline;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::pgdatadir_mapping::CollectKeySpaceError;
+use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
+use crate::tenant::size::CalculateSyntheticSizeError;
+use crate::tenant::storage_layer::LayerVisibilityHint;
+use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};
+use crate::tenant::timeline::EvictionError;
+use crate::tenant::{LogicalSizeCalculationCause, Tenant};
 
 #[derive(Default)]
 pub struct EvictionTaskTimelineState {
diff --git a/pageserver/src/tenant/timeline/handle.rs b/pageserver/src/tenant/timeline/handle.rs
index 5b39daaaf8..67fb89c433 100644
--- a/pageserver/src/tenant/timeline/handle.rs
+++ b/pageserver/src/tenant/timeline/handle.rs
@@ -202,18 +202,13 @@
 //! to the parent shard during a shard split. Eventually, the shard split task will
 //! shut down the parent => case (1).
 
-use std::collections::hash_map;
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::sync::Mutex;
-use std::sync::Weak;
+use std::collections::{HashMap, hash_map};
+use std::sync::{Arc, Mutex, Weak};
 
 use pageserver_api::shard::ShardIdentity;
-use tracing::instrument;
-use tracing::trace;
+use tracing::{instrument, trace};
 use utils::id::TimelineId;
-use utils::shard::ShardIndex;
-use utils::shard::ShardNumber;
+use utils::shard::{ShardIndex, ShardNumber};
 
 use crate::tenant::mgr::ShardSelector;
 
@@ -631,12 +626,10 @@ impl<T: Types> HandleInner<T> {
 mod tests {
     use std::sync::Weak;
 
-    use pageserver_api::{
-        key::{rel_block_to_key, Key, DBDIR_KEY},
-        models::ShardParameters,
-        reltag::RelTag,
-        shard::ShardStripeSize,
-    };
+    use pageserver_api::key::{DBDIR_KEY, Key, rel_block_to_key};
+    use pageserver_api::models::ShardParameters;
+    use pageserver_api::reltag::RelTag;
+    use pageserver_api::shard::ShardStripeSize;
     use utils::shard::ShardCount;
 
     use super::*;
diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
index 0ba9753e85..27243ba378 100644
--- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
+++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs
@@ -3,9 +3,10 @@
 //! Provides utilities to spawn and abort a background task where the downloads happen.
 //! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers.
 
+use std::sync::{Arc, Mutex};
+
 use futures::StreamExt;
 use http_utils::error::ApiError;
-use std::sync::{Arc, Mutex};
 use tokio_util::sync::CancellationToken;
 use utils::sync::gate::Gate;
 
diff --git a/pageserver/src/tenant/timeline/import_pgdata.rs b/pageserver/src/tenant/timeline/import_pgdata.rs
index 6940179ae9..8b94a114d6 100644
--- a/pageserver/src/tenant/timeline/import_pgdata.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata.rs
@@ -1,14 +1,14 @@
 use std::sync::Arc;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use remote_storage::RemotePath;
 use tokio_util::sync::CancellationToken;
-use tracing::{info, info_span, Instrument};
+use tracing::{Instrument, info, info_span};
 use utils::lsn::Lsn;
 
-use crate::{context::RequestContext, tenant::metadata::TimelineMetadata};
-
 use super::Timeline;
+use crate::context::RequestContext;
+use crate::tenant::metadata::TimelineMetadata;
 
 mod flow;
 mod importbucket_client;
diff --git a/pageserver/src/tenant/timeline/import_pgdata/flow.rs b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
index 4388072606..3ef82b3658 100644
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -28,52 +28,38 @@
 //! An incomplete set of TODOs from the Hackathon:
 //! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)
 
+use std::collections::HashSet;
+use std::ops::Range;
 use std::sync::Arc;
 
 use anyhow::{bail, ensure};
 use bytes::Bytes;
-
 use itertools::Itertools;
-use pageserver_api::{
-    key::{rel_block_to_key, rel_dir_to_key, rel_size_to_key, relmap_file_key, DBDIR_KEY},
-    reltag::RelTag,
-    shard::ShardIdentity,
-};
-use postgres_ffi::{pg_constants, relfile_utils::parse_relfilename, BLCKSZ};
-use tokio::task::JoinSet;
-use tracing::{debug, info_span, instrument, Instrument};
-
-use crate::{
-    assert_u64_eq_usize::UsizeIsU64,
-    pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory},
-};
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    pgdatadir_mapping::{DbDirectory, RelDirectory},
-    task_mgr::TaskKind,
-    tenant::storage_layer::{ImageLayerWriter, Layer},
-};
-
-use pageserver_api::key::Key;
 use pageserver_api::key::{
-    slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, CHECKPOINT_KEY, CONTROLFILE_KEY,
-    TWOPHASEDIR_KEY,
+    CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, Key, TWOPHASEDIR_KEY, rel_block_to_key,
+    rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
+    slru_segment_size_to_key,
 };
-use pageserver_api::keyspace::singleton_range;
-use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range};
-use pageserver_api::reltag::SlruKind;
+use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range, singleton_range};
+use pageserver_api::reltag::{RelTag, SlruKind};
+use pageserver_api::shard::ShardIdentity;
+use postgres_ffi::relfile_utils::parse_relfilename;
+use postgres_ffi::{BLCKSZ, pg_constants};
+use remote_storage::RemotePath;
+use tokio::task::JoinSet;
+use tracing::{Instrument, debug, info_span, instrument};
 use utils::bin_ser::BeSer;
 use utils::lsn::Lsn;
 
-use std::collections::HashSet;
-use std::ops::Range;
-
-use super::{
-    importbucket_client::{ControlFile, RemoteStorageWrapper},
-    Timeline,
+use super::Timeline;
+use super::importbucket_client::{ControlFile, RemoteStorageWrapper};
+use crate::assert_u64_eq_usize::UsizeIsU64;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::pgdatadir_mapping::{
+    DbDirectory, RelDirectory, SlruSegmentDirectory, TwoPhaseDirectory,
 };
-
-use remote_storage::RemotePath;
+use crate::task_mgr::TaskKind;
+use crate::tenant::storage_layer::{ImageLayerWriter, Layer};
 
 pub async fn run(
     timeline: Arc<Timeline>,
diff --git a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
index 68937e535d..a17a10d56b 100644
--- a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs
@@ -1,4 +1,5 @@
-use std::{ops::Bound, sync::Arc};
+use std::ops::Bound;
+use std::sync::Arc;
 
 use anyhow::Context;
 use bytes::Bytes;
@@ -12,9 +13,9 @@ use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, instrument};
 use utils::lsn::Lsn;
 
-use crate::{assert_u64_eq_usize::U64IsUsize, config::PageServerConf};
-
 use super::{importbucket_format, index_part_format};
+use crate::assert_u64_eq_usize::U64IsUsize;
+use crate::config::PageServerConf;
 
 pub async fn new(
     conf: &'static PageServerConf,
diff --git a/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs b/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs
index 310d97a6a9..ea7a41b25f 100644
--- a/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs
@@ -1,7 +1,6 @@
-use serde::{Deserialize, Serialize};
-
 #[cfg(feature = "testing")]
 use camino::Utf8PathBuf;
+use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
 pub enum Root {
diff --git a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
index c5210f9a30..7c7a4de2fc 100644
--- a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
@@ -1,13 +1,12 @@
 //! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
 use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
+use reqwest::Method;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use tracing::error;
 
-use crate::config::PageServerConf;
-use reqwest::Method;
-
 use super::importbucket_format::Spec;
+use crate::config::PageServerConf;
 
 pub struct Client {
     base_url: String,
diff --git a/pageserver/src/tenant/timeline/init.rs b/pageserver/src/tenant/timeline/init.rs
index 6634d07a0d..e952df0845 100644
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -1,22 +1,16 @@
-use crate::{
-    is_temporary,
-    tenant::{
-        ephemeral_file::is_ephemeral_file,
-        remote_timeline_client::{
-            self,
-            index::{IndexPart, LayerFileMetadata},
-        },
-        storage_layer::LayerName,
-    },
-};
+use std::collections::{HashMap, hash_map};
+use std::str::FromStr;
+
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
-use std::{
-    collections::{hash_map, HashMap},
-    str::FromStr,
-};
 use utils::lsn::Lsn;
 
+use crate::is_temporary;
+use crate::tenant::ephemeral_file::is_ephemeral_file;
+use crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata};
+use crate::tenant::remote_timeline_client::{self};
+use crate::tenant::storage_layer::LayerName;
+
 /// Identified files in the timeline directory.
 pub(super) enum Discovered {
     /// The only one we care about
diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs
index cb7783d779..e552ea83de 100644
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -1,27 +1,22 @@
-use anyhow::{bail, ensure, Context};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use anyhow::{Context, bail, ensure};
 use itertools::Itertools;
 use pageserver_api::shard::TenantShardId;
-use std::{collections::HashMap, sync::Arc};
 use tracing::trace;
-use utils::{
-    id::TimelineId,
-    lsn::{AtomicLsn, Lsn},
-};
-
-use crate::{
-    config::PageServerConf,
-    context::RequestContext,
-    metrics::TimelineMetrics,
-    tenant::{
-        layer_map::{BatchedUpdates, LayerMap},
-        storage_layer::{
-            AsLayerDesc, InMemoryLayer, Layer, PersistentLayerDesc, PersistentLayerKey,
-            ResidentLayer,
-        },
-    },
-};
+use utils::id::TimelineId;
+use utils::lsn::{AtomicLsn, Lsn};
 
 use super::TimelineWriterState;
+use crate::config::PageServerConf;
+use crate::context::RequestContext;
+use crate::metrics::TimelineMetrics;
+use crate::tenant::layer_map::{BatchedUpdates, LayerMap};
+use crate::tenant::storage_layer::{
+    AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc,
+    PersistentLayerKey, ResidentLayer,
+};
 
 /// Provides semantic APIs to manipulate the layer map.
 pub(crate) enum LayerManager {
@@ -118,6 +113,12 @@ impl LayerManager {
         self.layers().values().filter(|l| l.is_likely_resident())
     }
 
+    pub(crate) fn visible_layers(&self) -> impl Iterator<Item = &'_ Layer> + '_ {
+        self.layers()
+            .values()
+            .filter(|l| l.visibility() == LayerVisibilityHint::Visible)
+    }
+
     pub(crate) fn contains(&self, layer: &Layer) -> bool {
         self.contains_key(&layer.layer_desc().key())
     }
@@ -208,9 +209,7 @@ impl OpenLayerManager {
 
             trace!(
                 "creating in-memory layer at {}/{} for record at {}",
-                timeline_id,
-                start_lsn,
-                lsn
+                timeline_id, start_lsn, lsn
             );
 
             let new_layer =
diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs
index f4a4eea54a..397037ca9f 100644
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -1,11 +1,10 @@
-use anyhow::Context;
+use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
 
+use anyhow::Context;
 use once_cell::sync::OnceCell;
 use tokio_util::sync::CancellationToken;
 use utils::lsn::Lsn;
 
-use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
-
 /// Internal structure to hold all data needed for logical size calculation.
 ///
 /// Calculation consists of two stages:
diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 93e5a1100d..43ffaa6aab 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -2,11 +2,11 @@ use std::sync::Arc;
 
 use pageserver_api::models::{TenantState, TimelineState};
 
-use super::delete::{delete_local_timeline_directory, DeletionGuard};
 use super::Timeline;
+use super::delete::{DeletionGuard, delete_local_timeline_directory};
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
-use crate::tenant::timeline::delete::{make_timeline_delete_guard, TimelineDeleteGuardKind};
+use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
 use crate::tenant::{
     DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded,
 };
@@ -143,5 +143,12 @@ fn remove_timeline_from_tenant(
         .remove(&timeline.timeline_id)
         .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
 
+    // Clear the compaction queue for this timeline
+    tenant
+        .scheduled_compaction_tasks
+        .lock()
+        .unwrap()
+        .remove(&timeline.timeline_id);
+
     Arc::strong_count(&timeline)
 }
diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs
index 3074463384..f66c0ffa0f 100644
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -1,18 +1,21 @@
-use std::{collections::hash_map::Entry, fs, future::Future, sync::Arc};
+use std::collections::hash_map::Entry;
+use std::fs;
+use std::future::Future;
+use std::sync::Arc;
 
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use tracing::{error, info, info_span};
-use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
-
-use crate::{
-    context::RequestContext,
-    import_datadir,
-    span::debug_assert_current_span_has_tenant_and_timeline_id,
-    tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded},
-};
+use utils::fs_ext;
+use utils::id::TimelineId;
+use utils::lsn::Lsn;
+use utils::sync::gate::GateGuard;
 
 use super::Timeline;
+use crate::context::RequestContext;
+use crate::import_datadir;
+use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded};
 
 /// A timeline with some of its files on disk, being initialized.
 /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
@@ -128,7 +131,7 @@ impl<'t> UninitializedTimeline<'t> {
                 // We do not call Self::abort here.  Because we don't cleanly shut down our Timeline, [`Self::drop`] should
                 // skip trying to delete the timeline directory too.
                 anyhow::bail!(
-                "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
+                    "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
                 )
             }
             Entry::Vacant(v) => {
diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs
index f831f5e48a..4f80073cc3 100644
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -23,17 +23,11 @@
 mod connection_manager;
 mod walreceiver_connection;
 
-use crate::context::{DownloadBehavior, RequestContext};
-use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME};
-use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::timeline::walreceiver::connection_manager::{
-    connection_manager_loop_step, ConnectionManagerState,
-};
-
 use std::future::Future;
 use std::num::NonZeroU64;
 use std::sync::Arc;
 use std::time::Duration;
+
 use storage_broker::BrokerClientChannel;
 use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
@@ -41,8 +35,13 @@ use tracing::*;
 use utils::postgres_client::PostgresClientProtocol;
 
 use self::connection_manager::ConnectionManagerStatus;
-
 use super::Timeline;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME};
+use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::tenant::timeline::walreceiver::connection_manager::{
+    ConnectionManagerState, connection_manager_loop_step,
+};
 
 #[derive(Clone)]
 pub struct WalReceiverConf {
@@ -56,6 +55,7 @@ pub struct WalReceiverConf {
     pub auth_token: Option<Arc<String>>,
     pub availability_zone: Option<String>,
     pub ingest_batch_size: u64,
+    pub validate_wal_contiguity: bool,
 }
 
 pub struct WalReceiver {
diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
index 65f9d39078..df2663f6bb 100644
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -9,45 +9,42 @@
 //! then a (re)connection happens, if necessary.
 //! Only WAL streaming task expects to be finished, other loops (storage broker, connection management) never exit unless cancelled explicitly via the dedicated channel.
 
-use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration};
+use std::collections::HashMap;
+use std::num::NonZeroU64;
+use std::ops::ControlFlow;
+use std::sync::Arc;
+use std::time::Duration;
 
-use super::{TaskStateUpdate, WalReceiverConf};
+use anyhow::Context;
+use chrono::{NaiveDateTime, Utc};
+use pageserver_api::models::TimelineState;
+use postgres_connection::PgConnectionConfig;
+use storage_broker::proto::{
+    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,
+    SubscribeByFilterRequest, TenantTimelineId as ProtoTenantTimelineId, TypeSubscription,
+    TypedMessage,
+};
+use storage_broker::{BrokerClientChannel, Code, Streaming};
+use tokio_util::sync::CancellationToken;
+use tracing::*;
+use utils::backoff::{
+    DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff,
+};
+use utils::id::{NodeId, TenantTimelineId};
+use utils::lsn::Lsn;
+use utils::postgres_client::{
+    ConnectionConfigArgs, PostgresClientProtocol, wal_stream_connection_config,
+};
+
+use super::walreceiver_connection::{WalConnectionStatus, WalReceiverError};
+use super::{TaskEvent, TaskHandle, TaskStateUpdate, WalReceiverConf};
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::{
     WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,
     WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES,
 };
 use crate::task_mgr::TaskKind;
-use crate::tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline};
-use anyhow::Context;
-use chrono::{NaiveDateTime, Utc};
-use pageserver_api::models::TimelineState;
-
-use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
-use storage_broker::proto::{
-    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,
-    SubscribeByFilterRequest, TypeSubscription, TypedMessage,
-};
-use storage_broker::{BrokerClientChannel, Code, Streaming};
-use tokio_util::sync::CancellationToken;
-use tracing::*;
-
-use postgres_connection::PgConnectionConfig;
-use utils::backoff::{
-    exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
-};
-use utils::postgres_client::{
-    wal_stream_connection_config, ConnectionConfigArgs, PostgresClientProtocol,
-};
-use utils::{
-    id::{NodeId, TenantTimelineId},
-    lsn::Lsn,
-};
-
-use super::{
-    walreceiver_connection::WalConnectionStatus, walreceiver_connection::WalReceiverError,
-    TaskEvent, TaskHandle,
-};
+use crate::tenant::{Timeline, debug_assert_current_span_has_tenant_and_timeline_id};
 
 pub(crate) struct Cancelled;
 
@@ -349,7 +346,9 @@ async fn subscribe_for_timeline_updates(
             Err(e) => {
                 // Safekeeper nodes can stop pushing timeline updates to the broker, when no new writes happen and
                 // entire WAL is streamed. Keep this noticeable with logging, but do not warn/error.
-                info!("Attempt #{attempt}, failed to subscribe for timeline {id} updates in broker: {e:#}");
+                info!(
+                    "Attempt #{attempt}, failed to subscribe for timeline {id} updates in broker: {e:#}"
+                );
                 continue;
             }
         }
@@ -512,11 +511,11 @@ impl ConnectionManagerState {
     fn spawn<Fut>(
         &self,
         task: impl FnOnce(
-                tokio::sync::watch::Sender<TaskStateUpdate<WalConnectionStatus>>,
-                CancellationToken,
-            ) -> Fut
-            + Send
-            + 'static,
+            tokio::sync::watch::Sender<TaskStateUpdate<WalConnectionStatus>>,
+            CancellationToken,
+        ) -> Fut
+        + Send
+        + 'static,
     ) -> TaskHandle<WalConnectionStatus>
     where
         Fut: std::future::Future<Output = anyhow::Result<()>> + Send,
@@ -537,6 +536,7 @@ impl ConnectionManagerState {
         let connect_timeout = self.conf.wal_connect_timeout;
         let ingest_batch_size = self.conf.ingest_batch_size;
         let protocol = self.conf.protocol;
+        let validate_wal_contiguity = self.conf.validate_wal_contiguity;
         let timeline = Arc::clone(&self.timeline);
         let ctx = ctx.detached_child(
             TaskKind::WalReceiverConnectionHandler,
@@ -558,6 +558,7 @@ impl ConnectionManagerState {
                     ctx,
                     node_id,
                     ingest_batch_size,
+                    validate_wal_contiguity,
                 )
                 .await;
 
@@ -878,8 +879,7 @@ impl ConnectionManagerState {
                     discovered_new_wal = if candidate_commit_lsn > current_commit_lsn {
                         trace!(
                             "New candidate has commit_lsn {}, higher than current_commit_lsn {}",
-                            candidate_commit_lsn,
-                            current_commit_lsn
+                            candidate_commit_lsn, current_commit_lsn
                         );
                         Some(NewCommittedWAL {
                             lsn: candidate_commit_lsn,
@@ -1046,7 +1046,9 @@ impl ConnectionManagerState {
 
         if !node_ids_to_remove.is_empty() {
             for node_id in node_ids_to_remove {
-                info!("Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections");
+                info!(
+                    "Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections"
+                );
                 self.wal_connection_retries.remove(&node_id);
                 WALRECEIVER_CANDIDATES_REMOVED.inc();
             }
@@ -1117,11 +1119,12 @@ impl ReconnectReason {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
     use pageserver_api::config::defaults::DEFAULT_WAL_RECEIVER_PROTOCOL;
     use url::Host;
 
+    use super::*;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+
     fn dummy_broker_sk_timeline(
         commit_lsn: u64,
         safekeeper_connstr: &str,
@@ -1563,6 +1566,7 @@ mod tests {
                 auth_token: None,
                 availability_zone: None,
                 ingest_batch_size: 1,
+                validate_wal_contiguity: false,
             },
             wal_connection: None,
             wal_stream_candidates: HashMap::new(),
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 23db4f88d2..f41a9cfe82 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -1,46 +1,48 @@
 //! Actual Postgres connection handler to stream WAL to the server.
 
-use std::{
-    error::Error,
-    pin::pin,
-    str::FromStr,
-    sync::Arc,
-    time::{Duration, SystemTime},
-};
+use std::error::Error;
+use std::pin::pin;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::{Duration, SystemTime};
 
-use anyhow::{anyhow, Context};
+use anyhow::{Context, anyhow};
 use bytes::BytesMut;
 use chrono::{NaiveDateTime, Utc};
 use fail::fail_point;
 use futures::StreamExt;
-use postgres::{error::SqlState, SimpleQueryMessage, SimpleQueryRow};
-use postgres_ffi::WAL_SEGMENT_SIZE;
-use postgres_ffi::{v14::xlog_utils::normalize_lsn, waldecoder::WalDecodeError};
-use postgres_protocol::message::backend::ReplicationMessage;
-use postgres_types::PgLsn;
-use tokio::{select, sync::watch, time};
-use tokio_postgres::{replication::ReplicationStream, Client};
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, trace, warn, Instrument};
-use wal_decoder::{
-    models::{FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords},
-    wire_format::FromWireFormat,
-};
-
-use super::TaskStateUpdate;
-use crate::{
-    context::RequestContext,
-    metrics::{LIVE_CONNECTIONS, WALRECEIVER_STARTED_CONNECTIONS, WAL_INGEST},
-    pgdatadir_mapping::DatadirModification,
-    task_mgr::{TaskKind, WALRECEIVER_RUNTIME},
-    tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline, WalReceiverInfo},
-    walingest::WalIngest,
-};
 use postgres_backend::is_expected_io_error;
 use postgres_connection::PgConnectionConfig;
-use postgres_ffi::waldecoder::WalStreamDecoder;
-use utils::{critical, id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol};
-use utils::{pageserver_feedback::PageserverFeedback, sync::gate::GateError};
+use postgres_ffi::WAL_SEGMENT_SIZE;
+use postgres_ffi::v14::xlog_utils::normalize_lsn;
+use postgres_ffi::waldecoder::{WalDecodeError, WalStreamDecoder};
+use postgres_protocol::message::backend::ReplicationMessage;
+use postgres_types::PgLsn;
+use tokio::sync::watch;
+use tokio::{select, time};
+use tokio_postgres::error::SqlState;
+use tokio_postgres::replication::ReplicationStream;
+use tokio_postgres::{Client, SimpleQueryMessage, SimpleQueryRow};
+use tokio_util::sync::CancellationToken;
+use tracing::{Instrument, debug, error, info, trace, warn};
+use utils::critical;
+use utils::id::NodeId;
+use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
+use utils::postgres_client::PostgresClientProtocol;
+use utils::sync::gate::GateError;
+use wal_decoder::models::{FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords};
+use wal_decoder::wire_format::FromWireFormat;
+
+use super::TaskStateUpdate;
+use crate::context::RequestContext;
+use crate::metrics::{LIVE_CONNECTIONS, WAL_INGEST, WALRECEIVER_STARTED_CONNECTIONS};
+use crate::pgdatadir_mapping::DatadirModification;
+use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME};
+use crate::tenant::{
+    Timeline, WalReceiverInfo, debug_assert_current_span_has_tenant_and_timeline_id,
+};
+use crate::walingest::WalIngest;
 
 /// Status of the connection.
 #[derive(Debug, Clone, Copy)]
@@ -64,7 +66,7 @@ pub(super) struct WalConnectionStatus {
 
 pub(super) enum WalReceiverError {
     /// An error of a type that does not indicate an issue, e.g. a connection closing
-    ExpectedSafekeeperError(postgres::Error),
+    ExpectedSafekeeperError(tokio_postgres::Error),
     /// An "error" message that carries a SUCCESSFUL_COMPLETION status code.  Carries
     /// the message part of the original postgres error
     SuccessfulCompletion(String),
@@ -120,6 +122,7 @@ pub(super) async fn handle_walreceiver_connection(
     ctx: RequestContext,
     safekeeper_node: NodeId,
     ingest_batch_size: u64,
+    validate_wal_contiguity: bool,
 ) -> Result<(), WalReceiverError> {
     debug_assert_current_span_has_tenant_and_timeline_id();
 
@@ -142,13 +145,15 @@ pub(super) async fn handle_walreceiver_connection(
         let mut config = wal_source_connconf.to_tokio_postgres_config();
         config.application_name(format!("pageserver-{}", timeline.conf.id.0).as_str());
         config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
-        match time::timeout(connect_timeout, config.connect(postgres::NoTls)).await {
+        match time::timeout(connect_timeout, config.connect(tokio_postgres::NoTls)).await {
             Ok(client_and_conn) => client_and_conn?,
             Err(_elapsed) => {
                 // Timing out to connect to a safekeeper node could happen long time, due to
                 // many reasons that pageserver cannot control.
                 // Do not produce an error, but make it visible, that timeouts happen by logging the `event.
-                info!("Timed out while waiting {connect_timeout:?} for walreceiver connection to open");
+                info!(
+                    "Timed out while waiting {connect_timeout:?} for walreceiver connection to open"
+                );
                 return Ok(());
             }
         }
@@ -165,7 +170,9 @@ pub(super) async fn handle_walreceiver_connection(
         node: safekeeper_node,
     };
     if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {
-        warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}");
+        warn!(
+            "Wal connection event listener dropped right after connection init, aborting the connection: {e}"
+        );
         return Ok(());
     }
 
@@ -226,7 +233,9 @@ pub(super) async fn handle_walreceiver_connection(
     connection_status.latest_wal_update = Utc::now().naive_utc();
     connection_status.commit_lsn = Some(end_of_wal);
     if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {
-        warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}");
+        warn!(
+            "Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}"
+        );
         return Ok(());
     }
 
@@ -253,7 +262,9 @@ pub(super) async fn handle_walreceiver_connection(
     //  to the safekeepers.
     startpoint = normalize_lsn(startpoint, WAL_SEGMENT_SIZE);
 
-    info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...");
+    info!(
+        "last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}..."
+    );
 
     let query = format!("START_REPLICATION PHYSICAL {startpoint}");
 
@@ -274,6 +285,7 @@ pub(super) async fn handle_walreceiver_connection(
         } => Some((format, compression)),
     };
 
+    let mut expected_wal_start = startpoint;
     while let Some(replication_message) = {
         select! {
             _ = cancellation.cancelled() => {
@@ -340,13 +352,49 @@ pub(super) async fn handle_walreceiver_connection(
                     )
                     })?;
 
+                // Guard against WAL gaps. If the start LSN of the PG WAL section
+                // from which the interpreted records were extracted, doesn't match
+                // the end of the previous batch (or the starting point for the first batch),
+                // then kill this WAL receiver connection and start a new one.
+                if validate_wal_contiguity {
+                    if let Some(raw_wal_start_lsn) = batch.raw_wal_start_lsn {
+                        match raw_wal_start_lsn.cmp(&expected_wal_start) {
+                            std::cmp::Ordering::Greater => {
+                                let msg = format!(
+                                    "Gap in streamed WAL: [{}, {})",
+                                    expected_wal_start, raw_wal_start_lsn
+                                );
+                                critical!("{msg}");
+                                return Err(WalReceiverError::Other(anyhow!(msg)));
+                            }
+                            std::cmp::Ordering::Less => {
+                                // Other shards are reading WAL behind us.
+                                // This is valid, but check that we received records
+                                // that we haven't seen before.
+                                if let Some(first_rec) = batch.records.first() {
+                                    if first_rec.next_record_lsn < last_rec_lsn {
+                                        let msg = format!(
+                                            "Received record with next_record_lsn multiple times ({} < {})",
+                                            first_rec.next_record_lsn, expected_wal_start
+                                        );
+                                        critical!("{msg}");
+                                        return Err(WalReceiverError::Other(anyhow!(msg)));
+                                    }
+                                }
+                            }
+                            std::cmp::Ordering::Equal => {}
+                        }
+                    }
+                }
+
                 let InterpretedWalRecords {
                     records,
                     next_record_lsn,
+                    raw_wal_start_lsn: _,
                 } = batch;
 
                 tracing::debug!(
-                    "Received WAL up to {} with next_record_lsn={:?}",
+                    "Received WAL up to {} with next_record_lsn={}",
                     streaming_lsn,
                     next_record_lsn
                 );
@@ -423,12 +471,11 @@ pub(super) async fn handle_walreceiver_connection(
                 // need to advance last record LSN on all shards. If we've not ingested the latest
                 // record, then set the LSN of the modification past it. This way all shards
                 // advance their last record LSN at the same time.
-                let needs_last_record_lsn_advance = match next_record_lsn {
-                    Some(lsn) if lsn > modification.get_lsn() => {
-                        modification.set_lsn(lsn).unwrap();
-                        true
-                    }
-                    _ => false,
+                let needs_last_record_lsn_advance = if next_record_lsn > modification.get_lsn() {
+                    modification.set_lsn(next_record_lsn).unwrap();
+                    true
+                } else {
+                    false
                 };
 
                 if uncommitted_records > 0 || needs_last_record_lsn_advance {
@@ -446,9 +493,8 @@ pub(super) async fn handle_walreceiver_connection(
                     timeline.get_last_record_lsn()
                 );
 
-                if let Some(lsn) = next_record_lsn {
-                    last_rec_lsn = lsn;
-                }
+                last_rec_lsn = next_record_lsn;
+                expected_wal_start = streaming_lsn;
 
                 Some(streaming_lsn)
             }
@@ -590,7 +636,9 @@ pub(super) async fn handle_walreceiver_connection(
                 let timestamp = keepalive.timestamp();
                 let reply_requested = keepalive.reply() != 0;
 
-                trace!("received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})");
+                trace!(
+                    "received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})"
+                );
 
                 if reply_requested {
                     Some(last_rec_lsn)
diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs
index d302205ffe..d5dc9666ce 100644
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -1,21 +1,18 @@
 use std::collections::{HashMap, HashSet, VecDeque};
 use std::fmt::Debug;
-use std::sync::atomic::AtomicU32;
 use std::sync::Arc;
-
-use super::remote_timeline_client::is_same_remote_layer_path;
-use super::storage_layer::AsLayerDesc as _;
-use super::storage_layer::LayerName;
-use super::storage_layer::ResidentLayer;
-use crate::tenant::metadata::TimelineMetadata;
-use crate::tenant::remote_timeline_client::index::IndexPart;
-use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
-use utils::generation::Generation;
-use utils::lsn::{AtomicLsn, Lsn};
+use std::sync::atomic::AtomicU32;
 
 use chrono::NaiveDateTime;
 use once_cell::sync::Lazy;
 use tracing::info;
+use utils::generation::Generation;
+use utils::lsn::{AtomicLsn, Lsn};
+
+use super::remote_timeline_client::is_same_remote_layer_path;
+use super::storage_layer::{AsLayerDesc as _, LayerName, ResidentLayer};
+use crate::tenant::metadata::TimelineMetadata;
+use crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata};
 
 /// Kill switch for upload queue reordering in case it causes problems.
 /// TODO: remove this once we have confidence in it.
@@ -225,7 +222,7 @@ impl UploadQueueInitialized {
             // most one of them can be an index upload (enforced by can_bypass).
             .scan(&self.clean.0, |next_active_index, op| {
                 let active_index = *next_active_index;
-                if let UploadOp::UploadMetadata { ref uploaded } = op {
+                if let UploadOp::UploadMetadata { uploaded } = op {
                     *next_active_index = uploaded; // stash index for next operation after this
                 }
                 Some((op, active_index))
@@ -562,16 +559,18 @@ impl UploadOp {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
-    use crate::tenant::storage_layer::layer::local_layer_path;
-    use crate::tenant::storage_layer::Layer;
-    use crate::tenant::Timeline;
-    use crate::DEFAULT_PG_VERSION;
-    use itertools::Itertools as _;
     use std::str::FromStr as _;
+
+    use itertools::Itertools as _;
     use utils::shard::{ShardCount, ShardIndex, ShardNumber};
 
+    use super::*;
+    use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::Timeline;
+    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
+    use crate::tenant::storage_layer::Layer;
+    use crate::tenant::storage_layer::layer::local_layer_path;
+
     /// Test helper which asserts that two operations are the same, in lieu of UploadOp PartialEq.
     #[track_caller]
     fn assert_same_op(a: &UploadOp, b: &UploadOp) {
@@ -690,10 +689,22 @@ mod tests {
         let tli = make_timeline();
 
         let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer3 = make_layer(
+            &tli,
+            "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
         let (barrier, _) = tokio::sync::watch::channel(());
 
         // Enqueue non-conflicting upload, delete, and index before and after a barrier.
@@ -757,10 +768,22 @@ mod tests {
         let tli = make_timeline();
 
         // Enqueue a bunch of deletes, some with conflicting names.
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer3 = make_layer(
+            &tli,
+            "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         let ops = [
             UploadOp::Delete(Delete {
@@ -802,9 +825,21 @@ mod tests {
         let tli = make_timeline();
 
         // Enqueue three versions of the same layer, with different file sizes.
-        let layer0a = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 1);
-        let layer0b = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 2);
-        let layer0c = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 3);
+        let layer0a = make_layer_with_size(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+            1,
+        );
+        let layer0b = make_layer_with_size(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+            2,
+        );
+        let layer0c = make_layer_with_size(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+            3,
+        );
 
         let ops = [
             UploadOp::UploadLayer(layer0a.clone(), layer0a.metadata(), None),
@@ -836,8 +871,14 @@ mod tests {
 
         // Enqueue two layer uploads, with a delete of both layers in between them. These should be
         // scheduled one at a time, since deletes can't bypass uploads and vice versa.
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         let ops = [
             UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
@@ -878,10 +919,22 @@ mod tests {
         //
         // Also enqueue non-conflicting uploads and deletes at the end. These can bypass the queue
         // and run immediately.
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer3 = make_layer(
+            &tli,
+            "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         let ops = [
             UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
@@ -916,9 +969,18 @@ mod tests {
         let tli = make_timeline();
 
         // Enqueue three different layer uploads.
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         let ops = [
             UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
@@ -981,11 +1043,20 @@ mod tests {
 
         // Enqueue three uploads of the current empty index.
         let index = Box::new(queue.clean.0.clone());
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
         let index0 = index_with(&index, &layer0);
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
         let index1 = index_with(&index0, &layer1);
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
         let index2 = index_with(&index1, &layer2);
 
         let ops = [
@@ -1045,7 +1116,10 @@ mod tests {
         let tli = make_timeline();
 
         // Create a layer to upload.
-        let layer = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
         let index_upload = index_with(&queue.clean.0, &layer);
 
         // Remove the layer reference in a new index, then delete the layer.
@@ -1090,7 +1164,10 @@ mod tests {
         let tli = make_timeline();
 
         // Create a layer to upload.
-        let layer = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         // Upload the layer. Then dereference the layer, and upload/reference it again.
         let index_upload = index_with(&queue.clean.0, &layer);
@@ -1138,10 +1215,22 @@ mod tests {
         let tli = make_timeline();
 
         let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer3 = make_layer(
+            &tli,
+            "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         // Enqueue non-conflicting upload, delete, and index before and after a shutdown.
         let ops = [
@@ -1197,10 +1286,22 @@ mod tests {
         let tli = make_timeline();
 
         // Enqueue a bunch of uploads.
-        let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
-        let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51");
+        let layer0 = make_layer(
+            &tli,
+            "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer1 = make_layer(
+            &tli,
+            "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer2 = make_layer(
+            &tli,
+            "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
+        let layer3 = make_layer(
+            &tli,
+            "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51",
+        );
 
         let ops = [
             UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None),
diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs
index 47fb4a276b..dcf17a376c 100644
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -27,8 +27,7 @@ use utils::vec_map::VecMap;
 
 use crate::context::RequestContext;
 use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK};
-use crate::virtual_file::IoBufferMut;
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::{self, IoBufferMut, VirtualFile};
 
 /// Metadata bundled with the start and end offset of a blob.
 #[derive(Copy, Clone, Debug)]
@@ -139,7 +138,10 @@ impl VectoredBlob {
             bits => {
                 let error = std::io::Error::new(
                     std::io::ErrorKind::InvalidData,
-                    format!("Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", self.meta.key, self.meta.lsn, self.start, self.end),
+                    format!(
+                        "Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}",
+                        self.meta.key, self.meta.lsn, self.start, self.end
+                    ),
                 );
                 Err(error)
             }
@@ -677,13 +679,12 @@ impl StreamingVectoredReadPlanner {
 mod tests {
     use anyhow::Error;
 
+    use super::super::blob_io::tests::{random_array, write_maybe_compressed};
+    use super::*;
     use crate::context::DownloadBehavior;
     use crate::page_cache::PAGE_SZ;
     use crate::task_mgr::TaskKind;
 
-    use super::super::blob_io::tests::{random_array, write_maybe_compressed};
-    use super::*;
-
     fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {
         const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
         assert_eq!(read.start % ALIGN, 0);
diff --git a/pageserver/src/utilization.rs b/pageserver/src/utilization.rs
index 093a944777..29d1a31aaf 100644
--- a/pageserver/src/utilization.rs
+++ b/pageserver/src/utilization.rs
@@ -3,13 +3,15 @@
 //! The metric is exposed via `GET /v1/utilization`. Refer and maintain it's openapi spec as the
 //! truth.
 
-use anyhow::Context;
 use std::path::Path;
+
+use anyhow::Context;
+use pageserver_api::models::PageserverUtilization;
 use utils::serde_percent::Percent;
 
-use pageserver_api::models::PageserverUtilization;
-
-use crate::{config::PageServerConf, metrics::NODE_UTILIZATION_SCORE, tenant::mgr::TenantManager};
+use crate::config::PageServerConf;
+use crate::metrics::NODE_UTILIZATION_SCORE;
+use crate::tenant::mgr::TenantManager;
 
 pub(crate) fn regenerate(
     conf: &PageServerConf,
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index c966ad813f..b47aecf8a6 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -11,11 +11,13 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use crate::context::RequestContext;
-use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};
+use std::fs::File;
+use std::io::{Error, ErrorKind, Seek, SeekFrom};
+use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
+#[cfg(target_os = "linux")]
+use std::os::unix::fs::OpenOptionsExt;
+use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
 
-use crate::page_cache::{PageWriteGuard, PAGE_SZ};
-use crate::tenant::TENANTS_SEGMENT_NAME;
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
 use owned_buffers_io::aligned_buffer::buffer::AlignedBuffer;
@@ -23,31 +25,30 @@ use owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlig
 use owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut};
 use owned_buffers_io::io_buf_ext::FullSlice;
 use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
+pub use pageserver_api::models::virtual_file as api;
 use pageserver_api::shard::TenantShardId;
-use std::fs::File;
-use std::io::{Error, ErrorKind, Seek, SeekFrom};
-#[cfg(target_os = "linux")]
-use std::os::unix::fs::OpenOptionsExt;
-use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};
-
-use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
-use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio::time::Instant;
+use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};
 
-pub use pageserver_api::models::virtual_file as api;
+use crate::context::RequestContext;
+use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC, StorageIoOperation};
+use crate::page_cache::{PAGE_SZ, PageWriteGuard};
+use crate::tenant::TENANTS_SEGMENT_NAME;
 pub(crate) mod io_engine;
-pub use io_engine::feature_test as io_engine_feature_test;
-pub use io_engine::io_engine_for_bench;
-pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
+pub use io_engine::{
+    FeatureTestResult as IoEngineFeatureTestResult, feature_test as io_engine_feature_test,
+    io_engine_for_bench,
+};
 mod metadata;
 mod open_options;
-use self::owned_buffers_io::write::OwnedAsyncWriter;
 pub(crate) use api::IoMode;
 pub(crate) use io_engine::IoEngineKind;
 pub(crate) use metadata::Metadata;
 pub(crate) use open_options::*;
 
+use self::owned_buffers_io::write::OwnedAsyncWriter;
+
 pub(crate) mod owned_buffers_io {
     //! Abstractions for IO with owned buffers.
     //!
@@ -1078,7 +1079,8 @@ where
 #[cfg(test)]
 mod test_read_exact_at_impl {
 
-    use std::{collections::VecDeque, sync::Arc};
+    use std::collections::VecDeque;
+    use std::sync::Arc;
 
     use tokio_epoll_uring::{BoundedBuf, BoundedBufMut};
 
@@ -1424,19 +1426,19 @@ static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8);
 
 #[cfg(test)]
 mod tests {
-    use crate::context::DownloadBehavior;
-    use crate::task_mgr::TaskKind;
-
-    use super::*;
-    use owned_buffers_io::io_buf_ext::IoBufExt;
-    use owned_buffers_io::slice::SliceMutExt;
-    use rand::seq::SliceRandom;
-    use rand::thread_rng;
-    use rand::Rng;
     use std::io::Write;
     use std::os::unix::fs::FileExt;
     use std::sync::Arc;
 
+    use owned_buffers_io::io_buf_ext::IoBufExt;
+    use owned_buffers_io::slice::SliceMutExt;
+    use rand::seq::SliceRandom;
+    use rand::{Rng, thread_rng};
+
+    use super::*;
+    use crate::context::DownloadBehavior;
+    use crate::task_mgr::TaskKind;
+
     enum MaybeVirtualFile {
         VirtualFile(VirtualFile),
         File(File),
diff --git a/pageserver/src/virtual_file/io_engine.rs b/pageserver/src/virtual_file/io_engine.rs
index ccde90ee1a..758dd6e377 100644
--- a/pageserver/src/virtual_file/io_engine.rs
+++ b/pageserver/src/virtual_file/io_engine.rs
@@ -80,7 +80,9 @@ pub(crate) fn get() -> IoEngine {
                     Ok(v) => match v.parse::<IoEngineKind>() {
                         Ok(engine_kind) => engine_kind,
                         Err(e) => {
-                            panic!("invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}")
+                            panic!(
+                                "invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}"
+                            )
                         }
                     },
                     Err(std::env::VarError::NotPresent) => {
@@ -107,15 +109,12 @@ pub(crate) fn get() -> IoEngine {
     }
 }
 
-use std::{
-    os::unix::prelude::FileExt,
-    sync::atomic::{AtomicU8, Ordering},
-};
+use std::os::unix::prelude::FileExt;
+use std::sync::atomic::{AtomicU8, Ordering};
 
-use super::{
-    owned_buffers_io::{io_buf_ext::FullSlice, slice::SliceMutExt},
-    FileGuard, Metadata,
-};
+use super::owned_buffers_io::io_buf_ext::FullSlice;
+use super::owned_buffers_io::slice::SliceMutExt;
+use super::{FileGuard, Metadata};
 
 #[cfg(target_os = "linux")]
 fn epoll_uring_error_to_std(e: tokio_epoll_uring::Error<std::io::Error>) -> std::io::Error {
diff --git a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs
index c67215492f..ad17405b64 100644
--- a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs
+++ b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs
@@ -5,18 +5,16 @@
 //! on older kernels, such as some (but not all) older kernels in the Linux 5.10 series.
 //! See <https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391> for more details.
 
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::Arc;
-
-use tokio_util::sync::CancellationToken;
-use tracing::{error, info, info_span, warn, Instrument};
-use utils::backoff::{DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
+use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 
 use tokio_epoll_uring::{System, SystemHandle};
-
-use crate::virtual_file::on_fatal_io_error;
+use tokio_util::sync::CancellationToken;
+use tracing::{Instrument, error, info, info_span, warn};
+use utils::backoff::{DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
 
 use crate::metrics::tokio_epoll_uring::{self as metrics, THREAD_LOCAL_METRICS_STORAGE};
+use crate::virtual_file::on_fatal_io_error;
 
 #[derive(Clone)]
 struct ThreadLocalState(Arc<ThreadLocalStateInner>);
@@ -194,7 +192,7 @@ impl std::ops::Deref for Handle {
 
     fn deref(&self) -> &Self::Target {
         self.0
-             .0
+            .0
             .cell
             .get()
             .expect("must be already initialized when using this")
diff --git a/pageserver/src/virtual_file/open_options.rs b/pageserver/src/virtual_file/open_options.rs
index 7f951270d1..e188b8649b 100644
--- a/pageserver/src/virtual_file/open_options.rs
+++ b/pageserver/src/virtual_file/open_options.rs
@@ -1,7 +1,9 @@
 //! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`];
 
+use std::os::fd::OwnedFd;
+use std::path::Path;
+
 use super::io_engine::IoEngine;
-use std::{os::fd::OwnedFd, path::Path};
 
 #[derive(Debug, Clone)]
 pub enum OpenOptions {
diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs
index a5c26cd746..090d2ece85 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs
@@ -1,9 +1,9 @@
-use std::{
-    ops::{Deref, Range, RangeBounds},
-    sync::Arc,
-};
+use std::ops::{Deref, Range, RangeBounds};
+use std::sync::Arc;
 
-use super::{alignment::Alignment, raw::RawAlignedBuffer, AlignedBufferMut, ConstAlign};
+use super::alignment::Alignment;
+use super::raw::RawAlignedBuffer;
+use super::{AlignedBufferMut, ConstAlign};
 
 /// An shared, immutable aligned buffer type.
 #[derive(Clone, Debug)]
diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs
index d2f5e206bb..df5c911e50 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs
@@ -1,13 +1,9 @@
-use std::{
-    mem::MaybeUninit,
-    ops::{Deref, DerefMut},
-};
+use std::mem::MaybeUninit;
+use std::ops::{Deref, DerefMut};
 
-use super::{
-    alignment::{Alignment, ConstAlign},
-    buffer::AlignedBuffer,
-    raw::RawAlignedBuffer,
-};
+use super::alignment::{Alignment, ConstAlign};
+use super::buffer::AlignedBuffer;
+use super::raw::RawAlignedBuffer;
 
 /// A mutable aligned buffer type.
 #[derive(Debug)]
@@ -75,7 +71,8 @@ impl<A: Alignment> AlignedBufferMut<A> {
     /// Force the length of the buffer to `new_len`.
     #[inline]
     unsafe fn set_len(&mut self, new_len: usize) {
-        self.raw.set_len(new_len)
+        // SAFETY: the caller is unsafe
+        unsafe { self.raw.set_len(new_len) }
     }
 
     #[inline]
@@ -222,8 +219,10 @@ unsafe impl<A: Alignment> bytes::BufMut for AlignedBufferMut<A> {
             panic_advance(cnt, remaining);
         }
 
-        // Addition will not overflow since the sum is at most the capacity.
-        self.set_len(len + cnt);
+        // SAFETY: Addition will not overflow since the sum is at most the capacity.
+        unsafe {
+            self.set_len(len + cnt);
+        }
     }
 
     #[inline]
@@ -275,7 +274,10 @@ unsafe impl<A: Alignment> tokio_epoll_uring::IoBufMut for AlignedBufferMut<A> {
 
     unsafe fn set_init(&mut self, init_len: usize) {
         if self.len() < init_len {
-            self.set_len(init_len);
+            // SAFETY: caller function is unsafe
+            unsafe {
+                self.set_len(init_len);
+            }
         }
     }
 }
diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs
index 6c26dec0db..97a6c4049a 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs
@@ -1,9 +1,7 @@
 use core::slice;
-use std::{
-    alloc::{self, Layout},
-    cmp,
-    mem::ManuallyDrop,
-};
+use std::alloc::{self, Layout};
+use std::cmp;
+use std::mem::ManuallyDrop;
 
 use super::alignment::{Alignment, ConstAlign};
 
diff --git a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs
index 525f447b6d..4c671c2652 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs
@@ -1,11 +1,12 @@
 //! See [`FullSlice`].
 
-use crate::virtual_file::{IoBuffer, IoBufferMut};
-use bytes::{Bytes, BytesMut};
 use std::ops::{Deref, Range};
+
+use bytes::{Bytes, BytesMut};
 use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};
 
 use super::write::CheapCloneForRead;
+use crate::virtual_file::{IoBuffer, IoBufferMut};
 
 /// The true owned equivalent for Rust [`slice`]. Use this for the write path.
 ///
diff --git a/pageserver/src/virtual_file/owned_buffers_io/slice.rs b/pageserver/src/virtual_file/owned_buffers_io/slice.rs
index 6100593663..9f4a05dd57 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/slice.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/slice.rs
@@ -1,7 +1,4 @@
-use tokio_epoll_uring::BoundedBuf;
-use tokio_epoll_uring::BoundedBufMut;
-use tokio_epoll_uring::IoBufMut;
-use tokio_epoll_uring::Slice;
+use tokio_epoll_uring::{BoundedBuf, BoundedBufMut, IoBufMut, Slice};
 
 pub(crate) trait SliceMutExt {
     /// Get a `&mut[0..self.bytes_total()`] slice, for when you need to do borrow-based IO.
@@ -35,10 +32,11 @@ where
 mod tests {
     use std::io::Read;
 
-    use super::*;
     use bytes::Buf;
     use tokio_epoll_uring::Slice;
 
+    use super::*;
+
     #[test]
     fn test_slice_full_zeroed() {
         let make_fake_file = || bytes::BytesMut::from(&b"12345"[..]).reader();
diff --git a/pageserver/src/virtual_file/owned_buffers_io/write.rs b/pageserver/src/virtual_file/owned_buffers_io/write.rs
index 7299d83703..861ca3aa2a 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/write.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs
@@ -1,20 +1,14 @@
 mod flush;
 use std::sync::Arc;
 
+pub(crate) use flush::FlushControl;
 use flush::FlushHandle;
 use tokio_epoll_uring::IoBuf;
 
-use crate::{
-    context::RequestContext,
-    virtual_file::{IoBuffer, IoBufferMut},
-};
-
-use super::{
-    io_buf_aligned::IoBufAligned,
-    io_buf_ext::{FullSlice, IoBufExt},
-};
-
-pub(crate) use flush::FlushControl;
+use super::io_buf_aligned::IoBufAligned;
+use super::io_buf_ext::{FullSlice, IoBufExt};
+use crate::context::RequestContext;
+use crate::virtual_file::{IoBuffer, IoBufferMut};
 
 pub(crate) trait CheapCloneForRead {
     /// Returns a cheap clone of the buffer.
diff --git a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs
index 9ce8b311bb..46309d4011 100644
--- a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs
@@ -2,12 +2,10 @@ use std::sync::Arc;
 
 use utils::sync::duplex;
 
-use crate::{
-    context::RequestContext,
-    virtual_file::owned_buffers_io::{io_buf_aligned::IoBufAligned, io_buf_ext::FullSlice},
-};
-
 use super::{Buffer, CheapCloneForRead, OwnedAsyncWriter};
+use crate::context::RequestContext;
+use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAligned;
+use crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice;
 
 /// A handle to the flush task.
 pub struct FlushHandle<Buf, W> {
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 04edb3e3f4..18df065f76 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -22,39 +22,35 @@
 //! bespoken Rust code.
 
 use std::collections::HashMap;
-use std::sync::Arc;
-use std::sync::OnceLock;
-use std::time::Duration;
-use std::time::Instant;
-use std::time::SystemTime;
+use std::sync::{Arc, OnceLock};
+use std::time::{Duration, Instant, SystemTime};
 
-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
 use bytes::{Buf, Bytes};
-use tracing::*;
-
-use crate::context::RequestContext;
-use crate::metrics::WAL_INGEST;
-use crate::pgdatadir_mapping::{DatadirModification, Version};
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::PageReconstructError;
-use crate::tenant::Timeline;
-use crate::ZERO_PAGE;
 use pageserver_api::key::rel_block_to_key;
 use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use pageserver_api::shard::ShardIdentity;
-use postgres_ffi::fsm_logical_to_physical;
-use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::walrecord::*;
-use postgres_ffi::TransactionId;
-use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
+use postgres_ffi::{
+    TimestampTz, TransactionId, dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch,
+    fsm_logical_to_physical, pg_constants,
+};
+use tracing::*;
 use utils::bin_ser::SerializeError;
 use utils::lsn::Lsn;
 use utils::rate_limit::RateLimit;
 use utils::{critical, failpoint_support};
 use wal_decoder::models::*;
 
+use crate::ZERO_PAGE;
+use crate::context::RequestContext;
+use crate::metrics::WAL_INGEST;
+use crate::pgdatadir_mapping::{DatadirModification, Version};
+use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
+use crate::tenant::{PageReconstructError, Timeline};
+
 enum_pgversion! {CheckPoint, pgv::CheckPoint}
 
 impl CheckPoint {
@@ -302,7 +298,9 @@ impl WalIngest {
         if xid > next_xid {
             // Wraparound occurred, must be from a prev epoch.
             if epoch == 0 {
-                bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}");
+                bail!(
+                    "apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}"
+                );
             }
             epoch -= 1;
         }
@@ -796,9 +794,7 @@ impl WalIngest {
             // Remove twophase file. see RemoveTwoPhaseFile() in postgres code
             trace!(
                 "Drop twophaseFile for xid {} parsed_xact.xid {} here at {}",
-                xl_xid,
-                parsed.xid,
-                lsn,
+                xl_xid, parsed.xid, lsn,
             );
 
             let xid: u64 = if modification.tline.pg_version >= 17 {
@@ -1130,16 +1126,14 @@ impl WalIngest {
                 let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
                 trace!(
                     "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
-                    xlog_checkpoint.oldestXid,
-                    cp.oldestXid
+                    xlog_checkpoint.oldestXid, cp.oldestXid
                 );
                 if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
                     cp.oldestXid = xlog_checkpoint.oldestXid;
                 }
                 trace!(
                     "xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
-                    xlog_checkpoint.oldestActiveXid,
-                    cp.oldestActiveXid
+                    xlog_checkpoint.oldestActiveXid, cp.oldestActiveXid
                 );
 
                 // A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
@@ -1180,6 +1174,50 @@ impl WalIngest {
                 } else {
                     cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
                 }
+                // NB: We abuse the Checkpoint.redo field:
+                //
+                // - In PostgreSQL, the Checkpoint struct doesn't store the information
+                //   of whether this is an online checkpoint or a shutdown checkpoint. It's
+                //   stored in the XLOG info field of the WAL record, shutdown checkpoints
+                //   use record type XLOG_CHECKPOINT_SHUTDOWN and online checkpoints use
+                //   XLOG_CHECKPOINT_ONLINE. We don't store the original WAL record headers
+                //   in the pageserver, however.
+                //
+                // - In PostgreSQL, the Checkpoint.redo field stores the *start* of the
+                //   checkpoint record, if it's a shutdown checkpoint. But when we are
+                //   starting from a shutdown checkpoint, the basebackup LSN is the *end*
+                //   of the shutdown checkpoint WAL record. That makes it difficult to
+                //   correctly detect whether we're starting from a shutdown record or
+                //   not.
+                //
+                // To address both of those issues, we store 0 in the redo field if it's
+                // an online checkpoint record, and the record's *end* LSN if it's a
+                // shutdown checkpoint. We don't need the original redo pointer in neon,
+                // because we don't perform WAL replay at startup anyway, so we can get
+                // away with abusing the redo field like this.
+                //
+                // XXX: Ideally, we would persist the extra information in a more
+                // explicit format, rather than repurpose the fields of the Postgres
+                // struct like this. However, we already have persisted data like this,
+                // so we need to maintain backwards compatibility.
+                //
+                // NB: We didn't originally have this convention, so there are still old
+                // persisted records that didn't do this. Before, we didn't update the
+                // persisted redo field at all. That means that old records have a bogus
+                // redo pointer that points to some old value, from the checkpoint record
+                // that was originally imported from the data directory. If it was a
+                // project created in Neon, that means it points to the first checkpoint
+                // after initdb. That's OK for our purposes: all such old checkpoints are
+                // treated as old online checkpoints when the basebackup is created.
+                cp.redo = if info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN {
+                    // Store the *end* LSN of the checkpoint record. Or to be precise,
+                    // the start LSN of the *next* record, i.e. if the record ends
+                    // exactly at page boundary, the redo LSN points to just after the
+                    // page header on the next page.
+                    lsn.into()
+                } else {
+                    Lsn::INVALID.into()
+                };
 
                 // Write a new checkpoint key-value pair on every checkpoint record, even
                 // if nothing really changed. Not strictly required, but it seems nice to
@@ -1324,8 +1362,9 @@ impl WalIngest {
             // with zero pages. Logging is rate limited per pg version to
             // avoid skewing.
             if gap_blocks_filled > 0 {
-                use once_cell::sync::Lazy;
                 use std::sync::Mutex;
+
+                use once_cell::sync::Lazy;
                 use utils::rate_limit::RateLimit;
 
                 struct RateLimitPerPgVersion {
@@ -1431,10 +1470,7 @@ impl WalIngest {
         if new_nblocks > old_nblocks {
             trace!(
                 "extending SLRU {:?} seg {} from {} to {} blocks",
-                kind,
-                segno,
-                old_nblocks,
-                new_nblocks
+                kind, segno, old_nblocks, new_nblocks
             );
             modification.put_slru_extend(kind, segno, new_nblocks)?;
 
@@ -1473,13 +1509,13 @@ async fn get_relsize(
 #[allow(clippy::bool_assert_comparison)]
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::tenant::harness::*;
-    use crate::tenant::remote_timeline_client::{remote_initdb_archive_path, INITDB_PATH};
-    use crate::tenant::storage_layer::IoConcurrency;
     use postgres_ffi::RELSEG_SIZE;
 
+    use super::*;
     use crate::DEFAULT_PG_VERSION;
+    use crate::tenant::harness::*;
+    use crate::tenant::remote_timeline_client::{INITDB_PATH, remote_initdb_archive_path};
+    use crate::tenant::storage_layer::IoConcurrency;
 
     /// Arbitrary relation tag, for testing.
     const TESTREL_A: RelTag = RelTag {
@@ -1562,10 +1598,12 @@ mod tests {
                 .await?,
             false
         );
-        assert!(tline
-            .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
-            .await
-            .is_err());
+        assert!(
+            tline
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
+                .await
+                .is_err()
+        );
         assert_eq!(
             tline
                 .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx)
@@ -1953,10 +1991,12 @@ mod tests {
                 .await?,
             false
         );
-        assert!(tline
-            .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
-            .await
-            .is_err());
+        assert!(
+            tline
+                .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx)
+                .await
+                .is_err()
+        );
 
         assert_eq!(
             tline
@@ -2186,9 +2226,10 @@ mod tests {
     /// without waiting for unrelated steps.
     #[tokio::test]
     async fn test_ingest_real_wal() {
-        use crate::tenant::harness::*;
-        use postgres_ffi::waldecoder::WalStreamDecoder;
         use postgres_ffi::WAL_SEGMENT_SIZE;
+        use postgres_ffi::waldecoder::WalStreamDecoder;
+
+        use crate::tenant::harness::*;
 
         // Define test data path and constants.
         //
diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs
index 027a6eb7d7..22d8d83811 100644
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -24,26 +24,27 @@ mod process;
 /// Code to apply [`NeonWalRecord`]s.
 pub(crate) mod apply_neon;
 
-use crate::config::PageServerConf;
-use crate::metrics::{
-    WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
-    WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME,
-};
+use std::future::Future;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
 use anyhow::Context;
 use bytes::{Bytes, BytesMut};
 use pageserver_api::key::Key;
 use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};
 use pageserver_api::record::NeonWalRecord;
 use pageserver_api::shard::TenantShardId;
-use std::future::Future;
-use std::sync::Arc;
-use std::time::Duration;
-use std::time::Instant;
 use tracing::*;
 use utils::lsn::Lsn;
 use utils::sync::gate::GateError;
 use utils::sync::heavier_once_cell;
 
+use crate::config::PageServerConf;
+use crate::metrics::{
+    WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
+    WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME,
+};
+
 /// The real implementation that uses a Postgres process to
 /// perform WAL replay.
 ///
@@ -547,15 +548,18 @@ impl PostgresRedoManager {
 
 #[cfg(test)]
 mod tests {
-    use super::PostgresRedoManager;
-    use crate::config::PageServerConf;
+    use std::str::FromStr;
+
     use bytes::Bytes;
     use pageserver_api::key::Key;
     use pageserver_api::record::NeonWalRecord;
     use pageserver_api::shard::TenantShardId;
-    use std::str::FromStr;
     use tracing::Instrument;
-    use utils::{id::TenantId, lsn::Lsn};
+    use utils::id::TenantId;
+    use utils::lsn::Lsn;
+
+    use super::PostgresRedoManager;
+    use crate::config::PageServerConf;
 
     #[tokio::test]
     async fn test_ping() {
diff --git a/pageserver/src/walredo/apply_neon.rs b/pageserver/src/walredo/apply_neon.rs
index d62e325310..61ae1eb970 100644
--- a/pageserver/src/walredo/apply_neon.rs
+++ b/pageserver/src/walredo/apply_neon.rs
@@ -4,13 +4,12 @@ use bytes::BytesMut;
 use pageserver_api::key::Key;
 use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::SlruKind;
-use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
 use postgres_ffi::v14::nonrelfile_utils::{
     mx_offset_to_flags_bitshift, mx_offset_to_flags_offset, mx_offset_to_member_offset,
     transaction_id_set_status,
 };
-use postgres_ffi::BLCKSZ;
+use postgres_ffi::{BLCKSZ, pg_constants};
 use tracing::*;
 use utils::lsn::Lsn;
 
diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs
index bf30b92ea5..5a9fc63e63 100644
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -2,28 +2,28 @@ mod no_leak_child;
 /// The IPC protocol that pageserver and walredo process speak over their shared pipe.
 mod protocol;
 
-use self::no_leak_child::NoLeakChild;
-use crate::{
-    config::PageServerConf,
-    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
-    page_cache::PAGE_SZ,
-    span::debug_assert_current_span_has_tenant_id,
-};
+use std::collections::VecDeque;
+use std::process::{Command, Stdio};
+#[cfg(feature = "testing")]
+use std::sync::atomic::AtomicUsize;
+use std::time::Duration;
+
 use anyhow::Context;
 use bytes::Bytes;
 use pageserver_api::record::NeonWalRecord;
-use pageserver_api::{reltag::RelTag, shard::TenantShardId};
+use pageserver_api::reltag::RelTag;
+use pageserver_api::shard::TenantShardId;
 use postgres_ffi::BLCKSZ;
-#[cfg(feature = "testing")]
-use std::sync::atomic::AtomicUsize;
-use std::{
-    collections::VecDeque,
-    process::{Command, Stdio},
-    time::Duration,
-};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
-use tracing::{debug, error, instrument, Instrument};
-use utils::{lsn::Lsn, poison::Poison};
+use tracing::{Instrument, debug, error, instrument};
+use utils::lsn::Lsn;
+use utils::poison::Poison;
+
+use self::no_leak_child::NoLeakChild;
+use crate::config::PageServerConf;
+use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER, WalRedoKillCause};
+use crate::page_cache::PAGE_SZ;
+use crate::span::debug_assert_current_span_has_tenant_id;
 
 pub struct WalRedoProcess {
     #[allow(dead_code)]
diff --git a/pageserver/src/walredo/process/no_leak_child.rs b/pageserver/src/walredo/process/no_leak_child.rs
index 1a0d7039df..9939fc4b36 100644
--- a/pageserver/src/walredo/process/no_leak_child.rs
+++ b/pageserver/src/walredo/process/no_leak_child.rs
@@ -1,19 +1,11 @@
-use tracing::instrument;
-use tracing::{error, info};
-
-use crate::metrics::WalRedoKillCause;
-use crate::metrics::WAL_REDO_PROCESS_COUNTERS;
-
 use std::io;
-use std::process::Command;
-
-use std::ops::DerefMut;
-
-use std::ops::Deref;
-
-use std::process::Child;
+use std::ops::{Deref, DerefMut};
+use std::process::{Child, Command};
 
 use pageserver_api::shard::TenantShardId;
+use tracing::{error, info, instrument};
+
+use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WalRedoKillCause};
 
 /// Wrapper type around `std::process::Child` which guarantees that the child
 /// will be killed and waited-for by this process before being dropped.
diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
index a61dc9f4c6..f6a577abfc 100644
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -22,6 +22,7 @@
 #include "neon_pgversioncompat.h"
 
 #include "access/parallel.h"
+#include "access/xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "pagestore_client.h"
@@ -40,12 +41,16 @@
 #include "utils/dynahash.h"
 #include "utils/guc.h"
 
+#if PG_VERSION_NUM >= 150000
+#include "access/xlogrecovery.h"
+#endif
+
 #include "hll.h"
 #include "bitmap.h"
 #include "neon.h"
 #include "neon_perf_counters.h"
 
-#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
+#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "LFC: assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
 
 /*
  * Local file cache is used to temporary store relations pages in local file system.
@@ -93,7 +98,23 @@
 #define MB					((uint64)1024*1024)
 
 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK))
-#define CHUNK_BITMAP_SIZE ((BLOCKS_PER_CHUNK + 31) / 32)
+
+/*
+ * Blocks are read or written to LFC file outside LFC critical section.
+ * To synchronize access to such block, writer set state of such block to PENDING.
+ * If some other backend (read or writer) see PENDING status, it change it to REQUESTED and start
+ * waiting until status is changed on conditional variable.
+ * When writer completes is operation, it checks if status is REQUESTED and if so, broadcast conditional variable,
+ * waking up all backend waiting for access to this block.
+ */
+typedef enum FileCacheBlockState
+{
+	UNAVAILABLE, /* block is not present in cache */
+	AVAILABLE,   /* block can be used */
+	PENDING,     /* block is loaded */
+	REQUESTED    /* some other backend is waiting for block to be loaded */
+} FileCacheBlockState;
+
 
 typedef struct FileCacheEntry
 {
@@ -101,10 +122,16 @@ typedef struct FileCacheEntry
 	uint32		hash;
 	uint32		offset;
 	uint32		access_count;
-	uint32		bitmap[CHUNK_BITMAP_SIZE];
+	uint32		state[(BLOCKS_PER_CHUNK + 31) / 32 * 2]; /* two bits per block */
 	dlist_node	list_node;		/* LRU/holes list node */
 } FileCacheEntry;
 
+#define GET_STATE(entry, i) (((entry)->state[(i) / 16] >> ((i) % 16 * 2)) & 3)
+#define SET_STATE(entry, i, new_state) (entry)->state[(i) / 16] = ((entry)->state[(i) / 16] & ~(3 << ((i) % 16 * 2))) | ((new_state) << ((i) % 16 * 2))
+
+#define N_COND_VARS 	64
+#define CV_WAIT_TIMEOUT	10
+
 typedef struct FileCacheControl
 {
 	uint64		generation;		/* generation is needed to handle correct hash
@@ -118,18 +145,24 @@ typedef struct FileCacheControl
 	uint64		writes;			/* number of writes issued */
 	uint64		time_read;		/* time spent reading (us) */
 	uint64		time_write;		/* time spent writing (us) */
+	uint64		resizes;        /* number of LFC resizes   */
+	uint64		evicted_pages;	/* number of evicted pages */
 	dlist_head	lru;			/* double linked list for LRU replacement
 								 * algorithm */
 	dlist_head  holes;          /* double linked list of punched holes */
 	HyperLogLogState wss_estimation; /* estimation of working set size */
+	ConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */
 } FileCacheControl;
 
+bool lfc_store_prefetch_result;
+
 static HTAB *lfc_hash;
-static int	lfc_desc = 0;
+static int	lfc_desc = -1;
 static LWLockId lfc_lock;
 static int	lfc_max_size;
 static int	lfc_size_limit;
 static char *lfc_path;
+static uint64 lfc_generation;
 static FileCacheControl *lfc_ctl;
 static shmem_startup_hook_type prev_shmem_startup_hook;
 #if PG_VERSION_NUM>=150000
@@ -138,6 +171,20 @@ static shmem_request_hook_type prev_shmem_request_hook;
 
 #define LFC_ENABLED() (lfc_ctl->limit != 0)
 
+/*
+ * Close LFC file if opened.
+ * All backends should close their LFC files once LFC is disabled.
+ */
+static void
+lfc_close_file(void)
+{
+	if (lfc_desc >= 0)
+	{
+		close(lfc_desc);
+		lfc_desc = -1;
+	}
+}
+
 /*
  * Local file cache is optional and Neon can work without it.
  * In case of any any errors with this cache, we should disable it but to not throw error.
@@ -145,20 +192,16 @@ static shmem_request_hook_type prev_shmem_request_hook;
  * All cache content should be invalidated to avoid reading of stale or corrupted data
  */
 static void
-lfc_disable(char const *op)
+lfc_switch_off(void)
 {
 	int			fd;
 
-	elog(WARNING, "Failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);
-
-	/* Invalidate hash */
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
 	if (LFC_ENABLED())
 	{
 		HASH_SEQ_STATUS status;
 		FileCacheEntry *entry;
 
+		/* Invalidate hash */
 		hash_seq_init(&status, lfc_hash);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -171,41 +214,33 @@ lfc_disable(char const *op)
 		dlist_init(&lfc_ctl->lru);
 		dlist_init(&lfc_ctl->holes);
 
-		if (lfc_desc > 0)
-		{
-			int			rc;
+		/*
+		 * We need to use unlink to to avoid races in LFC write, because it is not
+		 * protected by lock
+		 */
+		unlink(lfc_path);
 
-			/*
-			 * If the reason of error is ENOSPC, then truncation of file may
-			 * help to reclaim some space
-			 */
-			pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
-			rc = ftruncate(lfc_desc, 0);
-			pgstat_report_wait_end();
+		fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
+		if (fd < 0)
+			elog(WARNING, "LFC: failed to recreate local file cache %s: %m", lfc_path);
+		else
+			close(fd);
 
-			if (rc < 0)
-				elog(WARNING, "Failed to truncate local file cache %s: %m", lfc_path);
-		}
+		/* Wakeup waiting backends */
+		for (int i = 0; i < N_COND_VARS; i++)
+			ConditionVariableBroadcast(&lfc_ctl->cv[i]);
 	}
+	lfc_close_file();
+}
 
-	/*
-	 * We need to use unlink to to avoid races in LFC write, because it is not
-	 * protectedby
-	 */
-	unlink(lfc_path);
-
-	fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
-	if (fd < 0)
-		elog(WARNING, "Failed to recreate local file cache %s: %m", lfc_path);
-	else
-		close(fd);
+static void
+lfc_disable(char const *op)
+{
+	elog(WARNING, "LFC: failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);
 
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+	lfc_switch_off();
 	LWLockRelease(lfc_lock);
-
-	if (lfc_desc > 0)
-		close(lfc_desc);
-
-	lfc_desc = -1;
 }
 
 /*
@@ -217,13 +252,20 @@ lfc_maybe_disabled(void)
 	return !lfc_ctl || !LFC_ENABLED();
 }
 
+/*
+ * Open LFC file if not opened yet or generation is changed.
+ * Should be called under LFC lock.
+ */
 static bool
 lfc_ensure_opened(void)
 {
-	bool		enabled = !lfc_maybe_disabled();
-
+	if (lfc_generation != lfc_ctl->generation)
+	{
+		lfc_close_file();
+		lfc_generation = lfc_ctl->generation;
+	}
 	/* Open cache file if not done yet */
-	if (lfc_desc <= 0 && enabled)
+	if (lfc_desc < 0)
 	{
 		lfc_desc = BasicOpenFile(lfc_path, O_RDWR);
 
@@ -233,7 +275,7 @@ lfc_ensure_opened(void)
 			return false;
 		}
 	}
-	return enabled;
+	return true;
 }
 
 static void
@@ -267,14 +309,7 @@ lfc_shmem_startup(void)
 								 n_chunks + 1, n_chunks + 1,
 								 &info,
 								 HASH_ELEM | HASH_BLOBS);
-		lfc_ctl->generation = 0;
-		lfc_ctl->size = 0;
-		lfc_ctl->used = 0;
-		lfc_ctl->hits = 0;
-		lfc_ctl->misses = 0;
-		lfc_ctl->writes = 0;
-		lfc_ctl->time_read = 0;
-		lfc_ctl->time_write = 0;
+		memset(lfc_ctl, 0, sizeof(FileCacheControl));
 		dlist_init(&lfc_ctl->lru);
 		dlist_init(&lfc_ctl->holes);
 
@@ -285,7 +320,7 @@ lfc_shmem_startup(void)
 		fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
 		if (fd < 0)
 		{
-			elog(WARNING, "Failed to create local file cache %s: %m", lfc_path);
+			elog(WARNING, "LFC: failed to create local file cache %s: %m", lfc_path);
 			lfc_ctl->limit = 0;
 		}
 		else
@@ -293,6 +328,11 @@ lfc_shmem_startup(void)
 			close(fd);
 			lfc_ctl->limit = SIZE_MB_TO_CHUNKS(lfc_size_limit);
 		}
+
+		/* Initialize turnstile of condition variables */
+		for (int i = 0; i < N_COND_VARS; i++)
+			ConditionVariableInit(&lfc_ctl->cv[i]);
+
 	}
 	LWLockRelease(AddinShmemInitLock);
 }
@@ -327,7 +367,7 @@ lfc_check_limit_hook(int *newval, void **extra, GucSource source)
 {
 	if (*newval > lfc_max_size)
 	{
-		elog(ERROR, "neon.file_cache_size_limit can not be larger than neon.max_file_cache_size");
+		elog(ERROR, "LFC: neon.file_cache_size_limit can not be larger than neon.max_file_cache_size");
 		return false;
 	}
 	return true;
@@ -338,14 +378,31 @@ lfc_change_limit_hook(int newval, void *extra)
 {
 	uint32		new_size = SIZE_MB_TO_CHUNKS(newval);
 
-	if (!is_normal_backend())
-		return;
-
-	if (!lfc_ensure_opened())
+	if (!lfc_ctl || !is_normal_backend())
 		return;
 
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
 
+	/* Open LFC file only if LFC was enabled or we are going to reenable it */
+	if (newval == 0 && !LFC_ENABLED())
+	{
+		LWLockRelease(lfc_lock);
+		/* File should be reopened if LFC is reenabled */
+		lfc_close_file();
+		return;
+	}
+
+	if (!lfc_ensure_opened())
+	{
+		LWLockRelease(lfc_lock);
+		return;
+	}
+
+	if (lfc_ctl->limit != new_size)
+	{
+		lfc_ctl->resizes += 1;
+	}
+
 	while (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru))
 	{
 		/*
@@ -367,7 +424,9 @@ lfc_change_limit_hook(int newval, void *extra)
 		/* We remove the old entry, and re-enter a hole to the hash table */
 		for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 		{
-			lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
+			bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
+			lfc_ctl->used_pages -= is_page_cached;
+			lfc_ctl->evicted_pages += is_page_cached;
 		}
 		hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
 
@@ -383,10 +442,11 @@ lfc_change_limit_hook(int newval, void *extra)
 
 		lfc_ctl->used -= 1;
 	}
-	lfc_ctl->limit = new_size;
-	if (new_size == 0) {
-		lfc_ctl->generation += 1;
-	}
+	if (new_size == 0)
+		lfc_switch_off();
+	else
+		lfc_ctl->limit = new_size;
+
 	neon_log(DEBUG1, "set local file cache limit to %d", new_size);
 
 	LWLockRelease(lfc_lock);
@@ -403,6 +463,17 @@ lfc_init(void)
 		neon_log(ERROR, "Neon module should be loaded via shared_preload_libraries");
 
 
+	DefineCustomBoolVariable("neon.store_prefetch_result_in_lfc",
+							"Immediately store received prefetch result in LFC",
+							NULL,
+							&lfc_store_prefetch_result,
+							false,
+							PGC_SUSET,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
 	DefineCustomIntVariable("neon.max_file_cache_size",
 							"Maximal size of Neon local file cache",
 							NULL,
@@ -480,7 +551,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	if (LFC_ENABLED())
 	{
 		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-		found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & ((uint32)1 << (chunk_offs & 31))) != 0;
+		found = entry != NULL && GET_STATE(entry, chunk_offs) != UNAVAILABLE;
 	}
 	LWLockRelease(lfc_lock);
 	return found;
@@ -529,8 +600,7 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		{
 			for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
 			{
-				if ((entry->bitmap[chunk_offs >> 5] & 
-					 ((uint32)1 << (chunk_offs & 31))) != 0)
+				if (GET_STATE(entry, chunk_offs) != UNAVAILABLE)
 				{
 					BITMAP_SET(bitmap, i);
 					found++;
@@ -541,7 +611,6 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		{
 			i += this_chunk;
 		}
-
 		/*
 		 * Break out of the iteration before doing expensive stuff for
 		 * a next iteration
@@ -577,87 +646,6 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	return found;
 }
 
-/*
- * Evict a page (if present) from the local file cache
- */
-void
-lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
-{
-	BufferTag	tag;
-	FileCacheEntry *entry;
-	bool		found;
-	int			chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
-	uint32		hash;
-
-	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return;
-
-	CopyNRelFileInfoToBufTag(tag, rinfo);
-	tag.forkNum = forkNum;
-	tag.blockNum = (blkno & ~(BLOCKS_PER_CHUNK - 1));
-
-	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-	hash = get_hash_value(lfc_hash, &tag);
-
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
-	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
-	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, &found);
-
-	if (!found)
-	{
-		/* nothing to do */
-		LWLockRelease(lfc_lock);
-		return;
-	}
-
-	/* remove the page from the cache */
-	entry->bitmap[chunk_offs >> 5] &= ~((uint32)1 << (chunk_offs & (32 - 1)));
-
-	if (entry->access_count == 0)
-	{
-		/*
-		 * If the chunk has no live entries, we can position the chunk to be
-		 * recycled first.
-		 */
-		if (entry->bitmap[chunk_offs >> 5] == 0)
-		{
-			bool		has_remaining_pages = false;
-
-			for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
-			{
-				if (entry->bitmap[i] != 0)
-				{
-					has_remaining_pages = true;
-					break;
-				}
-			}
-
-			/*
-			 * Put the entry at the position that is first to be reclaimed when we
-			 * have no cached pages remaining in the chunk
-			 */
-			if (!has_remaining_pages)
-			{
-				dlist_delete(&entry->list_node);
-				dlist_push_head(&lfc_ctl->lru, &entry->list_node);
-			}
-		}
-	}
-
-	/*
-	 * Done: apart from empty chunks, we don't move chunks in the LRU when
-	 * they're empty because eviction isn't usage.
-	 */
-
-	LWLockRelease(lfc_lock);
-}
-
 /*
  * Try to read pages from local cache.
  * Returns the number of pages read from the local cache, and sets bits in
@@ -685,17 +673,14 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	int			buf_offset = 0;
 
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return 0;
-
-	if (!lfc_ensure_opened())
-		return 0;
+		return -1;
 
 	CopyNRelFileInfoToBufTag(tag, rinfo);
 	tag.forkNum = forkNum;
 
 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
 
-	/* 
+	/*
 	 * For every chunk that has blocks we're interested in, we
 	 * 1. get the chunk header
 	 * 2. Check if the chunk actually has the blocks we're interested in
@@ -712,22 +697,35 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		int		iteration_hits = 0;
 		int		iteration_misses = 0;
 		uint64	io_time_us = 0;
+		int     n_blocks_to_read = 0;
+		ConditionVariable* cv;
+
 		Assert(blocks_in_chunk > 0);
 
 		for (int i = 0; i < blocks_in_chunk; i++)
 		{
+			n_blocks_to_read += (BITMAP_ISSET(mask, buf_offset + i) != 0);
 			iov[i].iov_base = buffers[buf_offset + i];
 			iov[i].iov_len = BLCKSZ;
+			BITMAP_CLR(mask,  buf_offset + i);
+		}
+		if (n_blocks_to_read == 0)
+		{
+			buf_offset += blocks_in_chunk;
+			nblocks -= blocks_in_chunk;
+			blkno += blocks_in_chunk;
+			continue;
 		}
 
 		tag.blockNum = blkno - chunk_offs;
 		hash = get_hash_value(lfc_hash, &tag);
+		cv = &lfc_ctl->cv[hash % N_COND_VARS];
 
 		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
 
 		/* We can return the blocks we've read before LFC got disabled;
 		 * assuming we read any. */
-		if (!LFC_ENABLED())
+		if (!LFC_ENABLED() || !lfc_ensure_opened())
 		{
 			LWLockRelease(lfc_lock);
 			return blocks_read;
@@ -763,15 +761,32 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		generation = lfc_ctl->generation;
 		entry_offset = entry->offset;
 
-		LWLockRelease(lfc_lock);
-
 		for (int i = 0; i < blocks_in_chunk; i++)
 		{
-			/*
-			 * If the page is valid, we consider it "read".
-			 * All other pages will be fetched separately by the next cache
-			 */
-			if (entry->bitmap[(chunk_offs + i) / 32] & ((uint32)1 << ((chunk_offs + i) % 32)))
+			FileCacheBlockState state = UNAVAILABLE;
+			bool sleeping = false;
+			while (lfc_ctl->generation == generation)
+			{
+				state = GET_STATE(entry, chunk_offs + i);
+				if (state == PENDING) {
+					SET_STATE(entry, chunk_offs + i, REQUESTED);
+				} else if (state != REQUESTED) {
+					break;
+				}
+				if (!sleeping)
+				{
+					ConditionVariablePrepareToSleep(cv);
+					sleeping = true;
+				}
+				LWLockRelease(lfc_lock);
+				ConditionVariableTimedSleep(cv, CV_WAIT_TIMEOUT, WAIT_EVENT_NEON_LFC_CV_WAIT);
+				LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+			}
+			if (sleeping)
+			{
+				ConditionVariableCancelSleep();
+			}
+			if (state == AVAILABLE)
 			{
 				BITMAP_SET(mask, buf_offset + i);
 				iteration_hits++;
@@ -779,6 +794,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			else
 				iteration_misses++;
 		}
+		LWLockRelease(lfc_lock);
 
 		Assert(iteration_hits + iteration_misses > 0);
 
@@ -820,6 +836,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		else
 		{
 			/* generation mismatch, assume error condition */
+			lfc_close_file();
 			LWLockRelease(lfc_lock);
 			return -1;
 		}
@@ -835,6 +852,249 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	return blocks_read;
 }
 
+/*
+ * Initialize new LFC hash entry, perform eviction if needed.
+ * Returns false if there are no unpinned entries and chunk can not be added.
+ */
+static bool
+lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
+{
+	/*-----------
+	 * If the chunk wasn't already in the LFC then we have these
+	 * options, in order of preference:
+	 *
+	 * Unless there is no space available, we can:
+	 *  1. Use an entry from the `holes` list, and
+	 *  2. Create a new entry.
+	 * We can always, regardless of space in the LFC:
+	 *  3. evict an entry from LRU, and
+	 *  4. ignore the write operation (the least favorite option)
+	 */
+	if (lfc_ctl->used < lfc_ctl->limit)
+	{
+		if (!dlist_is_empty(&lfc_ctl->holes))
+		{
+			/* We can reuse a hole that was left behind when the LFC was shrunk previously */
+			FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node,
+												   dlist_pop_head_node(&lfc_ctl->holes));
+			uint32 offset = hole->offset;
+			bool hole_found;
+
+			hash_search_with_hash_value(lfc_hash, &hole->key,
+										hole->hash, HASH_REMOVE, &hole_found);
+			CriticalAssert(hole_found);
+
+			lfc_ctl->used += 1;
+			entry->offset = offset;			/* reuse the hole */
+		}
+		else
+		{
+			lfc_ctl->used += 1;
+			entry->offset = lfc_ctl->size++;/* allocate new chunk at end
+											 * of file */
+		}
+	}
+	/*
+	 * We've already used up all allocated LFC entries.
+	 *
+	 * If we can clear an entry from the LRU, do that.
+	 * If we can't (e.g. because all other slots are being accessed)
+	 * then we will remove this entry from the hash and continue
+	 * on to the next chunk, as we may not exceed the limit.
+	 */
+	else if (!dlist_is_empty(&lfc_ctl->lru))
+	{
+		/* Cache overflow: evict least recently used chunk */
+		FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
+												 dlist_pop_head_node(&lfc_ctl->lru));
+
+		for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+		{
+			bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
+			lfc_ctl->used_pages -= is_page_cached;
+			lfc_ctl->evicted_pages += is_page_cached;
+		}
+
+		CriticalAssert(victim->access_count == 0);
+		entry->offset = victim->offset; /* grab victim's chunk */
+		hash_search_with_hash_value(lfc_hash, &victim->key,
+									victim->hash, HASH_REMOVE, NULL);
+		neon_log(DEBUG2, "Swap file cache page");
+	}
+	else
+	{
+		/* Can't add this chunk - we don't have the space for it */
+		hash_search_with_hash_value(lfc_hash, &entry->key, hash,
+									HASH_REMOVE, NULL);
+
+		return false;
+	}
+
+	entry->access_count = 1;
+	entry->hash = hash;
+
+	for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+		SET_STATE(entry, i, UNAVAILABLE);
+
+	return true;
+}
+
+/*
+ * Store received prefetch result in LFC cache.
+ * Unlike lfc_read/lfc_write this call is is not protected by shared buffer lock.
+ * So we should be ready that other backends will try to concurrently read or write this block.
+ * We do not store prefetched block if it already exists in LFC or it's not_modified_since LSN is smaller
+ * than current last written LSN (LwLSN).
+ *
+ * We can enforce correctness of storing page in LFC by the following steps:
+ * 1. Check under LFC lock that page in not present in LFC.
+ * 2. Check under LFC lock that LwLSN is not changed since prefetch request time (not_modified_since).
+ * 3. Change page state to "Pending" under LFC lock to prevent all other backends to read or write this
+ *    pages until this write is completed.
+ * 4. Assume that some other backend creates new image of the page without reading it
+ *    (because reads will be blocked because of 2). This version of the page is stored in shared buffer.
+ *    Any attempt to throw away this page from shared buffer will be blocked, because Postgres first
+ *    needs to save dirty page and write will be blocked because of 2.
+ *    So any backend trying to access this page, will take it from shared buffer without accessing
+ *    SMGR and LFC.
+ * 5. After write completion we once again obtain LFC lock and wake-up all waiting backends.
+ *    If there is some backend waiting to write new image of the page (4) then now it will be able to
+ *    do it,overwriting old (prefetched) page image. As far as this write will be completed before
+ *    shared buffer can be reassigned, not other backend can see old page image.
+*/
+bool
+lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
+			 const void* buffer, XLogRecPtr lsn)
+{
+	BufferTag	tag;
+	FileCacheEntry *entry;
+	ssize_t		rc;
+	bool		found;
+	uint32		hash;
+	uint64		generation;
+	uint32		entry_offset;
+	instr_time io_start, io_end;
+	ConditionVariable* cv;
+	FileCacheBlockState state;
+	XLogRecPtr lwlsn;
+
+	int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
+
+	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
+		return false;
+
+	CopyNRelFileInfoToBufTag(tag, rinfo);
+	tag.forkNum = forknum;
+
+	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
+
+	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
+	hash = get_hash_value(lfc_hash, &tag);
+	cv = &lfc_ctl->cv[hash % N_COND_VARS];
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (!LFC_ENABLED() || !lfc_ensure_opened())
+	{
+		LWLockRelease(lfc_lock);
+		return false;
+	}
+	lwlsn = GetLastWrittenLSN(rinfo, forknum, blkno);
+	if (lwlsn > lsn)
+	{
+		elog(DEBUG1, "Skip LFC write for %d because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X",
+			 blkno, LSN_FORMAT_ARGS(lwlsn), LSN_FORMAT_ARGS(lsn));
+		LWLockRelease(lfc_lock);
+		return false;
+	}
+
+	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
+
+	if (found)
+	{
+		state = GET_STATE(entry, chunk_offs);
+		if (state != UNAVAILABLE) {
+			/* Do not rewrite existed LFC entry */
+			LWLockRelease(lfc_lock);
+			return false;
+		}
+		/*
+		 * Unlink entry from LRU list to pin it for the duration of IO
+		 * operation
+		 */
+		if (entry->access_count++ == 0)
+			dlist_delete(&entry->list_node);
+	}
+	else
+	{
+		if (!lfc_init_new_entry(entry, hash))
+		{
+			/*
+			 * We can't process this chunk due to lack of space in LFC,
+			 * so skip to the next one
+			 */
+			LWLockRelease(lfc_lock);
+			return false;
+		}
+	}
+
+	generation = lfc_ctl->generation;
+	entry_offset = entry->offset;
+
+	SET_STATE(entry, chunk_offs, PENDING);
+
+	LWLockRelease(lfc_lock);
+
+	pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
+	INSTR_TIME_SET_CURRENT(io_start);
+	rc = pwrite(lfc_desc, buffer, BLCKSZ,
+				((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+	INSTR_TIME_SET_CURRENT(io_end);
+	pgstat_report_wait_end();
+
+	if (rc != BLCKSZ)
+	{
+		lfc_disable("write");
+	}
+	else
+	{
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+		if (lfc_ctl->generation == generation)
+		{
+			uint64	time_spent_us;
+			CriticalAssert(LFC_ENABLED());
+			/* Place entry to the head of LRU list */
+			CriticalAssert(entry->access_count > 0);
+
+			lfc_ctl->writes += 1;
+			INSTR_TIME_SUBTRACT(io_start, io_end);
+			time_spent_us = INSTR_TIME_GET_MICROSEC(io_start);
+			lfc_ctl->time_write += time_spent_us;
+			inc_page_cache_write_wait(time_spent_us);
+
+			if (--entry->access_count == 0)
+				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+
+			state = GET_STATE(entry, chunk_offs);
+			if (state == REQUESTED) {
+				ConditionVariableBroadcast(cv);
+			}
+			if (state != AVAILABLE)
+			{
+				lfc_ctl->used_pages += 1;
+				SET_STATE(entry, chunk_offs, AVAILABLE);
+			}
+		}
+		else
+		{
+			lfc_close_file();
+		}
+		LWLockRelease(lfc_lock);
+	}
+	return true;
+}
+
 /*
  * Put page in local file cache.
  * If cache is full then evict some other page.
@@ -855,15 +1115,21 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;
 
-	if (!lfc_ensure_opened())
-		return;
-
 	CopyNRelFileInfoToBufTag(tag, rinfo);
 	tag.forkNum = forkNum;
 
 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
 
-	/* 
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (!LFC_ENABLED() || !lfc_ensure_opened())
+	{
+		LWLockRelease(lfc_lock);
+		return;
+	}
+	generation = lfc_ctl->generation;
+
+	/*
 	 * For every chunk that has blocks we're interested in, we
 	 * 1. get the chunk header
 	 * 2. Check if the chunk actually has the blocks we're interested in
@@ -878,6 +1144,8 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
 		instr_time io_start, io_end;
+		ConditionVariable* cv;
+
 		Assert(blocks_in_chunk > 0);
 
 		for (int i = 0; i < blocks_in_chunk; i++)
@@ -888,14 +1156,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 		tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
 		hash = get_hash_value(lfc_hash, &tag);
-
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (!LFC_ENABLED())
-		{
-			LWLockRelease(lfc_lock);
-			return;
-		}
+		cv = &lfc_ctl->cv[hash % N_COND_VARS];
 
 		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
 
@@ -908,92 +1169,50 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			if (entry->access_count++ == 0)
 				dlist_delete(&entry->list_node);
 		}
-		/*-----------
-		 * If the chunk wasn't already in the LFC then we have these
-		 * options, in order of preference:
-		 *
-		 * Unless there is no space available, we can:
-		 *  1. Use an entry from the `holes` list, and
-		 *  2. Create a new entry.
-		 * We can always, regardless of space in the LFC:
-		 *  3. evict an entry from LRU, and
-		 *  4. ignore the write operation (the least favorite option)
-		 */
-		else if (lfc_ctl->used < lfc_ctl->limit)
-		{
-			if (!dlist_is_empty(&lfc_ctl->holes))
-			{
-				/* We can reuse a hole that was left behind when the LFC was shrunk previously */
-				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node,
-													   dlist_pop_head_node(&lfc_ctl->holes));
-				uint32 offset = hole->offset;
-				bool hole_found;
-
-				hash_search_with_hash_value(lfc_hash, &hole->key,
-											hole->hash, HASH_REMOVE, &hole_found);
-				CriticalAssert(hole_found);
-
-				lfc_ctl->used += 1;
-				entry->offset = offset;			/* reuse the hole */
-			}
-			else
-			{
-				lfc_ctl->used += 1;
-				entry->offset = lfc_ctl->size++;/* allocate new chunk at end
-												 * of file */
-			}
-		}
-		/*
-		 * We've already used up all allocated LFC entries.
-		 *
-		 * If we can clear an entry from the LRU, do that.
-		 * If we can't (e.g. because all other slots are being accessed)
-		 * then we will remove this entry from the hash and continue
-		 * on to the next chunk, as we may not exceed the limit.
-		 */
-		else if (!dlist_is_empty(&lfc_ctl->lru))
-		{
-			/* Cache overflow: evict least recently used chunk */
-			FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
-													 dlist_pop_head_node(&lfc_ctl->lru));
-
-			for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
-			{
-				lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
-			}
-
-			CriticalAssert(victim->access_count == 0);
-			entry->offset = victim->offset; /* grab victim's chunk */
-			hash_search_with_hash_value(lfc_hash, &victim->key,
-										victim->hash, HASH_REMOVE, NULL);
-			neon_log(DEBUG2, "Swap file cache page");
-		}
 		else
 		{
-			/* Can't add this chunk - we don't have the space for it */
-			hash_search_with_hash_value(lfc_hash, &entry->key, hash,
-										HASH_REMOVE, NULL);
-
-			/*
-			 * We can't process this chunk due to lack of space in LFC,
-			 * so skip to the next one
-			 */
-			LWLockRelease(lfc_lock);
-			blkno += blocks_in_chunk;
-			buf_offset += blocks_in_chunk;
-			nblocks -= blocks_in_chunk;
-			continue;
+			if (!lfc_init_new_entry(entry, hash))
+			{
+				/*
+				 * We can't process this chunk due to lack of space in LFC,
+				 * so skip to the next one
+				 */
+				blkno += blocks_in_chunk;
+				buf_offset += blocks_in_chunk;
+				nblocks -= blocks_in_chunk;
+				continue;
+			}
 		}
 
-		if (!found)
-		{
-			entry->access_count = 1;
-			entry->hash = hash;
-			memset(entry->bitmap, 0, sizeof entry->bitmap);
-		}
-
-		generation = lfc_ctl->generation;
 		entry_offset = entry->offset;
+
+		for (int i = 0; i < blocks_in_chunk; i++)
+		{
+			FileCacheBlockState state = UNAVAILABLE;
+			bool sleeping = false;
+			while (lfc_ctl->generation == generation)
+			{
+				state = GET_STATE(entry, chunk_offs + i);
+				if (state == PENDING) {
+					SET_STATE(entry, chunk_offs + i, REQUESTED);
+				} else if (state != REQUESTED) {
+					SET_STATE(entry, chunk_offs + i, PENDING);
+					break;
+				}
+				if (!sleeping)
+				{
+					ConditionVariablePrepareToSleep(cv);
+					sleeping = true;
+				}
+				LWLockRelease(lfc_lock);
+				ConditionVariableTimedSleep(cv, CV_WAIT_TIMEOUT, WAIT_EVENT_NEON_LFC_CV_WAIT);
+				LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+			}
+			if (sleeping)
+			{
+				ConditionVariableCancelSleep();
+			}
+		}
 		LWLockRelease(lfc_lock);
 
 		pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
@@ -1006,6 +1225,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		if (rc != BLCKSZ * blocks_in_chunk)
 		{
 			lfc_disable("write");
+			return;
 		}
 		else
 		{
@@ -1029,18 +1249,30 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 
 				for (int i = 0; i < blocks_in_chunk; i++)
 				{
-					lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
-					entry->bitmap[(chunk_offs + i) >> 5] |=
-						((uint32)1 << ((chunk_offs + i) & 31));
+					FileCacheBlockState state = GET_STATE(entry, chunk_offs + i);
+					if (state == REQUESTED)
+					{
+						ConditionVariableBroadcast(cv);
+					}
+					if (state != AVAILABLE)
+					{
+						lfc_ctl->used_pages += 1;
+						SET_STATE(entry, chunk_offs + i, AVAILABLE);
+					}
 				}
 			}
-
-			LWLockRelease(lfc_lock);
+			else
+			{
+				/* stop iteration if LFC was disabled */
+				lfc_close_file();
+				break;
+			}
 		}
 		blkno += blocks_in_chunk;
 		buf_offset += blocks_in_chunk;
 		nblocks -= blocks_in_chunk;
 	}
+	LWLockRelease(lfc_lock);
 }
 
 typedef struct
@@ -1127,6 +1359,16 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 			if (lfc_ctl)
 				value = lfc_ctl->used_pages;
 			break;
+		case 6:
+			key = "file_cache_evicted_pages";
+			if (lfc_ctl)
+				value = lfc_ctl->evicted_pages;
+			break;
+		case 7:
+			key = "file_cache_limit";
+			if (lfc_ctl)
+				value = lfc_ctl->limit;
+			break;
 		default:
 			SRF_RETURN_DONE(funcctx);
 	}
@@ -1250,8 +1492,8 @@ local_cache_pages(PG_FUNCTION_ARGS)
 				hash_seq_init(&status, lfc_hash);
 				while ((entry = hash_seq_search(&status)) != NULL)
 				{
-					for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
-						n_pages += pg_popcount32(entry->bitmap[i]);
+					for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
+						n_pages += GET_STATE(entry, i) == AVAILABLE;
 				}
 			}
 		}
@@ -1279,7 +1521,7 @@ local_cache_pages(PG_FUNCTION_ARGS)
 			{
 				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 				{
-					if (entry->bitmap[i >> 5] & ((uint32)1 << (i & 31)))
+					if (GET_STATE(entry, i) == AVAILABLE)
 					{
 						fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
 						fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index fc1aecd340..f71f11ff93 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -14,6 +14,8 @@
  */
 #include "postgres.h"
 
+#include <math.h>
+
 #include "access/xlog.h"
 #include "common/hashfn.h"
 #include "fmgr.h"
@@ -61,6 +63,9 @@ int         neon_protocol_version = 2;
 static int	max_reconnect_attempts = 60;
 static int	stripe_size;
 
+static int pageserver_response_log_timeout = 10000;
+static int pageserver_response_disconnect_timeout = 120000; /* 2 minutes */
+
 typedef struct
 {
 	char		connstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE];
@@ -129,6 +134,11 @@ typedef struct
 	uint64			nrequests_sent;
 	uint64			nresponses_received;
 
+	/* State for the receive timeout mechanism in call_PQgetCopyData() */
+	instr_time		receive_start_time;			/* when we started waiting */
+	instr_time		receive_last_log_time;		/* when we last printed a log message for the wait */
+	bool			receive_logged;				/* has the wait been logged */
+
 	/*---
 	 * WaitEventSet containing:
 	 *	- WL_SOCKET_READABLE on 'conn'
@@ -661,6 +671,9 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		shard->state = PS_Connected;
 		shard->nrequests_sent = 0;
 		shard->nresponses_received = 0;
+		INSTR_TIME_SET_ZERO(shard->receive_start_time);
+		INSTR_TIME_SET_ZERO(shard->receive_last_log_time);
+		shard->receive_logged = false;
 	}
 	/* FALLTHROUGH */
 	case PS_Connected:
@@ -680,6 +693,33 @@ pageserver_connect(shardno_t shard_no, int elevel)
 	Assert(false);
 }
 
+static void
+get_socket_stats(int socketfd, int *sndbuf, int *recvbuf)
+{
+	*sndbuf = -1;
+	*recvbuf = -1;
+
+#ifdef __linux__
+	/*
+	 * get kernel's send and recv queue size via ioctl
+	 * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27
+	 */
+	if (socketfd != -1)
+	{
+		int			ioctl_err;
+
+		ioctl_err = ioctl(socketfd, SIOCOUTQ, sndbuf);
+		if (ioctl_err!= 0) {
+			*sndbuf = -errno;
+		}
+		ioctl_err = ioctl(socketfd, FIONREAD, recvbuf);
+		if (ioctl_err != 0) {
+			*recvbuf = -errno;
+		}
+	}
+#endif
+}
+
 /*
  * A wrapper around PQgetCopyData that checks for interrupts while sleeping.
  */
@@ -690,46 +730,54 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer)
 	PageServer *shard = &page_servers[shard_no];
 	PGconn	   *pageserver_conn = shard->conn;
 	instr_time	now,
-				start_ts,
 				since_start,
-				last_log_ts,
 				since_last_log;
-	bool		logged = false;
-
-	/*
-	 * As a debugging aid, if we don't get a response for a long time, print a
-	 * log message.
-	 *
-	 * 10 s is a very generous threshold, normally we expect a response in a
-	 * few milliseconds. We have metrics to track latencies in normal ranges,
-	 * but in the cases that take exceptionally long, it's useful to log the
-	 * exact timestamps.
-	 */
-#define LOG_INTERVAL_MS		INT64CONST(10 * 1000)
-
-	INSTR_TIME_SET_CURRENT(now);
-	start_ts = last_log_ts = now;
-	INSTR_TIME_SET_ZERO(since_last_log);
 
 retry:
 	ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ );
 
 	if (ret == 0)
 	{
-		WaitEvent	event;
+		WaitEvent	occurred_event;
+		int			noccurred;
+		double		log_timeout,
+					disconnect_timeout;
 		long		timeout;
 
-		timeout = Max(0, LOG_INTERVAL_MS - INSTR_TIME_GET_MILLISEC(since_last_log));
+		/*
+		 * Calculate time elapsed since the start, and since the last progress
+		 * log message. On first call, remember the start time.
+		 */
+		INSTR_TIME_SET_CURRENT(now);
+		if (INSTR_TIME_IS_ZERO(shard->receive_start_time))
+		{
+			shard->receive_start_time = now;
+			INSTR_TIME_SET_ZERO(since_start);
+			shard->receive_last_log_time = now;
+			INSTR_TIME_SET_ZERO(since_last_log);
+			shard->receive_logged = false;
+		}
+		else
+		{
+			since_start = now;
+			INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);
+			since_last_log = now;
+			INSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time);
+		}
 
-		/* Sleep until there's something to do */
-		(void) WaitEventSetWait(shard->wes_read, timeout, &event, 1,
-								WAIT_EVENT_NEON_PS_READ);
+		/* Sleep until the log or disconnect timeout is reached. */
+		log_timeout = Max(0, (double) pageserver_response_log_timeout - INSTR_TIME_GET_MILLISEC(since_last_log));
+		disconnect_timeout = Max(0, (double) pageserver_response_disconnect_timeout - INSTR_TIME_GET_MILLISEC(since_start));
+		timeout = (long) ceil(Min(log_timeout, disconnect_timeout));
+
+		noccurred = WaitEventSetWait(shard->wes_read, timeout, &occurred_event, 1,
+									 WAIT_EVENT_NEON_PS_READ);
 		ResetLatch(MyLatch);
 
 		CHECK_FOR_INTERRUPTS();
 
 		/* Data available in socket? */
-		if (event.events & WL_SOCKET_READABLE)
+		if (noccurred > 0 && (occurred_event.events & WL_SOCKET_READABLE) != 0)
 		{
 			if (!PQconsumeInput(pageserver_conn))
 			{
@@ -739,49 +787,61 @@ retry:
 				pfree(msg);
 				return -1;
 			}
+			goto retry;
+		}
+
+		/* Timeout was reached, or we were interrupted for some other reason */
+		INSTR_TIME_SET_CURRENT(now);
+		since_last_log = now;
+		INSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time);
+		since_start = now;
+		INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);
+
+		/*
+		 * As a debugging aid, if we don't get a response to a pageserver request
+		 * for a long time, print a log message.
+		 *
+		 * The default neon.pageserver_response_log_timeout value, 10 s, is
+		 * very generous. Normally we expect a response in a few
+		 * milliseconds. We have metrics to track latencies in normal ranges,
+		 * but in the cases that take exceptionally long, it's useful to log
+		 * the exact timestamps.
+		 */
+		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= pageserver_response_log_timeout)
+		{
+			int			sndbuf;
+			int			recvbuf;
+
+			get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf);
+
+			neon_shard_log(shard_no, LOG,
+						   "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)",
+						   INSTR_TIME_GET_DOUBLE(since_start),
+						   shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf);
+			shard->receive_last_log_time = now;
+			shard->receive_logged = true;
 		}
 
 		/*
-		 * Print a message to the log if a long time has passed with no
-		 * response.
+		 * If an even longer time has passed without receiving a response from
+		 * the pageserver, disconnect.  That triggers a reconnection attempt
+		 * in the caller.
+		 *
+		 * If this happens, the pageserver is likely dead and isn't coming
+		 * back, or there's some kind of a network glitch and the connection
+		 * is permanently gone. Without this, if the pageserver or the network
+		 * connection is dead, it could take a very long time (15 minutes or
+		 * more) until the TCP keepalive timeout notices that. Even if we
+		 * would in fact get a response if we just waited a little longer,
+		 * there's a good chance that we'll get the response sooner by
+		 * reconnecting.
 		 */
-		INSTR_TIME_SET_CURRENT(now);
-		since_last_log = now;
-		INSTR_TIME_SUBTRACT(since_last_log, last_log_ts);
-		if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS)
+		if (INSTR_TIME_GET_MILLISEC(since_start) >= pageserver_response_disconnect_timeout)
 		{
-			int sndbuf = -1;
-			int recvbuf = -1;
-#ifdef __linux__
-			int socketfd;
-#endif
-
-			since_start = now;
-			INSTR_TIME_SUBTRACT(since_start, start_ts);
-
-#ifdef __linux__
-			/*
-			 * get kernel's send and recv queue size via ioctl
-			 * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27
-			 */
-			socketfd = PQsocket(pageserver_conn);
-			if (socketfd != -1) {
-				int ioctl_err;
-				ioctl_err = ioctl(socketfd, SIOCOUTQ, &sndbuf);
-				if (ioctl_err!= 0) {
-					sndbuf = -errno;
-				}
-				ioctl_err = ioctl(socketfd, FIONREAD, &recvbuf);
-				if (ioctl_err != 0) {
-					recvbuf = -errno;
-				}
-			}
-#endif
-			neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)",
-						   INSTR_TIME_GET_DOUBLE(since_start),
-						   shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf);
-			last_log_ts = now;
-			logged = true;
+			neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting",
+					   INSTR_TIME_GET_DOUBLE(since_start));
+			pageserver_disconnect(shard_no);
+			return -1;
 		}
 
 		goto retry;
@@ -791,14 +851,18 @@ retry:
 	 * If we logged earlier that the response is taking a long time, log
 	 * another message when the response is finally received.
 	 */
-	if (logged)
+	if (shard->receive_logged)
 	{
 		INSTR_TIME_SET_CURRENT(now);
 		since_start = now;
-		INSTR_TIME_SUBTRACT(since_start, start_ts);
-		neon_shard_log(shard_no, LOG, "received response from pageserver after %0.3f s",
+		INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time);
+		neon_shard_log(shard_no, LOG,
+					   "received response from pageserver after %0.3f s",
 					   INSTR_TIME_GET_DOUBLE(since_start));
 	}
+	INSTR_TIME_SET_ZERO(shard->receive_start_time);
+	INSTR_TIME_SET_ZERO(shard->receive_last_log_time);
+	shard->receive_logged = false;
 
 	return ret;
 }
@@ -972,9 +1036,17 @@ pageserver_receive(shardno_t shard_no)
 			pfree(msg);
 		}
 	}
+	else if (rc == -1 && shard->state == PS_Disconnected)
+	{
+		/* If the state is 'Disconnected', the disconnection message was already logged */
+		resp = NULL;
+	}
 	else if (rc == -1)
 	{
-		neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: psql end of copy data: %s", pchomp(PQerrorMessage(pageserver_conn)));
+		char	   *msg = pchomp(PQerrorMessage(pageserver_conn));
+
+		neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: psql end of copy data: %s", msg);
+		pfree(msg);
 		pageserver_disconnect(shard_no);
 		resp = NULL;
 	}
@@ -1260,6 +1332,26 @@ pg_init_libpagestore(void)
 							0,	/* no flags required */
 							NULL, NULL, NULL);
 
+	DefineCustomIntVariable("neon.pageserver_response_log_timeout",
+							"pageserver response log timeout",
+							"If the pageserver doesn't respond to a request within this timeout,"
+							"a message is printed to the log.",
+							&pageserver_response_log_timeout,
+							10000, 100, INT_MAX,
+							PGC_SUSET,
+							GUC_UNIT_MS,
+							NULL, NULL, NULL);
+
+	DefineCustomIntVariable("neon.pageserver_response_disconnect_timeout",
+							"pageserver response diconnect timeout",
+							"If the pageserver doesn't respond to a request within this timeout,"
+							"disconnect and reconnect.",
+							&pageserver_response_disconnect_timeout,
+							120000, 100, INT_MAX,
+							PGC_SUSET,
+							GUC_UNIT_MS,
+							NULL, NULL, NULL);
+
 	relsize_hash_init();
 
 	if (page_server != NULL)
diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
index ce2938cfd5..768d7ae9e8 100644
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -12,6 +12,7 @@
 #include "fmgr.h"
 
 #include "miscadmin.h"
+#include "pgstat.h"
 #include "access/subtrans.h"
 #include "access/twophase.h"
 #include "access/xlog.h"
@@ -56,6 +57,7 @@ uint32		WAIT_EVENT_NEON_LFC_MAINTENANCE;
 uint32		WAIT_EVENT_NEON_LFC_READ;
 uint32		WAIT_EVENT_NEON_LFC_TRUNCATE;
 uint32		WAIT_EVENT_NEON_LFC_WRITE;
+uint32		WAIT_EVENT_NEON_LFC_CV_WAIT;
 uint32		WAIT_EVENT_NEON_PS_STARTING;
 uint32		WAIT_EVENT_NEON_PS_CONFIGURING;
 uint32		WAIT_EVENT_NEON_PS_SEND;
@@ -409,6 +411,16 @@ ReportSearchPath(void)
 	}
 }
 
+#if PG_VERSION_NUM < 150000
+/*
+ * PG14 uses separate backend for stats collector having no access to shared memory.
+ * As far as AUX mechanism requires access to shared memory, persisting pgstat.stat file
+ * is not supported in PG14. And so there is no definition of neon_pgstat_file_size_limit
+ * variable, so we have to declare it here.
+ */
+static int neon_pgstat_file_size_limit;
+#endif
+
 void
 _PG_init(void)
 {
@@ -466,6 +478,15 @@ _PG_init(void)
 							0,
 							NULL, NULL, NULL);
 
+	DefineCustomIntVariable("neon.pgstat_file_size_limit",
+							"Maximal size of pgstat.stat file saved in Neon storage",
+							"Zero value disables persisting pgstat.stat file",
+							&neon_pgstat_file_size_limit,
+							0, 0, 1000000, /* disabled by default */
+							PGC_SIGHUP,
+							GUC_UNIT_KB,
+							NULL, NULL, NULL);
+
 	/*
 	 * Important: This must happen after other parts of the extension are
 	 * loaded, otherwise any settings to GUCs that were set before the
@@ -538,6 +559,7 @@ neon_shmem_startup_hook(void)
 	WAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew("Neon/FileCache_Read");
 	WAIT_EVENT_NEON_LFC_TRUNCATE = WaitEventExtensionNew("Neon/FileCache_Truncate");
 	WAIT_EVENT_NEON_LFC_WRITE = WaitEventExtensionNew("Neon/FileCache_Write");
+	WAIT_EVENT_NEON_LFC_CV_WAIT = WaitEventExtensionNew("Neon/FileCache_CvWait");
 	WAIT_EVENT_NEON_PS_STARTING = WaitEventExtensionNew("Neon/PS_Starting");
 	WAIT_EVENT_NEON_PS_CONFIGURING = WaitEventExtensionNew("Neon/PS_Configuring");
 	WAIT_EVENT_NEON_PS_SEND = WaitEventExtensionNew("Neon/PS_SendIO");
diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h
index 79aa88b8d3..912e09c3d3 100644
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -28,6 +28,7 @@ extern uint32		WAIT_EVENT_NEON_LFC_MAINTENANCE;
 extern uint32		WAIT_EVENT_NEON_LFC_READ;
 extern uint32		WAIT_EVENT_NEON_LFC_TRUNCATE;
 extern uint32		WAIT_EVENT_NEON_LFC_WRITE;
+extern uint32		WAIT_EVENT_NEON_LFC_CV_WAIT;
 extern uint32		WAIT_EVENT_NEON_PS_STARTING;
 extern uint32		WAIT_EVENT_NEON_PS_CONFIGURING;
 extern uint32		WAIT_EVENT_NEON_PS_SEND;
@@ -38,6 +39,7 @@ extern uint32		WAIT_EVENT_NEON_WAL_DL;
 #define WAIT_EVENT_NEON_LFC_READ		WAIT_EVENT_BUFFILE_READ
 #define WAIT_EVENT_NEON_LFC_TRUNCATE	WAIT_EVENT_BUFFILE_TRUNCATE
 #define WAIT_EVENT_NEON_LFC_WRITE		WAIT_EVENT_BUFFILE_WRITE
+#define WAIT_EVENT_NEON_LFC_CV_WAIT 	WAIT_EVENT_BUFFILE_READ
 #define WAIT_EVENT_NEON_PS_STARTING		PG_WAIT_EXTENSION
 #define WAIT_EVENT_NEON_PS_CONFIGURING	PG_WAIT_EXTENSION
 #define WAIT_EVENT_NEON_PS_SEND			PG_WAIT_EXTENSION
diff --git a/pgxn/neon/neon_utils.c b/pgxn/neon/neon_utils.c
index 1fb4ed9522..1fad44bd58 100644
--- a/pgxn/neon/neon_utils.c
+++ b/pgxn/neon/neon_utils.c
@@ -51,6 +51,26 @@ HexDecodeString(uint8 *result, char *input, int nbytes)
 	return true;
 }
 
+/* --------------------------------
+ *		pq_getmsgint16	- get a binary 2-byte int from a message buffer
+ * --------------------------------
+ */
+uint16
+pq_getmsgint16(StringInfo msg)
+{
+	return pq_getmsgint(msg, 2);
+}
+
+/* --------------------------------
+ *		pq_getmsgint32	- get a binary 4-byte int from a message buffer
+ * --------------------------------
+ */
+uint32
+pq_getmsgint32(StringInfo msg)
+{
+	return pq_getmsgint(msg, 4);
+}
+
 /* --------------------------------
  *		pq_getmsgint32_le	- get a binary 4-byte int from a message buffer in native (LE) order
  * --------------------------------
diff --git a/pgxn/neon/neon_utils.h b/pgxn/neon/neon_utils.h
index 89683714f1..7480ac28cc 100644
--- a/pgxn/neon/neon_utils.h
+++ b/pgxn/neon/neon_utils.h
@@ -8,6 +8,8 @@
 #endif
 
 bool		HexDecodeString(uint8 *result, char *input, int nbytes);
+uint16      pq_getmsgint16(StringInfo msg);
+uint32      pq_getmsgint32(StringInfo msg);
 uint32		pq_getmsgint32_le(StringInfo msg);
 uint64		pq_getmsgint64_le(StringInfo msg);
 void		pq_sendint32_le(StringInfo buf, uint32 i);
diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h
index 7b748d7252..9faab1e4f0 100644
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -233,6 +233,7 @@ extern char *neon_timeline;
 extern char *neon_tenant;
 extern int32 max_cluster_size;
 extern int  neon_protocol_version;
+extern bool lfc_store_prefetch_result;
 
 extern shardno_t get_shard_number(BufferTag* tag);
 
@@ -301,14 +302,16 @@ extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
 							   BlockNumber blkno);
 extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
 							   BlockNumber blkno, int nblocks, bits8 *bitmap);
-extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
 extern void lfc_init(void);
+extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
+						 const void* buffer, XLogRecPtr lsn);
+
 
 static inline bool
 lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		 void *buffer)
 {
-	bits8		rv = 0;
+	bits8		rv = 1;
 	return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
 }
 
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index f1087a8ccb..091ad555e0 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -162,7 +162,7 @@ static uint32 local_request_counter;
  * UNUSED ------> REQUESTED --> RECEIVED
  *   ^         :      |            |
  *   |         :      v            |
- *   |         : TAG_UNUSED        |
+ *   |         : TAG_REMAINS       |
  *   |         :      |            |
  *   +----------------+------------+
  *             :
@@ -181,7 +181,7 @@ typedef enum PrefetchStatus
 /* must fit in uint8; bits 0x1 are used */
 typedef enum {
 	PRFSF_NONE	= 0x0,
-	PRFSF_SEQ	= 0x1,
+	PRFSF_LFC	= 0x1  /* received prefetch result is stored in LFC */
 } PrefetchRequestFlags;
 
 typedef struct PrefetchRequest
@@ -305,7 +305,7 @@ GetLastWrittenLSNv(NRelFileInfo relfilenode, ForkNumber forknum,
 static void
 neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum,
 					  BlockNumber blkno, neon_request_lsns *output,
-					  BlockNumber nblocks, const bits8 *mask);
+					  BlockNumber nblocks);
 static bool neon_prefetch_response_usable(neon_request_lsns *request_lsns,
 										  PrefetchRequest *slot);
 
@@ -363,6 +363,7 @@ compact_prefetch_buffers(void)
 		target_slot->buftag = source_slot->buftag;
 		target_slot->shard_no = source_slot->shard_no;
 		target_slot->status = source_slot->status;
+		target_slot->flags = source_slot->flags;
 		target_slot->response = source_slot->response;
 		target_slot->reqid = source_slot->reqid;
 		target_slot->request_lsns = source_slot->request_lsns;
@@ -452,6 +453,18 @@ prefetch_pump_state(void)
 		/* update slot state */
 		slot->status = PRFS_RECEIVED;
 		slot->response = response;
+
+		if (response->tag == T_NeonGetPageResponse && !(slot->flags & PRFSF_LFC) && lfc_store_prefetch_result)
+		{
+			/*
+			 * Store prefetched result in LFC (please read comments to lfc_prefetch
+			 * explaining why it can be done without holding shared buffer lock
+			 */
+			if (lfc_prefetch(BufTagGetNRelFileInfo(slot->buftag), slot->buftag.forkNum, slot->buftag.blockNum, ((NeonGetPageResponse*)response)->page, slot->request_lsns.not_modified_since))
+			{
+				slot->flags |= PRFSF_LFC;
+			}
+		}
 	}
 }
 
@@ -474,8 +487,7 @@ readahead_buffer_resize(int newsize, void *extra)
 	 */
 	if (MyPState->n_requests_inflight > newsize)
 	{
-		Assert(MyPState->ring_unused >= MyPState->n_requests_inflight - newsize);
-		prefetch_wait_for(MyPState->ring_unused - (MyPState->n_requests_inflight - newsize));
+		prefetch_wait_for(MyPState->ring_unused - newsize - 1);
 		Assert(MyPState->n_requests_inflight <= newsize);
 	}
 
@@ -714,6 +726,18 @@ prefetch_read(PrefetchRequest *slot)
 		/* update slot state */
 		slot->status = PRFS_RECEIVED;
 		slot->response = response;
+
+		if (response->tag == T_NeonGetPageResponse && !(slot->flags & PRFSF_LFC) && lfc_store_prefetch_result)
+		{
+			/*
+			 * Store prefetched result in LFC (please read comments to lfc_prefetch
+			 * explaining why it can be done without holding shared buffer lock
+			 */
+			if (lfc_prefetch(BufTagGetNRelFileInfo(buftag), buftag.forkNum, buftag.blockNum, ((NeonGetPageResponse*)response)->page, slot->request_lsns.not_modified_since))
+			{
+				slot->flags |= PRFSF_LFC;
+			}
+		}
 		return true;
 	}
 	else
@@ -865,7 +889,7 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 	else
 		neon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),
 							  slot->buftag.forkNum, slot->buftag.blockNum,
-							  &slot->request_lsns, 1, NULL);
+							  &slot->request_lsns, 1);
 	request.hdr.lsn = slot->request_lsns.request_lsn;
 	request.hdr.not_modified_since = slot->request_lsns.not_modified_since;
 
@@ -891,6 +915,74 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 	Assert(!found);
 }
 
+/*
+ * Lookup of already received prefetch requests. Only already received responses matching required LSNs are accepted.
+ * Present pages are marked in "mask" bitmap and total number of such pages is returned.
+ */
+static int
+prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, neon_request_lsns *lsns,
+				 BlockNumber nblocks, void **buffers, bits8 *mask)
+{
+	int hits = 0;
+	PrefetchRequest hashkey;
+
+	/*
+	 * Use an intermediate PrefetchRequest struct as the hash key to ensure
+	 * correct alignment and that the padding bytes are cleared.
+	 */
+	memset(&hashkey.buftag, 0, sizeof(BufferTag));
+	CopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);
+	hashkey.buftag.forkNum = forknum;
+
+	for (int i = 0; i < nblocks; i++)
+	{
+		PrfHashEntry *entry;
+
+		hashkey.buftag.blockNum = blocknum + i;
+		entry = prfh_lookup(MyPState->prf_hash, &hashkey);
+
+		if (entry != NULL)
+		{
+			PrefetchRequest *slot = entry->slot;
+			uint64 ring_index = slot->my_ring_index;
+			Assert(slot == GetPrfSlot(ring_index));
+
+			Assert(slot->status != PRFS_UNUSED);
+			Assert(MyPState->ring_last <= ring_index &&
+				   ring_index < MyPState->ring_unused);
+			Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));
+
+			if (slot->status != PRFS_RECEIVED)
+				continue;
+
+			/*
+			 * If the caller specified a request LSN to use, only accept
+			 * prefetch responses that satisfy that request.
+			 */
+			if (!neon_prefetch_response_usable(&lsns[i], slot))
+				continue;
+
+			memcpy(buffers[i], ((NeonGetPageResponse*)slot->response)->page, BLCKSZ);
+			prefetch_set_unused(ring_index);
+			BITMAP_SET(mask, i);
+
+			hits += 1;
+			inc_getpage_wait(0);
+		}
+	}
+	pgBufferUsage.prefetch.hits += hits;
+	return hits;
+}
+
+#if PG_MAJORVERSION_NUM < 17
+static bool
+prefetch_lookup(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkn, neon_request_lsns *lsns, void *buffer)
+{
+	bits8 present = 0;
+	return prefetch_lookupv(rinfo, forkNum, blkn, lsns, 1, &buffer, &present) != 0;
+}
+#endif
+
 /*
  * prefetch_register_bufferv() - register and prefetch buffers
  *
@@ -1014,8 +1106,6 @@ Retry:
 					/* The buffered request is good enough, return that index */
 					if (is_prefetch)
 						pgBufferUsage.prefetch.duplicates++;
-					else
-						pgBufferUsage.prefetch.hits++;
 					continue;
 				}
 			}
@@ -1117,6 +1207,7 @@ Retry:
 		slot->buftag = hashkey.buftag;
 		slot->shard_no = get_shard_number(&tag);
 		slot->my_ring_index = ring_index;
+		slot->flags = 0;
 
 		min_ring_index = Min(min_ring_index, ring_index);
 
@@ -2057,8 +2148,7 @@ GetLastWrittenLSNv(NRelFileInfo relfilenode, ForkNumber forknum,
  */
 static void
 neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
-					  neon_request_lsns *output, BlockNumber nblocks,
-					  const bits8 *mask)
+					  neon_request_lsns *output, BlockNumber nblocks)
 {
 	XLogRecPtr	last_written_lsns[PG_IOV_MAX];
 
@@ -2146,9 +2236,6 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 			neon_request_lsns *result = &output[i];
 			XLogRecPtr	last_written_lsn = last_written_lsns[i];
 
-			if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
-				continue;
-
 			if (last_written_lsn > replay_lsn)
 			{
 				/* GetCurrentReplayRecPtr was introduced in v15 */
@@ -2191,8 +2278,6 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 			neon_request_lsns *result = &output[i];
 			XLogRecPtr	last_written_lsn = last_written_lsns[i];
 
-			if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
-				continue;
 			/*
 			 * Use the latest LSN that was evicted from the buffer cache as the
 			 * 'not_modified_since' hint. Any pages modified by later WAL records
@@ -2414,7 +2499,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 	}
 
 	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum,
-						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
 	{
 		NeonExistsRequest request = {
 			.hdr.tag = T_NeonExistsRequest,
@@ -2833,8 +2918,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	while (nblocks > 0)
 	{
 		int		iterblocks = Min(nblocks, PG_IOV_MAX);
-		bits8		lfc_present[PG_IOV_MAX / 8];
-		memset(lfc_present, 0, sizeof(lfc_present));
+		bits8	lfc_present[PG_IOV_MAX / 8] = {0};
 
 		if (lfc_cache_containsv(InfoFromSMgrRel(reln), forknum, blocknum,
 								iterblocks, lfc_present) == iterblocks)
@@ -2845,12 +2929,13 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		}
 
 		tag.blockNum = blocknum;
-		
+
 		for (int i = 0; i < PG_IOV_MAX / 8; i++)
 			lfc_present[i] = ~(lfc_present[i]);
 
 		ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
 											   lfc_present, true);
+
 		nblocks -= iterblocks;
 		blocknum += iterblocks;
 
@@ -3106,7 +3191,8 @@ Retry:
 					}
 				}
 				memcpy(buffer, getpage_resp->page, BLCKSZ);
-				lfc_write(rinfo, forkNum, blockno, buffer);
+				if (!lfc_store_prefetch_result)
+					lfc_write(rinfo, forkNum, blockno, buffer);
 				break;
 			}
 			case T_NeonErrorResponse:
@@ -3191,6 +3277,17 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}
 
+	/* Try to read PS results if they are available */
+	prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1);
+
+	if (prefetch_lookup(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, buffer))
+	{
+		/* Prefetch hit */
+		return;
+	}
+
 	/* Try to read from local file cache */
 	if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
 	{
@@ -3198,9 +3295,11 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 		return;
 	}
 
-	neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1, NULL);
 	neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
 
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
 	prefetch_pump_state();
 
 #ifdef DEBUG_COMPARE_LOCAL
@@ -3281,11 +3380,14 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 #if PG_MAJORVERSION_NUM >= 17
 static void
 neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		void **buffers, BlockNumber nblocks)
+		   void **buffers, BlockNumber nblocks)
 {
+	bits8		prefetch_hits[PG_IOV_MAX / 8] = {0};
+	bits8		lfc_hits[PG_IOV_MAX / 8];
 	bits8		read[PG_IOV_MAX / 8];
 	neon_request_lsns request_lsns[PG_IOV_MAX];
 	int			lfc_result;
+	int			prefetch_result;
 
 	switch (reln->smgr_relpersistence)
 	{
@@ -3308,38 +3410,52 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		neon_log(ERROR, "Read request too large: %d is larger than max %d",
 				 nblocks, PG_IOV_MAX);
 
-	memset(read, 0, sizeof(read));
+	/* Try to read PS results if they are available */
+	prefetch_pump_state();
+
+	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
+						  request_lsns, nblocks);
+
+
+	prefetch_result = prefetch_lookupv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns, nblocks, buffers, prefetch_hits);
+
+	if (prefetch_result == nblocks)
+		return;
+
+	/* invert the result: exclude prefetched blocks */
+	for (int i = 0; i < PG_IOV_MAX / 8; i++)
+		lfc_hits[i] = ~prefetch_hits[i];
 
 	/* Try to read from local file cache */
 	lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
-								  nblocks, read);
+								  nblocks, lfc_hits);
 
 	if (lfc_result > 0)
 		MyNeonCounters->file_cache_hits_total += lfc_result;
 
 	/* Read all blocks from LFC, so we're done */
-	if (lfc_result == nblocks)
+	if (prefetch_result + lfc_result == nblocks)
 		return;
 
-	if (lfc_result == -1)
+	if (lfc_result <= 0)
 	{
 		/* can't use the LFC result, so read all blocks from PS */
 		for (int i = 0; i < PG_IOV_MAX / 8; i++)
-			read[i] = 0xFF;
+			read[i] = ~prefetch_hits[i];
 	}
 	else
 	{
 		/* invert the result: exclude blocks read from lfc */
 		for (int i = 0; i < PG_IOV_MAX / 8; i++)
-			read[i] = ~(read[i]);
+			read[i] = ~(prefetch_hits[i] | lfc_hits[i]);
 	}
 
-	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum,
-						  request_lsns, nblocks, read);
-
 	neon_read_at_lsnv(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns,
 					  buffers, nblocks, read);
 
+	/*
+	 * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes.
+	 */
 	prefetch_pump_state();
 
 #ifdef DEBUG_COMPARE_LOCAL
@@ -3611,7 +3727,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 	}
 
 	neon_get_request_lsns(InfoFromSMgrRel(reln), forknum,
-						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
 
 	{
 		NeonNblocksRequest request = {
@@ -3696,7 +3812,7 @@ neon_dbsize(Oid dbNode)
 	NRelFileInfo dummy_node = {0};
 
 	neon_get_request_lsns(dummy_node, MAIN_FORKNUM,
-						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
+						  REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1);
 
 	{
 		NeonDbSizeRequest request = {
@@ -4431,7 +4547,12 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
 	if (no_redo_needed)
 	{
 		SetLastWrittenLSNForBlock(end_recptr, rinfo, forknum, blkno);
-		lfc_evict(rinfo, forknum, blkno);
+		/*
+		 * Redo changes if page exists in LFC.
+		 * We should perform this check after assigning LwLSN to prevent
+		 * prefetching of some older version of the page by some other backend.
+		 */
+		no_redo_needed = !lfc_cache_contains(rinfo, forknum, blkno);
 	}
 
 	LWLockRelease(partitionLock);
diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c
index 7472fd6afc..356895aa82 100644
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -70,6 +70,7 @@ static bool SendAppendRequests(Safekeeper *sk);
 static bool RecvAppendResponses(Safekeeper *sk);
 static XLogRecPtr CalculateMinFlushLsn(WalProposer *wp);
 static XLogRecPtr GetAcknowledgedByQuorumWALPosition(WalProposer *wp);
+static void PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version);
 static void HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk);
 static bool AsyncRead(Safekeeper *sk, char **buf, int *buf_size);
 static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg);
@@ -81,6 +82,8 @@ static char *FormatSafekeeperState(Safekeeper *sk);
 static void AssertEventsOkForState(uint32 events, Safekeeper *sk);
 static char *FormatEvents(WalProposer *wp, uint32 events);
 static void UpdateDonorShmem(WalProposer *wp);
+static char *MembershipConfigurationToString(MembershipConfiguration *mconf);
+static void MembershipConfigurationFree(MembershipConfiguration *mconf);
 
 WalProposer *
 WalProposerCreate(WalProposerConfig *config, walproposer_api api)
@@ -137,25 +140,21 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 	}
 	wp->quorum = wp->n_safekeepers / 2 + 1;
 
+	if (wp->config->proto_version != 2 && wp->config->proto_version != 3)
+		wp_log(FATAL, "unsupported safekeeper protocol version %d", wp->config->proto_version);
+	wp_log(LOG, "using safekeeper protocol version %d", wp->config->proto_version);
+
 	/* Fill the greeting package */
-	wp->greetRequest.tag = 'g';
-	wp->greetRequest.protocolVersion = SK_PROTOCOL_VERSION;
-	wp->greetRequest.pgVersion = PG_VERSION_NUM;
-	wp->api.strong_random(wp, &wp->greetRequest.proposerId, sizeof(wp->greetRequest.proposerId));
-	wp->greetRequest.systemId = wp->config->systemId;
-	if (!wp->config->neon_timeline)
-		wp_log(FATAL, "neon.timeline_id is not provided");
-	if (*wp->config->neon_timeline != '\0' &&
-		!HexDecodeString(wp->greetRequest.timeline_id, wp->config->neon_timeline, 16))
-		wp_log(FATAL, "could not parse neon.timeline_id, %s", wp->config->neon_timeline);
+	wp->greetRequest.pam.tag = 'g';
 	if (!wp->config->neon_tenant)
 		wp_log(FATAL, "neon.tenant_id is not provided");
-	if (*wp->config->neon_tenant != '\0' &&
-		!HexDecodeString(wp->greetRequest.tenant_id, wp->config->neon_tenant, 16))
-		wp_log(FATAL, "could not parse neon.tenant_id, %s", wp->config->neon_tenant);
-
-	wp->greetRequest.timeline = wp->config->pgTimeline;
-	wp->greetRequest.walSegSize = wp->config->wal_segment_size;
+	wp->greetRequest.tenant_id = wp->config->neon_tenant;
+	if (!wp->config->neon_timeline)
+		wp_log(FATAL, "neon.timeline_id is not provided");
+	wp->greetRequest.timeline_id = wp->config->neon_timeline;
+	wp->greetRequest.pg_version = PG_VERSION_NUM;
+	wp->greetRequest.system_id = wp->config->systemId;
+	wp->greetRequest.wal_seg_size = wp->config->wal_segment_size;
 
 	wp->api.init_event_set(wp);
 
@@ -165,12 +164,14 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api)
 void
 WalProposerFree(WalProposer *wp)
 {
+	MembershipConfigurationFree(&wp->mconf);
 	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
 		Safekeeper *sk = &wp->safekeeper[i];
 
 		Assert(sk->outbuf.data != NULL);
 		pfree(sk->outbuf.data);
+		MembershipConfigurationFree(&sk->greetResponse.mconf);
 		if (sk->voteResponse.termHistory.entries)
 			pfree(sk->voteResponse.termHistory.entries);
 		sk->voteResponse.termHistory.entries = NULL;
@@ -308,6 +309,7 @@ ShutdownConnection(Safekeeper *sk)
 	sk->state = SS_OFFLINE;
 	sk->streamingAt = InvalidXLogRecPtr;
 
+	MembershipConfigurationFree(&sk->greetResponse.mconf);
 	if (sk->voteResponse.termHistory.entries)
 		pfree(sk->voteResponse.termHistory.entries);
 	sk->voteResponse.termHistory.entries = NULL;
@@ -598,11 +600,14 @@ static void
 SendStartWALPush(Safekeeper *sk)
 {
 	WalProposer *wp = sk->wp;
+#define CMD_LEN 512
+	char		cmd[CMD_LEN];
 
-	if (!wp->api.conn_send_query(sk, "START_WAL_PUSH"))
+	snprintf(cmd, CMD_LEN, "START_WAL_PUSH (proto_version '%d')", wp->config->proto_version);
+	if (!wp->api.conn_send_query(sk, cmd))
 	{
-		wp_log(WARNING, "failed to send 'START_WAL_PUSH' query to safekeeper %s:%s: %s",
-			   sk->host, sk->port, wp->api.conn_error_message(sk));
+		wp_log(WARNING, "failed to send '%s' query to safekeeper %s:%s: %s",
+			   cmd, sk->host, sk->port, wp->api.conn_error_message(sk));
 		ShutdownConnection(sk);
 		return;
 	}
@@ -658,23 +663,33 @@ RecvStartWALPushResult(Safekeeper *sk)
 
 /*
  * Start handshake: first of all send information about the
- * safekeeper. After sending, we wait on SS_HANDSHAKE_RECV for
+ * walproposer. After sending, we wait on SS_HANDSHAKE_RECV for
  * a response to finish the handshake.
  */
 static void
 SendProposerGreeting(Safekeeper *sk)
 {
+	WalProposer *wp = sk->wp;
+	char	   *mconf_toml = MembershipConfigurationToString(&wp->greetRequest.mconf);
+
+	wp_log(LOG, "sending ProposerGreeting to safekeeper %s:%s with mconf = %s", sk->host, sk->port, mconf_toml);
+	pfree(mconf_toml);
+
+	PAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->greetRequest,
+					   &sk->outbuf, wp->config->proto_version);
+
 	/*
 	 * On failure, logging & resetting the connection is handled. We just need
 	 * to handle the control flow.
 	 */
-	BlockingWrite(sk, &sk->wp->greetRequest, sizeof(sk->wp->greetRequest), SS_HANDSHAKE_RECV);
+	BlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_HANDSHAKE_RECV);
 }
 
 static void
 RecvAcceptorGreeting(Safekeeper *sk)
 {
 	WalProposer *wp = sk->wp;
+	char	   *mconf_toml;
 
 	/*
 	 * If our reading doesn't immediately succeed, any necessary error
@@ -685,7 +700,10 @@ RecvAcceptorGreeting(Safekeeper *sk)
 	if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->greetResponse))
 		return;
 
-	wp_log(LOG, "received AcceptorGreeting from safekeeper %s:%s, term=" INT64_FORMAT, sk->host, sk->port, sk->greetResponse.term);
+	mconf_toml = MembershipConfigurationToString(&sk->greetResponse.mconf);
+	wp_log(LOG, "received AcceptorGreeting from safekeeper %s:%s, node_id = %lu, mconf = %s, term=" UINT64_FORMAT,
+		   sk->host, sk->port, sk->greetResponse.nodeId, mconf_toml, sk->greetResponse.term);
+	pfree(mconf_toml);
 
 	/* Protocol is all good, move to voting. */
 	sk->state = SS_VOTING;
@@ -707,12 +725,9 @@ RecvAcceptorGreeting(Safekeeper *sk)
 			wp->propTerm++;
 			wp_log(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, wp->quorum, wp->propTerm);
 
-			wp->voteRequest = (VoteRequest)
-			{
-				.tag = 'v',
-					.term = wp->propTerm
-			};
-			memcpy(wp->voteRequest.proposerId.data, wp->greetRequest.proposerId.data, UUID_LEN);
+			wp->voteRequest.pam.tag = 'v';
+			wp->voteRequest.generation = wp->mconf.generation;
+			wp->voteRequest.term = wp->propTerm;
 		}
 	}
 	else if (sk->greetResponse.term > wp->propTerm)
@@ -759,12 +774,14 @@ SendVoteRequest(Safekeeper *sk)
 {
 	WalProposer *wp = sk->wp;
 
-	/* We have quorum for voting, send our vote request */
-	wp_log(LOG, "requesting vote from %s:%s for term " UINT64_FORMAT, sk->host, sk->port, wp->voteRequest.term);
-	/* On failure, logging & resetting is handled */
-	if (!BlockingWrite(sk, &wp->voteRequest, sizeof(wp->voteRequest), SS_WAIT_VERDICT))
-		return;
+	PAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->voteRequest,
+					   &sk->outbuf, wp->config->proto_version);
 
+	/* We have quorum for voting, send our vote request */
+	wp_log(LOG, "requesting vote from %s:%s for generation %u term " UINT64_FORMAT, sk->host, sk->port,
+		   wp->voteRequest.generation, wp->voteRequest.term);
+	/* On failure, logging & resetting is handled */
+	BlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_WAIT_VERDICT);
 	/* If successful, wait for read-ready with SS_WAIT_VERDICT */
 }
 
@@ -778,11 +795,12 @@ RecvVoteResponse(Safekeeper *sk)
 		return;
 
 	wp_log(LOG,
-		   "got VoteResponse from acceptor %s:%s, voteGiven=" UINT64_FORMAT ", epoch=" UINT64_FORMAT ", flushLsn=%X/%X, truncateLsn=%X/%X, timelineStartLsn=%X/%X",
-		   sk->host, sk->port, sk->voteResponse.voteGiven, GetHighestTerm(&sk->voteResponse.termHistory),
+		   "got VoteResponse from acceptor %s:%s, generation=%u, term=%lu, voteGiven=%u, last_log_term=" UINT64_FORMAT ", flushLsn=%X/%X, truncateLsn=%X/%X",
+		   sk->host, sk->port, sk->voteResponse.generation, sk->voteResponse.term,
+		   sk->voteResponse.voteGiven,
+		   GetHighestTerm(&sk->voteResponse.termHistory),
 		   LSN_FORMAT_ARGS(sk->voteResponse.flushLsn),
-		   LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn),
-		   LSN_FORMAT_ARGS(sk->voteResponse.timelineStartLsn));
+		   LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn));
 
 	/*
 	 * In case of acceptor rejecting our vote, bail out, but only if either it
@@ -847,9 +865,9 @@ HandleElectedProposer(WalProposer *wp)
 	 * otherwise we must be sync-safekeepers and we have nothing to do then.
 	 *
 	 * Proceeding is not only pointless but harmful, because we'd give
-	 * safekeepers term history starting with 0/0. These hacks will go away once
-	 * we disable implicit timeline creation on safekeepers and create it with
-	 * non zero LSN from the start.
+	 * safekeepers term history starting with 0/0. These hacks will go away
+	 * once we disable implicit timeline creation on safekeepers and create it
+	 * with non zero LSN from the start.
 	 */
 	if (wp->propEpochStartLsn == InvalidXLogRecPtr)
 	{
@@ -942,7 +960,6 @@ DetermineEpochStartLsn(WalProposer *wp)
 	wp->propEpochStartLsn = InvalidXLogRecPtr;
 	wp->donorEpoch = 0;
 	wp->truncateLsn = InvalidXLogRecPtr;
-	wp->timelineStartLsn = InvalidXLogRecPtr;
 
 	for (int i = 0; i < wp->n_safekeepers; i++)
 	{
@@ -959,20 +976,6 @@ DetermineEpochStartLsn(WalProposer *wp)
 				wp->donor = i;
 			}
 			wp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn);
-
-			if (wp->safekeeper[i].voteResponse.timelineStartLsn != InvalidXLogRecPtr)
-			{
-				/* timelineStartLsn should be the same everywhere or unknown */
-				if (wp->timelineStartLsn != InvalidXLogRecPtr &&
-					wp->timelineStartLsn != wp->safekeeper[i].voteResponse.timelineStartLsn)
-				{
-					wp_log(WARNING,
-						   "inconsistent timelineStartLsn: current %X/%X, received %X/%X",
-						   LSN_FORMAT_ARGS(wp->timelineStartLsn),
-						   LSN_FORMAT_ARGS(wp->safekeeper[i].voteResponse.timelineStartLsn));
-				}
-				wp->timelineStartLsn = wp->safekeeper[i].voteResponse.timelineStartLsn;
-			}
 		}
 	}
 
@@ -995,22 +998,11 @@ DetermineEpochStartLsn(WalProposer *wp)
 	if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers)
 	{
 		wp->propEpochStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn(wp);
-		if (wp->timelineStartLsn == InvalidXLogRecPtr)
-		{
-			wp->timelineStartLsn = wp->api.get_redo_start_lsn(wp);
-		}
 		wp_log(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn));
 	}
 	pg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propEpochStartLsn);
 
-	/*
-	 * Safekeepers are setting truncateLsn after timelineStartLsn is known, so
-	 * it should never be zero at this point, if we know timelineStartLsn.
-	 *
-	 * timelineStartLsn can be zero only on the first syncSafekeepers run.
-	 */
-	Assert((wp->truncateLsn != InvalidXLogRecPtr) ||
-		   (wp->config->syncSafekeepers && wp->truncateLsn == wp->timelineStartLsn));
+	Assert(wp->truncateLsn != InvalidXLogRecPtr || wp->config->syncSafekeepers);
 
 	/*
 	 * We will be generating WAL since propEpochStartLsn, so we should set
@@ -1053,10 +1045,11 @@ DetermineEpochStartLsn(WalProposer *wp)
 		if (SkipXLogPageHeader(wp, wp->propEpochStartLsn) != wp->api.get_redo_start_lsn(wp))
 		{
 			/*
-			 * However, allow to proceed if last_log_term on the node which gave
-			 * the highest vote (i.e. point where we are going to start writing)
-			 * actually had been won by me; plain restart of walproposer not
-			 * intervened by concurrent compute which wrote WAL is ok.
+			 * However, allow to proceed if last_log_term on the node which
+			 * gave the highest vote (i.e. point where we are going to start
+			 * writing) actually had been won by me; plain restart of
+			 * walproposer not intervened by concurrent compute which wrote
+			 * WAL is ok.
 			 *
 			 * This avoids compute crash after manual term_bump.
 			 */
@@ -1126,14 +1119,8 @@ SendProposerElected(Safekeeper *sk)
 	{
 		/* safekeeper is empty or no common point, start from the beginning */
 		sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
-		wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, timelineStartLsn=%X/%X, termHistory.n_entries=%u",
-			   sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), LSN_FORMAT_ARGS(wp->timelineStartLsn), wp->propTermHistory.n_entries);
-
-		/*
-		 * wp->timelineStartLsn == InvalidXLogRecPtr can be only when timeline
-		 * is created manually (test_s3_wal_replay)
-		 */
-		Assert(sk->startStreamingAt == wp->timelineStartLsn || wp->timelineStartLsn == InvalidXLogRecPtr);
+		wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, termHistory.n_entries=%u",
+			   sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), wp->propTermHistory.n_entries);
 	}
 	else
 	{
@@ -1158,29 +1145,19 @@ SendProposerElected(Safekeeper *sk)
 
 	Assert(sk->startStreamingAt <= wp->availableLsn);
 
-	msg.tag = 'e';
+	msg.apm.tag = 'e';
+	msg.generation = wp->mconf.generation;
 	msg.term = wp->propTerm;
 	msg.startStreamingAt = sk->startStreamingAt;
 	msg.termHistory = &wp->propTermHistory;
-	msg.timelineStartLsn = wp->timelineStartLsn;
 
 	lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;
 	wp_log(LOG,
-		   "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
-		   sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
-
-	resetStringInfo(&sk->outbuf);
-	pq_sendint64_le(&sk->outbuf, msg.tag);
-	pq_sendint64_le(&sk->outbuf, msg.term);
-	pq_sendint64_le(&sk->outbuf, msg.startStreamingAt);
-	pq_sendint32_le(&sk->outbuf, msg.termHistory->n_entries);
-	for (int i = 0; i < msg.termHistory->n_entries; i++)
-	{
-		pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].term);
-		pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].lsn);
-	}
-	pq_sendint64_le(&sk->outbuf, msg.timelineStartLsn);
+		   "sending elected msg to node " UINT64_FORMAT " generation=%u term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s",
+		   sk->greetResponse.nodeId, msg.generation, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt),
+		   lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port);
 
+	PAMessageSerialize(wp, (ProposerAcceptorMessage *) &msg, &sk->outbuf, wp->config->proto_version);
 	if (!AsyncWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_SEND_ELECTED_FLUSH))
 		return;
 
@@ -1246,14 +1223,13 @@ static void
 PrepareAppendRequest(WalProposer *wp, AppendRequestHeader *req, XLogRecPtr beginLsn, XLogRecPtr endLsn)
 {
 	Assert(endLsn >= beginLsn);
-	req->tag = 'a';
+	req->apm.tag = 'a';
+	req->generation = wp->mconf.generation;
 	req->term = wp->propTerm;
-	req->epochStartLsn = wp->propEpochStartLsn;
 	req->beginLsn = beginLsn;
 	req->endLsn = endLsn;
 	req->commitLsn = wp->commitLsn;
 	req->truncateLsn = wp->truncateLsn;
-	req->proposerId = wp->greetRequest.proposerId;
 }
 
 /*
@@ -1354,7 +1330,8 @@ SendAppendRequests(Safekeeper *sk)
 			resetStringInfo(&sk->outbuf);
 
 			/* write AppendRequest header */
-			appendBinaryStringInfo(&sk->outbuf, (char *) req, sizeof(AppendRequestHeader));
+			PAMessageSerialize(wp, (ProposerAcceptorMessage *) req, &sk->outbuf, wp->config->proto_version);
+			/* prepare for reading WAL into the outbuf */
 			enlargeStringInfo(&sk->outbuf, req->endLsn - req->beginLsn);
 			sk->active_state = SS_ACTIVE_READ_WAL;
 		}
@@ -1367,14 +1344,17 @@ SendAppendRequests(Safekeeper *sk)
 			req = &sk->appendRequest;
 			req_len = req->endLsn - req->beginLsn;
 
-			/* We send zero sized AppenRequests as heartbeats; don't wal_read for these. */
+			/*
+			 * We send zero sized AppenRequests as heartbeats; don't wal_read
+			 * for these.
+			 */
 			if (req_len > 0)
 			{
 				switch (wp->api.wal_read(sk,
-										&sk->outbuf.data[sk->outbuf.len],
-										req->beginLsn,
-										req_len,
-										&errmsg))
+										 &sk->outbuf.data[sk->outbuf.len],
+										 req->beginLsn,
+										 req_len,
+										 &errmsg))
 				{
 					case NEON_WALREAD_SUCCESS:
 						break;
@@ -1382,7 +1362,7 @@ SendAppendRequests(Safekeeper *sk)
 						return true;
 					case NEON_WALREAD_ERROR:
 						wp_log(WARNING, "WAL reading for node %s:%s failed: %s",
-							sk->host, sk->port, errmsg);
+							   sk->host, sk->port, errmsg);
 						ShutdownConnection(sk);
 						return false;
 					default:
@@ -1470,11 +1450,11 @@ RecvAppendResponses(Safekeeper *sk)
 			 * Term has changed to higher one, probably another compute is
 			 * running. If this is the case we could PANIC as well because
 			 * likely it inserted some data and our basebackup is unsuitable
-			 * anymore. However, we also bump term manually (term_bump endpoint)
-			 * on safekeepers for migration purposes, in this case we do want
-			 * compute to stay alive. So restart walproposer with FATAL instead
-			 * of panicking; if basebackup is spoiled next election will notice
-			 * this.
+			 * anymore. However, we also bump term manually (term_bump
+			 * endpoint) on safekeepers for migration purposes, in this case
+			 * we do want compute to stay alive. So restart walproposer with
+			 * FATAL instead of panicking; if basebackup is spoiled next
+			 * election will notice this.
 			 */
 			wp_log(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT ", meaning another compute is running at the same time, and it conflicts with us",
 				   sk->host, sk->port,
@@ -1509,7 +1489,7 @@ ParsePageserverFeedbackMessage(WalProposer *wp, StringInfo reply_message, Pagese
 
 	for (i = 0; i < nkeys; i++)
 	{
-		const char *key = pq_getmsgstring(reply_message);
+		const char *key = pq_getmsgrawstring(reply_message);
 		unsigned int value_len = pq_getmsgint(reply_message, sizeof(int32));
 
 		if (strcmp(key, "current_timeline_size") == 0)
@@ -1750,6 +1730,213 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
 	}
 }
 
+/* Serialize MembershipConfiguration into buf. */
+static void
+MembershipConfigurationSerialize(MembershipConfiguration *mconf, StringInfo buf)
+{
+	uint32		i;
+
+	pq_sendint32(buf, mconf->generation);
+
+	pq_sendint32(buf, mconf->members.len);
+	for (i = 0; i < mconf->members.len; i++)
+	{
+		pq_sendint64(buf, mconf->members.m[i].node_id);
+		pq_send_ascii_string(buf, mconf->members.m[i].host);
+		pq_sendint16(buf, mconf->members.m[i].port);
+	}
+
+	/*
+	 * There is no special mark for absent new_members; zero members in
+	 * invalid, so zero len means absent.
+	 */
+	pq_sendint32(buf, mconf->new_members.len);
+	for (i = 0; i < mconf->new_members.len; i++)
+	{
+		pq_sendint64(buf, mconf->new_members.m[i].node_id);
+		pq_send_ascii_string(buf, mconf->new_members.m[i].host);
+		pq_sendint16(buf, mconf->new_members.m[i].port);
+	}
+}
+
+/* Serialize proposer -> acceptor message into buf using specified version */
+static void
+PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version)
+{
+	/* both version are supported currently until we fully migrate to 3 */
+	Assert(proto_version == 3 || proto_version == 2);
+
+	resetStringInfo(buf);
+
+	if (proto_version == 3)
+	{
+		/*
+		 * v2 sends structs for some messages as is, so commonly send tag only
+		 * for v3
+		 */
+		pq_sendint8(buf, msg->tag);
+
+		switch (msg->tag)
+		{
+			case 'g':
+				{
+					ProposerGreeting *m = (ProposerGreeting *) msg;
+
+					pq_send_ascii_string(buf, m->tenant_id);
+					pq_send_ascii_string(buf, m->timeline_id);
+					MembershipConfigurationSerialize(&m->mconf, buf);
+					pq_sendint32(buf, m->pg_version);
+					pq_sendint64(buf, m->system_id);
+					pq_sendint32(buf, m->wal_seg_size);
+					break;
+				}
+			case 'v':
+				{
+					VoteRequest *m = (VoteRequest *) msg;
+
+					pq_sendint32(buf, m->generation);
+					pq_sendint64(buf, m->term);
+					break;
+
+				}
+			case 'e':
+				{
+					ProposerElected *m = (ProposerElected *) msg;
+
+					pq_sendint32(buf, m->generation);
+					pq_sendint64(buf, m->term);
+					pq_sendint64(buf, m->startStreamingAt);
+					pq_sendint32(buf, m->termHistory->n_entries);
+					for (uint32 i = 0; i < m->termHistory->n_entries; i++)
+					{
+						pq_sendint64(buf, m->termHistory->entries[i].term);
+						pq_sendint64(buf, m->termHistory->entries[i].lsn);
+					}
+					break;
+				}
+			case 'a':
+				{
+					/*
+					 * Note: this serializes only AppendRequestHeader, caller
+					 * is expected to append WAL data later.
+					 */
+					AppendRequestHeader *m = (AppendRequestHeader *) msg;
+
+					pq_sendint32(buf, m->generation);
+					pq_sendint64(buf, m->term);
+					pq_sendint64(buf, m->beginLsn);
+					pq_sendint64(buf, m->endLsn);
+					pq_sendint64(buf, m->commitLsn);
+					pq_sendint64(buf, m->truncateLsn);
+					break;
+				}
+			default:
+				wp_log(FATAL, "unexpected message type %c to serialize", msg->tag);
+		}
+		return;
+	}
+
+	if (proto_version == 2)
+	{
+		switch (msg->tag)
+		{
+			case 'g':
+				{
+					/* v2 sent struct as is */
+					ProposerGreeting *m = (ProposerGreeting *) msg;
+					ProposerGreetingV2 greetRequestV2;
+
+					/* Fill also v2 struct. */
+					greetRequestV2.tag = 'g';
+					greetRequestV2.protocolVersion = proto_version;
+					greetRequestV2.pgVersion = m->pg_version;
+
+					/*
+					 * v3 removed this field because it's easier to pass as
+					 * libq or START_WAL_PUSH options
+					 */
+					memset(&greetRequestV2.proposerId, 0, sizeof(greetRequestV2.proposerId));
+					greetRequestV2.systemId = wp->config->systemId;
+					if (*m->timeline_id != '\0' &&
+						!HexDecodeString(greetRequestV2.timeline_id, m->timeline_id, 16))
+						wp_log(FATAL, "could not parse neon.timeline_id, %s", m->timeline_id);
+					if (*m->tenant_id != '\0' &&
+						!HexDecodeString(greetRequestV2.tenant_id, m->tenant_id, 16))
+						wp_log(FATAL, "could not parse neon.tenant_id, %s", m->tenant_id);
+
+					greetRequestV2.timeline = wp->config->pgTimeline;
+					greetRequestV2.walSegSize = wp->config->wal_segment_size;
+
+					pq_sendbytes(buf, (char *) &greetRequestV2, sizeof(greetRequestV2));
+					break;
+				}
+			case 'v':
+				{
+					/* v2 sent struct as is */
+					VoteRequest *m = (VoteRequest *) msg;
+					VoteRequestV2 voteRequestV2;
+
+					voteRequestV2.tag = m->pam.tag;
+					voteRequestV2.term = m->term;
+					/* removed field */
+					memset(&voteRequestV2.proposerId, 0, sizeof(voteRequestV2.proposerId));
+					pq_sendbytes(buf, (char *) &voteRequestV2, sizeof(voteRequestV2));
+					break;
+				}
+			case 'e':
+				{
+					ProposerElected *m = (ProposerElected *) msg;
+
+					pq_sendint64_le(buf, m->apm.tag);
+					pq_sendint64_le(buf, m->term);
+					pq_sendint64_le(buf, m->startStreamingAt);
+					pq_sendint32_le(buf, m->termHistory->n_entries);
+					for (int i = 0; i < m->termHistory->n_entries; i++)
+					{
+						pq_sendint64_le(buf, m->termHistory->entries[i].term);
+						pq_sendint64_le(buf, m->termHistory->entries[i].lsn);
+					}
+					/* 
+					 * Removed timeline_start_lsn. Still send it as a valid
+					 * value until safekeepers taking it from term history are
+					 * deployed.
+					 */
+					pq_sendint64_le(buf, m->termHistory->entries[0].lsn);
+					break;
+				}
+			case 'a':
+
+				/*
+				 * Note: this serializes only AppendRequestHeader, caller is
+				 * expected to append WAL data later.
+				 */
+				{
+					/* v2 sent struct as is */
+					AppendRequestHeader *m = (AppendRequestHeader *) msg;
+					AppendRequestHeaderV2 appendRequestHeaderV2;
+
+					appendRequestHeaderV2.tag = m->apm.tag;
+					appendRequestHeaderV2.term = m->term;
+					appendRequestHeaderV2.epochStartLsn = 0;	/* removed field */
+					appendRequestHeaderV2.beginLsn = m->beginLsn;
+					appendRequestHeaderV2.endLsn = m->endLsn;
+					appendRequestHeaderV2.commitLsn = m->commitLsn;
+					appendRequestHeaderV2.truncateLsn = m->truncateLsn;
+					/* removed field */
+					memset(&appendRequestHeaderV2.proposerId, 0, sizeof(appendRequestHeaderV2.proposerId));
+
+					pq_sendbytes(buf, (char *) &appendRequestHeaderV2, sizeof(appendRequestHeaderV2));
+					break;
+				}
+
+			default:
+				wp_log(FATAL, "unexpected message type %c to serialize", msg->tag);
+		}
+		return;
+	}
+	wp_log(FATAL, "unexpected proto_version %d", proto_version);
+}
+
 /*
  * Try to read CopyData message from i'th safekeeper, resetting connection on
  * failure.
@@ -1779,6 +1966,37 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size)
 	return false;
 }
 
+/* Deserialize membership configuration from buf to mconf. */
+static void
+MembershipConfigurationDeserialize(MembershipConfiguration *mconf, StringInfo buf)
+{
+	uint32		i;
+
+	mconf->generation = pq_getmsgint32(buf);
+	mconf->members.len = pq_getmsgint32(buf);
+	mconf->members.m = palloc0(sizeof(SafekeeperId) * mconf->members.len);
+	for (i = 0; i < mconf->members.len; i++)
+	{
+		const char *buf_host;
+
+		mconf->members.m[i].node_id = pq_getmsgint64(buf);
+		buf_host = pq_getmsgrawstring(buf);
+		strlcpy(mconf->members.m[i].host, buf_host, sizeof(mconf->members.m[i].host));
+		mconf->members.m[i].port = pq_getmsgint16(buf);
+	}
+	mconf->new_members.len = pq_getmsgint32(buf);
+	mconf->new_members.m = palloc0(sizeof(SafekeeperId) * mconf->new_members.len);
+	for (i = 0; i < mconf->new_members.len; i++)
+	{
+		const char *buf_host;
+
+		mconf->new_members.m[i].node_id = pq_getmsgint64(buf);
+		buf_host = pq_getmsgrawstring(buf);
+		strlcpy(mconf->new_members.m[i].host, buf_host, sizeof(mconf->new_members.m[i].host));
+		mconf->new_members.m[i].port = pq_getmsgint16(buf);
+	}
+}
+
 /*
  * Read next message with known type into provided struct, by reading a CopyData
  * block from the safekeeper's postgres connection, returning whether the read
@@ -1787,6 +2005,8 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size)
  * If the read needs more polling, we return 'false' and keep the state
  * unmodified, waiting until it becomes read-ready to try again. If it fully
  * failed, a warning is emitted and the connection is reset.
+ *
+ * Note: it pallocs if needed, i.e. for AcceptorGreeting and VoteResponse fields.
  */
 static bool
 AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg)
@@ -1795,82 +2015,154 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg)
 
 	char	   *buf;
 	int			buf_size;
-	uint64		tag;
+	uint8		tag;
 	StringInfoData s;
 
 	if (!(AsyncRead(sk, &buf, &buf_size)))
 		return false;
+	sk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp);
 
 	/* parse it */
 	s.data = buf;
 	s.len = buf_size;
+	s.maxlen = buf_size;
 	s.cursor = 0;
 
-	tag = pq_getmsgint64_le(&s);
-	if (tag != anymsg->tag)
+	if (wp->config->proto_version == 3)
 	{
-		wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host,
-			   sk->port, FormatSafekeeperState(sk));
-		ResetConnection(sk);
-		return false;
-	}
-	sk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp);
-	switch (tag)
-	{
-		case 'g':
-			{
-				AcceptorGreeting *msg = (AcceptorGreeting *) anymsg;
-
-				msg->term = pq_getmsgint64_le(&s);
-				msg->nodeId = pq_getmsgint64_le(&s);
-				pq_getmsgend(&s);
-				return true;
-			}
-
-		case 'v':
-			{
-				VoteResponse *msg = (VoteResponse *) anymsg;
-
-				msg->term = pq_getmsgint64_le(&s);
-				msg->voteGiven = pq_getmsgint64_le(&s);
-				msg->flushLsn = pq_getmsgint64_le(&s);
-				msg->truncateLsn = pq_getmsgint64_le(&s);
-				msg->termHistory.n_entries = pq_getmsgint32_le(&s);
-				msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries);
-				for (int i = 0; i < msg->termHistory.n_entries; i++)
+		tag = pq_getmsgbyte(&s);
+		if (tag != anymsg->tag)
+		{
+			wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host,
+				   sk->port, FormatSafekeeperState(sk));
+			ResetConnection(sk);
+			return false;
+		}
+		switch (tag)
+		{
+			case 'g':
 				{
-					msg->termHistory.entries[i].term = pq_getmsgint64_le(&s);
-					msg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s);
+					AcceptorGreeting *msg = (AcceptorGreeting *) anymsg;
+
+					msg->nodeId = pq_getmsgint64(&s);
+					MembershipConfigurationDeserialize(&msg->mconf, &s);
+					msg->term = pq_getmsgint64(&s);
+					pq_getmsgend(&s);
+					return true;
 				}
-				msg->timelineStartLsn = pq_getmsgint64_le(&s);
-				pq_getmsgend(&s);
-				return true;
-			}
+			case 'v':
+				{
+					VoteResponse *msg = (VoteResponse *) anymsg;
 
-		case 'a':
-			{
-				AppendResponse *msg = (AppendResponse *) anymsg;
+					msg->generation = pq_getmsgint32(&s);
+					msg->term = pq_getmsgint64(&s);
+					msg->voteGiven = pq_getmsgbyte(&s);
+					msg->flushLsn = pq_getmsgint64(&s);
+					msg->truncateLsn = pq_getmsgint64(&s);
+					msg->termHistory.n_entries = pq_getmsgint32(&s);
+					msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries);
+					for (uint32 i = 0; i < msg->termHistory.n_entries; i++)
+					{
+						msg->termHistory.entries[i].term = pq_getmsgint64(&s);
+						msg->termHistory.entries[i].lsn = pq_getmsgint64(&s);
+					}
+					pq_getmsgend(&s);
+					return true;
+				}
+			case 'a':
+				{
+					AppendResponse *msg = (AppendResponse *) anymsg;
 
-				msg->term = pq_getmsgint64_le(&s);
-				msg->flushLsn = pq_getmsgint64_le(&s);
-				msg->commitLsn = pq_getmsgint64_le(&s);
-				msg->hs.ts = pq_getmsgint64_le(&s);
-				msg->hs.xmin.value = pq_getmsgint64_le(&s);
-				msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s);
-				if (s.len > s.cursor)
-					ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback);
-				else
-					msg->ps_feedback.present = false;
-				pq_getmsgend(&s);
-				return true;
-			}
-
-		default:
-			{
-				Assert(false);
-				return false;
-			}
+					msg->generation = pq_getmsgint32(&s);
+					msg->term = pq_getmsgint64(&s);
+					msg->flushLsn = pq_getmsgint64(&s);
+					msg->commitLsn = pq_getmsgint64(&s);
+					msg->hs.ts = pq_getmsgint64(&s);
+					msg->hs.xmin.value = pq_getmsgint64(&s);
+					msg->hs.catalog_xmin.value = pq_getmsgint64(&s);
+					if (s.len > s.cursor)
+						ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback);
+					else
+						msg->ps_feedback.present = false;
+					pq_getmsgend(&s);
+					return true;
+				}
+			default:
+				{
+					wp_log(FATAL, "unexpected message tag %c to read", (char) tag);
+					return false;
+				}
+		}
 	}
+	else if (wp->config->proto_version == 2)
+	{
+		tag = pq_getmsgint64_le(&s);
+		if (tag != anymsg->tag)
+		{
+			wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host,
+				   sk->port, FormatSafekeeperState(sk));
+			ResetConnection(sk);
+			return false;
+		}
+		switch (tag)
+		{
+			case 'g':
+				{
+					AcceptorGreeting *msg = (AcceptorGreeting *) anymsg;
+
+					msg->term = pq_getmsgint64_le(&s);
+					msg->nodeId = pq_getmsgint64_le(&s);
+					pq_getmsgend(&s);
+					return true;
+				}
+
+			case 'v':
+				{
+					VoteResponse *msg = (VoteResponse *) anymsg;
+
+					msg->term = pq_getmsgint64_le(&s);
+					msg->voteGiven = pq_getmsgint64_le(&s);
+					msg->flushLsn = pq_getmsgint64_le(&s);
+					msg->truncateLsn = pq_getmsgint64_le(&s);
+					msg->termHistory.n_entries = pq_getmsgint32_le(&s);
+					msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries);
+					for (int i = 0; i < msg->termHistory.n_entries; i++)
+					{
+						msg->termHistory.entries[i].term = pq_getmsgint64_le(&s);
+						msg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s);
+					}
+					pq_getmsgint64_le(&s);	/* timelineStartLsn */
+					pq_getmsgend(&s);
+					return true;
+				}
+
+			case 'a':
+				{
+					AppendResponse *msg = (AppendResponse *) anymsg;
+
+					msg->term = pq_getmsgint64_le(&s);
+					msg->flushLsn = pq_getmsgint64_le(&s);
+					msg->commitLsn = pq_getmsgint64_le(&s);
+					msg->hs.ts = pq_getmsgint64_le(&s);
+					msg->hs.xmin.value = pq_getmsgint64_le(&s);
+					msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s);
+					if (s.len > s.cursor)
+						ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback);
+					else
+						msg->ps_feedback.present = false;
+					pq_getmsgend(&s);
+					return true;
+				}
+
+			default:
+				{
+					wp_log(FATAL, "unexpected message tag %c to read", (char) tag);
+					return false;
+				}
+		}
+	}
+	wp_log(FATAL, "unsupported proto_version %d", wp->config->proto_version);
+	return false; /* keep the compiler quiet */
 }
 
 /*
@@ -2246,3 +2538,45 @@ FormatEvents(WalProposer *wp, uint32 events)
 
 	return (char *) &return_str;
 }
+
+/* Dump mconf as toml for observability / debugging. Result is palloc'ed. */
+static char *
+MembershipConfigurationToString(MembershipConfiguration *mconf)
+{
+	StringInfoData s;
+	uint32		i;
+
+	initStringInfo(&s);
+	appendStringInfo(&s, "{gen = %u", mconf->generation);
+	appendStringInfoString(&s, ", members = [");
+	for (i = 0; i < mconf->members.len; i++)
+	{
+		if (i > 0)
+			appendStringInfoString(&s, ", ");
+		appendStringInfo(&s, "{node_id = %lu", mconf->members.m[i].node_id);
+		appendStringInfo(&s, ", host = %s", mconf->members.m[i].host);
+		appendStringInfo(&s, ", port = %u }", mconf->members.m[i].port);
+	}
+	appendStringInfo(&s, "], new_members = [");
+	for (i = 0; i < mconf->new_members.len; i++)
+	{
+		if (i > 0)
+			appendStringInfoString(&s, ", ");
+		appendStringInfo(&s, "{node_id = %lu", mconf->new_members.m[i].node_id);
+		appendStringInfo(&s, ", host = %s", mconf->new_members.m[i].host);
+		appendStringInfo(&s, ", port = %u }", mconf->new_members.m[i].port);
+	}
+	appendStringInfoString(&s, "]}");
+	return s.data;
+}
+
+static void
+MembershipConfigurationFree(MembershipConfiguration *mconf)
+{
+	if (mconf->members.m)
+		pfree(mconf->members.m);
+	mconf->members.m = NULL;
+	if (mconf->new_members.m)
+		pfree(mconf->new_members.m);
+	mconf->new_members.m = NULL;
+}
diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h
index d8c44f8182..eee55f924f 100644
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -12,9 +12,6 @@
 #include "neon_walreader.h"
 #include "pagestore_client.h"
 
-#define SK_MAGIC 0xCafeCeefu
-#define SK_PROTOCOL_VERSION 2
-
 #define MAX_SAFEKEEPERS 32
 #define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)	/* max size of a single* WAL
 											 * message */
@@ -143,12 +140,71 @@ typedef uint64 term_t;
 /* neon storage node id */
 typedef uint64 NNodeId;
 
+/*
+ * Number uniquely identifying safekeeper membership configuration.
+ * This and following structs pair ones in membership.rs.
+ */
+typedef uint32 Generation;
+
+typedef struct SafekeeperId
+{
+	NNodeId		node_id;
+	char		host[MAXCONNINFO];
+	uint16		port;
+} SafekeeperId;
+
+/* Set of safekeepers. */
+typedef struct MemberSet
+{
+	uint32		len;			/* number of members */
+	SafekeeperId *m;			/* ids themselves */
+} MemberSet;
+
+/* Timeline safekeeper membership configuration. */
+typedef struct MembershipConfiguration
+{
+	Generation	generation;
+	MemberSet	members;
+	/* Has 0 n_members in non joint conf. */
+	MemberSet	new_members;
+} MembershipConfiguration;
+
 /*
  * Proposer <-> Acceptor messaging.
  */
 
+typedef struct ProposerAcceptorMessage
+{
+	uint8		tag;
+} ProposerAcceptorMessage;
+
 /* Initial Proposer -> Acceptor message */
 typedef struct ProposerGreeting
+{
+	ProposerAcceptorMessage pam;	/* message tag */
+
+	/*
+	 * tenant/timeline ids as C strings with standard hex notation for ease of
+	 * printing. In principle they are not strictly needed as ttid is also
+	 * passed as libpq options.
+	 */
+	char	   *tenant_id;
+	char	   *timeline_id;
+	/* Full conf is carried to allow safekeeper switch */
+	MembershipConfiguration mconf;
+
+	/*
+	 * pg_version and wal_seg_size are used for timeline creation until we
+	 * fully migrate to doing externally. systemId is only used as a sanity
+	 * cross check.
+	 */
+	uint32		pg_version;		/* in PG_VERSION_NUM format */
+	uint64		system_id;		/* Postgres system identifier. */
+	uint32		wal_seg_size;
+} ProposerGreeting;
+
+/* protocol v2 variant, kept while wp supports it */
+typedef struct ProposerGreetingV2
 {
 	uint64		tag;			/* message tag */
 	uint32		protocolVersion;	/* proposer-safekeeper protocol version */
@@ -159,32 +215,42 @@ typedef struct ProposerGreeting
 	uint8		tenant_id[16];
 	TimeLineID	timeline;
 	uint32		walSegSize;
-} ProposerGreeting;
+} ProposerGreetingV2;
 
 typedef struct AcceptorProposerMessage
 {
-	uint64		tag;
+	uint8		tag;
 } AcceptorProposerMessage;
 
 /*
- * Acceptor -> Proposer initial response: the highest term acceptor voted for.
+ * Acceptor -> Proposer initial response: the highest term acceptor voted for,
+ * its node id and configuration.
  */
 typedef struct AcceptorGreeting
 {
 	AcceptorProposerMessage apm;
-	term_t		term;
 	NNodeId		nodeId;
+	MembershipConfiguration mconf;
+	term_t		term;
 } AcceptorGreeting;
 
 /*
  * Proposer -> Acceptor vote request.
  */
 typedef struct VoteRequest
+{
+	ProposerAcceptorMessage pam;	/* message tag */
+	Generation	generation;		/* membership conf generation */
+	term_t		term;
+} VoteRequest;
+
+/* protocol v2 variant, kept while wp supports it */
+typedef struct VoteRequestV2
 {
 	uint64		tag;
 	term_t		term;
 	pg_uuid_t	proposerId;		/* for monitoring/debugging */
-} VoteRequest;
+} VoteRequestV2;
 
 /* Element of term switching chain. */
 typedef struct TermSwitchEntry
@@ -203,8 +269,15 @@ typedef struct TermHistory
 typedef struct VoteResponse
 {
 	AcceptorProposerMessage apm;
+
+	/*
+	 * Membership conf generation. It's not strictly required because on
+	 * mismatch safekeeper is expected to ERROR the connection, but let's
+	 * sanity check it.
+	 */
+	Generation	generation;
 	term_t		term;
-	uint64		voteGiven;
+	uint8		voteGiven;
 
 	/*
 	 * Safekeeper flush_lsn (end of WAL) + history of term switches allow
@@ -214,7 +287,6 @@ typedef struct VoteResponse
 	XLogRecPtr	truncateLsn;	/* minimal LSN which may be needed for*
 								 * recovery of some safekeeper */
 	TermHistory termHistory;
-	XLogRecPtr	timelineStartLsn;	/* timeline globally starts at this LSN */
 } VoteResponse;
 
 /*
@@ -223,20 +295,37 @@ typedef struct VoteResponse
  */
 typedef struct ProposerElected
 {
-	uint64		tag;
+	AcceptorProposerMessage apm;
+	Generation	generation;		/* membership conf generation */
 	term_t		term;
 	/* proposer will send since this point */
 	XLogRecPtr	startStreamingAt;
 	/* history of term switches up to this proposer */
 	TermHistory *termHistory;
-	/* timeline globally starts at this LSN */
-	XLogRecPtr	timelineStartLsn;
 } ProposerElected;
 
 /*
  * Header of request with WAL message sent from proposer to safekeeper.
  */
 typedef struct AppendRequestHeader
+{
+	AcceptorProposerMessage apm;
+	Generation	generation;		/* membership conf generation */
+	term_t		term;			/* term of the proposer */
+	XLogRecPtr	beginLsn;		/* start position of message in WAL */
+	XLogRecPtr	endLsn;			/* end position of message in WAL */
+	XLogRecPtr	commitLsn;		/* LSN committed by quorum of safekeepers */
+
+	/*
+	 * minimal LSN which may be needed for recovery of some safekeeper (end
+	 * lsn + 1 of last chunk streamed to everyone)
+	 */
+	XLogRecPtr	truncateLsn;
+	/* in the AppendRequest message, WAL data follows */
+} AppendRequestHeader;
+
+/* protocol v2 variant, kept while wp supports it */
+typedef struct AppendRequestHeaderV2
 {
 	uint64		tag;
 	term_t		term;			/* term of the proposer */
@@ -256,7 +345,8 @@ typedef struct AppendRequestHeader
 	 */
 	XLogRecPtr	truncateLsn;
 	pg_uuid_t	proposerId;		/* for monitoring/debugging */
-} AppendRequestHeader;
+	/* in the AppendRequest message, WAL data follows */
+} AppendRequestHeaderV2;
 
 /*
  * Hot standby feedback received from replica
@@ -309,6 +399,13 @@ typedef struct AppendResponse
 {
 	AcceptorProposerMessage apm;
 
+	/*
+	 * Membership conf generation. It's not strictly required because on
+	 * mismatch safekeeper is expected to ERROR the connection, but let's
+	 * sanity check it.
+	 */
+	Generation	generation;
+
 	/*
 	 * Current term of the safekeeper; if it is higher than proposer's, the
 	 * compute is out of date.
@@ -644,6 +741,8 @@ typedef struct WalProposerConfig
 	/* Will be passed to safekeepers in greet request. */
 	TimeLineID	pgTimeline;
 
+	int			proto_version;
+
 #ifdef WALPROPOSER_LIB
 	void	   *callback_data;
 #endif
@@ -656,11 +755,14 @@ typedef struct WalProposerConfig
 typedef struct WalProposer
 {
 	WalProposerConfig *config;
-	int			n_safekeepers;
+	/* Current walproposer membership configuration */
+	MembershipConfiguration mconf;
 
 	/* (n_safekeepers / 2) + 1 */
 	int			quorum;
 
+	/* Number of occupied slots in safekeepers[] */
+	int			n_safekeepers;
 	Safekeeper	safekeeper[MAX_SAFEKEEPERS];
 
 	/* WAL has been generated up to this point */
@@ -670,6 +772,7 @@ typedef struct WalProposer
 	XLogRecPtr	commitLsn;
 
 	ProposerGreeting greetRequest;
+	ProposerGreetingV2 greetRequestV2;
 
 	/* Vote request for safekeeper */
 	VoteRequest voteRequest;
diff --git a/pgxn/neon/walproposer_compat.c b/pgxn/neon/walproposer_compat.c
index 35d984c52e..a986160224 100644
--- a/pgxn/neon/walproposer_compat.c
+++ b/pgxn/neon/walproposer_compat.c
@@ -117,14 +117,13 @@ pq_getmsgbytes(StringInfo msg, int datalen)
 }
 
 /* --------------------------------
- *		pq_getmsgstring - get a null-terminated text string (with conversion)
+ *		pq_getmsgrawstring - get a null-terminated text string - NO conversion
  *
- *		May return a pointer directly into the message buffer, or a pointer
- *		to a palloc'd conversion result.
+ *		Returns a pointer directly into the message buffer.
  * --------------------------------
  */
 const char *
-pq_getmsgstring(StringInfo msg)
+pq_getmsgrawstring(StringInfo msg)
 {
 	char	   *str;
 	int			slen;
@@ -155,6 +154,45 @@ pq_getmsgend(StringInfo msg)
 		ExceptionalCondition("invalid msg format", __FILE__, __LINE__);
 }
 
+/* --------------------------------
+ *		pq_sendbytes	- append raw data to a StringInfo buffer
+ * --------------------------------
+ */
+void
+pq_sendbytes(StringInfo buf, const void *data, int datalen)
+{
+	/* use variant that maintains a trailing null-byte, out of caution */
+	appendBinaryStringInfo(buf, data, datalen);
+}
+
+/* --------------------------------
+ *		pq_send_ascii_string	- append a null-terminated text string (without conversion)
+ *
+ * This function intentionally bypasses encoding conversion, instead just
+ * silently replacing any non-7-bit-ASCII characters with question marks.
+ * It is used only when we are having trouble sending an error message to
+ * the client with normal localization and encoding conversion.  The caller
+ * should already have taken measures to ensure the string is just ASCII;
+ * the extra work here is just to make certain we don't send a badly encoded
+ * string to the client (which might or might not be robust about that).
+ *
+ * NB: passed text string must be null-terminated, and so is the data
+ * sent to the frontend.
+ * --------------------------------
+ */
+void
+pq_send_ascii_string(StringInfo buf, const char *str)
+{
+	while (*str)
+	{
+		char		ch = *str++;
+
+		if (IS_HIGHBIT_SET(ch))
+			ch = '?';
+		appendStringInfoCharMacro(buf, ch);
+	}
+	appendStringInfoChar(buf, '\0');
+}
 
 /*
  * Produce a C-string representation of a TimestampTz.
diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c
index 86444084ff..b21184de57 100644
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -59,9 +59,11 @@
 
 #define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot"
 
+/* GUCs */
 char	   *wal_acceptors_list = "";
 int			wal_acceptor_reconnect_timeout = 1000;
 int			wal_acceptor_connection_timeout = 10000;
+int			safekeeper_proto_version = 2;
 
 /* Set to true in the walproposer bgw. */
 static bool am_walproposer;
@@ -126,6 +128,7 @@ init_walprop_config(bool syncSafekeepers)
 	else
 		walprop_config.systemId = 0;
 	walprop_config.pgTimeline = walprop_pg_get_timeline_id();
+	walprop_config.proto_version = safekeeper_proto_version;
 }
 
 /*
@@ -219,25 +222,37 @@ nwp_register_gucs(void)
 							PGC_SIGHUP,
 							GUC_UNIT_MS,
 							NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+							"neon.safekeeper_proto_version",
+							"Version of compute <-> safekeeper protocol.",
+							"Used while migrating from 2 to 3.",
+							&safekeeper_proto_version,
+							2, 0, INT_MAX,
+							PGC_POSTMASTER,
+							0,
+							NULL, NULL, NULL);
 }
 
 
 static int
 split_safekeepers_list(char *safekeepers_list, char *safekeepers[])
 {
-	int n_safekeepers = 0;
-	char *curr_sk = safekeepers_list;
+	int			n_safekeepers = 0;
+	char	   *curr_sk = safekeepers_list;
 
 	for (char *coma = safekeepers_list; coma != NULL && *coma != '\0'; curr_sk = coma)
 	{
-		if (++n_safekeepers >= MAX_SAFEKEEPERS) {
+		if (++n_safekeepers >= MAX_SAFEKEEPERS)
+		{
 			wpg_log(FATAL, "too many safekeepers");
 		}
 
 		coma = strchr(coma, ',');
-		safekeepers[n_safekeepers-1] = curr_sk;
+		safekeepers[n_safekeepers - 1] = curr_sk;
 
-		if (coma != NULL) {
+		if (coma != NULL)
+		{
 			*coma++ = '\0';
 		}
 	}
@@ -252,10 +267,10 @@ split_safekeepers_list(char *safekeepers_list, char *safekeepers[])
 static bool
 safekeepers_cmp(char *old, char *new)
 {
-	char *safekeepers_old[MAX_SAFEKEEPERS];
-	char *safekeepers_new[MAX_SAFEKEEPERS];
-	int len_old = 0;
-	int len_new = 0;
+	char	   *safekeepers_old[MAX_SAFEKEEPERS];
+	char	   *safekeepers_new[MAX_SAFEKEEPERS];
+	int			len_old = 0;
+	int			len_new = 0;
 
 	len_old = split_safekeepers_list(old, safekeepers_old);
 	len_new = split_safekeepers_list(new, safekeepers_new);
@@ -292,7 +307,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
 	if (!am_walproposer)
 		return;
 
-	if (!newval) {
+	if (!newval)
+	{
 		/* should never happen */
 		wpg_log(FATAL, "neon.safekeepers is empty");
 	}
@@ -301,11 +317,11 @@ assign_neon_safekeepers(const char *newval, void *extra)
 	newval_copy = pstrdup(newval);
 	oldval = pstrdup(wal_acceptors_list);
 
-	/* 
+	/*
 	 * TODO: restarting through FATAL is stupid and introduces 1s delay before
-	 * next bgw start. We should refactor walproposer to allow graceful exit and
-	 * thus remove this delay.
-	 * XXX: If you change anything here, sync with test_safekeepers_reconfigure_reorder.
+	 * next bgw start. We should refactor walproposer to allow graceful exit
+	 * and thus remove this delay. XXX: If you change anything here, sync with
+	 * test_safekeepers_reconfigure_reorder.
 	 */
 	if (!safekeepers_cmp(oldval, newval_copy))
 	{
@@ -454,7 +470,8 @@ backpressure_throttling_impl(void)
 	memcpy(new_status, old_status, len);
 	snprintf(new_status + len, 64, "backpressure throttling: lag %lu", lag);
 	set_ps_display(new_status);
-	new_status[len] = '\0'; /* truncate off " backpressure ..." to later reset the ps */
+	new_status[len] = '\0';		/* truncate off " backpressure ..." to later
+								 * reset the ps */
 
 	elog(DEBUG2, "backpressure throttling: lag %lu", lag);
 	start = GetCurrentTimestamp();
@@ -621,7 +638,7 @@ walprop_pg_start_streaming(WalProposer *wp, XLogRecPtr startpos)
 	wpg_log(LOG, "WAL proposer starts streaming at %X/%X",
 			LSN_FORMAT_ARGS(startpos));
 	cmd.slotname = WAL_PROPOSER_SLOT_NAME;
-	cmd.timeline = wp->greetRequest.timeline;
+	cmd.timeline = wp->config->pgTimeline;
 	cmd.startpoint = startpos;
 	StartProposerReplication(wp, &cmd);
 }
@@ -1963,10 +1980,11 @@ walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)
 		FullTransactionId xmin = hsFeedback.xmin;
 		FullTransactionId catalog_xmin = hsFeedback.catalog_xmin;
 		FullTransactionId next_xid = ReadNextFullTransactionId();
+
 		/*
-		 * Page server is updating nextXid in checkpoint each 1024 transactions,
-		 * so feedback xmin can be actually larger then nextXid and
-		 * function TransactionIdInRecentPast return false in this case,
+		 * Page server is updating nextXid in checkpoint each 1024
+		 * transactions, so feedback xmin can be actually larger then nextXid
+		 * and function TransactionIdInRecentPast return false in this case,
 		 * preventing update of slot's xmin.
 		 */
 		if (FullTransactionIdPrecedes(next_xid, xmin))
diff --git a/poetry.lock b/poetry.lock
index d66c3aae7a..ba3b0535e4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -122,7 +122,7 @@ multidict = ">=4.5,<7.0"
 yarl = ">=1.12.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 
 [[package]]
 name = "aiopg"
@@ -160,30 +160,30 @@ frozenlist = ">=1.1.0"
 
 [[package]]
 name = "allure-pytest"
-version = "2.13.2"
+version = "2.13.5"
 description = "Allure pytest integration"
 optional = false
 python-versions = "*"
 groups = ["main"]
 files = [
-    {file = "allure-pytest-2.13.2.tar.gz", hash = "sha256:22243159e8ec81ce2b5254b4013802198821b1b42f118f69d4a289396607c7b3"},
-    {file = "allure_pytest-2.13.2-py3-none-any.whl", hash = "sha256:17de9dbee7f61c8e66a5b5e818b00e419dbcea44cb55c24319401ba813220690"},
+    {file = "allure-pytest-2.13.5.tar.gz", hash = "sha256:0ef8e1790c44a988db6b83c4d4f5e91451e2c4c8ea10601dfa88528d23afcf6e"},
+    {file = "allure_pytest-2.13.5-py3-none-any.whl", hash = "sha256:94130bac32964b78058e62cf4b815ad97a5ac82a065e6dd2d43abac2be7640fc"},
 ]
 
 [package.dependencies]
-allure-python-commons = "2.13.2"
+allure-python-commons = "2.13.5"
 pytest = ">=4.5.0"
 
 [[package]]
 name = "allure-python-commons"
-version = "2.13.2"
-description = "Common module for integrate allure with python-based frameworks"
+version = "2.13.5"
+description = "('Contains the API for end users as well as helper functions and classes to build Allure adapters for Python test frameworks',)"
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
 files = [
-    {file = "allure-python-commons-2.13.2.tar.gz", hash = "sha256:8a03681330231b1deadd86b97ff68841c6591320114ae638570f1ed60d7a2033"},
-    {file = "allure_python_commons-2.13.2-py3-none-any.whl", hash = "sha256:2bb3646ec3fbf5b36d178a5e735002bc130ae9f9ba80f080af97d368ba375051"},
+    {file = "allure-python-commons-2.13.5.tar.gz", hash = "sha256:a232e7955811f988e49a4c1dd6c16cce7e9b81d0ea0422b1e5654d3254e2caf3"},
+    {file = "allure_python_commons-2.13.5-py3-none-any.whl", hash = "sha256:8b0e837b6e32d810adec563f49e1d04127a5b6770e0232065b7cb09b9953980d"},
 ]
 
 [package.dependencies]
@@ -232,7 +232,7 @@ sniffio = ">=1.1"
 
 [package.extras]
 doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
 trio = ["trio (>=0.23)"]
 
 [[package]]
@@ -308,8 +308,8 @@ files = [
 
 [package.extras]
 docs = ["Sphinx (>=8.1.3,<8.2.0)", "sphinx-rtd-theme (>=1.2.2)"]
-gssauth = ["gssapi", "sspilib"]
-test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi", "k5test", "mypy (>=1.8.0,<1.9.0)", "sspilib", "uvloop (>=0.15.3)"]
+gssauth = ["gssapi ; platform_system != \"Windows\"", "sspilib ; platform_system == \"Windows\""]
+test = ["distro (>=1.9.0,<1.10.0)", "flake8 (>=6.1,<7.0)", "flake8-pyi (>=24.1.0,<24.2.0)", "gssapi ; platform_system == \"Linux\"", "k5test ; platform_system == \"Linux\"", "mypy (>=1.8.0,<1.9.0)", "sspilib ; platform_system == \"Windows\"", "uvloop (>=0.15.3) ; platform_system != \"Windows\" and python_version < \"3.14.0\""]
 
 [[package]]
 name = "attrs"
@@ -324,10 +324,10 @@ files = [
 ]
 
 [package.extras]
-dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"]
+dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"]
 docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
-tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"]
-tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
+tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"]
+tests-no-zope = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
 
 [[package]]
 name = "aws-sam-translator"
@@ -1074,10 +1074,10 @@ files = [
 cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""}
 
 [package.extras]
-docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"]
+docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""]
 docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"]
-nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"]
-pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
+nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""]
+pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"]
 sdist = ["build (>=1.0.0)"]
 ssh = ["bcrypt (>=3.1.5)"]
 test = ["certifi (>=2024)", "cryptography-vectors (==44.0.1)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"]
@@ -1359,7 +1359,7 @@ idna = "*"
 sniffio = "*"
 
 [package.extras]
-brotli = ["brotli", "brotlicffi"]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
@@ -1545,8 +1545,8 @@ files = [
 
 [package.extras]
 docs = ["jaraco.packaging (>=3.2)", "rst.linker (>=1.9)", "sphinx"]
-testing = ["ecdsa", "enum34", "feedparser", "jsonlib", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8 (<1.1.0)", "pytest-flake8 (>=1.1.1)", "scikit-learn", "sqlalchemy"]
-testing-libs = ["simplejson", "ujson", "yajl"]
+testing = ["ecdsa", "enum34 ; python_version == \"2.7\"", "feedparser", "jsonlib ; python_version == \"2.7\"", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8 (<1.1.0) ; python_version <= \"3.6\"", "pytest-flake8 (>=1.1.1) ; python_version >= \"3.7\"", "scikit-learn", "sqlalchemy"]
+testing-libs = ["simplejson", "ujson", "yajl ; python_version == \"2.7\""]
 
 [[package]]
 name = "jsonpointer"
@@ -1867,7 +1867,7 @@ files = [
 [package.extras]
 develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
 docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
+gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
 tests = ["pytest (>=4.6)"]
 
 [[package]]
@@ -2330,7 +2330,7 @@ files = [
 ]
 
 [package.extras]
-test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
+test = ["enum34 ; python_version <= \"3.4\"", "ipaddress ; python_version < \"3.0\"", "mock ; python_version < \"3.0\"", "pywin32 ; sys_platform == \"win32\"", "wmi ; sys_platform == \"win32\""]
 
 [[package]]
 name = "psycopg2-binary"
@@ -2456,7 +2456,7 @@ typing-extensions = ">=4.12.2"
 
 [package.extras]
 email = ["email-validator (>=2.0.0)"]
-timezone = ["tzdata"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
 
 [[package]]
 name = "pydantic-core"
@@ -3068,7 +3068,7 @@ requests = ">=2.30.0,<3.0"
 urllib3 = ">=1.25.10,<3.0"
 
 [package.extras]
-tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
+tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli ; python_version < \"3.11\"", "tomli-w", "types-PyYAML", "types-requests"]
 
 [[package]]
 name = "rfc3339-validator"
@@ -3161,7 +3161,7 @@ files = [
 
 [package.extras]
 docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "six"
@@ -3407,8 +3407,8 @@ files = [
 ]
 
 [package.extras]
-brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
-secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
+brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
 [[package]]
@@ -3820,4 +3820,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.11"
-content-hash = "00ddc42c32e235b6171845fc066dcab078282ed832cd464d5e8a0afa959dd04a"
+content-hash = "9711c5479c867fa614ce3d352f1bbc63dba1cb2376d347f96fbeda6f512ee308"
diff --git a/pre-commit.py b/pre-commit.py
index c9567e0c50..09139459d5 100755
--- a/pre-commit.py
+++ b/pre-commit.py
@@ -29,12 +29,12 @@ def colorify(
     return f"{color.value}{s}{NC}"
 
 
-def rustfmt(fix_inplace: bool = False, no_color: bool = False) -> str:
-    cmd = "rustfmt --edition=2021"
+def cargo_fmt(fix_inplace: bool = False, no_color: bool = False) -> str:
+    cmd = "cargo fmt"
     if not fix_inplace:
         cmd += " --check"
     if no_color:
-        cmd += " --color=never"
+        cmd += " -- --color=never"
     return cmd
 
 
@@ -61,14 +61,23 @@ def get_commit_files() -> list[str]:
     return files.decode().splitlines()
 
 
-def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False):
+def check(
+    name: str,
+    suffix: str,
+    cmd: str,
+    changed_files: list[str],
+    no_color: bool = False,
+    append_files_to_cmd: bool = True,
+):
     print(f"Checking: {name} ", end="")
     applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
     if not applicable_files:
         print(colorify("[NOT APPLICABLE]", Color.CYAN, no_color))
         return
 
-    cmd = f'{cmd} {" ".join(applicable_files)}'
+    if append_files_to_cmd:
+        cmd = f"{cmd} {' '.join(applicable_files)}"
+
     res = subprocess.run(cmd.split(), capture_output=True)
     if res.returncode != 0:
         print(colorify("[FAILED]", Color.RED, no_color))
@@ -100,15 +109,13 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     files = get_commit_files()
-    # we use rustfmt here because cargo fmt does not accept list of files
-    # it internally gathers project files and feeds them to rustfmt
-    # so because we want to check only files included in the commit we use rustfmt directly
     check(
-        name="rustfmt",
+        name="cargo fmt",
         suffix=".rs",
-        cmd=rustfmt(fix_inplace=args.fix_inplace, no_color=args.no_color),
+        cmd=cargo_fmt(fix_inplace=args.fix_inplace, no_color=args.no_color),
         changed_files=files,
         no_color=args.no_color,
+        append_files_to_cmd=False,
     )
     check(
         name="ruff check",
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index 6a381bf094..5964b76ecf 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "proxy"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs
index 7503b4eac9..dd48384c03 100644
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -8,16 +8,16 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{info, info_span};
 
 use super::ComputeCredentialKeys;
-use crate::auth::backend::ComputeUserInfo;
 use crate::auth::IpPattern;
+use crate::auth::backend::ComputeUserInfo;
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestContext;
 use crate::control_plane::client::cplane_proxy_v1;
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
 use crate::error::{ReportableError, UserFacingError};
-use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
+use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::stream::PqStream;
 use crate::types::RoleName;
 use crate::{auth, compute, waiters};
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index 5d032c0deb..942f1e13d1 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -6,9 +6,9 @@ use std::time::{Duration, SystemTime};
 use arc_swap::ArcSwapOption;
 use clashmap::ClashMap;
 use jose_jwk::crypto::KeyInfo;
-use reqwest::{redirect, Client};
-use reqwest_retry::policies::ExponentialBackoff;
+use reqwest::{Client, redirect};
 use reqwest_retry::RetryTransientMiddleware;
+use reqwest_retry::policies::ExponentialBackoff;
 use serde::de::Visitor;
 use serde::{Deserialize, Deserializer};
 use serde_json::value::RawValue;
@@ -498,8 +498,8 @@ fn verify_rsa_signature(
     alg: &jose_jwa::Algorithm,
 ) -> Result<(), JwtError> {
     use jose_jwa::{Algorithm, Signing};
-    use rsa::pkcs1v15::{Signature, VerifyingKey};
     use rsa::RsaPublicKey;
+    use rsa::pkcs1v15::{Signature, VerifyingKey};
 
     let key = RsaPublicKey::try_from(key).map_err(JwtError::InvalidRsaKey)?;
 
diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs
index d10f0e82b2..9c3a3772cd 100644
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -8,8 +8,8 @@ use crate::auth::backend::jwt::FetchAuthRulesError;
 use crate::compute::ConnCfg;
 use crate::compute_ctl::ComputeCtlApi;
 use crate::context::RequestContext;
-use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo};
 use crate::control_plane::NodeInfo;
+use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo};
 use crate::http;
 use crate::intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag};
 use crate::types::EndpointId;
diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs
index dc595844c5..83feed5094 100644
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -18,7 +18,7 @@ use tracing::{debug, info, warn};
 
 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::{
-    self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern,
+    self, AuthError, ComputeUserInfoMaybeEndpoint, IpPattern, validate_password_and_exchange,
 };
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
@@ -32,8 +32,8 @@ use crate::control_plane::{
 use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
 use crate::protocol2::ConnectionInfoExtra;
-use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
+use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter};
 use crate::stream::Stream;
 use crate::types::{EndpointCacheKey, EndpointId, RoleName};
@@ -308,10 +308,7 @@ async fn auth_quirks(
 
         let incoming_vpc_endpoint_id = match ctx.extra() {
             None => return Err(AuthError::MissingEndpointName),
-            Some(ConnectionInfoExtra::Aws { vpce_id }) => {
-                // Convert the vcpe_id to a string
-                String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
-            }
+            Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
             Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
         };
         let allowed_vpc_endpoint_ids = api.get_allowed_vpc_endpoint_ids(ctx, &info).await?;
@@ -451,7 +448,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
                 Ok((Backend::ControlPlane(api, credentials), ip_allowlist))
             }
             Self::Local(_) => {
-                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
+                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"));
             }
         };
 
@@ -545,7 +542,7 @@ mod tests {
     use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};
 
     use super::jwt::JwkCache;
-    use super::{auth_quirks, AuthRateLimiter};
+    use super::{AuthRateLimiter, auth_quirks};
     use crate::auth::backend::MaskedIp;
     use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern};
     use crate::config::AuthenticationConfig;
@@ -556,8 +553,8 @@ mod tests {
     };
     use crate::proxy::NeonOptions;
     use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo};
-    use crate::scram::threadpool::ThreadPool;
     use crate::scram::ServerSecret;
+    use crate::scram::threadpool::ThreadPool;
     use crate::stream::{PqStream, Stream};
 
     struct Auth {
diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs
index eff49a402a..c1b7718e4f 100644
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -197,7 +197,10 @@ impl<'de> serde::de::Deserialize<'de> for IpPattern {
             type Value = IpPattern;
 
             fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                write!(formatter, "comma separated list with ip address, ip address range, or ip address subnet mask")
+                write!(
+                    formatter,
+                    "comma separated list with ip address, ip address range, or ip address subnet mask"
+                )
             }
 
             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
@@ -252,8 +255,8 @@ fn project_name_valid(name: &str) -> bool {
 #[cfg(test)]
 #[expect(clippy::unwrap_used)]
 mod tests {
-    use serde_json::json;
     use ComputeUserInfoParseError::*;
+    use serde_json::json;
 
     use super::*;
 
diff --git a/proxy/src/auth/mod.rs b/proxy/src/auth/mod.rs
index 6082695a6b..5670f8e43d 100644
--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -5,13 +5,13 @@ pub use backend::Backend;
 
 mod credentials;
 pub(crate) use credentials::{
-    check_peer_addr_is_in_list, endpoint_sni, ComputeUserInfoMaybeEndpoint,
-    ComputeUserInfoParseError, IpPattern,
+    ComputeUserInfoMaybeEndpoint, ComputeUserInfoParseError, IpPattern, check_peer_addr_is_in_list,
+    endpoint_sni,
 };
 
 mod password_hack;
-pub(crate) use password_hack::parse_endpoint_param;
 use password_hack::PasswordHackPayload;
+pub(crate) use password_hack::parse_endpoint_param;
 
 mod flow;
 use std::io;
diff --git a/proxy/src/binary/local_proxy.rs b/proxy/src/binary/local_proxy.rs
index 4ab11f828c..dedd225cba 100644
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -4,7 +4,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{bail, ensure, Context};
+use anyhow::{Context, bail, ensure};
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::Parser;
 use compute_api::spec::LocalProxySpec;
@@ -19,7 +19,7 @@ use utils::sentry_init::init_sentry;
 use utils::{pid_file, project_build_tag, project_git_version};
 
 use crate::auth::backend::jwt::JwkCache;
-use crate::auth::backend::local::{LocalBackend, JWKS_ROLE_MAP};
+use crate::auth::backend::local::{JWKS_ROLE_MAP, LocalBackend};
 use crate::auth::{self};
 use crate::cancellation::CancellationHandler;
 use crate::config::{
diff --git a/proxy/src/binary/pg_sni_router.rs b/proxy/src/binary/pg_sni_router.rs
index 94e771a61c..1aa290399c 100644
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -5,24 +5,24 @@
 /// the outside. Similar to an ingress controller for HTTPS.
 use std::{net::SocketAddr, sync::Arc};
 
-use anyhow::{anyhow, bail, ensure, Context};
+use anyhow::{Context, anyhow, bail, ensure};
 use clap::Arg;
-use futures::future::Either;
 use futures::TryFutureExt;
+use futures::future::Either;
 use itertools::Itertools;
 use rustls::crypto::ring;
 use rustls::pki_types::PrivateKeyDer;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::TcpListener;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, Instrument};
+use tracing::{Instrument, error, info};
 use utils::project_git_version;
 use utils::sentry_init::init_sentry;
 
 use crate::context::RequestContext;
 use crate::metrics::{Metrics, ThreadPoolMetrics};
 use crate::protocol2::ConnectionInfo;
-use crate::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource};
+use crate::proxy::{ErrorSource, copy_bidirectional_client_compute, run_until_cancelled};
 use crate::stream::{PqStream, Stream};
 use crate::tls::TlsServerEndPoint;
 
diff --git a/proxy/src/binary/proxy.rs b/proxy/src/binary/proxy.rs
index b72799df54..eec0bf8f99 100644
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -9,16 +9,16 @@ use remote_storage::RemoteStorageConfig;
 use tokio::net::TcpListener;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
-use tracing::{info, warn, Instrument};
+use tracing::{Instrument, info, warn};
 use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version};
 
 use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
-use crate::cancellation::{handle_cancel_messages, CancellationHandler};
+use crate::cancellation::{CancellationHandler, handle_cancel_messages};
 use crate::config::{
-    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig,
-    ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
+    self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
+    ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
 };
 use crate::context::parquet::ParquetUploadArgs;
 use crate::http::health_server::AppMetrics;
@@ -30,8 +30,8 @@ use crate::redis::connection_with_credentials_provider::ConnectionWithCredential
 use crate::redis::kv_ops::RedisKVClient;
 use crate::redis::{elasticache, notifications};
 use crate::scram::threadpool::ThreadPool;
-use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::GlobalConnPoolOptions;
+use crate::serverless::cancel_set::CancelSet;
 use crate::tls::client_config::compute_client_config_with_root_certs;
 use crate::{auth, control_plane, http, serverless, usage_metrics};
 
@@ -331,7 +331,9 @@ pub async fn run() -> anyhow::Result<()> {
                 ),
             ),
             (None, None) => {
-                warn!("irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client");
+                warn!(
+                    "irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client"
+                );
                 None
             }
             _ => {
diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs
index 7651eb71a2..e153e9f61f 100644
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -1,12 +1,12 @@
 use std::collections::HashSet;
 use std::convert::Infallible;
-use std::sync::atomic::AtomicU64;
 use std::sync::Arc;
+use std::sync::atomic::AtomicU64;
 use std::time::Duration;
 
 use async_trait::async_trait;
 use clashmap::ClashMap;
-use rand::{thread_rng, Rng};
+use rand::{Rng, thread_rng};
 use smol_str::SmolStr;
 use tokio::sync::Mutex;
 use tokio::time::Instant;
diff --git a/proxy/src/cache/timed_lru.rs b/proxy/src/cache/timed_lru.rs
index 06eaeb9a30..7cfe5100ea 100644
--- a/proxy/src/cache/timed_lru.rs
+++ b/proxy/src/cache/timed_lru.rs
@@ -11,11 +11,11 @@ use std::time::{Duration, Instant};
 //   This severely hinders its usage both in terms of creating wrappers and supported key types.
 //
 // On the other hand, `hashlink` has good download stats and appears to be maintained.
-use hashlink::{linked_hash_map::RawEntryMut, LruCache};
+use hashlink::{LruCache, linked_hash_map::RawEntryMut};
 use tracing::debug;
 
 use super::common::Cached;
-use super::{timed_lru, Cache};
+use super::{Cache, timed_lru};
 
 /// An implementation of timed LRU cache with fixed capacity.
 /// Key properties:
diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs
index 1f9c8a48b7..8263e5aa2a 100644
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -3,8 +3,8 @@ use std::net::{IpAddr, SocketAddr};
 use std::sync::Arc;
 
 use ipnet::{IpNet, Ipv4Net, Ipv6Net};
-use postgres_client::tls::MakeTlsConnect;
 use postgres_client::CancelToken;
+use postgres_client::tls::MakeTlsConnect;
 use pq_proto::CancelKeyData;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
@@ -13,7 +13,7 @@ use tokio::sync::{mpsc, oneshot};
 use tracing::{debug, info};
 
 use crate::auth::backend::ComputeUserInfo;
-use crate::auth::{check_peer_addr_is_in_list, AuthError};
+use crate::auth::{AuthError, check_peer_addr_is_in_list};
 use crate::config::ComputeConfig;
 use crate::context::RequestContext;
 use crate::control_plane::ControlPlaneApi;
@@ -358,10 +358,7 @@ impl CancellationHandler {
 
             let incoming_vpc_endpoint_id = match ctx.extra() {
                 None => return Err(CancelError::AuthError(AuthError::MissingVPCEndpointId)),
-                Some(ConnectionInfoExtra::Aws { vpce_id }) => {
-                    // Convert the vcpe_id to a string
-                    String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
-                }
+                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
                 Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
             };
 
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 460e0cff54..1bcd22e98f 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -2,18 +2,18 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{bail, ensure, Context, Ok};
+use anyhow::{Context, Ok, bail, ensure};
 use clap::ValueEnum;
 use remote_storage::RemoteStorageConfig;
 
-use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::AuthRateLimiter;
+use crate::auth::backend::jwt::JwkCache;
 use crate::control_plane::locks::ApiLocks;
 use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig};
 use crate::scram::threadpool::ThreadPool;
-use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::GlobalConnPoolOptions;
-pub use crate::tls::server_config::{configure_tls, TlsConfig};
+use crate::serverless::cancel_set::CancelSet;
+pub use crate::tls::server_config::{TlsConfig, configure_tls};
 use crate::types::Host;
 
 pub struct ProxyConfig {
@@ -97,8 +97,7 @@ pub struct EndpointCacheConfig {
 impl EndpointCacheConfig {
     /// Default options for [`crate::control_plane::NodeInfoCache`].
     /// Notice that by default the limiter is empty, which means that cache is disabled.
-    pub const CACHE_DEFAULT_OPTIONS: &'static str =
-        "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
+    pub const CACHE_DEFAULT_OPTIONS: &'static str = "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
 
     /// Parse cache options passed via cmdline.
     /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs
index 1044f5f8e2..4662860b3f 100644
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use futures::{FutureExt, TryFutureExt};
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, Instrument};
+use tracing::{Instrument, debug, error, info};
 
 use crate::auth::backend::ConsoleRedirectBackend;
 use crate::cancellation::CancellationHandler;
@@ -11,12 +11,12 @@ use crate::config::{ProxyConfig, ProxyProtocolV2};
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
-use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo};
-use crate::proxy::connect_compute::{connect_to_compute, TcpMechanism};
-use crate::proxy::handshake::{handshake, HandshakeData};
+use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
+use crate::proxy::connect_compute::{TcpMechanism, connect_to_compute};
+use crate::proxy::handshake::{HandshakeData, handshake};
 use crate::proxy::passthrough::ProxyPassthrough;
 use crate::proxy::{
-    prepare_client_connection, run_until_cancelled, ClientRequestError, ErrorSource,
+    ClientRequestError, ErrorSource, prepare_client_connection, run_until_cancelled,
 };
 
 pub async fn task_main(
@@ -64,22 +64,34 @@ pub async fn task_main(
                     debug!("healthcheck received");
                     return;
                 }
-                Ok((_socket, ConnectHeader::Missing)) if config.proxy_protocol_v2 == ProxyProtocolV2::Required => {
+                Ok((_socket, ConnectHeader::Missing))
+                    if config.proxy_protocol_v2 == ProxyProtocolV2::Required =>
+                {
                     error!("missing required proxy protocol header");
                     return;
                 }
-                Ok((_socket, ConnectHeader::Proxy(_))) if config.proxy_protocol_v2 == ProxyProtocolV2::Rejected => {
+                Ok((_socket, ConnectHeader::Proxy(_)))
+                    if config.proxy_protocol_v2 == ProxyProtocolV2::Rejected =>
+                {
                     error!("proxy protocol header not supported");
                     return;
                 }
                 Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),
-                Ok((socket, ConnectHeader::Missing)) => (socket, ConnectionInfo{ addr: peer_addr, extra: None }),
+                Ok((socket, ConnectHeader::Missing)) => (
+                    socket,
+                    ConnectionInfo {
+                        addr: peer_addr,
+                        extra: None,
+                    },
+                ),
             };
 
             match socket.inner.set_nodelay(true) {
                 Ok(()) => {}
                 Err(e) => {
-                    error!("per-client task finished with an error: failed to set socket option: {e:#}");
+                    error!(
+                        "per-client task finished with an error: failed to set socket option: {e:#}"
+                    );
                     return;
                 }
             }
@@ -118,10 +130,16 @@ pub async fn task_main(
                     match p.proxy_pass(&config.connect_to_compute).await {
                         Ok(()) => {}
                         Err(ErrorSource::Client(e)) => {
-                            error!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
+                            error!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the client: {e:#}"
+                            );
                         }
                         Err(ErrorSource::Compute(e)) => {
-                            error!(?session_id, "per-client task finished with an IO error from the compute: {e:#}");
+                            error!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the compute: {e:#}"
+                            );
                         }
                     }
                 }
@@ -241,6 +259,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     Ok(Some(ProxyPassthrough {
         client: stream,
         aux: node.aux.clone(),
+        private_link_id: None,
         compute: node,
         session_id: ctx.session_id(),
         cancel: session,
diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs
index 3236b2e1bf..f87f4e9ef8 100644
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams;
 use smol_str::SmolStr;
 use tokio::sync::mpsc;
 use tracing::field::display;
-use tracing::{debug, error, info_span, Span};
+use tracing::{Span, debug, error, info_span};
 use try_lock::TryLock;
 use uuid::Uuid;
 
@@ -55,6 +55,7 @@ struct RequestContextInner {
     dbname: Option<DbName>,
     user: Option<RoleName>,
     application: Option<SmolStr>,
+    user_agent: Option<SmolStr>,
     error_kind: Option<ErrorKind>,
     pub(crate) auth_method: Option<AuthMethod>,
     jwt_issuer: Option<String>,
@@ -100,6 +101,7 @@ impl Clone for RequestContext {
             dbname: inner.dbname.clone(),
             user: inner.user.clone(),
             application: inner.application.clone(),
+            user_agent: inner.user_agent.clone(),
             error_kind: inner.error_kind,
             auth_method: inner.auth_method.clone(),
             jwt_issuer: inner.jwt_issuer.clone(),
@@ -149,6 +151,7 @@ impl RequestContext {
             dbname: None,
             user: None,
             application: None,
+            user_agent: None,
             error_kind: None,
             auth_method: None,
             jwt_issuer: None,
@@ -245,6 +248,13 @@ impl RequestContext {
             .set_user(user);
     }
 
+    pub(crate) fn set_user_agent(&self, user_agent: Option<SmolStr>) {
+        self.0
+            .try_lock()
+            .expect("should not deadlock")
+            .set_user_agent(user_agent);
+    }
+
     pub(crate) fn set_auth_method(&self, auth_method: AuthMethod) {
         let mut this = self.0.try_lock().expect("should not deadlock");
         this.auth_method = Some(auth_method);
@@ -384,6 +394,10 @@ impl RequestContextInner {
         }
     }
 
+    fn set_user_agent(&mut self, user_agent: Option<SmolStr>) {
+        self.user_agent = user_agent;
+    }
+
     fn set_dbname(&mut self, dbname: DbName) {
         self.dbname = Some(dbname);
     }
diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs
index 0537ae6a62..bfab5f34f9 100644
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -8,7 +8,7 @@ use chrono::{Datelike, Timelike};
 use futures::{Stream, StreamExt};
 use parquet::basic::Compression;
 use parquet::file::metadata::RowGroupMetaDataPtr;
-use parquet::file::properties::{WriterProperties, WriterPropertiesPtr, DEFAULT_PAGE_SIZE};
+use parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties, WriterPropertiesPtr};
 use parquet::file::writer::SerializedFileWriter;
 use parquet::record::RecordWriter;
 use pq_proto::StartupMessageParams;
@@ -17,10 +17,10 @@ use serde::ser::SerializeMap;
 use tokio::sync::mpsc;
 use tokio::time;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, info, Span};
+use tracing::{Span, debug, info};
 use utils::backoff;
 
-use super::{RequestContextInner, LOG_CHAN};
+use super::{LOG_CHAN, RequestContextInner};
 use crate::config::remote_storage_from_toml;
 use crate::context::LOG_CHAN_DISCONNECT;
 use crate::ext::TaskExt;
@@ -82,6 +82,7 @@ pub(crate) struct RequestData {
     peer_addr: String,
     username: Option<String>,
     application_name: Option<String>,
+    user_agent: Option<String>,
     endpoint_id: Option<String>,
     database: Option<String>,
     project: Option<String>,
@@ -128,6 +129,7 @@ impl From<&RequestContextInner> for RequestData {
             timestamp: value.first_packet.naive_utc(),
             username: value.user.as_deref().map(String::from),
             application_name: value.application.as_deref().map(String::from),
+            user_agent: value.user_agent.as_deref().map(String::from),
             endpoint_id: value.endpoint_id.as_deref().map(String::from),
             database: value.dbname.as_deref().map(String::from),
             project: value.project.as_deref().map(String::from),
@@ -425,20 +427,20 @@ mod tests {
     use futures::{Stream, StreamExt};
     use itertools::Itertools;
     use parquet::basic::{Compression, ZstdLevel};
-    use parquet::file::properties::{WriterProperties, DEFAULT_PAGE_SIZE};
+    use parquet::file::properties::{DEFAULT_PAGE_SIZE, WriterProperties};
     use parquet::file::reader::FileReader;
     use parquet::file::serialized_reader::SerializedFileReader;
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
     use remote_storage::{
-        GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind, S3Config,
         DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
+        GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind, S3Config,
     };
     use tokio::sync::mpsc;
     use tokio::time;
     use walkdir::WalkDir;
 
-    use super::{worker_inner, ParquetConfig, ParquetUploadArgs, RequestData};
+    use super::{ParquetConfig, ParquetUploadArgs, RequestData, worker_inner};
 
     #[derive(Parser)]
     struct ProxyCliArgs {
@@ -514,26 +516,27 @@ mod tests {
 
     fn generate_request_data(rng: &mut impl Rng) -> RequestData {
         RequestData {
-            session_id: uuid::Builder::from_random_bytes(rng.gen()).into_uuid(),
-            peer_addr: Ipv4Addr::from(rng.gen::<[u8; 4]>()).to_string(),
+            session_id: uuid::Builder::from_random_bytes(rng.r#gen()).into_uuid(),
+            peer_addr: Ipv4Addr::from(rng.r#gen::<[u8; 4]>()).to_string(),
             timestamp: chrono::DateTime::from_timestamp_millis(
                 rng.gen_range(1703862754..1803862754),
             )
             .unwrap()
             .naive_utc(),
             application_name: Some("test".to_owned()),
-            username: Some(hex::encode(rng.gen::<[u8; 4]>())),
-            endpoint_id: Some(hex::encode(rng.gen::<[u8; 16]>())),
-            database: Some(hex::encode(rng.gen::<[u8; 16]>())),
-            project: Some(hex::encode(rng.gen::<[u8; 16]>())),
-            branch: Some(hex::encode(rng.gen::<[u8; 16]>())),
+            user_agent: Some("test-user-agent".to_owned()),
+            username: Some(hex::encode(rng.r#gen::<[u8; 4]>())),
+            endpoint_id: Some(hex::encode(rng.r#gen::<[u8; 16]>())),
+            database: Some(hex::encode(rng.r#gen::<[u8; 16]>())),
+            project: Some(hex::encode(rng.r#gen::<[u8; 16]>())),
+            branch: Some(hex::encode(rng.r#gen::<[u8; 16]>())),
             pg_options: None,
             auth_method: None,
             jwt_issuer: None,
             protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)],
             region: "us-east-1",
             error: None,
-            success: rng.gen(),
+            success: rng.r#gen(),
             cold_start_info: "no",
             duration_us: rng.gen_range(0..30_000_000),
             disconnect_timestamp: None,
@@ -610,15 +613,15 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1313105, 3, 6000),
-                (1313094, 3, 6000),
-                (1313153, 3, 6000),
-                (1313110, 3, 6000),
-                (1313246, 3, 6000),
-                (1313083, 3, 6000),
-                (1312877, 3, 6000),
-                (1313112, 3, 6000),
-                (438020, 1, 2000)
+                (1313953, 3, 6000),
+                (1313942, 3, 6000),
+                (1314001, 3, 6000),
+                (1313958, 3, 6000),
+                (1314094, 3, 6000),
+                (1313931, 3, 6000),
+                (1313725, 3, 6000),
+                (1313960, 3, 6000),
+                (438318, 1, 2000)
             ]
         );
 
@@ -650,11 +653,11 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1204324, 5, 10000),
-                (1204048, 5, 10000),
-                (1204349, 5, 10000),
-                (1204334, 5, 10000),
-                (1204588, 5, 10000)
+                (1205810, 5, 10000),
+                (1205534, 5, 10000),
+                (1205835, 5, 10000),
+                (1205820, 5, 10000),
+                (1206074, 5, 10000)
             ]
         );
 
@@ -679,15 +682,15 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1313105, 3, 6000),
-                (1313094, 3, 6000),
-                (1313153, 3, 6000),
-                (1313110, 3, 6000),
-                (1313246, 3, 6000),
-                (1313083, 3, 6000),
-                (1312877, 3, 6000),
-                (1313112, 3, 6000),
-                (438020, 1, 2000)
+                (1313953, 3, 6000),
+                (1313942, 3, 6000),
+                (1314001, 3, 6000),
+                (1313958, 3, 6000),
+                (1314094, 3, 6000),
+                (1313931, 3, 6000),
+                (1313725, 3, 6000),
+                (1313960, 3, 6000),
+                (438318, 1, 2000)
             ]
         );
 
@@ -724,7 +727,7 @@ mod tests {
         // files are smaller than the size threshold, but they took too long to fill so were flushed early
         assert_eq!(
             file_stats,
-            [(658014, 2, 3001), (657728, 2, 3000), (657524, 2, 2999)]
+            [(658584, 2, 3001), (658298, 2, 3000), (658094, 2, 2999)]
         );
 
         tmpdir.close().unwrap();
diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs
index ef6621fc59..977fcf4727 100644
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -3,16 +3,16 @@
 use std::sync::Arc;
 use std::time::Duration;
 
-use ::http::header::AUTHORIZATION;
 use ::http::HeaderName;
+use ::http::header::AUTHORIZATION;
 use futures::TryFutureExt;
 use postgres_client::config::SslMode;
 use tokio::time::Instant;
-use tracing::{debug, info, info_span, warn, Instrument};
+use tracing::{Instrument, debug, info, info_span, warn};
 
 use super::super::messages::{ControlPlaneErrorMessage, GetEndpointAccessControl, WakeCompute};
-use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::ComputeUserInfo;
+use crate::auth::backend::jwt::AuthRule;
 use crate::cache::Cached;
 use crate::context::RequestContext;
 use crate::control_plane::caches::ApiCaches;
diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs
index 1e6cde8fb0..7da5464aa5 100644
--- a/proxy/src/control_plane/client/mock.rs
+++ b/proxy/src/control_plane/client/mock.rs
@@ -6,11 +6,11 @@ use std::sync::Arc;
 use futures::TryFutureExt;
 use thiserror::Error;
 use tokio_postgres::Client;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{Instrument, error, info, info_span, warn};
 
-use crate::auth::backend::jwt::AuthRule;
-use crate::auth::backend::ComputeUserInfo;
 use crate::auth::IpPattern;
+use crate::auth::backend::ComputeUserInfo;
+use crate::auth::backend::jwt::AuthRule;
 use crate::cache::Cached;
 use crate::context::RequestContext;
 use crate::control_plane::client::{
diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs
index c28ff4789d..746595de38 100644
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -10,15 +10,15 @@ use clashmap::ClashMap;
 use tokio::time::Instant;
 use tracing::{debug, info};
 
-use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
 use crate::auth::backend::ComputeUserInfo;
+use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
 use crate::cache::endpoints::EndpointsCache;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
 use crate::context::RequestContext;
 use crate::control_plane::{
-    errors, CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds,
-    CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache,
+    CachedAccessBlockerFlags, CachedAllowedIps, CachedAllowedVpcEndpointIds, CachedNodeInfo,
+    CachedRoleSecret, ControlPlaneApi, NodeInfoCache, errors,
 };
 use crate::error::ReportableError;
 use crate::metrics::ApiLockMetrics;
diff --git a/proxy/src/control_plane/errors.rs b/proxy/src/control_plane/errors.rs
index d6f565e34a..bc30cffd27 100644
--- a/proxy/src/control_plane/errors.rs
+++ b/proxy/src/control_plane/errors.rs
@@ -2,7 +2,7 @@ use thiserror::Error;
 
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::messages::{self, ControlPlaneErrorMessage, Reason};
-use crate::error::{io_error, ErrorKind, ReportableError, UserFacingError};
+use crate::error::{ErrorKind, ReportableError, UserFacingError, io_error};
 use crate::proxy::retry::CouldRetry;
 
 /// A go-to error message which doesn't leak any detail.
diff --git a/proxy/src/control_plane/mgmt.rs b/proxy/src/control_plane/mgmt.rs
index 2f7359240d..df31abcc8c 100644
--- a/proxy/src/control_plane/mgmt.rs
+++ b/proxy/src/control_plane/mgmt.rs
@@ -6,7 +6,7 @@ use postgres_backend::{AuthType, PostgresBackend, PostgresBackendTCP, QueryError
 use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
 use tokio::net::{TcpListener, TcpStream};
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, info_span, Instrument};
+use tracing::{Instrument, error, info, info_span};
 
 use crate::control_plane::messages::{DatabaseInfo, KickSession};
 use crate::waiters::{self, Waiter, Waiters};
diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs
index 89ec4f9b33..d592223be1 100644
--- a/proxy/src/control_plane/mod.rs
+++ b/proxy/src/control_plane/mod.rs
@@ -11,9 +11,9 @@ pub(crate) mod errors;
 
 use std::sync::Arc;
 
+use crate::auth::IpPattern;
 use crate::auth::backend::jwt::AuthRule;
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
-use crate::auth::IpPattern;
 use crate::cache::project_info::ProjectInfoCacheImpl;
 use crate::cache::{Cached, TimedLru};
 use crate::config::ComputeConfig;
diff --git a/proxy/src/http/health_server.rs b/proxy/src/http/health_server.rs
index 141f319567..5278fe2a3e 100644
--- a/proxy/src/http/health_server.rs
+++ b/proxy/src/http/health_server.rs
@@ -9,8 +9,8 @@ use http_utils::json::json_response;
 use http_utils::{RouterBuilder, RouterService};
 use hyper0::header::CONTENT_TYPE;
 use hyper0::{Body, Request, Response, StatusCode};
-use measured::text::BufferedTextEncoder;
 use measured::MetricGroup;
+use measured::text::BufferedTextEncoder;
 use metrics::NeonMetrics;
 use tracing::{info, info_span};
 
diff --git a/proxy/src/http/mod.rs b/proxy/src/http/mod.rs
index ed88c77256..96f600d836 100644
--- a/proxy/src/http/mod.rs
+++ b/proxy/src/http/mod.rs
@@ -13,8 +13,8 @@ use hyper::body::Body;
 pub(crate) use reqwest::{Request, Response};
 use reqwest_middleware::RequestBuilder;
 pub(crate) use reqwest_middleware::{ClientWithMiddleware, Error};
-pub(crate) use reqwest_retry::policies::ExponentialBackoff;
 pub(crate) use reqwest_retry::RetryTransientMiddleware;
+pub(crate) use reqwest_retry::policies::ExponentialBackoff;
 use thiserror::Error;
 
 use crate::metrics::{ConsoleRequest, Metrics};
diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs
index fbd4811b54..3c34918d84 100644
--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -8,7 +8,7 @@ use opentelemetry::trace::TraceContextExt;
 use scopeguard::defer;
 use serde::ser::{SerializeMap, Serializer};
 use tracing::subscriber::Interest;
-use tracing::{callsite, span, Event, Metadata, Span, Subscriber};
+use tracing::{Event, Metadata, Span, Subscriber, callsite, span};
 use tracing_opentelemetry::OpenTelemetrySpanExt;
 use tracing_subscriber::filter::{EnvFilter, LevelFilter};
 use tracing_subscriber::fmt::format::{Format, Full};
diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs
index f3447e063e..db1f096de1 100644
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -543,11 +543,7 @@ impl Drop for LatencyTimer {
 
 impl From<bool> for Bool {
     fn from(value: bool) -> Self {
-        if value {
-            Bool::True
-        } else {
-            Bool::False
-        }
+        if value { Bool::True } else { Bool::False }
     }
 }
 
diff --git a/proxy/src/protocol2.rs b/proxy/src/protocol2.rs
index 74a15d9bf4..41180fa6c1 100644
--- a/proxy/src/protocol2.rs
+++ b/proxy/src/protocol2.rs
@@ -9,6 +9,7 @@ use std::task::{Context, Poll};
 
 use bytes::{Buf, Bytes, BytesMut};
 use pin_project_lite::pin_project;
+use smol_str::SmolStr;
 use strum_macros::FromRepr;
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
 use zerocopy::{FromBytes, FromZeroes};
@@ -99,7 +100,7 @@ impl fmt::Display for ConnectionInfo {
 
 #[derive(PartialEq, Eq, Clone, Debug)]
 pub enum ConnectionInfoExtra {
-    Aws { vpce_id: Bytes },
+    Aws { vpce_id: SmolStr },
     Azure { link_id: u32 },
 }
 
@@ -193,7 +194,7 @@ fn process_proxy_payload(
             return Err(io::Error::new(
                 io::ErrorKind::Other,
                 "invalid proxy protocol address family/transport protocol.",
-            ))
+            ));
         }
     };
 
@@ -207,9 +208,14 @@ fn process_proxy_payload(
                 }
                 let subtype = tlv.value.get_u8();
                 match Pp2AwsType::from_repr(subtype) {
-                    Some(Pp2AwsType::VpceId) => {
-                        extra = Some(ConnectionInfoExtra::Aws { vpce_id: tlv.value });
-                    }
+                    Some(Pp2AwsType::VpceId) => match std::str::from_utf8(&tlv.value) {
+                        Ok(s) => {
+                            extra = Some(ConnectionInfoExtra::Aws { vpce_id: s.into() });
+                        }
+                        Err(e) => {
+                            tracing::warn!("invalid aws vpce id: {e}");
+                        }
+                    },
                     None => {
                         tracing::warn!("unknown aws tlv: subtype={subtype}");
                     }
@@ -401,7 +407,7 @@ mod tests {
     use tokio::io::AsyncReadExt;
 
     use crate::protocol2::{
-        read_proxy_protocol, ConnectHeader, LOCAL_V2, PROXY_V2, TCP_OVER_IPV4, UDP_OVER_IPV6,
+        ConnectHeader, LOCAL_V2, PROXY_V2, TCP_OVER_IPV4, UDP_OVER_IPV6, read_proxy_protocol,
     };
 
     #[tokio::test]
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index 26fb1754bf..b8b39fa121 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -5,7 +5,7 @@ use tracing::{debug, info, warn};
 
 use super::retry::ShouldRetryWakeCompute;
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
-use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT};
+use crate::compute::{self, COULD_NOT_CONNECT, PostgresConnection};
 use crate::config::{ComputeConfig, RetryConfig};
 use crate::context::RequestContext;
 use crate::control_plane::errors::WakeComputeError;
@@ -15,7 +15,7 @@ use crate::error::ReportableError;
 use crate::metrics::{
     ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType,
 };
-use crate::proxy::retry::{retry_after, should_retry, CouldRetry};
+use crate::proxy::retry::{CouldRetry, retry_after, should_retry};
 use crate::proxy::wake_compute::wake_compute;
 use crate::types::Host;
 
diff --git a/proxy/src/proxy/copy_bidirectional.rs b/proxy/src/proxy/copy_bidirectional.rs
index 861f1766e8..6f8b972348 100644
--- a/proxy/src/proxy/copy_bidirectional.rs
+++ b/proxy/src/proxy/copy_bidirectional.rs
@@ -1,7 +1,7 @@
 use std::future::poll_fn;
 use std::io;
 use std::pin::Pin;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll, ready};
 
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 use tracing::info;
diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs
index 2a406fcb34..0c6d352600 100644
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -9,28 +9,28 @@ pub(crate) mod retry;
 pub(crate) mod wake_compute;
 use std::sync::Arc;
 
-pub use copy_bidirectional::{copy_bidirectional_client_compute, ErrorSource};
+pub use copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
 use pq_proto::{BeMessage as Be, CancelKeyData, StartupMessageParams};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use smol_str::{format_smolstr, SmolStr};
+use smol_str::{SmolStr, ToSmolStr, format_smolstr};
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, warn, Instrument};
+use tracing::{Instrument, debug, error, info, warn};
 
-use self::connect_compute::{connect_to_compute, TcpMechanism};
+use self::connect_compute::{TcpMechanism, connect_to_compute};
 use self::passthrough::ProxyPassthrough;
 use crate::cancellation::{self, CancellationHandler};
 use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
-use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo};
-use crate::proxy::handshake::{handshake, HandshakeData};
+use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
+use crate::proxy::handshake::{HandshakeData, handshake};
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::stream::{PqStream, Stream};
 use crate::types::EndpointCacheKey;
@@ -100,22 +100,34 @@ pub async fn task_main(
                     debug!("healthcheck received");
                     return;
                 }
-                Ok((_socket, ConnectHeader::Missing)) if config.proxy_protocol_v2 == ProxyProtocolV2::Required => {
+                Ok((_socket, ConnectHeader::Missing))
+                    if config.proxy_protocol_v2 == ProxyProtocolV2::Required =>
+                {
                     warn!("missing required proxy protocol header");
                     return;
                 }
-                Ok((_socket, ConnectHeader::Proxy(_))) if config.proxy_protocol_v2 == ProxyProtocolV2::Rejected => {
+                Ok((_socket, ConnectHeader::Proxy(_)))
+                    if config.proxy_protocol_v2 == ProxyProtocolV2::Rejected =>
+                {
                     warn!("proxy protocol header not supported");
                     return;
                 }
                 Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),
-                Ok((socket, ConnectHeader::Missing)) => (socket, ConnectionInfo { addr: peer_addr, extra: None }),
+                Ok((socket, ConnectHeader::Missing)) => (
+                    socket,
+                    ConnectionInfo {
+                        addr: peer_addr,
+                        extra: None,
+                    },
+                ),
             };
 
             match socket.inner.set_nodelay(true) {
                 Ok(()) => {}
                 Err(e) => {
-                    error!("per-client task finished with an error: failed to set socket option: {e:#}");
+                    error!(
+                        "per-client task finished with an error: failed to set socket option: {e:#}"
+                    );
                     return;
                 }
             }
@@ -156,10 +168,16 @@ pub async fn task_main(
                     match p.proxy_pass(&config.connect_to_compute).await {
                         Ok(()) => {}
                         Err(ErrorSource::Client(e)) => {
-                            warn!(?session_id, "per-client task finished with an IO error from the client: {e:#}");
+                            warn!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the client: {e:#}"
+                            );
                         }
                         Err(ErrorSource::Compute(e)) => {
-                            error!(?session_id, "per-client task finished with an IO error from the compute: {e:#}");
+                            error!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the compute: {e:#}"
+                            );
                         }
                     }
                 }
@@ -374,9 +392,16 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     let (stream, read_buf) = stream.into_inner();
     node.stream.write_all(&read_buf).await?;
 
+    let private_link_id = match ctx.extra() {
+        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
+        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
+        None => None,
+    };
+
     Ok(Some(ProxyPassthrough {
         client: stream,
         aux: node.aux.clone(),
+        private_link_id,
         compute: node,
         session_id: ctx.session_id(),
         cancel: session,
diff --git a/proxy/src/proxy/passthrough.rs b/proxy/src/proxy/passthrough.rs
index 08871380d6..23b9897155 100644
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -1,3 +1,4 @@
+use smol_str::SmolStr;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::debug;
 use utils::measured_stream::MeasuredStream;
@@ -9,7 +10,7 @@ use crate::config::ComputeConfig;
 use crate::control_plane::messages::MetricsAuxInfo;
 use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard};
 use crate::stream::Stream;
-use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounterRecorder, TrafficDirection, USAGE_METRICS};
 
 /// Forward bytes in both directions (client <-> compute).
 #[tracing::instrument(skip_all)]
@@ -17,10 +18,14 @@ pub(crate) async fn proxy_pass(
     client: impl AsyncRead + AsyncWrite + Unpin,
     compute: impl AsyncRead + AsyncWrite + Unpin,
     aux: MetricsAuxInfo,
+    private_link_id: Option<SmolStr>,
 ) -> Result<(), ErrorSource> {
-    let usage = USAGE_METRICS.register(Ids {
+    // we will report ingress at a later date
+    let usage_tx = USAGE_METRICS.register(Ids {
         endpoint_id: aux.endpoint_id,
         branch_id: aux.branch_id,
+        direction: TrafficDirection::Egress,
+        private_link_id,
     });
 
     let metrics = &Metrics::get().proxy.io_bytes;
@@ -31,7 +36,7 @@ pub(crate) async fn proxy_pass(
         |cnt| {
             // Number of bytes we sent to the client (outbound).
             metrics.get_metric(m_sent).inc_by(cnt as u64);
-            usage.record_egress(cnt as u64);
+            usage_tx.record_egress(cnt as u64);
         },
     );
 
@@ -61,6 +66,7 @@ pub(crate) struct ProxyPassthrough<S> {
     pub(crate) compute: PostgresConnection,
     pub(crate) aux: MetricsAuxInfo,
     pub(crate) session_id: uuid::Uuid,
+    pub(crate) private_link_id: Option<SmolStr>,
     pub(crate) cancel: cancellation::Session,
 
     pub(crate) _req: NumConnectionRequestsGuard<'static>,
@@ -72,7 +78,13 @@ impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
         self,
         compute_config: &ComputeConfig,
     ) -> Result<(), ErrorSource> {
-        let res = proxy_pass(self.client, self.compute.stream, self.aux).await;
+        let res = proxy_pass(
+            self.client,
+            self.compute.stream,
+            self.aux,
+            self.private_link_id,
+        )
+        .await;
         if let Err(err) = self
             .compute
             .cancel_closure
diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs
index d8c00a9b41..171f539b1e 100644
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -5,12 +5,12 @@ mod mitm;
 
 use std::time::Duration;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use async_trait::async_trait;
 use http::StatusCode;
 use postgres_client::config::SslMode;
 use postgres_client::tls::{MakeTlsConnect, NoTls};
-use retry::{retry_after, ShouldRetryWakeCompute};
+use retry::{ShouldRetryWakeCompute, retry_after};
 use rstest::rstest;
 use rustls::crypto::ring;
 use rustls::pki_types;
@@ -334,8 +334,8 @@ async fn scram_auth_mock() -> anyhow::Result<()> {
         generate_tls_config("generic-project-name.localhost", "localhost")?;
     let proxy = tokio::spawn(dummy_proxy(client, Some(server_config), Scram::mock()));
 
-    use rand::distributions::Alphanumeric;
     use rand::Rng;
+    use rand::distributions::Alphanumeric;
     let password: String = rand::thread_rng()
         .sample_iter(&Alphanumeric)
         .take(rand::random::<u8>() as usize)
diff --git a/proxy/src/proxy/wake_compute.rs b/proxy/src/proxy/wake_compute.rs
index 4e9206feff..9d8915e24a 100644
--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -3,8 +3,8 @@ use tracing::{error, info};
 use super::connect_compute::ComputeConnectBackend;
 use crate::config::RetryConfig;
 use crate::context::RequestContext;
-use crate::control_plane::errors::{ControlPlaneError, WakeComputeError};
 use crate::control_plane::CachedNodeInfo;
+use crate::control_plane::errors::{ControlPlaneError, WakeComputeError};
 use crate::error::ReportableError;
 use crate::metrics::{
     ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
diff --git a/proxy/src/rate_limiter/leaky_bucket.rs b/proxy/src/rate_limiter/leaky_bucket.rs
index 9645eaf725..b3853d48e4 100644
--- a/proxy/src/rate_limiter/leaky_bucket.rs
+++ b/proxy/src/rate_limiter/leaky_bucket.rs
@@ -3,7 +3,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
 
 use ahash::RandomState;
 use clashmap::ClashMap;
-use rand::{thread_rng, Rng};
+use rand::{Rng, thread_rng};
 use tokio::time::Instant;
 use tracing::info;
 use utils::leaky_bucket::LeakyBucketState;
diff --git a/proxy/src/rate_limiter/limit_algorithm.rs b/proxy/src/rate_limiter/limit_algorithm.rs
index b74a9ab17e..f8eeb89f05 100644
--- a/proxy/src/rate_limiter/limit_algorithm.rs
+++ b/proxy/src/rate_limiter/limit_algorithm.rs
@@ -5,8 +5,8 @@ use std::time::Duration;
 
 use parking_lot::Mutex;
 use tokio::sync::Notify;
-use tokio::time::error::Elapsed;
 use tokio::time::Instant;
+use tokio::time::error::Elapsed;
 
 use self::aimd::Aimd;
 
diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs
index ef6c39f230..71e2a92da6 100644
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -1,8 +1,8 @@
 use std::borrow::Cow;
 use std::collections::hash_map::RandomState;
 use std::hash::{BuildHasher, Hash};
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Mutex;
+use std::sync::atomic::{AtomicUsize, Ordering};
 
 use anyhow::bail;
 use clashmap::ClashMap;
diff --git a/proxy/src/redis/elasticache.rs b/proxy/src/redis/elasticache.rs
index bf6dde9332..58e3c889a7 100644
--- a/proxy/src/redis/elasticache.rs
+++ b/proxy/src/redis/elasticache.rs
@@ -1,6 +1,7 @@
 use std::sync::Arc;
 use std::time::{Duration, SystemTime};
 
+use aws_config::Region;
 use aws_config::environment::EnvironmentVariableCredentialsProvider;
 use aws_config::imds::credentials::ImdsCredentialsProvider;
 use aws_config::meta::credentials::CredentialsProviderChain;
@@ -8,7 +9,6 @@ use aws_config::meta::region::RegionProviderChain;
 use aws_config::profile::ProfileFileCredentialsProvider;
 use aws_config::provider_config::ProviderConfig;
 use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
-use aws_config::Region;
 use aws_sdk_iam::config::ProvideCredentials;
 use aws_sigv4::http_request::{
     self, SignableBody, SignableRequest, SignatureLocation, SigningSettings,
diff --git a/proxy/src/redis/keys.rs b/proxy/src/redis/keys.rs
index dcb9a59f87..7527bca6d0 100644
--- a/proxy/src/redis/keys.rs
+++ b/proxy/src/redis/keys.rs
@@ -1,7 +1,7 @@
 use std::io::ErrorKind;
 
 use anyhow::Ok;
-use pq_proto::{id_to_cancel_key, CancelKeyData};
+use pq_proto::{CancelKeyData, id_to_cancel_key};
 use serde::{Deserialize, Serialize};
 
 pub mod keyspace {
diff --git a/proxy/src/sasl/stream.rs b/proxy/src/sasl/stream.rs
index ac77556566..46e6a439e5 100644
--- a/proxy/src/sasl/stream.rs
+++ b/proxy/src/sasl/stream.rs
@@ -5,8 +5,8 @@ use std::io;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::info;
 
-use super::messages::ServerMessage;
 use super::Mechanism;
+use super::messages::ServerMessage;
 use crate::stream::PqStream;
 
 /// Abstracts away all peculiarities of the libpq's protocol.
diff --git a/proxy/src/scram/countmin.rs b/proxy/src/scram/countmin.rs
index 87ab6e0d5f..9d56c465ec 100644
--- a/proxy/src/scram/countmin.rs
+++ b/proxy/src/scram/countmin.rs
@@ -90,7 +90,7 @@ mod tests {
             // number of insert operations
             let m = rng.gen_range(1..100);
 
-            let id = uuid::Builder::from_random_bytes(rng.gen()).into_uuid();
+            let id = uuid::Builder::from_random_bytes(rng.r#gen()).into_uuid();
             ids.push((id, n, m));
 
             // N = sum(actual)
diff --git a/proxy/src/scram/exchange.rs b/proxy/src/scram/exchange.rs
index 77853db3db..abd5aeae5b 100644
--- a/proxy/src/scram/exchange.rs
+++ b/proxy/src/scram/exchange.rs
@@ -5,6 +5,7 @@ use std::convert::Infallible;
 use hmac::{Hmac, Mac};
 use sha2::Sha256;
 
+use super::ScramKey;
 use super::messages::{
     ClientFinalMessage, ClientFirstMessage, OwnedServerFirstMessage, SCRAM_RAW_NONCE_LEN,
 };
@@ -12,7 +13,6 @@ use super::pbkdf2::Pbkdf2;
 use super::secret::ServerSecret;
 use super::signature::SignatureBuilder;
 use super::threadpool::ThreadPool;
-use super::ScramKey;
 use crate::intern::EndpointIdInt;
 use crate::sasl::{self, ChannelBinding, Error as SaslError};
 
@@ -208,8 +208,8 @@ impl sasl::Mechanism for Exchange<'_> {
     type Output = super::ScramKey;
 
     fn exchange(mut self, input: &str) -> sasl::Result<sasl::Step<Self, Self::Output>> {
-        use sasl::Step;
         use ExchangeState;
+        use sasl::Step;
         match &self.state {
             ExchangeState::Initial(init) => {
                 match init.transition(self.secret, &self.tls_server_end_point, input)? {
diff --git a/proxy/src/scram/messages.rs b/proxy/src/scram/messages.rs
index 0e54e7ded9..7b0b861ce9 100644
--- a/proxy/src/scram/messages.rs
+++ b/proxy/src/scram/messages.rs
@@ -4,7 +4,7 @@ use std::fmt;
 use std::ops::Range;
 
 use super::base64_decode_array;
-use super::key::{ScramKey, SCRAM_KEY_LEN};
+use super::key::{SCRAM_KEY_LEN, ScramKey};
 use super::signature::SignatureBuilder;
 use crate::sasl::ChannelBinding;
 
diff --git a/proxy/src/scram/mod.rs b/proxy/src/scram/mod.rs
index cfa571cbe1..24f991d4d9 100644
--- a/proxy/src/scram/mod.rs
+++ b/proxy/src/scram/mod.rs
@@ -15,7 +15,7 @@ mod secret;
 mod signature;
 pub mod threadpool;
 
-pub(crate) use exchange::{exchange, Exchange};
+pub(crate) use exchange::{Exchange, exchange};
 use hmac::{Hmac, Mac};
 pub(crate) use key::ScramKey;
 pub(crate) use secret::ServerSecret;
diff --git a/proxy/src/scram/signature.rs b/proxy/src/scram/signature.rs
index d3255cf2ca..a5b1c3e9f4 100644
--- a/proxy/src/scram/signature.rs
+++ b/proxy/src/scram/signature.rs
@@ -1,6 +1,6 @@
 //! Tools for client/server signature management.
 
-use super::key::{ScramKey, SCRAM_KEY_LEN};
+use super::key::{SCRAM_KEY_LEN, ScramKey};
 
 /// A collection of message parts needed to derive the client's signature.
 #[derive(Debug)]
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index f35c375ba2..72029102e0 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -7,27 +7,27 @@ use ed25519_dalek::SigningKey;
 use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
 use jose_jwk::jose_b64;
 use rand::rngs::OsRng;
-use tokio::net::{lookup_host, TcpStream};
+use tokio::net::{TcpStream, lookup_host};
 use tracing::field::display;
 use tracing::{debug, info};
 
 use super::conn_pool::poll_client;
 use super::conn_pool_lib::{Client, ConnInfo, EndpointConnPool, GlobalConnPool};
-use super::http_conn_pool::{self, poll_http2_client, HttpConnPool, Send};
-use super::local_conn_pool::{self, LocalConnPool, EXT_NAME, EXT_SCHEMA, EXT_VERSION};
+use super::http_conn_pool::{self, HttpConnPool, Send, poll_http2_client};
+use super::local_conn_pool::{self, EXT_NAME, EXT_SCHEMA, EXT_VERSION, LocalConnPool};
 use crate::auth::backend::local::StaticAuthRules;
 use crate::auth::backend::{ComputeCredentials, ComputeUserInfo};
-use crate::auth::{self, check_peer_addr_is_in_list, AuthError};
+use crate::auth::{self, AuthError, check_peer_addr_is_in_list};
 use crate::compute;
 use crate::compute_ctl::{
     ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
 };
 use crate::config::{ComputeConfig, ProxyConfig};
 use crate::context::RequestContext;
+use crate::control_plane::CachedNodeInfo;
 use crate::control_plane::client::ApiLockError;
 use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
 use crate::control_plane::locks::ApiLocks;
-use crate::control_plane::CachedNodeInfo;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::intern::EndpointIdInt;
 use crate::protocol2::ConnectionInfoExtra;
@@ -75,10 +75,7 @@ impl PoolingBackend {
             let extra = ctx.extra();
             let incoming_endpoint_id = match extra {
                 None => String::new(),
-                Some(ConnectionInfoExtra::Aws { vpce_id }) => {
-                    // Convert the vcpe_id to a string
-                    String::from_utf8(vpce_id.to_vec()).unwrap_or_default()
-                }
+                Some(ConnectionInfoExtra::Aws { vpce_id }) => vpce_id.to_string(),
                 Some(ConnectionInfoExtra::Azure { link_id }) => link_id.to_string(),
             };
 
diff --git a/proxy/src/serverless/cancel_set.rs b/proxy/src/serverless/cancel_set.rs
index 6db986f1f7..ba8945afc5 100644
--- a/proxy/src/serverless/cancel_set.rs
+++ b/proxy/src/serverless/cancel_set.rs
@@ -6,7 +6,7 @@ use std::time::Duration;
 
 use indexmap::IndexMap;
 use parking_lot::Mutex;
-use rand::{thread_rng, Rng};
+use rand::{Rng, thread_rng};
 use rustc_hash::FxHasher;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
@@ -40,7 +40,7 @@ impl CancelSet {
 
     pub(crate) fn take(&self) -> Option<CancellationToken> {
         for _ in 0..4 {
-            if let Some(token) = self.take_raw(thread_rng().gen()) {
+            if let Some(token) = self.take_raw(thread_rng().r#gen()) {
                 return Some(token);
             }
             tracing::trace!("failed to get cancel token");
@@ -68,7 +68,7 @@ impl CancelShard {
     fn take(&mut self, rng: usize) -> Option<CancellationToken> {
         NonZeroUsize::new(self.tokens.len()).and_then(|len| {
             // 10 second grace period so we don't cancel new connections
-            if self.tokens.get_index(rng % len)?.1 .0.elapsed() < Duration::from_secs(10) {
+            if self.tokens.get_index(rng % len)?.1.0.elapsed() < Duration::from_secs(10) {
                 return None;
             }
 
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index 447103edce..6a9089fc2a 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -1,17 +1,17 @@
 use std::fmt;
 use std::pin::pin;
 use std::sync::{Arc, Weak};
-use std::task::{ready, Poll};
+use std::task::{Poll, ready};
 
-use futures::future::poll_fn;
 use futures::Future;
-use postgres_client::tls::NoTlsStream;
+use futures::future::poll_fn;
 use postgres_client::AsyncMessage;
+use postgres_client::tls::NoTlsStream;
 use smallvec::SmallVec;
 use tokio::net::TcpStream;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{Instrument, error, info, info_span, warn};
 #[cfg(test)]
 use {
     super::conn_pool_lib::GlobalConnPoolOptions,
diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs
index a300198de4..933204994b 100644
--- a/proxy/src/serverless/conn_pool_lib.rs
+++ b/proxy/src/serverless/conn_pool_lib.rs
@@ -9,7 +9,8 @@ use clashmap::ClashMap;
 use parking_lot::RwLock;
 use postgres_client::ReadyForQueryStatus;
 use rand::Rng;
-use tracing::{debug, info, Span};
+use smol_str::ToSmolStr;
+use tracing::{Span, debug, info};
 
 use super::backend::HttpConnError;
 use super::conn_pool::ClientDataRemote;
@@ -19,8 +20,9 @@ use crate::auth::backend::ComputeUserInfo;
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
+use crate::protocol2::ConnectionInfoExtra;
 use crate::types::{DbName, EndpointCacheKey, RoleName};
-use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
 
 #[derive(Debug, Clone)]
 pub(crate) struct ConnInfo {
@@ -473,7 +475,9 @@ where
                 .http_pool_opened_connections
                 .get_metric()
                 .dec_by(clients_removed as i64);
-            info!("pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}");
+            info!(
+                "pool: performed global pool gc. removed {clients_removed} clients, total number of clients in pool is {size}"
+            );
         }
         let removed = current_len - new_len;
 
@@ -635,15 +639,28 @@ impl<C: ClientInnerExt> Client<C> {
         (&mut inner.inner, Discard { conn_info, pool })
     }
 
-    pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
+    pub(crate) fn metrics(
+        &self,
+        direction: TrafficDirection,
+        ctx: &RequestContext,
+    ) -> Arc<MetricCounter> {
         let aux = &self
             .inner
             .as_ref()
             .expect("client inner should not be removed")
             .aux;
+
+        let private_link_id = match ctx.extra() {
+            None => None,
+            Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
+            Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
+        };
+
         USAGE_METRICS.register(Ids {
             endpoint_id: aux.endpoint_id,
             branch_id: aux.branch_id,
+            direction,
+            private_link_id,
         })
     }
 }
@@ -700,7 +717,9 @@ impl<C: ClientInnerExt> Discard<'_, C> {
     pub(crate) fn discard(&mut self) {
         let conn_info = &self.conn_info;
         if std::mem::take(self.pool).strong_count() > 0 {
-            info!("pool: throwing away connection '{conn_info}' because connection is potentially in a broken state");
+            info!(
+                "pool: throwing away connection '{conn_info}' because connection is potentially in a broken state"
+            );
         }
     }
 }
diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs
index fde38d0de3..338a79b4b3 100644
--- a/proxy/src/serverless/http_conn_pool.rs
+++ b/proxy/src/serverless/http_conn_pool.rs
@@ -5,8 +5,9 @@ use std::sync::{Arc, Weak};
 use hyper::client::conn::http2;
 use hyper_util::rt::{TokioExecutor, TokioIo};
 use parking_lot::RwLock;
+use smol_str::ToSmolStr;
 use tokio::net::TcpStream;
-use tracing::{debug, error, info, info_span, Instrument};
+use tracing::{Instrument, debug, error, info, info_span};
 
 use super::backend::HttpConnError;
 use super::conn_pool_lib::{
@@ -16,8 +17,9 @@ use super::conn_pool_lib::{
 use crate::context::RequestContext;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
+use crate::protocol2::ConnectionInfoExtra;
 use crate::types::EndpointCacheKey;
-use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
+use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS};
 
 pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
 pub(crate) type Connect =
@@ -264,11 +266,24 @@ impl<C: ClientInnerExt + Clone> Client<C> {
         Self { inner }
     }
 
-    pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
+    pub(crate) fn metrics(
+        &self,
+        direction: TrafficDirection,
+        ctx: &RequestContext,
+    ) -> Arc<MetricCounter> {
         let aux = &self.inner.aux;
+
+        let private_link_id = match ctx.extra() {
+            None => None,
+            Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
+            Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
+        };
+
         USAGE_METRICS.register(Ids {
             endpoint_id: aux.endpoint_id,
             branch_id: aux.branch_id,
+            direction,
+            private_link_id,
         })
     }
 }
diff --git a/proxy/src/serverless/json.rs b/proxy/src/serverless/json.rs
index ab012bd020..fbd12ad9cb 100644
--- a/proxy/src/serverless/json.rs
+++ b/proxy/src/serverless/json.rs
@@ -1,5 +1,5 @@
-use postgres_client::types::{Kind, Type};
 use postgres_client::Row;
+use postgres_client::types::{Kind, Type};
 use serde_json::{Map, Value};
 
 //
diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs
index 137a2d6377..8426a0810e 100644
--- a/proxy/src/serverless/local_conn_pool.rs
+++ b/proxy/src/serverless/local_conn_pool.rs
@@ -11,24 +11,24 @@
 
 use std::collections::HashMap;
 use std::pin::pin;
-use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;
-use std::task::{ready, Poll};
+use std::sync::atomic::AtomicUsize;
+use std::task::{Poll, ready};
 use std::time::Duration;
 
 use ed25519_dalek::{Signature, Signer, SigningKey};
-use futures::future::poll_fn;
 use futures::Future;
+use futures::future::poll_fn;
 use indexmap::IndexMap;
 use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};
 use parking_lot::RwLock;
-use postgres_client::tls::NoTlsStream;
 use postgres_client::AsyncMessage;
+use postgres_client::tls::NoTlsStream;
 use serde_json::value::RawValue;
 use tokio::net::TcpStream;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, info_span, warn, Instrument};
+use tracing::{Instrument, debug, error, info, info_span, warn};
 
 use super::backend::HttpConnError;
 use super::conn_pool_lib::{
@@ -389,6 +389,9 @@ mod tests {
         // });
         // println!("{}", serde_json::to_string(&jwk).unwrap());
 
-        assert_eq!(jwt, "eyJhbGciOiJFZERTQSJ9.eyJmb28iOiJiYXIiLCJqdGkiOjIsIm5lc3RlZCI6eyJqdGkiOiJ0cmlja3kgbmVzdGluZyJ9fQ.Cvyc2By33KI0f0obystwdy8PN111L3Sc9_Mr2CU3XshtSqSdxuRxNEZGbb_RvyJf2IzheC_s7aBZ-jLeQ9N0Bg");
+        assert_eq!(
+            jwt,
+            "eyJhbGciOiJFZERTQSJ9.eyJmb28iOiJiYXIiLCJqdGkiOjIsIm5lc3RlZCI6eyJqdGkiOiJ0cmlja3kgbmVzdGluZyJ9fQ.Cvyc2By33KI0f0obystwdy8PN111L3Sc9_Mr2CU3XshtSqSdxuRxNEZGbb_RvyJf2IzheC_s7aBZ-jLeQ9N0Bg"
+        );
     }
 }
diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs
index 8289500159..acd6a05718 100644
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -15,7 +15,7 @@ mod sql_over_http;
 mod websocket;
 
 use std::net::{IpAddr, SocketAddr};
-use std::pin::{pin, Pin};
+use std::pin::{Pin, pin};
 use std::sync::Arc;
 
 use anyhow::Context;
@@ -23,8 +23,8 @@ use async_trait::async_trait;
 use atomic_take::AtomicTake;
 use bytes::Bytes;
 pub use conn_pool_lib::GlobalConnPoolOptions;
-use futures::future::{select, Either};
 use futures::TryFutureExt;
+use futures::future::{Either, select};
 use http::{Method, Response, StatusCode};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Empty};
@@ -32,23 +32,23 @@ use http_utils::error::ApiError;
 use hyper::body::Incoming;
 use hyper_util::rt::TokioExecutor;
 use hyper_util::server::conn::auto::Builder;
-use rand::rngs::StdRng;
 use rand::SeedableRng;
-use sql_over_http::{uuid_to_header_value, NEON_REQUEST_ID};
+use rand::rngs::StdRng;
+use sql_over_http::{NEON_REQUEST_ID, uuid_to_header_value};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::net::{TcpListener, TcpStream};
 use tokio::time::timeout;
 use tokio_rustls::TlsAcceptor;
 use tokio_util::sync::CancellationToken;
 use tokio_util::task::TaskTracker;
-use tracing::{info, warn, Instrument};
+use tracing::{Instrument, info, warn};
 
 use crate::cancellation::CancellationHandler;
 use crate::config::{ProxyConfig, ProxyProtocolV2};
 use crate::context::RequestContext;
 use crate::ext::TaskExt;
 use crate::metrics::Metrics;
-use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo};
+use crate::protocol2::{ChainRW, ConnectHeader, ConnectionInfo, read_proxy_protocol};
 use crate::proxy::run_until_cancelled;
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::serverless::backend::PoolingBackend;
@@ -438,6 +438,14 @@ async fn request_handler(
             &config.region,
         );
 
+        ctx.set_user_agent(
+            request
+                .headers()
+                .get(hyper::header::USER_AGENT)
+                .and_then(|h| h.to_str().ok())
+                .map(Into::into),
+        );
+
         let span = ctx.span();
         info!(parent: &span, "performing websocket upgrade");
 
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index 5982fe225d..93dd531f70 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -2,23 +2,23 @@ use std::pin::pin;
 use std::sync::Arc;
 
 use bytes::Bytes;
-use futures::future::{select, try_join, Either};
+use futures::future::{Either, select, try_join};
 use futures::{StreamExt, TryFutureExt};
-use http::header::AUTHORIZATION;
 use http::Method;
+use http::header::AUTHORIZATION;
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Full};
 use http_utils::error::ApiError;
 use hyper::body::Incoming;
 use hyper::http::{HeaderName, HeaderValue};
-use hyper::{header, HeaderMap, Request, Response, StatusCode};
+use hyper::{HeaderMap, Request, Response, StatusCode, header};
 use indexmap::IndexMap;
 use postgres_client::error::{DbError, ErrorPosition, SqlState};
 use postgres_client::{GenericClient, IsolationLevel, NoTls, ReadyForQueryStatus, Transaction};
 use pq_proto::StartupMessageParamsBuilder;
 use serde::Serialize;
-use serde_json::value::RawValue;
 use serde_json::Value;
+use serde_json::value::RawValue;
 use tokio::time::{self, Instant};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info};
@@ -31,18 +31,18 @@ use super::conn_pool::{AuthData, ConnInfoWithAuth};
 use super::conn_pool_lib::{self, ConnInfo};
 use super::error::HttpCodeError;
 use super::http_util::json_response;
-use super::json::{json_to_pg_text, pg_text_row_to_json, JsonConversionError};
+use super::json::{JsonConversionError, json_to_pg_text, pg_text_row_to_json};
 use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo};
-use crate::auth::{endpoint_sni, ComputeUserInfoParseError};
+use crate::auth::{ComputeUserInfoParseError, endpoint_sni};
 use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig};
 use crate::context::RequestContext;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
-use crate::http::{read_body_with_limit, ReadBodyError};
+use crate::http::{ReadBodyError, read_body_with_limit};
 use crate::metrics::{HttpDirection, Metrics};
-use crate::proxy::{run_until_cancelled, NeonOptions};
+use crate::proxy::{NeonOptions, run_until_cancelled};
 use crate::serverless::backend::HttpConnError;
 use crate::types::{DbName, RoleName};
-use crate::usage_metrics::{MetricCounter, MetricCounterRecorder};
+use crate::usage_metrics::{MetricCounter, MetricCounterRecorder, TrafficDirection};
 
 #[derive(serde::Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -209,7 +209,7 @@ fn get_conn_info(
             }
         }
         Some(url::Host::Ipv4(_) | url::Host::Ipv6(_)) | None => {
-            return Err(ConnInfoError::MissingHostname)
+            return Err(ConnInfoError::MissingHostname);
         }
     };
     ctx.set_endpoint_id(endpoint.clone());
@@ -228,6 +228,13 @@ fn get_conn_info(
         }
     }
 
+    ctx.set_user_agent(
+        headers
+            .get(hyper::header::USER_AGENT)
+            .and_then(|h| h.to_str().ok())
+            .map(Into::into),
+    );
+
     let user_info = ComputeUserInfo {
         endpoint,
         user: username,
@@ -745,7 +752,7 @@ async fn handle_db_inner(
         }
     };
 
-    let metrics = client.metrics();
+    let metrics = client.metrics(TrafficDirection::Egress, ctx);
 
     let len = json_output.len();
     let response = response
@@ -818,7 +825,7 @@ async fn handle_auth_broker_inner(
         .expect("all headers and params received via hyper should be valid for request");
 
     // todo: map body to count egress
-    let _metrics = client.metrics();
+    let _metrics = client.metrics(TrafficDirection::Egress, ctx);
 
     Ok(client
         .inner
@@ -1021,7 +1028,7 @@ async fn query_to_json<T: GenericClient>(
     data: QueryData,
     current_size: &mut usize,
     parsed_headers: HttpHeaders,
-) -> Result<(ReadyForQueryStatus, impl Serialize), SqlOverHttpError> {
+) -> Result<(ReadyForQueryStatus, impl Serialize + use<T>), SqlOverHttpError> {
     let query_start = Instant::now();
 
     let query_params = data.params;
@@ -1118,10 +1125,10 @@ enum Discard<'a> {
 }
 
 impl Client {
-    fn metrics(&self) -> Arc<MetricCounter> {
+    fn metrics(&self, direction: TrafficDirection, ctx: &RequestContext) -> Arc<MetricCounter> {
         match self {
-            Client::Remote(client) => client.metrics(),
-            Client::Local(local_client) => local_client.metrics(),
+            Client::Remote(client) => client.metrics(direction, ctx),
+            Client::Local(local_client) => local_client.metrics(direction, ctx),
         }
     }
 
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index 585a7d63b2..c4baeeb5cc 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -1,6 +1,6 @@
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::{ready, Context, Poll};
+use std::task::{Context, Poll, ready};
 
 use anyhow::Context as _;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
@@ -15,9 +15,9 @@ use tracing::warn;
 use crate::cancellation::CancellationHandler;
 use crate::config::ProxyConfig;
 use crate::context::RequestContext;
-use crate::error::{io_error, ReportableError};
+use crate::error::{ReportableError, io_error};
 use crate::metrics::Metrics;
-use crate::proxy::{handle_client, ClientMode, ErrorSource};
+use crate::proxy::{ClientMode, ErrorSource, handle_client};
 use crate::rate_limiter::EndpointRateLimiter;
 
 pin_project! {
@@ -184,11 +184,11 @@ mod tests {
 
     use framed_websockets::WebSocketServer;
     use futures::{SinkExt, StreamExt};
-    use tokio::io::{duplex, AsyncReadExt, AsyncWriteExt};
+    use tokio::io::{AsyncReadExt, AsyncWriteExt, duplex};
     use tokio::task::JoinSet;
-    use tokio_tungstenite::tungstenite::protocol::Role;
-    use tokio_tungstenite::tungstenite::Message;
     use tokio_tungstenite::WebSocketStream;
+    use tokio_tungstenite::tungstenite::Message;
+    use tokio_tungstenite::tungstenite::protocol::Role;
 
     use super::WebSocketRw;
 
diff --git a/proxy/src/signals.rs b/proxy/src/signals.rs
index 0b675683c0..32b2344a1c 100644
--- a/proxy/src/signals.rs
+++ b/proxy/src/signals.rs
@@ -12,7 +12,7 @@ pub async fn handle<F>(
 where
     F: FnMut(),
 {
-    use tokio::signal::unix::{signal, SignalKind};
+    use tokio::signal::unix::{SignalKind, signal};
 
     let mut hangup = signal(SignalKind::hangup())?;
     let mut interrupt = signal(SignalKind::interrupt())?;
diff --git a/proxy/src/tls/postgres_rustls.rs b/proxy/src/tls/postgres_rustls.rs
index 0ad279b635..f09e916a1d 100644
--- a/proxy/src/tls/postgres_rustls.rs
+++ b/proxy/src/tls/postgres_rustls.rs
@@ -2,8 +2,8 @@ use std::convert::TryFrom;
 use std::sync::Arc;
 
 use postgres_client::tls::MakeTlsConnect;
-use rustls::pki_types::ServerName;
 use rustls::ClientConfig;
+use rustls::pki_types::ServerName;
 use tokio::io::{AsyncRead, AsyncWrite};
 
 mod private {
@@ -15,8 +15,8 @@ mod private {
     use postgres_client::tls::{ChannelBinding, TlsConnect};
     use rustls::pki_types::ServerName;
     use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
-    use tokio_rustls::client::TlsStream;
     use tokio_rustls::TlsConnector;
+    use tokio_rustls::client::TlsStream;
 
     use crate::tls::TlsServerEndPoint;
 
diff --git a/proxy/src/tls/server_config.rs b/proxy/src/tls/server_config.rs
index 2cc1657eea..903c0b712b 100644
--- a/proxy/src/tls/server_config.rs
+++ b/proxy/src/tls/server_config.rs
@@ -1,12 +1,12 @@
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use itertools::Itertools;
 use rustls::crypto::ring::{self, sign};
 use rustls::pki_types::{CertificateDer, PrivateKeyDer};
 
-use super::{TlsServerEndPoint, PG_ALPN_PROTOCOL};
+use super::{PG_ALPN_PROTOCOL, TlsServerEndPoint};
 
 pub struct TlsConfig {
     pub config: Arc<rustls::ServerConfig>,
diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs
index d369e3742f..004d268fa1 100644
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -2,20 +2,21 @@
 //! and push them to a HTTP endpoint.
 use std::borrow::Cow;
 use std::convert::Infallible;
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
 use std::time::Duration;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use async_compression::tokio::write::GzipEncoder;
 use bytes::Bytes;
 use chrono::{DateTime, Datelike, Timelike, Utc};
-use clashmap::mapref::entry::Entry;
 use clashmap::ClashMap;
-use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
+use clashmap::mapref::entry::Entry;
+use consumption_metrics::{CHUNK_SIZE, Event, EventChunk, EventType, idempotency_key};
 use once_cell::sync::Lazy;
 use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
 use serde::{Deserialize, Serialize};
+use smol_str::SmolStr;
 use tokio::io::AsyncWriteExt;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, instrument, trace, warn};
@@ -43,6 +44,33 @@ const HTTP_REPORTING_RETRY_DURATION: Duration = Duration::from_secs(60);
 pub(crate) struct Ids {
     pub(crate) endpoint_id: EndpointIdInt,
     pub(crate) branch_id: BranchIdInt,
+    pub(crate) direction: TrafficDirection,
+    #[serde(with = "none_as_empty_string")]
+    pub(crate) private_link_id: Option<SmolStr>,
+}
+
+mod none_as_empty_string {
+    use serde::Deserialize;
+    use smol_str::SmolStr;
+
+    #[allow(clippy::ref_option)]
+    pub fn serialize<S: serde::Serializer>(t: &Option<SmolStr>, s: S) -> Result<S::Ok, S::Error> {
+        s.serialize_str(t.as_deref().unwrap_or(""))
+    }
+
+    pub fn deserialize<'de, D: serde::Deserializer<'de>>(
+        d: D,
+    ) -> Result<Option<SmolStr>, D::Error> {
+        let s = SmolStr::deserialize(d)?;
+        if s.is_empty() { Ok(None) } else { Ok(Some(s)) }
+    }
+}
+
+#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]
+#[serde(rename_all = "lowercase")]
+pub(crate) enum TrafficDirection {
+    Ingress,
+    Egress,
 }
 
 pub(crate) trait MetricCounterRecorder {
@@ -505,6 +533,8 @@ mod tests {
         let counter = metrics.register(Ids {
             endpoint_id: (&EndpointId::from("e1")).into(),
             branch_id: (&BranchId::from("b1")).into(),
+            direction: TrafficDirection::Egress,
+            private_link_id: None,
         });
 
         // the counter should be observed despite 0 egress
diff --git a/pyproject.toml b/pyproject.toml
index 92a660c233..c6e5073bcd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ prometheus-client = "^0.14.1"
 pytest-timeout = "^2.3.1"
 Werkzeug = "^3.0.6"
 pytest-order = "^1.1.0"
-allure-pytest = "^2.13.2"
+allure-pytest = "^2.13.5"
 pytest-asyncio = "^0.21.0"
 toml = "^0.10.2"
 psutil = "^5.9.4"
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 38a7f202ba..591d60ea79 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.84.1"
+channel = "1.85.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml
index d12ebc1030..bb937ad56a 100644
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "safekeeper"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
@@ -31,7 +31,6 @@ futures.workspace = true
 once_cell.workspace = true
 parking_lot.workspace = true
 pageserver_api.workspace = true
-postgres.workspace = true
 postgres-protocol.workspace = true
 pprof.workspace = true
 rand.workspace = true
diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs
index 19c6662e74..122630d953 100644
--- a/safekeeper/benches/receive_wal.rs
+++ b/safekeeper/benches/receive_wal.rs
@@ -4,7 +4,7 @@ use std::io::Write as _;
 
 use bytes::BytesMut;
 use camino_tempfile::tempfile;
-use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion};
+use criterion::{BatchSize, Bencher, Criterion, criterion_group, criterion_main};
 use itertools::Itertools as _;
 use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};
 use pprof::criterion::{Output, PProfProfiler};
@@ -13,6 +13,7 @@ use safekeeper::safekeeper::{
     AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
 };
 use safekeeper::test_utils::Env;
+use safekeeper_api::membership::SafekeeperGeneration as Generation;
 use tokio::io::AsyncWriteExt as _;
 use utils::id::{NodeId, TenantTimelineId};
 use utils::lsn::Lsn;
@@ -26,7 +27,7 @@ const GB: usize = 1024 * MB;
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 
 #[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
+#[unsafe(export_name = "malloc_conf")]
 pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
 
 // Register benchmarks with Criterion.
@@ -88,13 +89,12 @@ fn bench_process_msg(c: &mut Criterion) {
                 let (lsn, record) = walgen.next().expect("endless WAL");
                 ProposerAcceptorMessage::AppendRequest(AppendRequest {
                     h: AppendRequestHeader {
+                        generation: Generation::new(0),
                         term: 1,
-                        term_start_lsn: Lsn(0),
                         begin_lsn: lsn,
                         end_lsn: lsn + record.len() as u64,
                         commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record
                         truncate_lsn: Lsn(0),
-                        proposer_uuid: [0; 16],
                     },
                     wal_data: record,
                 })
@@ -160,13 +160,12 @@ fn bench_wal_acceptor(c: &mut Criterion) {
                     .take(n)
                     .map(|(lsn, record)| AppendRequest {
                         h: AppendRequestHeader {
+                            generation: Generation::new(0),
                             term: 1,
-                            term_start_lsn: Lsn(0),
                             begin_lsn: lsn,
                             end_lsn: lsn + record.len() as u64,
                             commit_lsn: Lsn(0),
                             truncate_lsn: Lsn(0),
-                            proposer_uuid: [0; 16],
                         },
                         wal_data: record,
                     })
@@ -262,13 +261,12 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) {
             runtime.block_on(async {
                 let reqgen = walgen.take(count).map(|(lsn, record)| AppendRequest {
                     h: AppendRequestHeader {
+                        generation: Generation::new(0),
                         term: 1,
-                        term_start_lsn: Lsn(0),
                         begin_lsn: lsn,
                         end_lsn: lsn + record.len() as u64,
                         commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record
                         truncate_lsn: Lsn(0),
-                        proposer_uuid: [0; 16],
                     },
                     wal_data: record,
                 });
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index 6cc53e0d23..10fc4a4b59 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -1,52 +1,41 @@
 //
 // Main entry point for the safekeeper executable
 //
-use anyhow::{bail, Context, Result};
-use camino::{Utf8Path, Utf8PathBuf};
-use clap::{ArgAction, Parser};
-use futures::future::BoxFuture;
-use futures::stream::FuturesUnordered;
-use futures::{FutureExt, StreamExt};
-use remote_storage::RemoteStorageConfig;
-use sd_notify::NotifyState;
-use tokio::runtime::Handle;
-use tokio::signal::unix::{signal, SignalKind};
-use tokio::task::JoinError;
-use utils::logging::SecretString;
-
-use std::env::{var, VarError};
+use std::env::{VarError, var};
 use std::fs::{self, File};
 use std::io::{ErrorKind, Write};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
-use storage_broker::Uri;
-
-use tracing::*;
-use utils::pid_file;
 
+use anyhow::{Context, Result, bail};
+use camino::{Utf8Path, Utf8PathBuf};
+use clap::{ArgAction, Parser};
+use futures::future::BoxFuture;
+use futures::stream::FuturesUnordered;
+use futures::{FutureExt, StreamExt};
 use metrics::set_build_info_metric;
+use remote_storage::RemoteStorageConfig;
 use safekeeper::defaults::{
     DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT,
     DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
     DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,
 };
-use safekeeper::http;
-use safekeeper::wal_service;
-use safekeeper::GlobalTimelines;
-use safekeeper::SafeKeeperConf;
-use safekeeper::{broker, WAL_SERVICE_RUNTIME};
-use safekeeper::{control_file, BROKER_RUNTIME};
-use safekeeper::{wal_backup, HTTP_RUNTIME};
-use storage_broker::DEFAULT_ENDPOINT;
-use utils::auth::{JwtAuth, Scope, SwappableJwtAuth};
-use utils::{
-    id::NodeId,
-    logging::{self, LogFormat},
-    project_build_tag, project_git_version,
-    sentry_init::init_sentry,
-    tcp_listener,
+use safekeeper::{
+    BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf, WAL_SERVICE_RUNTIME, broker,
+    control_file, http, wal_backup, wal_service,
 };
+use sd_notify::NotifyState;
+use storage_broker::{DEFAULT_ENDPOINT, Uri};
+use tokio::runtime::Handle;
+use tokio::signal::unix::{SignalKind, signal};
+use tokio::task::JoinError;
+use tracing::*;
+use utils::auth::{JwtAuth, Scope, SwappableJwtAuth};
+use utils::id::NodeId;
+use utils::logging::{self, LogFormat, SecretString};
+use utils::sentry_init::init_sentry;
+use utils::{pid_file, project_build_tag, project_git_version, tcp_listener};
 
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
@@ -55,7 +44,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 /// This adds roughly 3% overhead for allocations on average, which is acceptable considering
 /// performance-sensitive code will avoid allocations as far as possible anyway.
 #[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
+#[unsafe(export_name = "malloc_conf")]
 pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
 
 const PID_FILE_NAME: &str = "safekeeper.pid";
diff --git a/safekeeper/src/broker.rs b/safekeeper/src/broker.rs
index 4b091e2c29..de6e275124 100644
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -1,39 +1,25 @@
 //! Communication with the broker, providing safekeeper peers and pageserver coordination.
 
-use anyhow::anyhow;
-use anyhow::bail;
-use anyhow::Context;
-
-use anyhow::Error;
-use anyhow::Result;
-
-use storage_broker::parse_proto_ttid;
-
-use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
-use storage_broker::proto::FilterTenantTimelineId;
-use storage_broker::proto::MessageType;
-use storage_broker::proto::SafekeeperDiscoveryResponse;
-use storage_broker::proto::SubscribeByFilterRequest;
-use storage_broker::proto::SubscribeSafekeeperInfoRequest;
-use storage_broker::proto::TypeSubscription;
-use storage_broker::proto::TypedMessage;
-use storage_broker::Request;
-
-use std::sync::atomic::AtomicU64;
 use std::sync::Arc;
-use std::time::Duration;
-use std::time::Instant;
-use std::time::UNIX_EPOCH;
+use std::sync::atomic::AtomicU64;
+use std::time::{Duration, Instant, UNIX_EPOCH};
+
+use anyhow::{Context, Error, Result, anyhow, bail};
+use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
+use storage_broker::proto::{
+    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryResponse, SubscribeByFilterRequest,
+    SubscribeSafekeeperInfoRequest, TypeSubscription, TypedMessage,
+};
+use storage_broker::{Request, parse_proto_ttid};
 use tokio::task::JoinHandle;
 use tokio::time::sleep;
 use tracing::*;
 
-use crate::metrics::BROKER_ITERATION_TIMELINES;
-use crate::metrics::BROKER_PULLED_UPDATES;
-use crate::metrics::BROKER_PUSHED_UPDATES;
-use crate::metrics::BROKER_PUSH_ALL_UPDATES_SECONDS;
-use crate::GlobalTimelines;
-use crate::SafeKeeperConf;
+use crate::metrics::{
+    BROKER_ITERATION_TIMELINES, BROKER_PULLED_UPDATES, BROKER_PUSH_ALL_UPDATES_SECONDS,
+    BROKER_PUSHED_UPDATES,
+};
+use crate::{GlobalTimelines, SafeKeeperConf};
 
 const RETRY_INTERVAL_MSEC: u64 = 1000;
 const PUSH_INTERVAL_MSEC: u64 = 1000;
diff --git a/safekeeper/src/control_file.rs b/safekeeper/src/control_file.rs
index 35aebfd8ad..1bf3e4cac1 100644
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -1,24 +1,23 @@
 //! Control file serialization, deserialization and persistence.
 
-use anyhow::{bail, ensure, Context, Result};
-use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
-use camino::{Utf8Path, Utf8PathBuf};
-use safekeeper_api::membership::INVALID_GENERATION;
-use tokio::fs::File;
-use tokio::io::AsyncWriteExt;
-use utils::crashsafe::durable_rename;
-
 use std::future::Future;
 use std::io::Read;
 use std::ops::Deref;
 use std::path::Path;
 use std::time::Instant;
 
-use crate::control_file_upgrade::downgrade_v10_to_v9;
-use crate::control_file_upgrade::upgrade_control_file;
+use anyhow::{Context, Result, bail, ensure};
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
+use camino::{Utf8Path, Utf8PathBuf};
+use safekeeper_api::membership::INVALID_GENERATION;
+use tokio::fs::File;
+use tokio::io::AsyncWriteExt;
+use utils::bin_ser::LeSer;
+use utils::crashsafe::durable_rename;
+
+use crate::control_file_upgrade::{downgrade_v10_to_v9, upgrade_control_file};
 use crate::metrics::PERSIST_CONTROL_FILE_SECONDS;
 use crate::state::{EvictionState, TimelinePersistentState};
-use utils::bin_ser::LeSer;
 
 pub const SK_MAGIC: u32 = 0xcafeceefu32;
 pub const SK_FORMAT_VERSION: u32 = 10;
@@ -234,11 +233,12 @@ impl Storage for FileStorage {
 
 #[cfg(test)]
 mod test {
-    use super::*;
     use safekeeper_api::membership::{Configuration, MemberSet, SafekeeperGeneration};
     use tokio::fs;
     use utils::lsn::Lsn;
 
+    use super::*;
+
     const NO_SYNC: bool = true;
 
     #[tokio::test]
diff --git a/safekeeper/src/control_file_upgrade.rs b/safekeeper/src/control_file_upgrade.rs
index 904e79f976..1ad9e62f9b 100644
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -1,24 +1,19 @@
 //! Code to deal with safekeeper control file upgrades
 use std::vec;
 
-use crate::{
-    safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn},
-    state::{EvictionState, TimelinePersistentState},
-    wal_backup_partial,
-};
-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
 use pq_proto::SystemId;
-use safekeeper_api::{
-    membership::{Configuration, INVALID_GENERATION},
-    ServerInfo, Term,
-};
+use safekeeper_api::membership::{Configuration, INVALID_GENERATION};
+use safekeeper_api::{ServerInfo, Term};
 use serde::{Deserialize, Serialize};
 use tracing::*;
-use utils::{
-    bin_ser::LeSer,
-    id::{NodeId, TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::bin_ser::LeSer;
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::lsn::Lsn;
+
+use crate::safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn};
+use crate::state::{EvictionState, TimelinePersistentState};
+use crate::wal_backup_partial;
 
 /// Persistent consensus state of the acceptor.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -552,11 +547,11 @@ pub fn downgrade_v10_to_v9(state: &TimelinePersistentState) -> TimelinePersisten
 mod tests {
     use std::str::FromStr;
 
-    use utils::{id::NodeId, Hex};
-
-    use crate::control_file_upgrade::PersistedPeerInfo;
+    use utils::Hex;
+    use utils::id::NodeId;
 
     use super::*;
+    use crate::control_file_upgrade::PersistedPeerInfo;
 
     #[test]
     fn roundtrip_v1() {
diff --git a/safekeeper/src/copy_timeline.rs b/safekeeper/src/copy_timeline.rs
index 10a761e1f5..11daff22cb 100644
--- a/safekeeper/src/copy_timeline.rs
+++ b/safekeeper/src/copy_timeline.rs
@@ -1,24 +1,22 @@
-use anyhow::{bail, Result};
+use std::sync::Arc;
+
+use anyhow::{Result, bail};
 use camino::Utf8PathBuf;
 use postgres_ffi::{MAX_SEND_SIZE, WAL_SEGMENT_SIZE};
 use safekeeper_api::membership::Configuration;
-use std::sync::Arc;
-use tokio::{
-    fs::OpenOptions,
-    io::{AsyncSeekExt, AsyncWriteExt},
-};
+use tokio::fs::OpenOptions;
+use tokio::io::{AsyncSeekExt, AsyncWriteExt};
 use tracing::{info, warn};
-use utils::{id::TenantTimelineId, lsn::Lsn};
+use utils::id::TenantTimelineId;
+use utils::lsn::Lsn;
 
-use crate::{
-    control_file::FileStorage,
-    state::TimelinePersistentState,
-    timeline::{TimelineError, WalResidentTimeline},
-    timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline},
-    wal_backup::copy_s3_segments,
-    wal_storage::{wal_file_paths, WalReader},
-    GlobalTimelines,
-};
+use crate::GlobalTimelines;
+use crate::control_file::FileStorage;
+use crate::state::TimelinePersistentState;
+use crate::timeline::{TimelineError, WalResidentTimeline};
+use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline};
+use crate::wal_backup::copy_s3_segments;
+use crate::wal_storage::{WalReader, wal_file_paths};
 
 // we don't want to have more than 10 segments on disk after copy, because they take space
 const MAX_BACKUP_LAG: u64 = 10 * WAL_SEGMENT_SIZE as u64;
diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs
index 19362a0992..68a38e1498 100644
--- a/safekeeper/src/debug_dump.rs
+++ b/safekeeper/src/debug_dump.rs
@@ -2,37 +2,25 @@
 
 use std::fs;
 use std::fs::DirEntry;
-use std::io::BufReader;
-use std::io::Read;
+use std::io::{BufReader, Read};
 use std::path::PathBuf;
 use std::sync::Arc;
 
-use anyhow::bail;
-use anyhow::Result;
-use camino::Utf8Path;
-use camino::Utf8PathBuf;
+use anyhow::{Result, bail};
+use camino::{Utf8Path, Utf8PathBuf};
 use chrono::{DateTime, Utc};
-use postgres_ffi::XLogSegNo;
-use postgres_ffi::MAX_SEND_SIZE;
-use safekeeper_api::models::WalSenderState;
-use serde::Deserialize;
-use serde::Serialize;
-
 use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName};
+use postgres_ffi::{MAX_SEND_SIZE, XLogSegNo};
+use safekeeper_api::models::WalSenderState;
+use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
-use utils::id::NodeId;
-use utils::id::TenantTimelineId;
-use utils::id::{TenantId, TimelineId};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
 use utils::lsn::Lsn;
 
 use crate::safekeeper::TermHistory;
-use crate::state::TimelineMemState;
-use crate::state::TimelinePersistentState;
-use crate::timeline::get_timeline_dir;
-use crate::timeline::WalResidentTimeline;
-use crate::timeline_manager;
-use crate::GlobalTimelines;
-use crate::SafeKeeperConf;
+use crate::state::{TimelineMemState, TimelinePersistentState};
+use crate::timeline::{WalResidentTimeline, get_timeline_dir};
+use crate::{GlobalTimelines, SafeKeeperConf, timeline_manager};
 
 /// Various filters that influence the resulting JSON output.
 #[derive(Debug, Serialize, Deserialize, Clone)]
diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs
index e77eeb4130..5ca3d1b7c2 100644
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -1,35 +1,31 @@
 //! Part of Safekeeper pretending to be Postgres, i.e. handling Postgres
 //! protocol commands.
 
+use std::future::Future;
+use std::str::{self, FromStr};
+use std::sync::Arc;
+
 use anyhow::Context;
 use pageserver_api::models::ShardParameters;
 use pageserver_api::shard::{ShardIdentity, ShardStripeSize};
-use safekeeper_api::models::ConnectionId;
+use postgres_backend::{PostgresBackend, QueryError};
+use postgres_ffi::PG_TLI;
+use pq_proto::{BeMessage, FeStartupPacket, INT4_OID, RowDescriptor, TEXT_OID};
+use regex::Regex;
 use safekeeper_api::Term;
-use std::future::Future;
-use std::str::{self, FromStr};
-use std::sync::Arc;
+use safekeeper_api::models::ConnectionId;
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{debug, info, info_span, Instrument};
+use tracing::{Instrument, debug, info, info_span};
+use utils::auth::{Claims, JwtAuth, Scope};
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
 use utils::postgres_client::PostgresClientProtocol;
 use utils::shard::{ShardCount, ShardNumber};
 
 use crate::auth::check_permission;
-use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};
-
-use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE};
+use crate::metrics::{PG_QUERIES_GAUGE, TrafficMetrics};
 use crate::timeline::TimelineError;
 use crate::{GlobalTimelines, SafeKeeperConf};
-use postgres_backend::PostgresBackend;
-use postgres_backend::QueryError;
-use postgres_ffi::PG_TLI;
-use pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID, TEXT_OID};
-use regex::Regex;
-use utils::auth::{Claims, JwtAuth, Scope};
-use utils::{
-    id::{TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
 
 /// Safekeeper handler of postgres commands
 pub struct SafekeeperPostgresHandler {
@@ -65,9 +61,6 @@ enum SafekeeperPostgresCommand {
     },
     IdentifySystem,
     TimelineStatus,
-    JSONCtrl {
-        cmd: AppendLogicalMessage,
-    },
 }
 
 fn parse_cmd(cmd: &str) -> anyhow::Result<SafekeeperPostgresCommand> {
@@ -137,11 +130,6 @@ fn parse_cmd(cmd: &str) -> anyhow::Result<SafekeeperPostgresCommand> {
         Ok(SafekeeperPostgresCommand::IdentifySystem)
     } else if cmd.starts_with("TIMELINE_STATUS") {
         Ok(SafekeeperPostgresCommand::TimelineStatus)
-    } else if cmd.starts_with("JSON_CTRL") {
-        let cmd = cmd.strip_prefix("JSON_CTRL").context("invalid prefix")?;
-        Ok(SafekeeperPostgresCommand::JSONCtrl {
-            cmd: serde_json::from_str(cmd)?,
-        })
     } else {
         anyhow::bail!("unsupported command {cmd}");
     }
@@ -153,7 +141,6 @@ fn cmd_to_string(cmd: &SafekeeperPostgresCommand) -> &str {
         SafekeeperPostgresCommand::StartReplication { .. } => "START_REPLICATION",
         SafekeeperPostgresCommand::TimelineStatus => "TIMELINE_STATUS",
         SafekeeperPostgresCommand::IdentifySystem => "IDENTIFY_SYSTEM",
-        SafekeeperPostgresCommand::JSONCtrl { .. } => "JSON_CTRL",
     }
 }
 
@@ -362,9 +349,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
                 }
                 SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb).await,
                 SafekeeperPostgresCommand::TimelineStatus => self.handle_timeline_status(pgb).await,
-                SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
-                    handle_json_ctrl(self, pgb, cmd).await
-                }
             }
         })
     }
diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs
index 6e160b7a5e..f162985ef7 100644
--- a/safekeeper/src/http/mod.rs
+++ b/safekeeper/src/http/mod.rs
@@ -1,9 +1,9 @@
 pub mod routes;
-pub use routes::make_router;
-
-pub use safekeeper_api::models;
 use std::sync::Arc;
 
+pub use routes::make_router;
+pub use safekeeper_api::models;
+
 use crate::{GlobalTimelines, SafeKeeperConf};
 
 pub async fn task_main(
diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs
index cd2ac5f44c..4f47331c85 100644
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -1,51 +1,41 @@
-use http_utils::failpoints::failpoints_handler;
-use hyper::{Body, Request, Response, StatusCode};
-use safekeeper_api::models;
-use safekeeper_api::models::AcceptorStateStatus;
-use safekeeper_api::models::PullTimelineRequest;
-use safekeeper_api::models::SafekeeperStatus;
-use safekeeper_api::models::TermSwitchApiEntry;
-use safekeeper_api::models::TimelineStatus;
-use safekeeper_api::ServerInfo;
 use std::collections::HashMap;
 use std::fmt;
 use std::io::Write as _;
 use std::str::FromStr;
 use std::sync::Arc;
-use storage_broker::proto::SafekeeperTimelineInfo;
-use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
+
+use http_utils::endpoint::{
+    self, ChannelWriter, auth_middleware, check_permission_with, profile_cpu_handler,
+    profile_heap_handler, prometheus_metrics_handler, request_span,
+};
+use http_utils::error::ApiError;
+use http_utils::failpoints::failpoints_handler;
+use http_utils::json::{json_request, json_response};
+use http_utils::request::{ensure_no_body, parse_query_param, parse_request_param};
+use http_utils::{RequestExt, RouterBuilder};
+use hyper::{Body, Request, Response, StatusCode};
+use postgres_ffi::WAL_SEGMENT_SIZE;
+use safekeeper_api::models::{
+    AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry,
+    TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest,
+};
+use safekeeper_api::{ServerInfo, membership, models};
+use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId};
 use tokio::sync::mpsc;
 use tokio::task;
 use tokio_stream::wrappers::ReceiverStream;
 use tokio_util::sync::CancellationToken;
-use tracing::{info_span, Instrument};
-
-use http_utils::endpoint::{
-    profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
-};
-use http_utils::{
-    endpoint::{self, auth_middleware, check_permission_with, ChannelWriter},
-    error::ApiError,
-    json::{json_request, json_response},
-    request::{ensure_no_body, parse_query_param, parse_request_param},
-    RequestExt, RouterBuilder,
-};
-
-use postgres_ffi::WAL_SEGMENT_SIZE;
-use safekeeper_api::models::{SkTimelineInfo, TimelineCopyRequest};
-use safekeeper_api::models::{TimelineCreateRequest, TimelineTermBumpRequest};
-use utils::{
-    auth::SwappableJwtAuth,
-    id::{TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
+use tracing::{Instrument, info_span};
+use utils::auth::SwappableJwtAuth;
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
 
 use crate::debug_dump::TimelineDigestRequest;
 use crate::safekeeper::TermLsn;
-use crate::timelines_global_map::TimelineDeleteForceResult;
-use crate::GlobalTimelines;
-use crate::SafeKeeperConf;
-use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline};
+use crate::timelines_global_map::{DeleteOrExclude, TimelineDeleteResult};
+use crate::{
+    GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline,
+};
 
 /// Healthcheck handler.
 async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -83,10 +73,13 @@ async fn tenant_delete_handler(mut request: Request<Body>) -> Result<Response<Bo
     check_permission(&request, Some(tenant_id))?;
     ensure_no_body(&mut request).await?;
     let global_timelines = get_global_timelines(&request);
-    // FIXME: `delete_force_all_for_tenant` can return an error for multiple different reasons;
-    // Using an `InternalServerError` should be fixed when the types support it
+    let action = if only_local {
+        DeleteOrExclude::DeleteLocal
+    } else {
+        DeleteOrExclude::Delete
+    };
     let delete_info = global_timelines
-        .delete_force_all_for_tenant(&tenant_id, only_local)
+        .delete_all_for_tenant(&tenant_id, action)
         .await
         .map_err(ApiError::InternalServerError)?;
     json_response(
@@ -94,7 +87,7 @@ async fn tenant_delete_handler(mut request: Request<Body>) -> Result<Response<Bo
         delete_info
             .iter()
             .map(|(ttid, resp)| (format!("{}", ttid.timeline_id), *resp))
-            .collect::<HashMap<String, TimelineDeleteForceResult>>(),
+            .collect::<HashMap<String, TimelineDeleteResult>>(),
     )
 }
 
@@ -218,12 +211,15 @@ async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<
     check_permission(&request, Some(ttid.tenant_id))?;
     ensure_no_body(&mut request).await?;
     let global_timelines = get_global_timelines(&request);
-    // FIXME: `delete_force` can fail from both internal errors and bad requests. Add better
-    // error handling here when we're able to.
+    let action = if only_local {
+        DeleteOrExclude::DeleteLocal
+    } else {
+        DeleteOrExclude::Delete
+    };
     let resp = global_timelines
-        .delete(&ttid, only_local)
+        .delete_or_exclude(&ttid, action)
         .await
-        .map_err(ApiError::InternalServerError)?;
+        .map_err(ApiError::from)?;
     json_response(StatusCode::OK, resp)
 }
 
@@ -277,6 +273,64 @@ async fn timeline_snapshot_handler(request: Request<Body>) -> Result<Response<Bo
     Ok(response)
 }
 
+/// Error type for delete_or_exclude: either generation conflict or something
+/// internal.
+#[derive(thiserror::Error, Debug)]
+pub enum DeleteOrExcludeError {
+    #[error("refused to switch into excluding mconf {requested}, current: {current}")]
+    Conflict {
+        requested: membership::Configuration,
+        current: membership::Configuration,
+    },
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+/// Convert DeleteOrExcludeError to ApiError.
+impl From<DeleteOrExcludeError> for ApiError {
+    fn from(de: DeleteOrExcludeError) -> ApiError {
+        match de {
+            DeleteOrExcludeError::Conflict {
+                requested: _,
+                current: _,
+            } => ApiError::Conflict(de.to_string()),
+            DeleteOrExcludeError::Other(e) => ApiError::InternalServerError(e),
+        }
+    }
+}
+
+/// Remove timeline locally after this node has been excluded from the
+/// membership configuration. The body is the same as in the membership endpoint
+/// -- conf where node is excluded -- and in principle single ep could be used
+/// for both actions, but since this is a data deletion op let's keep them
+/// separate.
+async fn timeline_exclude_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let ttid = TenantTimelineId::new(
+        parse_request_param(&request, "tenant_id")?,
+        parse_request_param(&request, "timeline_id")?,
+    );
+    check_permission(&request, Some(ttid.tenant_id))?;
+
+    let global_timelines = get_global_timelines(&request);
+    let data: models::TimelineMembershipSwitchRequest = json_request(&mut request).await?;
+    let my_id = get_conf(&request).my_id;
+    // If request doesn't exclude us, membership switch endpoint should be used
+    // instead.
+    if data.mconf.contains(my_id) {
+        return Err(ApiError::Forbidden(format!(
+            "refused to switch into {}, node {} is member of it",
+            data.mconf, my_id
+        )));
+    }
+    let action = DeleteOrExclude::Exclude(data.mconf);
+
+    let resp = global_timelines
+        .delete_or_exclude(&ttid, action)
+        .await
+        .map_err(ApiError::from)?;
+    json_response(StatusCode::OK, resp)
+}
+
 /// Consider switching timeline membership configuration to the provided one.
 async fn timeline_membership_handler(
     mut request: Request<Body>,
@@ -291,12 +345,29 @@ async fn timeline_membership_handler(
     let tli = global_timelines.get(ttid).map_err(ApiError::from)?;
 
     let data: models::TimelineMembershipSwitchRequest = json_request(&mut request).await?;
+    let my_id = get_conf(&request).my_id;
+    // If request excludes us, exclude endpoint should be used instead.
+    if !data.mconf.contains(my_id) {
+        return Err(ApiError::Forbidden(format!(
+            "refused to switch into {}, node {} is not a member of it",
+            data.mconf, my_id
+        )));
+    }
+    let req_gen = data.mconf.generation;
     let response = tli
         .membership_switch(data.mconf)
         .await
         .map_err(ApiError::InternalServerError)?;
 
-    json_response(StatusCode::OK, response)
+    // Return 409 if request was ignored.
+    if req_gen == response.current_conf.generation {
+        json_response(StatusCode::OK, response)
+    } else {
+        Err(ApiError::Conflict(format!(
+            "request to switch into {} ignored, current generation {}",
+            req_gen, response.current_conf.generation
+        )))
+    }
 }
 
 async fn timeline_copy_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -647,11 +718,14 @@ pub fn make_router(
         .post("/v1/pull_timeline", |r| {
             request_span(r, timeline_pull_handler)
         })
+        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/exclude", |r| {
+            request_span(r, timeline_exclude_handler)
+        })
         .get(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/snapshot/:destination_id",
             |r| request_span(r, timeline_snapshot_handler),
         )
-        .post(
+        .put(
             "/v1/tenant/:tenant_id/timeline/:timeline_id/membership",
             |r| request_span(r, timeline_membership_handler),
         )
diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs
deleted file mode 100644
index 19e17c4a75..0000000000
--- a/safekeeper/src/json_ctrl.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-//!
-//! This module implements JSON_CTRL protocol, which allows exchange
-//! JSON messages over psql for testing purposes.
-//!
-//! Currently supports AppendLogicalMessage, which is used for WAL
-//! modifications in tests.
-//!
-
-use anyhow::Context;
-use postgres_backend::QueryError;
-use safekeeper_api::membership::Configuration;
-use safekeeper_api::{ServerInfo, Term};
-use serde::{Deserialize, Serialize};
-use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::*;
-
-use crate::handler::SafekeeperPostgresHandler;
-use crate::safekeeper::{AcceptorProposerMessage, AppendResponse};
-use crate::safekeeper::{
-    AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, ProposerElected,
-};
-use crate::safekeeper::{TermHistory, TermLsn};
-use crate::state::TimelinePersistentState;
-use crate::timeline::WalResidentTimeline;
-use postgres_backend::PostgresBackend;
-use postgres_ffi::encode_logical_message;
-use postgres_ffi::WAL_SEGMENT_SIZE;
-use pq_proto::{BeMessage, RowDescriptor, TEXT_OID};
-use utils::lsn::Lsn;
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct AppendLogicalMessage {
-    // prefix and message to build LogicalMessage
-    pub lm_prefix: String,
-    pub lm_message: String,
-
-    // if true, commit_lsn will match flush_lsn after append
-    pub set_commit_lsn: bool,
-
-    // if true, ProposerElected will be sent before append
-    pub send_proposer_elected: bool,
-
-    // fields from AppendRequestHeader
-    pub term: Term,
-    #[serde(with = "utils::lsn::serde_as_u64")]
-    pub epoch_start_lsn: Lsn,
-    #[serde(with = "utils::lsn::serde_as_u64")]
-    pub begin_lsn: Lsn,
-    #[serde(with = "utils::lsn::serde_as_u64")]
-    pub truncate_lsn: Lsn,
-    pub pg_version: u32,
-}
-
-#[derive(Debug, Serialize)]
-struct AppendResult {
-    // safekeeper state after append
-    state: TimelinePersistentState,
-    // info about new record in the WAL
-    inserted_wal: InsertedWAL,
-}
-
-/// Handles command to craft logical message WAL record with given
-/// content, and then append it with specified term and lsn. This
-/// function is used to test safekeepers in different scenarios.
-pub async fn handle_json_ctrl<IO: AsyncRead + AsyncWrite + Unpin>(
-    spg: &SafekeeperPostgresHandler,
-    pgb: &mut PostgresBackend<IO>,
-    append_request: &AppendLogicalMessage,
-) -> Result<(), QueryError> {
-    info!("JSON_CTRL request: {append_request:?}");
-
-    // need to init safekeeper state before AppendRequest
-    let tli = prepare_safekeeper(spg, append_request.pg_version).await?;
-
-    // if send_proposer_elected is true, we need to update local history
-    if append_request.send_proposer_elected {
-        send_proposer_elected(&tli, append_request.term, append_request.epoch_start_lsn).await?;
-    }
-
-    let inserted_wal = append_logical_message(&tli, append_request).await?;
-    let response = AppendResult {
-        state: tli.get_state().await.1,
-        inserted_wal,
-    };
-    let response_data = serde_json::to_vec(&response)
-        .with_context(|| format!("Response {response:?} is not a json array"))?;
-
-    pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor {
-        name: b"json",
-        typoid: TEXT_OID,
-        typlen: -1,
-        ..Default::default()
-    }]))?
-    .write_message_noflush(&BeMessage::DataRow(&[Some(&response_data)]))?
-    .write_message_noflush(&BeMessage::CommandComplete(b"JSON_CTRL"))?;
-    Ok(())
-}
-
-/// Prepare safekeeper to process append requests without crashes,
-/// by sending ProposerGreeting with default server.wal_seg_size.
-async fn prepare_safekeeper(
-    spg: &SafekeeperPostgresHandler,
-    pg_version: u32,
-) -> anyhow::Result<WalResidentTimeline> {
-    let tli = spg
-        .global_timelines
-        .create(
-            spg.ttid,
-            Configuration::empty(),
-            ServerInfo {
-                pg_version,
-                wal_seg_size: WAL_SEGMENT_SIZE as u32,
-                system_id: 0,
-            },
-            Lsn::INVALID,
-            Lsn::INVALID,
-        )
-        .await?;
-
-    tli.wal_residence_guard().await
-}
-
-async fn send_proposer_elected(
-    tli: &WalResidentTimeline,
-    term: Term,
-    lsn: Lsn,
-) -> anyhow::Result<()> {
-    // add new term to existing history
-    let history = tli.get_state().await.1.acceptor_state.term_history;
-    let history = history.up_to(lsn.checked_sub(1u64).unwrap());
-    let mut history_entries = history.0;
-    history_entries.push(TermLsn { term, lsn });
-    let history = TermHistory(history_entries);
-
-    let proposer_elected_request = ProposerAcceptorMessage::Elected(ProposerElected {
-        term,
-        start_streaming_at: lsn,
-        term_history: history,
-        timeline_start_lsn: lsn,
-    });
-
-    tli.process_msg(&proposer_elected_request).await?;
-    Ok(())
-}
-
-#[derive(Debug, Serialize)]
-pub struct InsertedWAL {
-    begin_lsn: Lsn,
-    pub end_lsn: Lsn,
-    append_response: AppendResponse,
-}
-
-/// Extend local WAL with new LogicalMessage record. To do that,
-/// create AppendRequest with new WAL and pass it to safekeeper.
-pub async fn append_logical_message(
-    tli: &WalResidentTimeline,
-    msg: &AppendLogicalMessage,
-) -> anyhow::Result<InsertedWAL> {
-    let wal_data = encode_logical_message(&msg.lm_prefix, &msg.lm_message);
-    let sk_state = tli.get_state().await.1;
-
-    let begin_lsn = msg.begin_lsn;
-    let end_lsn = begin_lsn + wal_data.len() as u64;
-
-    let commit_lsn = if msg.set_commit_lsn {
-        end_lsn
-    } else {
-        sk_state.commit_lsn
-    };
-
-    let append_request = ProposerAcceptorMessage::AppendRequest(AppendRequest {
-        h: AppendRequestHeader {
-            term: msg.term,
-            term_start_lsn: begin_lsn,
-            begin_lsn,
-            end_lsn,
-            commit_lsn,
-            truncate_lsn: msg.truncate_lsn,
-            proposer_uuid: [0u8; 16],
-        },
-        wal_data,
-    });
-
-    let response = tli.process_msg(&append_request).await?;
-
-    let append_response = match response {
-        Some(AcceptorProposerMessage::AppendResponse(resp)) => resp,
-        _ => anyhow::bail!("not AppendResponse"),
-    };
-
-    Ok(InsertedWAL {
-        begin_lsn,
-        end_lsn,
-        append_response,
-    })
-}
diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs
index e0090c638a..de3b783508 100644
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -2,15 +2,16 @@
 
 extern crate hyper0 as hyper;
 
+use std::time::Duration;
+
 use camino::Utf8PathBuf;
 use once_cell::sync::Lazy;
 use remote_storage::RemoteStorageConfig;
-use tokio::runtime::Runtime;
-
-use std::time::Duration;
 use storage_broker::Uri;
-
-use utils::{auth::SwappableJwtAuth, id::NodeId, logging::SecretString};
+use tokio::runtime::Runtime;
+use utils::auth::SwappableJwtAuth;
+use utils::id::NodeId;
+use utils::logging::SecretString;
 
 mod auth;
 pub mod broker;
@@ -20,7 +21,6 @@ pub mod copy_timeline;
 pub mod debug_dump;
 pub mod handler;
 pub mod http;
-pub mod json_ctrl;
 pub mod metrics;
 pub mod patch_control_file;
 pub mod pull_timeline;
@@ -48,6 +48,7 @@ pub mod test_utils;
 
 mod timelines_global_map;
 use std::sync::Arc;
+
 pub use timelines_global_map::GlobalTimelines;
 use utils::auth::JwtAuth;
 
diff --git a/safekeeper/src/metrics.rs b/safekeeper/src/metrics.rs
index 3ea9e3d674..cb21a5f6d2 100644
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -1,30 +1,28 @@
 //! Global safekeeper mertics and per-timeline safekeeper metrics.
 
-use std::{
-    sync::{Arc, RwLock},
-    time::{Instant, SystemTime},
-};
+use std::sync::{Arc, RwLock};
+use std::time::{Instant, SystemTime};
 
 use anyhow::Result;
 use futures::Future;
+use metrics::core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts};
+use metrics::proto::MetricFamily;
 use metrics::{
-    core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
-    pow2_buckets,
-    proto::MetricFamily,
+    DISK_FSYNC_SECONDS_BUCKETS, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter,
+    IntCounterPair, IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, pow2_buckets,
     register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair,
     register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge,
-    register_int_gauge_vec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
-    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, DISK_FSYNC_SECONDS_BUCKETS,
+    register_int_gauge_vec,
 };
 use once_cell::sync::Lazy;
 use postgres_ffi::XLogSegNo;
-use utils::{id::TenantTimelineId, lsn::Lsn, pageserver_feedback::PageserverFeedback};
+use utils::id::TenantTimelineId;
+use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
 
-use crate::{
-    receive_wal::MSG_QUEUE_SIZE,
-    state::{TimelineMemState, TimelinePersistentState},
-    GlobalTimelines,
-};
+use crate::GlobalTimelines;
+use crate::receive_wal::MSG_QUEUE_SIZE;
+use crate::state::{TimelineMemState, TimelinePersistentState};
 
 // Global metrics across all timelines.
 pub static WRITE_WAL_BYTES: Lazy<Histogram> = Lazy::new(|| {
diff --git a/safekeeper/src/patch_control_file.rs b/safekeeper/src/patch_control_file.rs
index 2136d1b5f7..efdbd9b3d7 100644
--- a/safekeeper/src/patch_control_file.rs
+++ b/safekeeper/src/patch_control_file.rs
@@ -4,7 +4,8 @@ use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tracing::info;
 
-use crate::{state::TimelinePersistentState, timeline::Timeline};
+use crate::state::TimelinePersistentState;
+use crate::timeline::Timeline;
 
 #[derive(Deserialize, Debug, Clone)]
 pub struct Request {
diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs
index 4827b73074..fc58b8509a 100644
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -1,46 +1,38 @@
-use anyhow::{anyhow, bail, Context, Result};
+use std::cmp::min;
+use std::io::{self, ErrorKind};
+use std::sync::Arc;
+
+use anyhow::{Context, Result, anyhow, bail};
 use bytes::Bytes;
 use camino::Utf8PathBuf;
 use chrono::{DateTime, Utc};
 use futures::{SinkExt, StreamExt, TryStreamExt};
-use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
-use safekeeper_api::{
-    models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus},
-    Term,
-};
+use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
+use safekeeper_api::Term;
+use safekeeper_api::models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus};
 use safekeeper_client::mgmt_api;
 use safekeeper_client::mgmt_api::Client;
 use serde::Deserialize;
-use std::{
-    cmp::min,
-    io::{self, ErrorKind},
-    sync::Arc,
-};
-use tokio::{fs::OpenOptions, io::AsyncWrite, sync::mpsc, task};
+use tokio::fs::OpenOptions;
+use tokio::io::AsyncWrite;
+use tokio::sync::mpsc;
+use tokio::task;
 use tokio_tar::{Archive, Builder, Header};
-use tokio_util::{
-    io::{CopyToBytes, SinkWriter},
-    sync::PollSender,
-};
+use tokio_util::io::{CopyToBytes, SinkWriter};
+use tokio_util::sync::PollSender;
 use tracing::{error, info, instrument};
+use utils::crashsafe::fsync_async_opt;
+use utils::id::{NodeId, TenantTimelineId};
+use utils::logging::SecretString;
+use utils::lsn::Lsn;
+use utils::pausable_failpoint;
 
-use crate::{
-    control_file::CONTROL_FILE_NAME,
-    debug_dump,
-    state::{EvictionState, TimelinePersistentState},
-    timeline::{Timeline, WalResidentTimeline},
-    timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline},
-    wal_backup,
-    wal_storage::open_wal_file,
-    GlobalTimelines,
-};
-use utils::{
-    crashsafe::fsync_async_opt,
-    id::{NodeId, TenantTimelineId},
-    logging::SecretString,
-    lsn::Lsn,
-    pausable_failpoint,
-};
+use crate::control_file::CONTROL_FILE_NAME;
+use crate::state::{EvictionState, TimelinePersistentState};
+use crate::timeline::{Timeline, WalResidentTimeline};
+use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline};
+use crate::wal_storage::open_wal_file;
+use crate::{GlobalTimelines, debug_dump, wal_backup};
 
 /// Stream tar archive of timeline to tx.
 #[instrument(name = "snapshot", skip_all, fields(ttid = %tli.ttid))]
@@ -374,8 +366,13 @@ impl WalResidentTimeline {
         // change, but as long as older history is strictly part of new that's
         // fine), but there is no need to do it.
         if bctx.term != term || bctx.last_log_term != last_log_term {
-            bail!("term(s) changed during snapshot: were term={}, last_log_term={}, now term={}, last_log_term={}",
-              bctx.term, bctx.last_log_term, term, last_log_term);
+            bail!(
+                "term(s) changed during snapshot: were term={}, last_log_term={}, now term={}, last_log_term={}",
+                bctx.term,
+                bctx.last_log_term,
+                term,
+                last_log_term
+            );
         }
         Ok(())
     }
diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs
index cb42f6f414..7967acde3f 100644
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -2,35 +2,21 @@
 //! Gets messages from the network, passes them down to consensus module and
 //! sends replies back.
 
-use crate::handler::SafekeeperPostgresHandler;
-use crate::metrics::{
-    WAL_RECEIVERS, WAL_RECEIVER_QUEUE_DEPTH, WAL_RECEIVER_QUEUE_DEPTH_TOTAL,
-    WAL_RECEIVER_QUEUE_SIZE_TOTAL,
-};
-use crate::safekeeper::AcceptorProposerMessage;
-use crate::safekeeper::ProposerAcceptorMessage;
-use crate::timeline::WalResidentTimeline;
-use crate::GlobalTimelines;
-use anyhow::{anyhow, Context};
-use bytes::BytesMut;
-use parking_lot::MappedMutexGuard;
-use parking_lot::Mutex;
-use parking_lot::MutexGuard;
-use postgres_backend::CopyStreamHandlerEnd;
-use postgres_backend::PostgresBackend;
-use postgres_backend::PostgresBackendReader;
-use postgres_backend::QueryError;
-use pq_proto::BeMessage;
-use safekeeper_api::membership::Configuration;
-use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus};
-use safekeeper_api::ServerInfo;
 use std::future;
 use std::net::SocketAddr;
 use std::sync::Arc;
-use tokio::io::AsyncRead;
-use tokio::io::AsyncWrite;
+
+use anyhow::{Context, anyhow};
+use bytes::BytesMut;
+use parking_lot::{MappedMutexGuard, Mutex, MutexGuard};
+use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend, PostgresBackendReader, QueryError};
+use pq_proto::BeMessage;
+use safekeeper_api::ServerInfo;
+use safekeeper_api::membership::Configuration;
+use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus};
+use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::sync::mpsc::error::SendTimeoutError;
-use tokio::sync::mpsc::{channel, Receiver, Sender};
+use tokio::sync::mpsc::{Receiver, Sender, channel};
 use tokio::task;
 use tokio::task::JoinHandle;
 use tokio::time::{Duration, Instant, MissedTickBehavior};
@@ -39,6 +25,15 @@ use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
 use utils::pageserver_feedback::PageserverFeedback;
 
+use crate::GlobalTimelines;
+use crate::handler::SafekeeperPostgresHandler;
+use crate::metrics::{
+    WAL_RECEIVER_QUEUE_DEPTH, WAL_RECEIVER_QUEUE_DEPTH_TOTAL, WAL_RECEIVER_QUEUE_SIZE_TOTAL,
+    WAL_RECEIVERS,
+};
+use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage};
+use crate::timeline::WalResidentTimeline;
+
 const DEFAULT_FEEDBACK_CAPACITY: usize = 8;
 
 /// Registry of WalReceivers (compute connections). Timeline holds it (wrapped
@@ -281,7 +276,7 @@ impl SafekeeperPostgresHandler {
             tokio::select! {
                 // todo: add read|write .context to these errors
                 r = network_reader.run(msg_tx, msg_rx, reply_tx, timeline, next_msg) => r,
-                r = network_write(pgb, reply_rx, pageserver_feedback_rx) => r,
+                r = network_write(pgb, reply_rx, pageserver_feedback_rx, proto_version) => r,
                 _ = timeline_cancel.cancelled() => {
                     return Err(CopyStreamHandlerEnd::Cancelled);
                 }
@@ -342,8 +337,8 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'_, IO> {
         let tli = match next_msg {
             ProposerAcceptorMessage::Greeting(ref greeting) => {
                 info!(
-                    "start handshake with walproposer {} sysid {} timeline {}",
-                    self.peer_addr, greeting.system_id, greeting.tli,
+                    "start handshake with walproposer {} sysid {}",
+                    self.peer_addr, greeting.system_id,
                 );
                 let server_info = ServerInfo {
                     pg_version: greeting.pg_version,
@@ -371,7 +366,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'_, IO> {
             _ => {
                 return Err(CopyStreamHandlerEnd::Other(anyhow::anyhow!(
                     "unexpected message {next_msg:?} instead of greeting"
-                )))
+                )));
             }
         };
         Ok((tli, next_msg))
@@ -459,6 +454,7 @@ async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
     pgb_writer: &mut PostgresBackend<IO>,
     mut reply_rx: Receiver<AcceptorProposerMessage>,
     mut pageserver_feedback_rx: tokio::sync::broadcast::Receiver<PageserverFeedback>,
+    proto_version: u32,
 ) -> Result<(), CopyStreamHandlerEnd> {
     let mut buf = BytesMut::with_capacity(128);
 
@@ -496,7 +492,7 @@ async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
         };
 
         buf.clear();
-        msg.serialize(&mut buf)?;
+        msg.serialize(&mut buf, proto_version)?;
         pgb_writer.write_message(&BeMessage::CopyData(&buf)).await?;
     }
 }
diff --git a/safekeeper/src/recovery.rs b/safekeeper/src/recovery.rs
index 61647c16b0..c2760792b8 100644
--- a/safekeeper/src/recovery.rs
+++ b/safekeeper/src/recovery.rs
@@ -1,39 +1,36 @@
 //! This module implements pulling WAL from peer safekeepers if compute can't
 //! provide it, i.e. safekeeper lags too much.
 
+use std::fmt;
+use std::pin::pin;
 use std::time::SystemTime;
-use std::{fmt, pin::pin};
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use futures::StreamExt;
 use postgres_protocol::message::backend::ReplicationMessage;
-use safekeeper_api::models::{PeerInfo, TimelineStatus};
 use safekeeper_api::Term;
-use tokio::sync::mpsc::{channel, Receiver, Sender};
-use tokio::time::timeout;
-use tokio::{
-    select,
-    time::sleep,
-    time::{self, Duration},
-};
+use safekeeper_api::membership::INVALID_GENERATION;
+use safekeeper_api::models::{PeerInfo, TimelineStatus};
+use tokio::select;
+use tokio::sync::mpsc::{Receiver, Sender, channel};
+use tokio::time::{self, Duration, sleep, timeout};
 use tokio_postgres::replication::ReplicationStream;
 use tokio_postgres::types::PgLsn;
 use tracing::*;
-use utils::postgres_client::{ConnectionConfigArgs, PostgresClientProtocol};
-use utils::{id::NodeId, lsn::Lsn, postgres_client::wal_stream_connection_config};
-
-use crate::receive_wal::{WalAcceptor, REPLY_QUEUE_SIZE};
-use crate::safekeeper::{AppendRequest, AppendRequestHeader};
-use crate::timeline::WalResidentTimeline;
-use crate::{
-    receive_wal::MSG_QUEUE_SIZE,
-    safekeeper::{
-        AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn,
-        VoteRequest,
-    },
-    SafeKeeperConf,
+use utils::id::NodeId;
+use utils::lsn::Lsn;
+use utils::postgres_client::{
+    ConnectionConfigArgs, PostgresClientProtocol, wal_stream_connection_config,
 };
 
+use crate::SafeKeeperConf;
+use crate::receive_wal::{MSG_QUEUE_SIZE, REPLY_QUEUE_SIZE, WalAcceptor};
+use crate::safekeeper::{
+    AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage,
+    ProposerElected, TermHistory, TermLsn, VoteRequest,
+};
+use crate::timeline::WalResidentTimeline;
+
 /// Entrypoint for per timeline task which always runs, checking whether
 /// recovery for this safekeeper is needed and starting it if so.
 #[instrument(name = "recovery", skip_all, fields(ttid = %tli.ttid))]
@@ -267,7 +264,10 @@ async fn recover(
     );
 
     // Now understand our term history.
-    let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: donor.term });
+    let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest {
+        generation: INVALID_GENERATION,
+        term: donor.term,
+    });
     let vote_response = match tli
         .process_msg(&vote_request)
         .await
@@ -302,10 +302,10 @@ async fn recover(
 
     // truncate WAL locally
     let pe = ProposerAcceptorMessage::Elected(ProposerElected {
+        generation: INVALID_GENERATION,
         term: donor.term,
         start_streaming_at: last_common_point.lsn,
         term_history: donor_th,
-        timeline_start_lsn: Lsn::INVALID,
     });
     // Successful ProposerElected handling always returns None. If term changed,
     // we'll find out that during the streaming. Note: it is expected to get
@@ -343,12 +343,17 @@ async fn recovery_stream(
     cfg.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
 
     let connect_timeout = Duration::from_millis(10000);
-    let (client, connection) = match time::timeout(connect_timeout, cfg.connect(postgres::NoTls))
-        .await
+    let (client, connection) = match time::timeout(
+        connect_timeout,
+        cfg.connect(tokio_postgres::NoTls),
+    )
+    .await
     {
         Ok(client_and_conn) => client_and_conn?,
         Err(_elapsed) => {
-            bail!("timed out while waiting {connect_timeout:?} for connection to peer safekeeper to open");
+            bail!(
+                "timed out while waiting {connect_timeout:?} for connection to peer safekeeper to open"
+            );
         }
     };
     trace!("connected to {:?}", donor);
@@ -434,13 +439,12 @@ async fn network_io(
         match msg {
             ReplicationMessage::XLogData(xlog_data) => {
                 let ar_hdr = AppendRequestHeader {
+                    generation: INVALID_GENERATION,
                     term: donor.term,
-                    term_start_lsn: Lsn::INVALID, // unused
                     begin_lsn: Lsn(xlog_data.wal_start()),
                     end_lsn: Lsn(xlog_data.wal_start()) + xlog_data.data().len() as u64,
                     commit_lsn: Lsn::INVALID, // do not attempt to advance, peer communication anyway does it
                     truncate_lsn: Lsn::INVALID, // do not attempt to advance
-                    proposer_uuid: [0; 16],
                 };
                 let ar = AppendRequest {
                     h: ar_hdr,
diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs
index f816f8459a..0edac04b97 100644
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -1,35 +1,34 @@
 //! Acceptor part of proposer-acceptor consensus algorithm.
 
-use anyhow::{bail, Context, Result};
-use byteorder::{LittleEndian, ReadBytesExt};
-use bytes::{Buf, BufMut, Bytes, BytesMut};
-
-use postgres_ffi::{TimeLineID, MAX_SEND_SIZE};
-use safekeeper_api::models::HotStandbyFeedback;
-use safekeeper_api::Term;
-use serde::{Deserialize, Serialize};
-use std::cmp::max;
-use std::cmp::min;
+use std::cmp::{max, min};
 use std::fmt;
 use std::io::Read;
-use storage_broker::proto::SafekeeperTimelineInfo;
+use std::str::FromStr;
 
-use tracing::*;
-
-use crate::control_file;
-use crate::metrics::MISC_OPERATION_SECONDS;
-
-use crate::state::TimelineState;
-use crate::wal_storage;
+use anyhow::{Context, Result, bail};
+use byteorder::{LittleEndian, ReadBytesExt};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use postgres_ffi::{MAX_SEND_SIZE, TimeLineID};
 use pq_proto::SystemId;
-use utils::pageserver_feedback::PageserverFeedback;
-use utils::{
-    bin_ser::LeSer,
-    id::{NodeId, TenantId, TimelineId},
-    lsn::Lsn,
+use safekeeper_api::membership::{
+    INVALID_GENERATION, MemberSet, SafekeeperGeneration as Generation, SafekeeperId,
 };
+use safekeeper_api::models::HotStandbyFeedback;
+use safekeeper_api::{Term, membership};
+use serde::{Deserialize, Serialize};
+use storage_broker::proto::SafekeeperTimelineInfo;
+use tracing::*;
+use utils::bin_ser::LeSer;
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
 
-pub const SK_PROTOCOL_VERSION: u32 = 2;
+use crate::metrics::MISC_OPERATION_SECONDS;
+use crate::state::TimelineState;
+use crate::{control_file, wal_storage};
+
+pub const SK_PROTO_VERSION_2: u32 = 2;
+pub const SK_PROTO_VERSION_3: u32 = 3;
 pub const UNKNOWN_SERVER_VERSION: u32 = 0;
 
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
@@ -56,8 +55,28 @@ impl TermHistory {
         TermHistory(Vec::new())
     }
 
-    // Parse TermHistory as n_entries followed by TermLsn pairs
+    // Parse TermHistory as n_entries followed by TermLsn pairs in network order.
     pub fn from_bytes(bytes: &mut Bytes) -> Result<TermHistory> {
+        let n_entries = bytes
+            .get_u32_f()
+            .with_context(|| "TermHistory misses len")?;
+        let mut res = Vec::with_capacity(n_entries as usize);
+        for i in 0..n_entries {
+            let term = bytes
+                .get_u64_f()
+                .with_context(|| format!("TermHistory pos {} misses term", i))?;
+            let lsn = bytes
+                .get_u64_f()
+                .with_context(|| format!("TermHistory pos {} misses lsn", i))?
+                .into();
+            res.push(TermLsn { term, lsn })
+        }
+        Ok(TermHistory(res))
+    }
+
+    // Parse TermHistory as n_entries followed by TermLsn pairs in LE order.
+    // TODO remove once v2 protocol is fully dropped.
+    pub fn from_bytes_le(bytes: &mut Bytes) -> Result<TermHistory> {
         if bytes.remaining() < 4 {
             bail!("TermHistory misses len");
         }
@@ -197,6 +216,18 @@ impl AcceptorState {
 /// Initial Proposer -> Acceptor message
 #[derive(Debug, Deserialize)]
 pub struct ProposerGreeting {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub mconf: membership::Configuration,
+    /// Postgres server version
+    pub pg_version: u32,
+    pub system_id: SystemId,
+    pub wal_seg_size: u32,
+}
+
+/// V2 of the message; exists as a struct because we (de)serialized it as is.
+#[derive(Debug, Deserialize)]
+pub struct ProposerGreetingV2 {
     /// proposer-acceptor protocol version
     pub protocol_version: u32,
     /// Postgres server version
@@ -213,27 +244,35 @@ pub struct ProposerGreeting {
 /// (acceptor voted for).
 #[derive(Debug, Serialize)]
 pub struct AcceptorGreeting {
-    term: u64,
     node_id: NodeId,
+    mconf: membership::Configuration,
+    term: u64,
 }
 
 /// Vote request sent from proposer to safekeepers
-#[derive(Debug, Deserialize)]
+#[derive(Debug)]
 pub struct VoteRequest {
+    pub generation: Generation,
+    pub term: Term,
+}
+
+/// V2 of the message; exists as a struct because we (de)serialized it as is.
+#[derive(Debug, Deserialize)]
+pub struct VoteRequestV2 {
     pub term: Term,
 }
 
 /// Vote itself, sent from safekeeper to proposer
 #[derive(Debug, Serialize)]
 pub struct VoteResponse {
+    generation: Generation, // membership conf generation
     pub term: Term, // safekeeper's current term; if it is higher than proposer's, the compute is out of date.
-    vote_given: u64, // fixme u64 due to padding
+    vote_given: bool,
     // Safekeeper flush_lsn (end of WAL) + history of term switches allow
     // proposer to choose the most advanced one.
     pub flush_lsn: Lsn,
     truncate_lsn: Lsn,
     pub term_history: TermHistory,
-    timeline_start_lsn: Lsn,
 }
 
 /*
@@ -242,10 +281,10 @@ pub struct VoteResponse {
  */
 #[derive(Debug)]
 pub struct ProposerElected {
+    pub generation: Generation, // membership conf generation
     pub term: Term,
     pub start_streaming_at: Lsn,
     pub term_history: TermHistory,
-    pub timeline_start_lsn: Lsn,
 }
 
 /// Request with WAL message sent from proposer to safekeeper. Along the way it
@@ -257,6 +296,22 @@ pub struct AppendRequest {
 }
 #[derive(Debug, Clone, Deserialize)]
 pub struct AppendRequestHeader {
+    pub generation: Generation, // membership conf generation
+    // safekeeper's current term; if it is higher than proposer's, the compute is out of date.
+    pub term: Term,
+    /// start position of message in WAL
+    pub begin_lsn: Lsn,
+    /// end position of message in WAL
+    pub end_lsn: Lsn,
+    /// LSN committed by quorum of safekeepers
+    pub commit_lsn: Lsn,
+    /// minimal LSN which may be needed by proposer to perform recovery of some safekeeper
+    pub truncate_lsn: Lsn,
+}
+
+/// V2 of the message; exists as a struct because we (de)serialized it as is.
+#[derive(Debug, Clone, Deserialize)]
+pub struct AppendRequestHeaderV2 {
     // safekeeper's current term; if it is higher than proposer's, the compute is out of date.
     pub term: Term,
     // TODO: remove this field from the protocol, it in unused -- LSN of term
@@ -277,6 +332,9 @@ pub struct AppendRequestHeader {
 /// Report safekeeper state to proposer
 #[derive(Debug, Serialize, Clone)]
 pub struct AppendResponse {
+    // Membership conf generation. Not strictly required because on mismatch
+    // connection is reset, but let's sanity check it.
+    generation: Generation,
     // Current term of the safekeeper; if it is higher than proposer's, the
     // compute is out of date.
     pub term: Term,
@@ -293,8 +351,9 @@ pub struct AppendResponse {
 }
 
 impl AppendResponse {
-    fn term_only(term: Term) -> AppendResponse {
+    fn term_only(generation: Generation, term: Term) -> AppendResponse {
         AppendResponse {
+            generation,
             term,
             flush_lsn: Lsn(0),
             commit_lsn: Lsn(0),
@@ -315,72 +374,322 @@ pub enum ProposerAcceptorMessage {
     FlushWAL,
 }
 
-impl ProposerAcceptorMessage {
-    /// Parse proposer message.
-    pub fn parse(msg_bytes: Bytes, proto_version: u32) -> Result<ProposerAcceptorMessage> {
-        if proto_version != SK_PROTOCOL_VERSION {
-            bail!(
-                "incompatible protocol version {}, expected {}",
-                proto_version,
-                SK_PROTOCOL_VERSION
-            );
+/// Augment Bytes with fallible get_uN where N is number of bytes methods.
+/// All reads are in network (big endian) order.
+trait BytesF {
+    fn get_u8_f(&mut self) -> Result<u8>;
+    fn get_u16_f(&mut self) -> Result<u16>;
+    fn get_u32_f(&mut self) -> Result<u32>;
+    fn get_u64_f(&mut self) -> Result<u64>;
+}
+
+impl BytesF for Bytes {
+    fn get_u8_f(&mut self) -> Result<u8> {
+        if self.is_empty() {
+            bail!("no bytes left, expected 1");
         }
-        // xxx using Reader is inefficient but easy to work with bincode
-        let mut stream = msg_bytes.reader();
-        // u64 is here to avoid padding; it will be removed once we stop packing C structs into the wire as is
-        let tag = stream.read_u64::<LittleEndian>()? as u8 as char;
-        match tag {
-            'g' => {
-                let msg = ProposerGreeting::des_from(&mut stream)?;
-                Ok(ProposerAcceptorMessage::Greeting(msg))
-            }
-            'v' => {
-                let msg = VoteRequest::des_from(&mut stream)?;
-                Ok(ProposerAcceptorMessage::VoteRequest(msg))
-            }
-            'e' => {
-                let mut msg_bytes = stream.into_inner();
-                if msg_bytes.remaining() < 16 {
-                    bail!("ProposerElected message is not complete");
-                }
-                let term = msg_bytes.get_u64_le();
-                let start_streaming_at = msg_bytes.get_u64_le().into();
-                let term_history = TermHistory::from_bytes(&mut msg_bytes)?;
-                if msg_bytes.remaining() < 8 {
-                    bail!("ProposerElected message is not complete");
-                }
-                let timeline_start_lsn = msg_bytes.get_u64_le().into();
-                let msg = ProposerElected {
-                    term,
-                    start_streaming_at,
-                    timeline_start_lsn,
-                    term_history,
+        Ok(self.get_u8())
+    }
+    fn get_u16_f(&mut self) -> Result<u16> {
+        if self.remaining() < 2 {
+            bail!("no bytes left, expected 2");
+        }
+        Ok(self.get_u16())
+    }
+    fn get_u32_f(&mut self) -> Result<u32> {
+        if self.remaining() < 4 {
+            bail!("only {} bytes left, expected 4", self.remaining());
+        }
+        Ok(self.get_u32())
+    }
+    fn get_u64_f(&mut self) -> Result<u64> {
+        if self.remaining() < 8 {
+            bail!("only {} bytes left, expected 8", self.remaining());
+        }
+        Ok(self.get_u64())
+    }
+}
+
+impl ProposerAcceptorMessage {
+    /// Read cstring from Bytes.
+    fn get_cstr(buf: &mut Bytes) -> Result<String> {
+        let pos = buf
+            .iter()
+            .position(|x| *x == 0)
+            .ok_or_else(|| anyhow::anyhow!("missing cstring terminator"))?;
+        let result = buf.split_to(pos);
+        buf.advance(1); // drop the null terminator
+        match std::str::from_utf8(&result) {
+            Ok(s) => Ok(s.to_string()),
+            Err(e) => bail!("invalid utf8 in cstring: {}", e),
+        }
+    }
+
+    /// Read membership::Configuration from Bytes.
+    fn get_mconf(buf: &mut Bytes) -> Result<membership::Configuration> {
+        let generation = Generation::new(buf.get_u32_f().with_context(|| "reading generation")?);
+        let members_len = buf.get_u32_f().with_context(|| "reading members_len")?;
+        // Main member set must have at least someone in valid configuration.
+        // Empty conf is allowed until we fully migrate.
+        if generation != INVALID_GENERATION && members_len == 0 {
+            bail!("empty members_len");
+        }
+        let mut members = MemberSet::empty();
+        for i in 0..members_len {
+            let id = buf
+                .get_u64_f()
+                .with_context(|| format!("reading member {} node_id", i))?;
+            let host = Self::get_cstr(buf).with_context(|| format!("reading member {} host", i))?;
+            let pg_port = buf
+                .get_u16_f()
+                .with_context(|| format!("reading member {} port", i))?;
+            let sk = SafekeeperId {
+                id: NodeId(id),
+                host,
+                pg_port,
+            };
+            members.add(sk)?;
+        }
+        let new_members_len = buf.get_u32_f().with_context(|| "reading new_members_len")?;
+        // Non joint conf.
+        if new_members_len == 0 {
+            Ok(membership::Configuration {
+                generation,
+                members,
+                new_members: None,
+            })
+        } else {
+            let mut new_members = MemberSet::empty();
+            for i in 0..new_members_len {
+                let id = buf
+                    .get_u64_f()
+                    .with_context(|| format!("reading new member {} node_id", i))?;
+                let host = Self::get_cstr(buf)
+                    .with_context(|| format!("reading new member {} host", i))?;
+                let pg_port = buf
+                    .get_u16_f()
+                    .with_context(|| format!("reading new member {} port", i))?;
+                let sk = SafekeeperId {
+                    id: NodeId(id),
+                    host,
+                    pg_port,
                 };
-                Ok(ProposerAcceptorMessage::Elected(msg))
+                new_members.add(sk)?;
             }
-            'a' => {
-                // read header followed by wal data
-                let hdr = AppendRequestHeader::des_from(&mut stream)?;
-                let rec_size = hdr
-                    .end_lsn
-                    .checked_sub(hdr.begin_lsn)
-                    .context("begin_lsn > end_lsn in AppendRequest")?
-                    .0 as usize;
-                if rec_size > MAX_SEND_SIZE {
-                    bail!(
-                        "AppendRequest is longer than MAX_SEND_SIZE ({})",
-                        MAX_SEND_SIZE
-                    );
+            Ok(membership::Configuration {
+                generation,
+                members,
+                new_members: Some(new_members),
+            })
+        }
+    }
+
+    /// Parse proposer message.
+    pub fn parse(mut msg_bytes: Bytes, proto_version: u32) -> Result<ProposerAcceptorMessage> {
+        if proto_version == SK_PROTO_VERSION_3 {
+            if msg_bytes.is_empty() {
+                bail!("ProposerAcceptorMessage is not complete: missing tag");
+            }
+            let tag = msg_bytes.get_u8_f().with_context(|| {
+                "ProposerAcceptorMessage is not complete: missing tag".to_string()
+            })? as char;
+            match tag {
+                'g' => {
+                    let tenant_id_str =
+                        Self::get_cstr(&mut msg_bytes).with_context(|| "reading tenant_id")?;
+                    let tenant_id = TenantId::from_str(&tenant_id_str)?;
+                    let timeline_id_str =
+                        Self::get_cstr(&mut msg_bytes).with_context(|| "reading timeline_id")?;
+                    let timeline_id = TimelineId::from_str(&timeline_id_str)?;
+                    let mconf = Self::get_mconf(&mut msg_bytes)?;
+                    let pg_version = msg_bytes
+                        .get_u32_f()
+                        .with_context(|| "reading pg_version")?;
+                    let system_id = msg_bytes.get_u64_f().with_context(|| "reading system_id")?;
+                    let wal_seg_size = msg_bytes
+                        .get_u32_f()
+                        .with_context(|| "reading wal_seg_size")?;
+                    let g = ProposerGreeting {
+                        tenant_id,
+                        timeline_id,
+                        mconf,
+                        pg_version,
+                        system_id,
+                        wal_seg_size,
+                    };
+                    Ok(ProposerAcceptorMessage::Greeting(g))
                 }
+                'v' => {
+                    let generation = Generation::new(
+                        msg_bytes
+                            .get_u32_f()
+                            .with_context(|| "reading generation")?,
+                    );
+                    let term = msg_bytes.get_u64_f().with_context(|| "reading term")?;
+                    let v = VoteRequest { generation, term };
+                    Ok(ProposerAcceptorMessage::VoteRequest(v))
+                }
+                'e' => {
+                    let generation = Generation::new(
+                        msg_bytes
+                            .get_u32_f()
+                            .with_context(|| "reading generation")?,
+                    );
+                    let term = msg_bytes.get_u64_f().with_context(|| "reading term")?;
+                    let start_streaming_at: Lsn = msg_bytes
+                        .get_u64_f()
+                        .with_context(|| "reading start_streaming_at")?
+                        .into();
+                    let term_history = TermHistory::from_bytes(&mut msg_bytes)?;
+                    let msg = ProposerElected {
+                        generation,
+                        term,
+                        start_streaming_at,
+                        term_history,
+                    };
+                    Ok(ProposerAcceptorMessage::Elected(msg))
+                }
+                'a' => {
+                    let generation = Generation::new(
+                        msg_bytes
+                            .get_u32_f()
+                            .with_context(|| "reading generation")?,
+                    );
+                    let term = msg_bytes.get_u64_f().with_context(|| "reading term")?;
+                    let begin_lsn: Lsn = msg_bytes
+                        .get_u64_f()
+                        .with_context(|| "reading begin_lsn")?
+                        .into();
+                    let end_lsn: Lsn = msg_bytes
+                        .get_u64_f()
+                        .with_context(|| "reading end_lsn")?
+                        .into();
+                    let commit_lsn: Lsn = msg_bytes
+                        .get_u64_f()
+                        .with_context(|| "reading commit_lsn")?
+                        .into();
+                    let truncate_lsn: Lsn = msg_bytes
+                        .get_u64_f()
+                        .with_context(|| "reading truncate_lsn")?
+                        .into();
+                    let hdr = AppendRequestHeader {
+                        generation,
+                        term,
+                        begin_lsn,
+                        end_lsn,
+                        commit_lsn,
+                        truncate_lsn,
+                    };
+                    let rec_size = hdr
+                        .end_lsn
+                        .checked_sub(hdr.begin_lsn)
+                        .context("begin_lsn > end_lsn in AppendRequest")?
+                        .0 as usize;
+                    if rec_size > MAX_SEND_SIZE {
+                        bail!(
+                            "AppendRequest is longer than MAX_SEND_SIZE ({})",
+                            MAX_SEND_SIZE
+                        );
+                    }
+                    if msg_bytes.remaining() < rec_size {
+                        bail!(
+                            "reading WAL: only {} bytes left, wanted {}",
+                            msg_bytes.remaining(),
+                            rec_size
+                        );
+                    }
+                    let wal_data = msg_bytes.copy_to_bytes(rec_size);
+                    let msg = AppendRequest { h: hdr, wal_data };
 
-                let mut wal_data_vec: Vec<u8> = vec![0; rec_size];
-                stream.read_exact(&mut wal_data_vec)?;
-                let wal_data = Bytes::from(wal_data_vec);
-                let msg = AppendRequest { h: hdr, wal_data };
-
-                Ok(ProposerAcceptorMessage::AppendRequest(msg))
+                    Ok(ProposerAcceptorMessage::AppendRequest(msg))
+                }
+                _ => bail!("unknown proposer-acceptor message tag: {}", tag),
             }
-            _ => bail!("unknown proposer-acceptor message tag: {}", tag),
+        } else if proto_version == SK_PROTO_VERSION_2 {
+            // xxx using Reader is inefficient but easy to work with bincode
+            let mut stream = msg_bytes.reader();
+            // u64 is here to avoid padding; it will be removed once we stop packing C structs into the wire as is
+            let tag = stream.read_u64::<LittleEndian>()? as u8 as char;
+            match tag {
+                'g' => {
+                    let msgv2 = ProposerGreetingV2::des_from(&mut stream)?;
+                    let g = ProposerGreeting {
+                        tenant_id: msgv2.tenant_id,
+                        timeline_id: msgv2.timeline_id,
+                        mconf: membership::Configuration {
+                            generation: INVALID_GENERATION,
+                            members: MemberSet::empty(),
+                            new_members: None,
+                        },
+                        pg_version: msgv2.pg_version,
+                        system_id: msgv2.system_id,
+                        wal_seg_size: msgv2.wal_seg_size,
+                    };
+                    Ok(ProposerAcceptorMessage::Greeting(g))
+                }
+                'v' => {
+                    let msg = VoteRequestV2::des_from(&mut stream)?;
+                    let v = VoteRequest {
+                        generation: INVALID_GENERATION,
+                        term: msg.term,
+                    };
+                    Ok(ProposerAcceptorMessage::VoteRequest(v))
+                }
+                'e' => {
+                    let mut msg_bytes = stream.into_inner();
+                    if msg_bytes.remaining() < 16 {
+                        bail!("ProposerElected message is not complete");
+                    }
+                    let term = msg_bytes.get_u64_le();
+                    let start_streaming_at = msg_bytes.get_u64_le().into();
+                    let term_history = TermHistory::from_bytes_le(&mut msg_bytes)?;
+                    if msg_bytes.remaining() < 8 {
+                        bail!("ProposerElected message is not complete");
+                    }
+                    let _timeline_start_lsn = msg_bytes.get_u64_le();
+                    let msg = ProposerElected {
+                        generation: INVALID_GENERATION,
+                        term,
+                        start_streaming_at,
+                        term_history,
+                    };
+                    Ok(ProposerAcceptorMessage::Elected(msg))
+                }
+                'a' => {
+                    // read header followed by wal data
+                    let hdrv2 = AppendRequestHeaderV2::des_from(&mut stream)?;
+                    let hdr = AppendRequestHeader {
+                        generation: INVALID_GENERATION,
+                        term: hdrv2.term,
+                        begin_lsn: hdrv2.begin_lsn,
+                        end_lsn: hdrv2.end_lsn,
+                        commit_lsn: hdrv2.commit_lsn,
+                        truncate_lsn: hdrv2.truncate_lsn,
+                    };
+                    let rec_size = hdr
+                        .end_lsn
+                        .checked_sub(hdr.begin_lsn)
+                        .context("begin_lsn > end_lsn in AppendRequest")?
+                        .0 as usize;
+                    if rec_size > MAX_SEND_SIZE {
+                        bail!(
+                            "AppendRequest is longer than MAX_SEND_SIZE ({})",
+                            MAX_SEND_SIZE
+                        );
+                    }
+
+                    let mut wal_data_vec: Vec<u8> = vec![0; rec_size];
+                    stream.read_exact(&mut wal_data_vec)?;
+                    let wal_data = Bytes::from(wal_data_vec);
+
+                    let msg = AppendRequest { h: hdr, wal_data };
+
+                    Ok(ProposerAcceptorMessage::AppendRequest(msg))
+                }
+                _ => bail!("unknown proposer-acceptor message tag: {}", tag),
+            }
+        } else {
+            bail!("unsupported protocol version {}", proto_version);
         }
     }
 
@@ -394,36 +703,21 @@ impl ProposerAcceptorMessage {
         // We explicitly list all fields, to draw attention here when new fields are added.
         let mut size = BASE_SIZE;
         size += match self {
-            Self::Greeting(ProposerGreeting {
-                protocol_version: _,
-                pg_version: _,
-                proposer_id: _,
-                system_id: _,
-                timeline_id: _,
-                tenant_id: _,
-                tli: _,
-                wal_seg_size: _,
-            }) => 0,
+            Self::Greeting(_) => 0,
 
-            Self::VoteRequest(VoteRequest { term: _ }) => 0,
+            Self::VoteRequest(_) => 0,
 
-            Self::Elected(ProposerElected {
-                term: _,
-                start_streaming_at: _,
-                term_history: _,
-                timeline_start_lsn: _,
-            }) => 0,
+            Self::Elected(_) => 0,
 
             Self::AppendRequest(AppendRequest {
                 h:
                     AppendRequestHeader {
+                        generation: _,
                         term: _,
-                        term_start_lsn: _,
                         begin_lsn: _,
                         end_lsn: _,
                         commit_lsn: _,
                         truncate_lsn: _,
-                        proposer_uuid: _,
                     },
                 wal_data,
             }) => wal_data.len(),
@@ -431,13 +725,12 @@ impl ProposerAcceptorMessage {
             Self::NoFlushAppendRequest(AppendRequest {
                 h:
                     AppendRequestHeader {
+                        generation: _,
                         term: _,
-                        term_start_lsn: _,
                         begin_lsn: _,
                         end_lsn: _,
                         commit_lsn: _,
                         truncate_lsn: _,
-                        proposer_uuid: _,
                     },
                 wal_data,
             }) => wal_data.len(),
@@ -458,45 +751,118 @@ pub enum AcceptorProposerMessage {
 }
 
 impl AcceptorProposerMessage {
-    /// Serialize acceptor -> proposer message.
-    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
-        match self {
-            AcceptorProposerMessage::Greeting(msg) => {
-                buf.put_u64_le('g' as u64);
-                buf.put_u64_le(msg.term);
-                buf.put_u64_le(msg.node_id.0);
-            }
-            AcceptorProposerMessage::VoteResponse(msg) => {
-                buf.put_u64_le('v' as u64);
-                buf.put_u64_le(msg.term);
-                buf.put_u64_le(msg.vote_given);
-                buf.put_u64_le(msg.flush_lsn.into());
-                buf.put_u64_le(msg.truncate_lsn.into());
-                buf.put_u32_le(msg.term_history.0.len() as u32);
-                for e in &msg.term_history.0 {
-                    buf.put_u64_le(e.term);
-                    buf.put_u64_le(e.lsn.into());
-                }
-                buf.put_u64_le(msg.timeline_start_lsn.into());
-            }
-            AcceptorProposerMessage::AppendResponse(msg) => {
-                buf.put_u64_le('a' as u64);
-                buf.put_u64_le(msg.term);
-                buf.put_u64_le(msg.flush_lsn.into());
-                buf.put_u64_le(msg.commit_lsn.into());
-                buf.put_i64_le(msg.hs_feedback.ts);
-                buf.put_u64_le(msg.hs_feedback.xmin);
-                buf.put_u64_le(msg.hs_feedback.catalog_xmin);
+    fn put_cstr(buf: &mut BytesMut, s: &str) {
+        buf.put_slice(s.as_bytes());
+        buf.put_u8(0); // null terminator
+    }
 
-                // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback
-                // if it is not present.
-                if let Some(ref msg) = msg.pageserver_feedback {
-                    msg.serialize(buf);
-                }
-            }
+    /// Serialize membership::Configuration into buf.
+    fn serialize_mconf(buf: &mut BytesMut, mconf: &membership::Configuration) {
+        buf.put_u32(mconf.generation.into_inner());
+        buf.put_u32(mconf.members.m.len() as u32);
+        for sk in &mconf.members.m {
+            buf.put_u64(sk.id.0);
+            Self::put_cstr(buf, &sk.host);
+            buf.put_u16(sk.pg_port);
         }
+        if let Some(ref new_members) = mconf.new_members {
+            buf.put_u32(new_members.m.len() as u32);
+            for sk in &new_members.m {
+                buf.put_u64(sk.id.0);
+                Self::put_cstr(buf, &sk.host);
+                buf.put_u16(sk.pg_port);
+            }
+        } else {
+            buf.put_u32(0);
+        }
+    }
 
-        Ok(())
+    /// Serialize acceptor -> proposer message.
+    pub fn serialize(&self, buf: &mut BytesMut, proto_version: u32) -> Result<()> {
+        if proto_version == SK_PROTO_VERSION_3 {
+            match self {
+                AcceptorProposerMessage::Greeting(msg) => {
+                    buf.put_u8(b'g');
+                    buf.put_u64(msg.node_id.0);
+                    Self::serialize_mconf(buf, &msg.mconf);
+                    buf.put_u64(msg.term)
+                }
+                AcceptorProposerMessage::VoteResponse(msg) => {
+                    buf.put_u8(b'v');
+                    buf.put_u32(msg.generation.into_inner());
+                    buf.put_u64(msg.term);
+                    buf.put_u8(msg.vote_given as u8);
+                    buf.put_u64(msg.flush_lsn.into());
+                    buf.put_u64(msg.truncate_lsn.into());
+                    buf.put_u32(msg.term_history.0.len() as u32);
+                    for e in &msg.term_history.0 {
+                        buf.put_u64(e.term);
+                        buf.put_u64(e.lsn.into());
+                    }
+                }
+                AcceptorProposerMessage::AppendResponse(msg) => {
+                    buf.put_u8(b'a');
+                    buf.put_u32(msg.generation.into_inner());
+                    buf.put_u64(msg.term);
+                    buf.put_u64(msg.flush_lsn.into());
+                    buf.put_u64(msg.commit_lsn.into());
+                    buf.put_i64(msg.hs_feedback.ts);
+                    buf.put_u64(msg.hs_feedback.xmin);
+                    buf.put_u64(msg.hs_feedback.catalog_xmin);
+
+                    // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback
+                    // if it is not present.
+                    if let Some(ref msg) = msg.pageserver_feedback {
+                        msg.serialize(buf);
+                    }
+                }
+            }
+            Ok(())
+        // TODO remove 3 after converting all msgs
+        } else if proto_version == SK_PROTO_VERSION_2 {
+            match self {
+                AcceptorProposerMessage::Greeting(msg) => {
+                    buf.put_u64_le('g' as u64);
+                    // v2 didn't have mconf and fields were reordered
+                    buf.put_u64_le(msg.term);
+                    buf.put_u64_le(msg.node_id.0);
+                }
+                AcceptorProposerMessage::VoteResponse(msg) => {
+                    // v2 didn't have generation, had u64 vote_given and timeline_start_lsn
+                    buf.put_u64_le('v' as u64);
+                    buf.put_u64_le(msg.term);
+                    buf.put_u64_le(msg.vote_given as u64);
+                    buf.put_u64_le(msg.flush_lsn.into());
+                    buf.put_u64_le(msg.truncate_lsn.into());
+                    buf.put_u32_le(msg.term_history.0.len() as u32);
+                    for e in &msg.term_history.0 {
+                        buf.put_u64_le(e.term);
+                        buf.put_u64_le(e.lsn.into());
+                    }
+                    // removed timeline_start_lsn
+                    buf.put_u64_le(0);
+                }
+                AcceptorProposerMessage::AppendResponse(msg) => {
+                    // v2 didn't have generation
+                    buf.put_u64_le('a' as u64);
+                    buf.put_u64_le(msg.term);
+                    buf.put_u64_le(msg.flush_lsn.into());
+                    buf.put_u64_le(msg.commit_lsn.into());
+                    buf.put_i64_le(msg.hs_feedback.ts);
+                    buf.put_u64_le(msg.hs_feedback.xmin);
+                    buf.put_u64_le(msg.hs_feedback.catalog_xmin);
+
+                    // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback
+                    // if it is not present.
+                    if let Some(ref msg) = msg.pageserver_feedback {
+                        msg.serialize(buf);
+                    }
+                }
+            }
+            Ok(())
+        } else {
+            bail!("unsupported protocol version {}", proto_version);
+        }
     }
 }
 
@@ -593,14 +959,6 @@ where
         &mut self,
         msg: &ProposerGreeting,
     ) -> Result<Option<AcceptorProposerMessage>> {
-        // Check protocol compatibility
-        if msg.protocol_version != SK_PROTOCOL_VERSION {
-            bail!(
-                "incompatible protocol version {}, expected {}",
-                msg.protocol_version,
-                SK_PROTOCOL_VERSION
-            );
-        }
         /* Postgres major version mismatch is treated as fatal error
          * because safekeepers parse WAL headers and the format
          * may change between versions.
@@ -655,15 +1013,16 @@ where
             self.state.finish_change(&state).await?;
         }
 
-        info!(
-            "processed greeting from walproposer {}, sending term {:?}",
-            msg.proposer_id.map(|b| format!("{:X}", b)).join(""),
-            self.state.acceptor_state.term
-        );
-        Ok(Some(AcceptorProposerMessage::Greeting(AcceptorGreeting {
-            term: self.state.acceptor_state.term,
+        let apg = AcceptorGreeting {
             node_id: self.node_id,
-        })))
+            mconf: self.state.mconf.clone(),
+            term: self.state.acceptor_state.term,
+        };
+        info!(
+            "processed greeting {:?} from walproposer, sending {:?}",
+            msg, apg
+        );
+        Ok(Some(AcceptorProposerMessage::Greeting(apg)))
     }
 
     /// Give vote for the given term, if we haven't done that previously.
@@ -684,12 +1043,12 @@ where
         self.wal_store.flush_wal().await?;
         // initialize with refusal
         let mut resp = VoteResponse {
+            generation: self.state.mconf.generation,
             term: self.state.acceptor_state.term,
-            vote_given: false as u64,
+            vote_given: false,
             flush_lsn: self.flush_lsn(),
             truncate_lsn: self.state.inmem.peer_horizon_lsn,
             term_history: self.get_term_history(),
-            timeline_start_lsn: self.state.timeline_start_lsn,
         };
         if self.state.acceptor_state.term < msg.term {
             let mut state = self.state.start_change();
@@ -698,15 +1057,16 @@ where
             self.state.finish_change(&state).await?;
 
             resp.term = self.state.acceptor_state.term;
-            resp.vote_given = true as u64;
+            resp.vote_given = true;
         }
-        info!("processed VoteRequest for term {}: {:?}", msg.term, &resp);
+        info!("processed {:?}: sending {:?}", msg, &resp);
         Ok(Some(AcceptorProposerMessage::VoteResponse(resp)))
     }
 
     /// Form AppendResponse from current state.
     fn append_response(&self) -> AppendResponse {
         let ar = AppendResponse {
+            generation: self.state.mconf.generation,
             term: self.state.acceptor_state.term,
             flush_lsn: self.flush_lsn(),
             commit_lsn: self.state.commit_lsn,
@@ -769,9 +1129,14 @@ where
         // and walproposer recalculates the streaming point. OTOH repeating
         // error indicates a serious bug.
         if last_common_point.lsn != msg.start_streaming_at {
-            bail!("refusing ProposerElected with unexpected truncation point: lcp={:?} start_streaming_at={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}",
-                    last_common_point, msg.start_streaming_at,
-                    self.state.acceptor_state.term, sk_th, self.flush_lsn(), msg.term_history,
+            bail!(
+                "refusing ProposerElected with unexpected truncation point: lcp={:?} start_streaming_at={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}",
+                last_common_point,
+                msg.start_streaming_at,
+                self.state.acceptor_state.term,
+                sk_th,
+                self.flush_lsn(),
+                msg.term_history,
             );
         }
 
@@ -779,8 +1144,12 @@ where
         assert!(
             msg.start_streaming_at >= self.state.inmem.commit_lsn,
             "attempt to truncate committed data: start_streaming_at={}, commit_lsn={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}",
-            msg.start_streaming_at, self.state.inmem.commit_lsn,
-            self.state.acceptor_state.term, sk_th, self.flush_lsn(), msg.term_history,
+            msg.start_streaming_at,
+            self.state.inmem.commit_lsn,
+            self.state.acceptor_state.term,
+            sk_th,
+            self.flush_lsn(),
+            msg.term_history,
         );
 
         // Before first WAL write initialize its segment. It makes first segment
@@ -805,18 +1174,22 @@ where
             // Here we learn initial LSN for the first time, set fields
             // interested in that.
 
-            if state.timeline_start_lsn == Lsn(0) {
-                // Remember point where WAL begins globally.
-                state.timeline_start_lsn = msg.timeline_start_lsn;
-                info!(
-                    "setting timeline_start_lsn to {:?}",
-                    state.timeline_start_lsn
-                );
+            if let Some(start_lsn) = msg.term_history.0.first() {
+                if state.timeline_start_lsn == Lsn(0) {
+                    // Remember point where WAL begins globally. In the future it
+                    // will be intialized immediately on timeline creation.
+                    state.timeline_start_lsn = start_lsn.lsn;
+                    info!(
+                        "setting timeline_start_lsn to {:?}",
+                        state.timeline_start_lsn
+                    );
+                }
             }
+
             if state.peer_horizon_lsn == Lsn(0) {
                 // Update peer_horizon_lsn as soon as we know where timeline starts.
                 // It means that peer_horizon_lsn cannot be zero after we know timeline_start_lsn.
-                state.peer_horizon_lsn = msg.timeline_start_lsn;
+                state.peer_horizon_lsn = state.timeline_start_lsn;
             }
             if state.local_start_lsn == Lsn(0) {
                 state.local_start_lsn = msg.start_streaming_at;
@@ -896,7 +1269,10 @@ where
 
         // If our term is higher, immediately refuse the message.
         if self.state.acceptor_state.term > msg.h.term {
-            let resp = AppendResponse::term_only(self.state.acceptor_state.term);
+            let resp = AppendResponse::term_only(
+                self.state.mconf.generation,
+                self.state.acceptor_state.term,
+            );
             return Ok(Some(AcceptorProposerMessage::AppendResponse(resp)));
         }
 
@@ -924,10 +1300,8 @@ where
             );
         }
 
-        // Now we know that we are in the same term as the proposer,
-        // processing the message.
-
-        self.state.inmem.proposer_uuid = msg.h.proposer_uuid;
+        // Now we know that we are in the same term as the proposer, process the
+        // message.
 
         // do the job
         if !msg.wal_data.is_empty() {
@@ -1000,21 +1374,19 @@ where
 
 #[cfg(test)]
 mod tests {
-    use futures::future::BoxFuture;
+    use std::ops::Deref;
+    use std::str::FromStr;
+    use std::time::{Instant, UNIX_EPOCH};
 
-    use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE};
-    use safekeeper_api::{
-        membership::{Configuration, MemberSet, SafekeeperGeneration, SafekeeperId},
-        ServerInfo,
+    use futures::future::BoxFuture;
+    use postgres_ffi::{WAL_SEGMENT_SIZE, XLogSegNo};
+    use safekeeper_api::ServerInfo;
+    use safekeeper_api::membership::{
+        Configuration, MemberSet, SafekeeperGeneration, SafekeeperId,
     };
 
     use super::*;
     use crate::state::{EvictionState, TimelinePersistentState};
-    use std::{
-        ops::Deref,
-        str::FromStr,
-        time::{Instant, UNIX_EPOCH},
-    };
 
     // fake storage for tests
     struct InMemoryState {
@@ -1097,10 +1469,13 @@ mod tests {
         let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap();
 
         // check voting for 1 is ok
-        let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: 1 });
+        let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest {
+            generation: Generation::new(0),
+            term: 1,
+        });
         let mut vote_resp = sk.process_msg(&vote_request).await;
         match vote_resp.unwrap() {
-            Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given != 0),
+            Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given),
             r => panic!("unexpected response: {:?}", r),
         }
 
@@ -1115,7 +1490,7 @@ mod tests {
         // and ensure voting second time for 1 is not ok
         vote_resp = sk.process_msg(&vote_request).await;
         match vote_resp.unwrap() {
-            Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given == 0),
+            Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(!resp.vote_given),
             r => panic!("unexpected response: {:?}", r),
         }
     }
@@ -1130,13 +1505,12 @@ mod tests {
         let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap();
 
         let mut ar_hdr = AppendRequestHeader {
+            generation: Generation::new(0),
             term: 2,
-            term_start_lsn: Lsn(3),
             begin_lsn: Lsn(1),
             end_lsn: Lsn(2),
             commit_lsn: Lsn(0),
             truncate_lsn: Lsn(0),
-            proposer_uuid: [0; 16],
         };
         let mut append_request = AppendRequest {
             h: ar_hdr.clone(),
@@ -1144,6 +1518,7 @@ mod tests {
         };
 
         let pem = ProposerElected {
+            generation: Generation::new(0),
             term: 2,
             start_streaming_at: Lsn(1),
             term_history: TermHistory(vec![
@@ -1156,7 +1531,6 @@ mod tests {
                     lsn: Lsn(3),
                 },
             ]),
-            timeline_start_lsn: Lsn(1),
         };
         sk.process_msg(&ProposerAcceptorMessage::Elected(pem))
             .await
@@ -1191,26 +1565,25 @@ mod tests {
         let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap();
 
         let pem = ProposerElected {
+            generation: Generation::new(0),
             term: 1,
             start_streaming_at: Lsn(1),
             term_history: TermHistory(vec![TermLsn {
                 term: 1,
                 lsn: Lsn(1),
             }]),
-            timeline_start_lsn: Lsn(1),
         };
         sk.process_msg(&ProposerAcceptorMessage::Elected(pem))
             .await
             .unwrap();
 
         let ar_hdr = AppendRequestHeader {
+            generation: Generation::new(0),
             term: 1,
-            term_start_lsn: Lsn(3),
             begin_lsn: Lsn(1),
             end_lsn: Lsn(2),
             commit_lsn: Lsn(0),
             truncate_lsn: Lsn(0),
-            proposer_uuid: [0; 16],
         };
         let append_request = AppendRequest {
             h: ar_hdr.clone(),
diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs
index 5916675c3f..e196f91d3c 100644
--- a/safekeeper/src/send_interpreted_wal.rs
+++ b/safekeeper/src/send_interpreted_wal.rs
@@ -3,23 +3,22 @@ use std::fmt::Display;
 use std::sync::Arc;
 use std::time::Duration;
 
-use anyhow::{anyhow, Context};
-use futures::future::Either;
+use anyhow::{Context, anyhow};
 use futures::StreamExt;
+use futures::future::Either;
 use pageserver_api::shard::ShardIdentity;
 use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend};
-use postgres_ffi::waldecoder::WalDecodeError;
-use postgres_ffi::{get_current_timestamp, waldecoder::WalStreamDecoder};
+use postgres_ffi::get_current_timestamp;
+use postgres_ffi::waldecoder::{WalDecodeError, WalStreamDecoder};
 use pq_proto::{BeMessage, InterpretedWalRecordsBody, WalSndKeepAlive};
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio::sync::mpsc::error::SendError;
 use tokio::task::JoinHandle;
 use tokio::time::MissedTickBehavior;
-use tracing::{error, info, info_span, Instrument};
+use tracing::{Instrument, error, info, info_span};
 use utils::critical;
 use utils::lsn::Lsn;
-use utils::postgres_client::Compression;
-use utils::postgres_client::InterpretedFormat;
+use utils::postgres_client::{Compression, InterpretedFormat};
 use wal_decoder::models::{InterpretedWalRecord, InterpretedWalRecords};
 use wal_decoder::wire_format::ToWireFormat;
 
@@ -100,7 +99,12 @@ struct ShardSenderState {
 /// State of [`InterpretedWalReader`] visible outside of the task running it.
 #[derive(Debug)]
 pub(crate) enum InterpretedWalReaderState {
-    Running { current_position: Lsn },
+    Running {
+        current_position: Lsn,
+        /// Tracks the start of the PG WAL LSN from which the current batch of
+        /// interpreted records originated.
+        current_batch_wal_start: Option<Lsn>,
+    },
     Done,
 }
 
@@ -122,14 +126,21 @@ pub enum InterpretedWalReaderError {
 }
 
 enum CurrentPositionUpdate {
-    Reset(Lsn),
+    Reset { from: Lsn, to: Lsn },
     NotReset(Lsn),
 }
 
 impl CurrentPositionUpdate {
     fn current_position(&self) -> Lsn {
         match self {
-            CurrentPositionUpdate::Reset(lsn) => *lsn,
+            CurrentPositionUpdate::Reset { from: _, to } => *to,
+            CurrentPositionUpdate::NotReset(lsn) => *lsn,
+        }
+    }
+
+    fn previous_position(&self) -> Lsn {
+        match self {
+            CurrentPositionUpdate::Reset { from, to: _ } => *from,
             CurrentPositionUpdate::NotReset(lsn) => *lsn,
         }
     }
@@ -145,16 +156,33 @@ impl InterpretedWalReaderState {
         }
     }
 
+    #[cfg(test)]
+    fn current_batch_wal_start(&self) -> Option<Lsn> {
+        match self {
+            InterpretedWalReaderState::Running {
+                current_batch_wal_start,
+                ..
+            } => *current_batch_wal_start,
+            InterpretedWalReaderState::Done => None,
+        }
+    }
+
     // Reset the current position of the WAL reader if the requested starting position
     // of the new shard is smaller than the current value.
     fn maybe_reset(&mut self, new_shard_start_pos: Lsn) -> CurrentPositionUpdate {
         match self {
             InterpretedWalReaderState::Running {
-                current_position, ..
+                current_position,
+                current_batch_wal_start,
             } => {
                 if new_shard_start_pos < *current_position {
+                    let from = *current_position;
                     *current_position = new_shard_start_pos;
-                    CurrentPositionUpdate::Reset(*current_position)
+                    *current_batch_wal_start = None;
+                    CurrentPositionUpdate::Reset {
+                        from,
+                        to: *current_position,
+                    }
                 } else {
                     CurrentPositionUpdate::NotReset(*current_position)
                 }
@@ -164,6 +192,47 @@ impl InterpretedWalReaderState {
             }
         }
     }
+
+    fn update_current_batch_wal_start(&mut self, lsn: Lsn) {
+        match self {
+            InterpretedWalReaderState::Running {
+                current_batch_wal_start,
+                ..
+            } => {
+                if current_batch_wal_start.is_none() {
+                    *current_batch_wal_start = Some(lsn);
+                }
+            }
+            InterpretedWalReaderState::Done => {
+                panic!("update_current_batch_wal_start called on finished reader")
+            }
+        }
+    }
+
+    fn take_current_batch_wal_start(&mut self) -> Lsn {
+        match self {
+            InterpretedWalReaderState::Running {
+                current_batch_wal_start,
+                ..
+            } => current_batch_wal_start.take().unwrap(),
+            InterpretedWalReaderState::Done => {
+                panic!("take_current_batch_wal_start called on finished reader")
+            }
+        }
+    }
+
+    fn update_current_position(&mut self, lsn: Lsn) {
+        match self {
+            InterpretedWalReaderState::Running {
+                current_position, ..
+            } => {
+                *current_position = lsn;
+            }
+            InterpretedWalReaderState::Done => {
+                panic!("update_current_position called on finished reader")
+            }
+        }
+    }
 }
 
 pub(crate) struct AttachShardNotification {
@@ -184,6 +253,7 @@ impl InterpretedWalReader {
     ) -> InterpretedWalReaderHandle {
         let state = Arc::new(std::sync::RwLock::new(InterpretedWalReaderState::Running {
             current_position: start_pos,
+            current_batch_wal_start: None,
         }));
 
         let (shard_notification_tx, shard_notification_rx) = tokio::sync::mpsc::unbounded_channel();
@@ -237,9 +307,13 @@ impl InterpretedWalReader {
         tx: tokio::sync::mpsc::Sender<Batch>,
         shard: ShardIdentity,
         pg_version: u32,
+        shard_notification_rx: Option<
+            tokio::sync::mpsc::UnboundedReceiver<AttachShardNotification>,
+        >,
     ) -> InterpretedWalReader {
         let state = Arc::new(std::sync::RwLock::new(InterpretedWalReaderState::Running {
             current_position: start_pos,
+            current_batch_wal_start: None,
         }));
 
         InterpretedWalReader {
@@ -252,7 +326,7 @@ impl InterpretedWalReader {
                     next_record_lsn: start_pos,
                 }],
             )]),
-            shard_notification_rx: None,
+            shard_notification_rx,
             state: state.clone(),
             pg_version,
         }
@@ -302,7 +376,7 @@ impl InterpretedWalReader {
                     let wal = wal_or_reset.map(|wor| wor.get_wal().expect("reset handled in select branch below"));
                     let WalBytes {
                         wal,
-                        wal_start_lsn: _,
+                        wal_start_lsn,
                         wal_end_lsn,
                         available_wal_end_lsn,
                     } = match wal {
@@ -315,6 +389,8 @@ impl InterpretedWalReader {
                         }
                     };
 
+                    self.state.write().unwrap().update_current_batch_wal_start(wal_start_lsn);
+
                     wal_decoder.feed_bytes(&wal);
 
                     // Deserialize and interpret WAL records from this batch of WAL.
@@ -363,35 +439,49 @@ impl InterpretedWalReader {
 
                     let max_next_record_lsn = match max_next_record_lsn {
                         Some(lsn) => lsn,
-                        None => { continue; }
+                        None => {
+                            continue;
+                        }
                     };
 
                     // Update the current position such that new receivers can decide
                     // whether to attach to us or spawn a new WAL reader.
-                    match &mut *self.state.write().unwrap() {
-                        InterpretedWalReaderState::Running { current_position, .. } => {
-                            *current_position = max_next_record_lsn;
-                        },
-                        InterpretedWalReaderState::Done => {
-                            unreachable!()
-                        }
-                    }
+                    let batch_wal_start_lsn = {
+                        let mut guard = self.state.write().unwrap();
+                        guard.update_current_position(max_next_record_lsn);
+                        guard.take_current_batch_wal_start()
+                    };
 
                     // Send interpreted records downstream. Anything that has already been seen
                     // by a shard is filtered out.
                     let mut shard_senders_to_remove = Vec::new();
                     for (shard, states) in &mut self.shard_senders {
                         for state in states {
-                            if max_next_record_lsn <= state.next_record_lsn {
-                                continue;
-                            }
-
                             let shard_sender_id = ShardSenderId::new(*shard, state.sender_id);
-                            let records = records_by_sender.remove(&shard_sender_id).unwrap_or_default();
 
-                            let batch = InterpretedWalRecords {
-                                records,
-                                next_record_lsn: Some(max_next_record_lsn),
+                            let batch = if max_next_record_lsn > state.next_record_lsn {
+                                // This batch contains at least one record that this shard has not
+                                // seen yet.
+                                let records = records_by_sender.remove(&shard_sender_id).unwrap_or_default();
+
+                                InterpretedWalRecords {
+                                    records,
+                                    next_record_lsn: max_next_record_lsn,
+                                    raw_wal_start_lsn: Some(batch_wal_start_lsn),
+                                }
+                            } else if wal_end_lsn > state.next_record_lsn {
+                                // All the records in this batch were seen by the shard
+                                // However, the batch maps to a chunk of WAL that the
+                                // shard has not yet seen. Notify it of the start LSN
+                                // of the PG WAL chunk such that it doesn't look like a gap.
+                                InterpretedWalRecords {
+                                    records: Vec::default(),
+                                    next_record_lsn: state.next_record_lsn,
+                                    raw_wal_start_lsn: Some(batch_wal_start_lsn),
+                                }
+                            } else {
+                                // The shard has seen this chunk of WAL before. Skip it.
+                                continue;
                             };
 
                             let res = state.tx.send(Batch {
@@ -403,7 +493,7 @@ impl InterpretedWalReader {
                             if res.is_err() {
                                 shard_senders_to_remove.push(shard_sender_id);
                             } else {
-                                state.next_record_lsn = max_next_record_lsn;
+                                state.next_record_lsn = std::cmp::max(state.next_record_lsn, max_next_record_lsn);
                             }
                         }
                     }
@@ -451,7 +541,7 @@ impl InterpretedWalReader {
                         // anything outside the select statement.
                         let position_reset = self.state.write().unwrap().maybe_reset(start_pos);
                         match position_reset {
-                            CurrentPositionUpdate::Reset(to) => {
+                            CurrentPositionUpdate::Reset { from: _, to } => {
                                 self.wal_stream.reset(to).await;
                                 wal_decoder = WalStreamDecoder::new(to, self.pg_version);
                             },
@@ -459,14 +549,22 @@ impl InterpretedWalReader {
                         };
 
                         tracing::info!(
-                            "Added shard sender {} with start_pos={} current_pos={}",
-                            ShardSenderId::new(shard_id, new_sender_id), start_pos, position_reset.current_position()
+                            "Added shard sender {} with start_pos={} previous_pos={} current_pos={}",
+                            ShardSenderId::new(shard_id, new_sender_id),
+                            start_pos,
+                            position_reset.previous_position(),
+                            position_reset.current_position(),
                         );
                     }
                 }
             }
         }
     }
+
+    #[cfg(test)]
+    fn state(&self) -> Arc<std::sync::RwLock<InterpretedWalReaderState>> {
+        self.state.clone()
+    }
 }
 
 impl InterpretedWalReaderHandle {
@@ -592,22 +690,20 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> InterpretedWalSender<'_, IO> {
 }
 #[cfg(test)]
 mod tests {
-    use std::{collections::HashMap, str::FromStr, time::Duration};
+    use std::collections::HashMap;
+    use std::str::FromStr;
+    use std::time::Duration;
 
     use pageserver_api::shard::{ShardIdentity, ShardStripeSize};
     use postgres_ffi::MAX_SEND_SIZE;
     use tokio::sync::mpsc::error::TryRecvError;
-    use utils::{
-        id::{NodeId, TenantTimelineId},
-        lsn::Lsn,
-        shard::{ShardCount, ShardNumber},
-    };
+    use utils::id::{NodeId, TenantTimelineId};
+    use utils::lsn::Lsn;
+    use utils::shard::{ShardCount, ShardNumber};
 
-    use crate::{
-        send_interpreted_wal::{Batch, InterpretedWalReader},
-        test_utils::Env,
-        wal_reader_stream::StreamingWalReader,
-    };
+    use crate::send_interpreted_wal::{AttachShardNotification, Batch, InterpretedWalReader};
+    use crate::test_utils::Env;
+    use crate::wal_reader_stream::StreamingWalReader;
 
     #[tokio::test]
     async fn test_interpreted_wal_reader_fanout() {
@@ -709,9 +805,11 @@ mod tests {
 
         // This test uses logical messages. Those only go to shard 0. Check that the
         // filtering worked and shard 1 did not get any.
-        assert!(shard_1_interpreted_records
-            .iter()
-            .all(|recs| recs.records.is_empty()));
+        assert!(
+            shard_1_interpreted_records
+                .iter()
+                .all(|recs| recs.records.is_empty())
+        );
 
         // Shard 0 should not receive anything more since the reader is
         // going through wal that it has already processed.
@@ -884,4 +982,128 @@ mod tests {
             assert_eq!(sender.received_next_record_lsns, expected);
         }
     }
+
+    #[tokio::test]
+    async fn test_batch_start_tracking_on_reset() {
+        // When the WAL stream is reset to an older LSN,
+        // the current batch start LSN should be invalidated.
+        // This test constructs such a scenario:
+        // 1. Shard 0 is reading somewhere ahead
+        // 2. Reader reads some WAL, but does not decode a full record (partial read)
+        // 3. Shard 1 attaches to the reader and resets it to an older LSN
+        // 4. Shard 1 should get the correct batch WAL start LSN
+        let _ = env_logger::builder().is_test(true).try_init();
+
+        const SIZE: usize = 64 * 1024;
+        const MSG_COUNT: usize = 10;
+        const PG_VERSION: u32 = 17;
+        const SHARD_COUNT: u8 = 2;
+        const WAL_READER_BATCH_SIZE: usize = 8192;
+
+        let start_lsn = Lsn::from_str("0/149FD18").unwrap();
+        let env = Env::new(true).unwrap();
+        let mut next_record_lsns = Vec::default();
+        let tli = env
+            .make_timeline(NodeId(1), TenantTimelineId::generate(), start_lsn)
+            .await
+            .unwrap();
+
+        let resident_tli = tli.wal_residence_guard().await.unwrap();
+        let end_watch =
+            Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns))
+                .await
+                .unwrap();
+
+        assert!(next_record_lsns.len() > 3);
+        let shard_0_start_lsn = next_record_lsns[3];
+
+        let end_pos = end_watch.get();
+
+        let streaming_wal_reader = StreamingWalReader::new(
+            resident_tli,
+            None,
+            shard_0_start_lsn,
+            end_pos,
+            end_watch,
+            WAL_READER_BATCH_SIZE,
+        );
+
+        let shard_0 = ShardIdentity::new(
+            ShardNumber(0),
+            ShardCount(SHARD_COUNT),
+            ShardStripeSize::default(),
+        )
+        .unwrap();
+
+        let shard_1 = ShardIdentity::new(
+            ShardNumber(1),
+            ShardCount(SHARD_COUNT),
+            ShardStripeSize::default(),
+        )
+        .unwrap();
+
+        let mut shards = HashMap::new();
+
+        for shard_number in 0..SHARD_COUNT {
+            let shard_id = ShardIdentity::new(
+                ShardNumber(shard_number),
+                ShardCount(SHARD_COUNT),
+                ShardStripeSize::default(),
+            )
+            .unwrap();
+            let (tx, rx) = tokio::sync::mpsc::channel::<Batch>(MSG_COUNT * 2);
+            shards.insert(shard_id, (Some(tx), Some(rx)));
+        }
+
+        let shard_0_tx = shards.get_mut(&shard_0).unwrap().0.take().unwrap();
+
+        let (shard_notification_tx, shard_notification_rx) = tokio::sync::mpsc::unbounded_channel();
+
+        let reader = InterpretedWalReader::new(
+            streaming_wal_reader,
+            shard_0_start_lsn,
+            shard_0_tx,
+            shard_0,
+            PG_VERSION,
+            Some(shard_notification_rx),
+        );
+
+        let reader_state = reader.state();
+        let mut reader_fut = std::pin::pin!(reader.run(shard_0_start_lsn, &None));
+        loop {
+            let poll = futures::poll!(reader_fut.as_mut());
+            assert!(poll.is_pending());
+
+            let guard = reader_state.read().unwrap();
+            if guard.current_batch_wal_start().is_some() {
+                break;
+            }
+        }
+
+        shard_notification_tx
+            .send(AttachShardNotification {
+                shard_id: shard_1,
+                sender: shards.get_mut(&shard_1).unwrap().0.take().unwrap(),
+                start_pos: start_lsn,
+            })
+            .unwrap();
+
+        let mut shard_1_rx = shards.get_mut(&shard_1).unwrap().1.take().unwrap();
+        loop {
+            let poll = futures::poll!(reader_fut.as_mut());
+            assert!(poll.is_pending());
+
+            let try_recv_res = shard_1_rx.try_recv();
+            match try_recv_res {
+                Ok(batch) => {
+                    assert_eq!(batch.records.raw_wal_start_lsn.unwrap(), start_lsn);
+                    break;
+                }
+                Err(tokio::sync::mpsc::error::TryRecvError::Empty) => {}
+                Err(tokio::sync::mpsc::error::TryRecvError::Disconnected) => {
+                    unreachable!();
+                }
+            }
+        }
+    }
 }
diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs
index 4a4a74a0fd..33e3d0485c 100644
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -1,6 +1,34 @@
 //! This module implements the streaming side of replication protocol, starting
 //! with the "START_REPLICATION" message, and registry of walsenders.
 
+use std::cmp::{max, min};
+use std::net::SocketAddr;
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context as AnyhowContext, bail};
+use bytes::Bytes;
+use futures::FutureExt;
+use itertools::Itertools;
+use parking_lot::Mutex;
+use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend, PostgresBackendReader, QueryError};
+use postgres_ffi::{MAX_SEND_SIZE, TimestampTz, get_current_timestamp};
+use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody};
+use safekeeper_api::Term;
+use safekeeper_api::models::{
+    HotStandbyFeedback, INVALID_FULL_TRANSACTION_ID, ReplicationFeedback, StandbyFeedback,
+    StandbyReply,
+};
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::sync::watch::Receiver;
+use tokio::time::timeout;
+use tracing::*;
+use utils::bin_ser::BeSer;
+use utils::failpoint_support;
+use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
+use utils::postgres_client::PostgresClientProtocol;
+
 use crate::handler::SafekeeperPostgresHandler;
 use crate::metrics::{RECEIVED_PS_FEEDBACKS, WAL_READERS};
 use crate::receive_wal::WalReceivers;
@@ -11,34 +39,6 @@ use crate::send_interpreted_wal::{
 use crate::timeline::WalResidentTimeline;
 use crate::wal_reader_stream::StreamingWalReader;
 use crate::wal_storage::WalReader;
-use anyhow::{bail, Context as AnyhowContext};
-use bytes::Bytes;
-use futures::FutureExt;
-use parking_lot::Mutex;
-use postgres_backend::PostgresBackend;
-use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError};
-use postgres_ffi::get_current_timestamp;
-use postgres_ffi::{TimestampTz, MAX_SEND_SIZE};
-use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody};
-use safekeeper_api::models::{
-    HotStandbyFeedback, ReplicationFeedback, StandbyFeedback, StandbyReply,
-    INVALID_FULL_TRANSACTION_ID,
-};
-use safekeeper_api::Term;
-use tokio::io::{AsyncRead, AsyncWrite};
-use utils::failpoint_support;
-use utils::pageserver_feedback::PageserverFeedback;
-use utils::postgres_client::PostgresClientProtocol;
-
-use itertools::Itertools;
-use std::cmp::{max, min};
-use std::net::SocketAddr;
-use std::sync::Arc;
-use std::time::Duration;
-use tokio::sync::watch::Receiver;
-use tokio::time::timeout;
-use tracing::*;
-use utils::{bin_ser::BeSer, lsn::Lsn};
 
 // See: https://www.postgresql.org/docs/13/protocol-replication.html
 const HOT_STANDBY_FEEDBACK_TAG_BYTE: u8 = b'h';
@@ -624,8 +624,9 @@ impl SafekeeperPostgresHandler {
                         MAX_SEND_SIZE,
                     );
 
-                    let reader =
-                        InterpretedWalReader::new(wal_reader, start_pos, tx, shard, pg_version);
+                    let reader = InterpretedWalReader::new(
+                        wal_reader, start_pos, tx, shard, pg_version, None,
+                    );
 
                     let sender = InterpretedWalSender {
                         format,
@@ -905,9 +906,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
                         // pageserver to identify WalReceiverError::SuccessfulCompletion,
                         // do not change this string without updating pageserver.
                         return Err(CopyStreamHandlerEnd::ServerInitiated(format!(
-                        "ending streaming to {:?} at {}, receiver is caughtup and there is no computes",
-                        self.appname, self.start_pos,
-                    )));
+                            "ending streaming to {:?} at {}, receiver is caughtup and there is no computes",
+                            self.appname, self.start_pos,
+                        )));
                     }
                 }
             }
diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs
index 4d566b12a0..e437e6d2cd 100644
--- a/safekeeper/src/state.rs
+++ b/safekeeper/src/state.rs
@@ -1,28 +1,24 @@
 //! Defines per timeline data stored persistently (SafeKeeperPersistentState)
 //! and its wrapper with in memory layer (SafekeeperState).
 
-use std::{cmp::max, ops::Deref, time::SystemTime};
+use std::cmp::max;
+use std::ops::Deref;
+use std::time::SystemTime;
 
-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
 use postgres_ffi::WAL_SEGMENT_SIZE;
-use safekeeper_api::{
-    membership::Configuration,
-    models::{TimelineMembershipSwitchResponse, TimelineTermBumpResponse},
-    ServerInfo, Term, INITIAL_TERM,
-};
+use safekeeper_api::membership::Configuration;
+use safekeeper_api::models::{TimelineMembershipSwitchResponse, TimelineTermBumpResponse};
+use safekeeper_api::{INITIAL_TERM, ServerInfo, Term};
 use serde::{Deserialize, Serialize};
 use tracing::info;
-use utils::{
-    id::{TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
 
-use crate::{
-    control_file,
-    safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn, UNKNOWN_SERVER_VERSION},
-    timeline::TimelineError,
-    wal_backup_partial::{self},
-};
+use crate::control_file;
+use crate::safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn, UNKNOWN_SERVER_VERSION};
+use crate::timeline::TimelineError;
+use crate::wal_backup_partial::{self};
 
 /// Persistent information stored on safekeeper node about timeline.
 /// On disk data is prefixed by magic and format version and followed by checksum.
diff --git a/safekeeper/src/test_utils.rs b/safekeeper/src/test_utils.rs
index 79ceddd366..e6f74185c1 100644
--- a/safekeeper/src/test_utils.rs
+++ b/safekeeper/src/test_utils.rs
@@ -1,5 +1,12 @@
 use std::sync::Arc;
 
+use camino_tempfile::Utf8TempDir;
+use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};
+use safekeeper_api::membership::SafekeeperGeneration as Generation;
+use tokio::fs::create_dir_all;
+use utils::id::{NodeId, TenantTimelineId};
+use utils::lsn::Lsn;
+
 use crate::rate_limit::RateLimiter;
 use crate::receive_wal::WalAcceptor;
 use crate::safekeeper::{
@@ -8,15 +15,10 @@ use crate::safekeeper::{
 };
 use crate::send_wal::EndWatch;
 use crate::state::{TimelinePersistentState, TimelineState};
-use crate::timeline::{get_timeline_dir, SharedState, StateSK, Timeline};
+use crate::timeline::{SharedState, StateSK, Timeline, get_timeline_dir};
 use crate::timelines_set::TimelinesSet;
 use crate::wal_backup::remote_timeline_path;
-use crate::{control_file, receive_wal, wal_storage, SafeKeeperConf};
-use camino_tempfile::Utf8TempDir;
-use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator};
-use tokio::fs::create_dir_all;
-use utils::id::{NodeId, TenantTimelineId};
-use utils::lsn::Lsn;
+use crate::{SafeKeeperConf, control_file, receive_wal, wal_storage};
 
 /// A Safekeeper testing or benchmarking environment. Uses a tempdir for storage, removed on drop.
 pub struct Env {
@@ -73,10 +75,10 @@ impl Env {
         // Emulate an initial election.
         safekeeper
             .process_msg(&ProposerAcceptorMessage::Elected(ProposerElected {
+                generation: Generation::new(0),
                 term: 1,
                 start_streaming_at: start_lsn,
                 term_history: TermHistory(vec![(1, start_lsn).into()]),
-                timeline_start_lsn: start_lsn,
             }))
             .await?;
 
@@ -146,13 +148,12 @@ impl Env {
 
             let req = AppendRequest {
                 h: AppendRequestHeader {
+                    generation: Generation::new(0),
                     term: 1,
-                    term_start_lsn: start_lsn,
                     begin_lsn: lsn,
                     end_lsn: lsn + record.len() as u64,
                     commit_lsn: lsn,
                     truncate_lsn: Lsn(0),
-                    proposer_uuid: [0; 16],
                 },
                 wal_data: record,
             };
diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs
index 4341f13824..930f66a207 100644
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -1,37 +1,32 @@
 //! This module implements Timeline lifecycle management and has all necessary code
 //! to glue together SafeKeeper and all other background services.
 
-use anyhow::{anyhow, bail, Result};
+use std::cmp::max;
+use std::ops::{Deref, DerefMut};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::time::Duration;
+
+use anyhow::{Result, anyhow, bail};
 use camino::{Utf8Path, Utf8PathBuf};
+use http_utils::error::ApiError;
 use remote_storage::RemotePath;
+use safekeeper_api::Term;
 use safekeeper_api::membership::Configuration;
 use safekeeper_api::models::{
     PeerInfo, TimelineMembershipSwitchResponse, TimelineTermBumpResponse,
 };
-use safekeeper_api::Term;
+use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId};
 use tokio::fs::{self};
+use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard, watch};
+use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use utils::id::TenantId;
+use tracing::*;
+use utils::id::{NodeId, TenantId, TenantTimelineId};
+use utils::lsn::Lsn;
 use utils::sync::gate::Gate;
 
-use http_utils::error::ApiError;
-use std::cmp::max;
-use std::ops::{Deref, DerefMut};
-use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
-use std::sync::Arc;
-use std::time::Duration;
-use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
-use tokio::{sync::watch, time::Instant};
-use tracing::*;
-use utils::{
-    id::{NodeId, TenantTimelineId},
-    lsn::Lsn,
-};
-
-use storage_broker::proto::SafekeeperTimelineInfo;
-use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
-
-use crate::control_file;
+use crate::metrics::{FullTimelineInfo, MISC_OPERATION_SECONDS, WalStorageMetrics};
 use crate::rate_limit::RateLimiter;
 use crate::receive_wal::WalReceivers;
 use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, TermLsn};
@@ -42,11 +37,8 @@ use crate::timeline_manager::{AtomicStatus, ManagerCtl};
 use crate::timelines_set::TimelinesSet;
 use crate::wal_backup::{self, remote_timeline_path};
 use crate::wal_backup_partial::PartialRemoteSegment;
-
-use crate::metrics::{FullTimelineInfo, WalStorageMetrics, MISC_OPERATION_SECONDS};
 use crate::wal_storage::{Storage as wal_storage_iface, WalReader};
-use crate::SafeKeeperConf;
-use crate::{debug_dump, timeline_manager, wal_storage};
+use crate::{SafeKeeperConf, control_file, debug_dump, timeline_manager, wal_storage};
 
 fn peer_info_from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo {
     PeerInfo {
@@ -168,7 +160,7 @@ impl StateSK {
     pub fn state(&self) -> &TimelineState<control_file::FileStorage> {
         match self {
             StateSK::Loaded(sk) => &sk.state,
-            StateSK::Offloaded(ref s) => s,
+            StateSK::Offloaded(s) => s,
             StateSK::Empty => unreachable!(),
         }
     }
@@ -176,7 +168,7 @@ impl StateSK {
     pub fn state_mut(&mut self) -> &mut TimelineState<control_file::FileStorage> {
         match self {
             StateSK::Loaded(sk) => &mut sk.state,
-            StateSK::Offloaded(ref mut s) => s,
+            StateSK::Offloaded(s) => s,
             StateSK::Empty => unreachable!(),
         }
     }
@@ -566,11 +558,18 @@ impl Timeline {
         });
     }
 
-    /// Background timeline activities (which hold Timeline::gate) will no
-    /// longer run once this function completes.
-    pub async fn shutdown(&self) {
+    /// Cancel the timeline, requesting background activity to stop. Closing
+    /// the `self.gate` waits for that.
+    pub async fn cancel(&self) {
         info!("timeline {} shutting down", self.ttid);
         self.cancel.cancel();
+    }
+
+    /// Background timeline activities (which hold Timeline::gate) will no
+    /// longer run once this function completes. `Self::cancel` must have been
+    /// already called.
+    pub async fn close(&self) {
+        assert!(self.cancel.is_cancelled());
 
         // Wait for any concurrent tasks to stop using this timeline, to avoid e.g. attempts
         // to read deleted files.
@@ -582,13 +581,13 @@ impl Timeline {
     /// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but
     /// deletion API endpoint is retriable.
     ///
-    /// Timeline must be in shut-down state (i.e. call [`Self::shutdown`] first)
+    /// Timeline must be in shut-down state (i.e. call [`Self::close`] first)
     pub async fn delete(
         &self,
         shared_state: &mut WriteGuardSharedState<'_>,
         only_local: bool,
     ) -> Result<bool> {
-        // Assert that [`Self::shutdown`] was already called
+        // Assert that [`Self::close`] was already called
         assert!(self.cancel.is_cancelled());
         assert!(self.gate.close_complete());
 
@@ -1114,7 +1113,7 @@ impl ManagerTimeline {
 }
 
 /// Deletes directory and it's contents. Returns false if directory does not exist.
-async fn delete_dir(path: &Utf8PathBuf) -> Result<bool> {
+pub async fn delete_dir(path: &Utf8PathBuf) -> Result<bool> {
     match fs::remove_dir_all(path).await {
         Ok(_) => Ok(true),
         Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false),
diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs
index 303421c837..06ccb32d03 100644
--- a/safekeeper/src/timeline_eviction.rs
+++ b/safekeeper/src/timeline_eviction.rs
@@ -7,23 +7,19 @@
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use remote_storage::RemotePath;
-use tokio::{
-    fs::File,
-    io::{AsyncRead, AsyncWriteExt},
-};
+use tokio::fs::File;
+use tokio::io::{AsyncRead, AsyncWriteExt};
 use tracing::{debug, info, instrument, warn};
 use utils::crashsafe::durable_rename;
 
-use crate::{
-    metrics::{
-        EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, NUM_EVICTED_TIMELINES,
-    },
-    rate_limit::rand_duration,
-    timeline_manager::{Manager, StateSnapshot},
-    wal_backup,
-    wal_backup_partial::{self, PartialRemoteSegment},
-    wal_storage::wal_file_paths,
+use crate::metrics::{
+    EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, EvictionEvent, NUM_EVICTED_TIMELINES,
 };
+use crate::rate_limit::rand_duration;
+use crate::timeline_manager::{Manager, StateSnapshot};
+use crate::wal_backup;
+use crate::wal_backup_partial::{self, PartialRemoteSegment};
+use crate::wal_storage::wal_file_paths;
 
 impl Manager {
     /// Returns true if the timeline is ready for eviction.
diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs
index a33994dcab..71e99a4de7 100644
--- a/safekeeper/src/timeline_manager.rs
+++ b/safekeeper/src/timeline_manager.rs
@@ -7,41 +7,36 @@
 //! Be aware that you need to be extra careful with manager code, because it is not respawned on panic.
 //! Also, if it will stuck in some branch, it will prevent any further progress in the timeline.
 
-use std::{
-    sync::{atomic::AtomicUsize, Arc},
-    time::Duration,
-};
+use std::sync::Arc;
+use std::sync::atomic::AtomicUsize;
+use std::time::Duration;
 
 use futures::channel::oneshot;
 use postgres_ffi::XLogSegNo;
-use safekeeper_api::{models::PeerInfo, Term};
+use safekeeper_api::Term;
+use safekeeper_api::models::PeerInfo;
 use serde::{Deserialize, Serialize};
-use tokio::{
-    task::{JoinError, JoinHandle},
-    time::Instant,
-};
+use tokio::task::{JoinError, JoinHandle};
+use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, info, info_span, instrument, warn, Instrument};
+use tracing::{Instrument, debug, info, info_span, instrument, warn};
 use utils::lsn::Lsn;
 
-use crate::{
-    control_file::{FileStorage, Storage},
-    metrics::{
-        MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS,
-        NUM_EVICTED_TIMELINES,
-    },
-    rate_limit::{rand_duration, RateLimiter},
-    recovery::recovery_main,
-    remove_wal::calc_horizon_lsn,
-    send_wal::WalSenders,
-    state::TimelineState,
-    timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline},
-    timeline_guard::{AccessService, GuardId, ResidenceGuard},
-    timelines_set::{TimelineSetGuard, TimelinesSet},
-    wal_backup::{self, WalBackupTaskHandle},
-    wal_backup_partial::{self, PartialBackup, PartialRemoteSegment},
-    SafeKeeperConf,
+use crate::SafeKeeperConf;
+use crate::control_file::{FileStorage, Storage};
+use crate::metrics::{
+    MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS, NUM_EVICTED_TIMELINES,
 };
+use crate::rate_limit::{RateLimiter, rand_duration};
+use crate::recovery::recovery_main;
+use crate::remove_wal::calc_horizon_lsn;
+use crate::send_wal::WalSenders;
+use crate::state::TimelineState;
+use crate::timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline};
+use crate::timeline_guard::{AccessService, GuardId, ResidenceGuard};
+use crate::timelines_set::{TimelineSetGuard, TimelinesSet};
+use crate::wal_backup::{self, WalBackupTaskHandle};
+use crate::wal_backup_partial::{self, PartialBackup, PartialRemoteSegment};
 
 pub(crate) struct StateSnapshot {
     // inmem values
diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs
index 1ff6a72bce..858dfce807 100644
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -2,31 +2,33 @@
 //! All timelines should always be present in this map, this is done by loading them
 //! all from the disk on startup and keeping them in memory.
 
-use crate::defaults::DEFAULT_EVICTION_CONCURRENCY;
-use crate::rate_limit::RateLimiter;
-use crate::state::TimelinePersistentState;
-use crate::timeline::{get_tenant_dir, get_timeline_dir, Timeline, TimelineError};
-use crate::timelines_set::TimelinesSet;
-use crate::wal_storage::Storage;
-use crate::{control_file, wal_storage, SafeKeeperConf};
-use anyhow::{bail, Context, Result};
+use std::collections::HashMap;
+use std::str::FromStr;
+use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
+
+use anyhow::{Context, Result, bail};
 use camino::Utf8PathBuf;
 use camino_tempfile::Utf8TempDir;
 use safekeeper_api::membership::Configuration;
 use safekeeper_api::models::SafekeeperUtilization;
-use safekeeper_api::ServerInfo;
+use safekeeper_api::{ServerInfo, membership};
 use serde::Serialize;
-use std::collections::HashMap;
-use std::str::FromStr;
-use std::sync::atomic::Ordering;
-use std::sync::{Arc, Mutex};
-use std::time::{Duration, Instant};
 use tokio::fs;
 use tracing::*;
 use utils::crashsafe::{durable_rename, fsync_async_opt};
 use utils::id::{TenantId, TenantTimelineId, TimelineId};
 use utils::lsn::Lsn;
 
+use crate::defaults::DEFAULT_EVICTION_CONCURRENCY;
+use crate::http::routes::DeleteOrExcludeError;
+use crate::rate_limit::RateLimiter;
+use crate::state::TimelinePersistentState;
+use crate::timeline::{Timeline, TimelineError, delete_dir, get_tenant_dir, get_timeline_dir};
+use crate::timelines_set::TimelinesSet;
+use crate::wal_storage::Storage;
+use crate::{SafeKeeperConf, control_file, wal_storage};
+
 // Timeline entry in the global map: either a ready timeline, or mark that it is
 // being created.
 #[derive(Clone)]
@@ -446,23 +448,20 @@ impl GlobalTimelines {
             .collect()
     }
 
-    /// Cancels timeline, then deletes the corresponding data directory.
-    /// If only_local, doesn't remove WAL segments in remote storage.
-    pub(crate) async fn delete(
+    /// Delete timeline, only locally on this node or globally (also cleaning
+    /// remote storage WAL), depending on `action` value.
+    pub(crate) async fn delete_or_exclude(
         &self,
         ttid: &TenantTimelineId,
-        only_local: bool,
-    ) -> Result<TimelineDeleteForceResult> {
+        action: DeleteOrExclude,
+    ) -> Result<TimelineDeleteResult, DeleteOrExcludeError> {
         let tli_res = {
             let state = self.state.lock().unwrap();
 
             if state.tombstones.contains_key(ttid) {
                 // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do.
                 info!("Timeline {ttid} was already deleted");
-                return Ok(TimelineDeleteForceResult {
-                    dir_existed: false,
-                    was_active: false,
-                });
+                return Ok(TimelineDeleteResult { dir_existed: false });
             }
 
             state.get(ttid)
@@ -470,32 +469,47 @@ impl GlobalTimelines {
 
         let result = match tli_res {
             Ok(timeline) => {
-                let was_active = timeline.broker_active.load(Ordering::Relaxed);
+                info!("deleting timeline {}, action={:?}", ttid, action);
 
-                info!("deleting timeline {}, only_local={}", ttid, only_local);
-                timeline.shutdown().await;
+                // If node is getting excluded, check the generation first.
+                // Then, while holding the lock cancel the timeline; it will be
+                // unusable after this point, and if node is added back first
+                // deletion must be completed and node seeded anew.
+                //
+                // We would like to avoid holding the lock while waiting for the
+                // gate to finish as this is deadlock prone, so for actual
+                // deletion will take it second time.
+                if let DeleteOrExclude::Exclude(ref mconf) = action {
+                    let shared_state = timeline.read_shared_state().await;
+                    if shared_state.sk.state().mconf.generation > mconf.generation {
+                        return Err(DeleteOrExcludeError::Conflict {
+                            requested: mconf.clone(),
+                            current: shared_state.sk.state().mconf.clone(),
+                        });
+                    }
+                    timeline.cancel().await;
+                } else {
+                    timeline.cancel().await;
+                }
+
+                timeline.close().await;
 
                 info!("timeline {ttid} shut down for deletion");
 
                 // Take a lock and finish the deletion holding this mutex.
                 let mut shared_state = timeline.write_shared_state().await;
 
+                let only_local = !matches!(action, DeleteOrExclude::Delete);
                 let dir_existed = timeline.delete(&mut shared_state, only_local).await?;
 
-                Ok(TimelineDeleteForceResult {
-                    dir_existed,
-                    was_active, // TODO: we probably should remove this field
-                })
+                Ok(TimelineDeleteResult { dir_existed })
             }
             Err(_) => {
                 // Timeline is not memory, but it may still exist on disk in broken state.
                 let dir_path = get_timeline_dir(self.state.lock().unwrap().conf.as_ref(), ttid);
-                let dir_existed = delete_dir(dir_path)?;
+                let dir_existed = delete_dir(&dir_path).await?;
 
-                Ok(TimelineDeleteForceResult {
-                    dir_existed,
-                    was_active: false,
-                })
+                Ok(TimelineDeleteResult { dir_existed })
             }
         };
 
@@ -513,11 +527,11 @@ impl GlobalTimelines {
     /// retry tenant deletion again later.
     ///
     /// If only_local, doesn't remove WAL segments in remote storage.
-    pub async fn delete_force_all_for_tenant(
+    pub async fn delete_all_for_tenant(
         &self,
         tenant_id: &TenantId,
-        only_local: bool,
-    ) -> Result<HashMap<TenantTimelineId, TimelineDeleteForceResult>> {
+        action: DeleteOrExclude,
+    ) -> Result<HashMap<TenantTimelineId, TimelineDeleteResult>> {
         info!("deleting all timelines for tenant {}", tenant_id);
         let to_delete = self.get_all_for_tenant(*tenant_id);
 
@@ -525,7 +539,7 @@ impl GlobalTimelines {
 
         let mut deleted = HashMap::new();
         for tli in &to_delete {
-            match self.delete(&tli.ttid, only_local).await {
+            match self.delete_or_exclude(&tli.ttid, action.clone()).await {
                 Ok(result) => {
                     deleted.insert(tli.ttid, result);
                 }
@@ -539,17 +553,15 @@ impl GlobalTimelines {
 
         // If there was an error, return it.
         if let Some(e) = err {
-            return Err(e);
+            return Err(anyhow::Error::from(e));
         }
 
         // There may be broken timelines on disk, so delete the whole tenant dir as well.
         // Note that we could concurrently create new timelines while we were deleting them,
         // so the directory may be not empty. In this case timelines will have bad state
         // and timeline background jobs can panic.
-        delete_dir(get_tenant_dir(
-            self.state.lock().unwrap().conf.as_ref(),
-            tenant_id,
-        ))?;
+        let tenant_dir = get_tenant_dir(self.state.lock().unwrap().conf.as_ref(), tenant_id);
+        delete_dir(&tenant_dir).await?;
 
         Ok(deleted)
     }
@@ -568,18 +580,20 @@ impl GlobalTimelines {
 }
 
 #[derive(Clone, Copy, Serialize)]
-pub struct TimelineDeleteForceResult {
+pub struct TimelineDeleteResult {
     pub dir_existed: bool,
-    pub was_active: bool,
 }
 
-/// Deletes directory and it's contents. Returns false if directory does not exist.
-fn delete_dir(path: Utf8PathBuf) -> Result<bool> {
-    match std::fs::remove_dir_all(path) {
-        Ok(_) => Ok(true),
-        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false),
-        Err(e) => Err(e.into()),
-    }
+/// Action for delete_or_exclude.
+#[derive(Clone, Debug)]
+pub enum DeleteOrExclude {
+    /// Delete timeline globally.
+    Delete,
+    /// Legacy mode until we fully migrate to generations: like exclude deletes
+    /// timeline only locally, but ignores generation number.
+    DeleteLocal,
+    /// This node is getting excluded, delete timeline locally.
+    Exclude(membership::Configuration),
 }
 
 /// Create temp directory for a new timeline. It needs to be located on the same
diff --git a/safekeeper/src/timelines_set.rs b/safekeeper/src/timelines_set.rs
index 096e348295..1d1abc530f 100644
--- a/safekeeper/src/timelines_set.rs
+++ b/safekeeper/src/timelines_set.rs
@@ -1,4 +1,5 @@
-use std::{collections::HashMap, sync::Arc};
+use std::collections::HashMap;
+use std::sync::Arc;
 
 use utils::id::TenantTimelineId;
 
diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs
index 2f6b91cf47..6176e64698 100644
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -1,34 +1,29 @@
-use anyhow::{Context, Result};
-
-use camino::{Utf8Path, Utf8PathBuf};
-use futures::stream::FuturesOrdered;
-use futures::StreamExt;
-use safekeeper_api::models::PeerInfo;
-use tokio::task::JoinHandle;
-use tokio_util::sync::CancellationToken;
-use utils::backoff;
-use utils::id::NodeId;
-
 use std::cmp::min;
 use std::collections::HashSet;
 use std::num::NonZeroU32;
 use std::pin::Pin;
 use std::time::Duration;
 
+use anyhow::{Context, Result};
+use camino::{Utf8Path, Utf8PathBuf};
+use futures::StreamExt;
+use futures::stream::FuturesOrdered;
 use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
-use postgres_ffi::XLogFileName;
-use postgres_ffi::{XLogSegNo, PG_TLI};
+use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
 use remote_storage::{
     DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata,
 };
+use safekeeper_api::models::PeerInfo;
 use tokio::fs::File;
-
 use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
-use tokio::sync::{watch, OnceCell};
+use tokio::sync::{OnceCell, watch};
+use tokio::task::JoinHandle;
+use tokio_util::sync::CancellationToken;
 use tracing::*;
-
-use utils::{id::TenantTimelineId, lsn::Lsn};
+use utils::backoff;
+use utils::id::{NodeId, TenantTimelineId};
+use utils::lsn::Lsn;
 
 use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS};
 use crate::timeline::WalResidentTimeline;
diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs
index 5ecb23e8e0..049852a048 100644
--- a/safekeeper/src/wal_backup_partial.rs
+++ b/safekeeper/src/wal_backup_partial.rs
@@ -20,23 +20,23 @@
 //! This way control file stores information about all potentially existing
 //! remote partial segments and can clean them up after uploading a newer version.
 use camino::Utf8PathBuf;
-use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
+use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
 use remote_storage::RemotePath;
 use safekeeper_api::Term;
 use serde::{Deserialize, Serialize};
-
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn};
-use utils::{id::NodeId, lsn::Lsn};
+use utils::id::NodeId;
+use utils::lsn::Lsn;
 
-use crate::{
-    metrics::{MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS},
-    rate_limit::{rand_duration, RateLimiter},
-    timeline::WalResidentTimeline,
-    timeline_manager::StateSnapshot,
-    wal_backup::{self},
-    SafeKeeperConf,
+use crate::SafeKeeperConf;
+use crate::metrics::{
+    MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS,
 };
+use crate::rate_limit::{RateLimiter, rand_duration};
+use crate::timeline::WalResidentTimeline;
+use crate::timeline_manager::StateSnapshot;
+use crate::wal_backup::{self};
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub enum UploadStatus {
diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs
index a0dd571a34..cc9d4e6e3b 100644
--- a/safekeeper/src/wal_reader_stream.rs
+++ b/safekeeper/src/wal_reader_stream.rs
@@ -1,14 +1,15 @@
-use std::{
-    pin::Pin,
-    task::{Context, Poll},
-};
+use std::pin::Pin;
+use std::task::{Context, Poll};
 
 use bytes::Bytes;
-use futures::{stream::BoxStream, Stream, StreamExt};
+use futures::stream::BoxStream;
+use futures::{Stream, StreamExt};
+use safekeeper_api::Term;
 use utils::lsn::Lsn;
 
-use crate::{send_wal::EndWatch, timeline::WalResidentTimeline, wal_storage::WalReader};
-use safekeeper_api::Term;
+use crate::send_wal::EndWatch;
+use crate::timeline::WalResidentTimeline;
+use crate::wal_storage::WalReader;
 
 #[derive(PartialEq, Eq, Debug)]
 pub(crate) struct WalBytes {
@@ -224,12 +225,11 @@ mod tests {
 
     use futures::StreamExt;
     use postgres_ffi::MAX_SEND_SIZE;
-    use utils::{
-        id::{NodeId, TenantTimelineId},
-        lsn::Lsn,
-    };
+    use utils::id::{NodeId, TenantTimelineId};
+    use utils::lsn::Lsn;
 
-    use crate::{test_utils::Env, wal_reader_stream::StreamingWalReader};
+    use crate::test_utils::Env;
+    use crate::wal_reader_stream::StreamingWalReader;
 
     #[tokio::test]
     async fn test_streaming_wal_reader_reset() {
diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs
index e5ccbb3230..045fa88cb0 100644
--- a/safekeeper/src/wal_service.rs
+++ b/safekeeper/src/wal_service.rs
@@ -2,23 +2,23 @@
 //!   WAL service listens for client connections and
 //!   receive WAL from wal_proposer and send it to WAL receivers
 //!
-use anyhow::{Context, Result};
-use postgres_backend::QueryError;
-use safekeeper_api::models::ConnectionId;
+use std::os::fd::AsRawFd;
 use std::sync::Arc;
 use std::time::Duration;
+
+use anyhow::{Context, Result};
+use postgres_backend::{AuthType, PostgresBackend, QueryError};
+use safekeeper_api::models::ConnectionId;
 use tokio::net::TcpStream;
 use tokio_io_timeout::TimeoutReader;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::{auth::Scope, measured_stream::MeasuredStream};
-
-use std::os::fd::AsRawFd;
+use utils::auth::Scope;
+use utils::measured_stream::MeasuredStream;
 
+use crate::handler::SafekeeperPostgresHandler;
 use crate::metrics::TrafficMetrics;
-use crate::SafeKeeperConf;
-use crate::{handler::SafekeeperPostgresHandler, GlobalTimelines};
-use postgres_backend::{AuthType, PostgresBackend};
+use crate::{GlobalTimelines, SafeKeeperConf};
 
 /// Accept incoming TCP connections and spawn them into a background thread.
 ///
diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs
index e338d70731..ed197a3f83 100644
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -7,32 +7,32 @@
 //!
 //! Note that last file has `.partial` suffix, that's different from postgres.
 
-use anyhow::{bail, Context, Result};
-use bytes::Bytes;
-use camino::{Utf8Path, Utf8PathBuf};
-use futures::future::BoxFuture;
-use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName};
-use postgres_ffi::{dispatch_pgversion, XLogSegNo, PG_TLI};
-use remote_storage::RemotePath;
 use std::cmp::{max, min};
 use std::future::Future;
 use std::io::{self, SeekFrom};
 use std::pin::Pin;
-use tokio::fs::{self, remove_file, File, OpenOptions};
-use tokio::io::{AsyncRead, AsyncWriteExt};
-use tokio::io::{AsyncReadExt, AsyncSeekExt};
+
+use anyhow::{Context, Result, bail};
+use bytes::Bytes;
+use camino::{Utf8Path, Utf8PathBuf};
+use futures::future::BoxFuture;
+use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName};
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo, dispatch_pgversion};
+use pq_proto::SystemId;
+use remote_storage::RemotePath;
+use tokio::fs::{self, File, OpenOptions, remove_file};
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
 use tracing::*;
 use utils::crashsafe::durable_rename;
+use utils::id::TenantTimelineId;
+use utils::lsn::Lsn;
 
 use crate::metrics::{
-    time_io_closure, WalStorageMetrics, REMOVED_WAL_SEGMENTS, WAL_STORAGE_OPERATION_SECONDS,
+    REMOVED_WAL_SEGMENTS, WAL_STORAGE_OPERATION_SECONDS, WalStorageMetrics, time_io_closure,
 };
 use crate::state::TimelinePersistentState;
 use crate::wal_backup::{read_object, remote_timeline_path};
-use postgres_ffi::waldecoder::WalStreamDecoder;
-use postgres_ffi::XLogFileName;
-use pq_proto::SystemId;
-use utils::{id::TenantTimelineId, lsn::Lsn};
 
 pub trait Storage {
     // Last written LSN.
@@ -200,7 +200,12 @@ impl PhysicalStorage {
             ttid.timeline_id, flush_lsn, state.commit_lsn, state.peer_horizon_lsn,
         );
         if flush_lsn < state.commit_lsn {
-            bail!("timeline {} potential data loss: flush_lsn {} by find_end_of_wal is less than commit_lsn  {} from control file", ttid.timeline_id, flush_lsn, state.commit_lsn);
+            bail!(
+                "timeline {} potential data loss: flush_lsn {} by find_end_of_wal is less than commit_lsn  {} from control file",
+                ttid.timeline_id,
+                flush_lsn,
+                state.commit_lsn
+            );
         }
         if flush_lsn < state.peer_horizon_lsn {
             warn!(
diff --git a/safekeeper/tests/misc_test.rs b/safekeeper/tests/misc_test.rs
index 8e5b17a143..8e54d2bb86 100644
--- a/safekeeper/tests/misc_test.rs
+++ b/safekeeper/tests/misc_test.rs
@@ -3,9 +3,9 @@ use std::sync::Arc;
 use tracing::{info, warn};
 use utils::lsn::Lsn;
 
-use crate::walproposer_sim::{
-    log::{init_logger, init_tracing_logger},
-    simulation::{generate_network_opts, generate_schedule, Schedule, TestAction, TestConfig},
+use crate::walproposer_sim::log::{init_logger, init_tracing_logger};
+use crate::walproposer_sim::simulation::{
+    Schedule, TestAction, TestConfig, generate_network_opts, generate_schedule,
 };
 
 pub mod walproposer_sim;
diff --git a/safekeeper/tests/random_test.rs b/safekeeper/tests/random_test.rs
index 1a932ef699..e29b58836a 100644
--- a/safekeeper/tests/random_test.rs
+++ b/safekeeper/tests/random_test.rs
@@ -1,11 +1,9 @@
 use rand::Rng;
 use tracing::{info, warn};
 
-use crate::walproposer_sim::{
-    log::{init_logger, init_tracing_logger},
-    simulation::{generate_network_opts, generate_schedule, TestConfig},
-    simulation_logs::validate_events,
-};
+use crate::walproposer_sim::log::{init_logger, init_tracing_logger};
+use crate::walproposer_sim::simulation::{TestConfig, generate_network_opts, generate_schedule};
+use crate::walproposer_sim::simulation_logs::validate_events;
 
 pub mod walproposer_sim;
 
@@ -18,7 +16,7 @@ fn test_random_schedules() -> anyhow::Result<()> {
     let mut config = TestConfig::new(Some(clock));
 
     for _ in 0..500 {
-        let seed: u64 = rand::thread_rng().gen();
+        let seed: u64 = rand::thread_rng().r#gen();
         config.network = generate_network_opts(seed);
 
         let test = config.start(seed);
diff --git a/safekeeper/tests/simple_test.rs b/safekeeper/tests/simple_test.rs
index 0be9d0deef..f7b266e39c 100644
--- a/safekeeper/tests/simple_test.rs
+++ b/safekeeper/tests/simple_test.rs
@@ -1,7 +1,8 @@
 use tracing::info;
 use utils::lsn::Lsn;
 
-use crate::walproposer_sim::{log::init_logger, simulation::TestConfig};
+use crate::walproposer_sim::log::init_logger;
+use crate::walproposer_sim::simulation::TestConfig;
 
 pub mod walproposer_sim;
 
diff --git a/safekeeper/tests/walproposer_sim/log.rs b/safekeeper/tests/walproposer_sim/log.rs
index 870f30de4f..e2ba3282ca 100644
--- a/safekeeper/tests/walproposer_sim/log.rs
+++ b/safekeeper/tests/walproposer_sim/log.rs
@@ -1,9 +1,11 @@
-use std::{fmt, sync::Arc};
+use std::fmt;
+use std::sync::Arc;
 
 use desim::time::Timing;
 use once_cell::sync::OnceCell;
 use parking_lot::Mutex;
-use tracing_subscriber::fmt::{format::Writer, time::FormatTime};
+use tracing_subscriber::fmt::format::Writer;
+use tracing_subscriber::fmt::time::FormatTime;
 
 /// SimClock can be plugged into tracing logger to print simulation time.
 #[derive(Clone)]
diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs
index 0023a4d22a..6ce1a9940e 100644
--- a/safekeeper/tests/walproposer_sim/safekeeper.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper.rs
@@ -2,33 +2,30 @@
 //! Gets messages from the network, passes them down to consensus module and
 //! sends replies back.
 
-use std::{collections::HashMap, sync::Arc, time::Duration};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::Duration;
 
-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
 use bytes::{Bytes, BytesMut};
 use camino::Utf8PathBuf;
-use desim::{
-    executor::{self, PollSome},
-    network::TCP,
-    node_os::NodeOs,
-    proto::{AnyMessage, NetEvent, NodeEvent},
-};
+use desim::executor::{self, PollSome};
+use desim::network::TCP;
+use desim::node_os::NodeOs;
+use desim::proto::{AnyMessage, NetEvent, NodeEvent};
 use http::Uri;
-use safekeeper::{
-    safekeeper::{
-        ProposerAcceptorMessage, SafeKeeper, SK_PROTOCOL_VERSION, UNKNOWN_SERVER_VERSION,
-    },
-    state::{TimelinePersistentState, TimelineState},
-    timeline::TimelineError,
-    wal_storage::Storage,
-    SafeKeeperConf,
+use safekeeper::SafeKeeperConf;
+use safekeeper::safekeeper::{
+    ProposerAcceptorMessage, SK_PROTO_VERSION_3, SafeKeeper, UNKNOWN_SERVER_VERSION,
 };
-use safekeeper_api::{membership::Configuration, ServerInfo};
+use safekeeper::state::{TimelinePersistentState, TimelineState};
+use safekeeper::timeline::TimelineError;
+use safekeeper::wal_storage::Storage;
+use safekeeper_api::ServerInfo;
+use safekeeper_api::membership::Configuration;
 use tracing::{debug, info_span, warn};
-use utils::{
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
 
 use super::safekeeper_disk::{DiskStateStorage, DiskWALStorage, SafekeeperDisk, TimelineDisk};
 
@@ -287,7 +284,7 @@ impl ConnState {
                 bail!("finished processing START_REPLICATION")
             }
 
-            let msg = ProposerAcceptorMessage::parse(copy_data, SK_PROTOCOL_VERSION)?;
+            let msg = ProposerAcceptorMessage::parse(copy_data, SK_PROTO_VERSION_3)?;
             debug!("got msg: {:?}", msg);
             self.process(msg, global)
         } else {
@@ -403,7 +400,7 @@ impl ConnState {
             // TODO: if this is AppendResponse, fill in proper hot standby feedback and disk consistent lsn
 
             let mut buf = BytesMut::with_capacity(128);
-            reply.serialize(&mut buf)?;
+            reply.serialize(&mut buf, SK_PROTO_VERSION_3)?;
 
             self.tcp.send(AnyMessage::Bytes(buf.into()));
         }
diff --git a/safekeeper/tests/walproposer_sim/safekeeper_disk.rs b/safekeeper/tests/walproposer_sim/safekeeper_disk.rs
index b854754ecf..94a849b5f0 100644
--- a/safekeeper/tests/walproposer_sim/safekeeper_disk.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper_disk.rs
@@ -1,22 +1,23 @@
 use std::collections::HashMap;
+use std::ops::Deref;
 use std::sync::Arc;
-
-use parking_lot::Mutex;
-use safekeeper::state::TimelinePersistentState;
-use utils::id::TenantTimelineId;
-
-use super::block_storage::BlockStorage;
-
-use std::{ops::Deref, time::Instant};
+use std::time::Instant;
 
 use anyhow::Result;
 use bytes::{Buf, BytesMut};
 use futures::future::BoxFuture;
-use postgres_ffi::{waldecoder::WalStreamDecoder, XLogSegNo};
-use safekeeper::{control_file, metrics::WalStorageMetrics, wal_storage};
+use parking_lot::Mutex;
+use postgres_ffi::XLogSegNo;
+use postgres_ffi::waldecoder::WalStreamDecoder;
+use safekeeper::metrics::WalStorageMetrics;
+use safekeeper::state::TimelinePersistentState;
+use safekeeper::{control_file, wal_storage};
 use tracing::{debug, info};
+use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
 
+use super::block_storage::BlockStorage;
+
 /// All safekeeper state that is usually saved to disk.
 pub struct SafekeeperDisk {
     pub timelines: Mutex<HashMap<TenantTimelineId, Arc<TimelineDisk>>>,
diff --git a/safekeeper/tests/walproposer_sim/simulation.rs b/safekeeper/tests/walproposer_sim/simulation.rs
index fabf450eef..f314143952 100644
--- a/safekeeper/tests/walproposer_sim/simulation.rs
+++ b/safekeeper/tests/walproposer_sim/simulation.rs
@@ -1,23 +1,24 @@
-use std::{cell::Cell, str::FromStr, sync::Arc};
+use std::cell::Cell;
+use std::str::FromStr;
+use std::sync::Arc;
 
-use crate::walproposer_sim::{safekeeper::run_server, walproposer_api::SimulationApi};
-use desim::{
-    executor::{self, ExternalHandle},
-    node_os::NodeOs,
-    options::{Delay, NetworkOptions},
-    proto::{AnyMessage, NodeEvent},
-    world::Node,
-    world::World,
-};
+use desim::executor::{self, ExternalHandle};
+use desim::node_os::NodeOs;
+use desim::options::{Delay, NetworkOptions};
+use desim::proto::{AnyMessage, NodeEvent};
+use desim::world::{Node, World};
 use rand::{Rng, SeedableRng};
 use tracing::{debug, info_span, warn};
-use utils::{id::TenantTimelineId, lsn::Lsn};
+use utils::id::TenantTimelineId;
+use utils::lsn::Lsn;
 use walproposer::walproposer::{Config, Wrapper};
 
-use super::{
-    log::SimClock, safekeeper_disk::SafekeeperDisk, walproposer_api,
-    walproposer_disk::DiskWalProposer,
-};
+use super::log::SimClock;
+use super::safekeeper_disk::SafekeeperDisk;
+use super::walproposer_api;
+use super::walproposer_disk::DiskWalProposer;
+use crate::walproposer_sim::safekeeper::run_server;
+use crate::walproposer_sim::walproposer_api::SimulationApi;
 
 /// Simulated safekeeper node.
 pub struct SafekeeperNode {
diff --git a/safekeeper/tests/walproposer_sim/walproposer_api.rs b/safekeeper/tests/walproposer_sim/walproposer_api.rs
index 5578c94cf6..6451589e80 100644
--- a/safekeeper/tests/walproposer_sim/walproposer_api.rs
+++ b/safekeeper/tests/walproposer_sim/walproposer_api.rs
@@ -1,26 +1,20 @@
-use std::{
-    cell::{RefCell, RefMut, UnsafeCell},
-    ffi::CStr,
-    sync::Arc,
-};
+use std::cell::{RefCell, RefMut, UnsafeCell};
+use std::ffi::CStr;
+use std::sync::Arc;
 
 use bytes::Bytes;
-use desim::{
-    executor::{self, PollSome},
-    network::TCP,
-    node_os::NodeOs,
-    proto::{AnyMessage, NetEvent, NodeEvent},
-    world::NodeId,
-};
+use desim::executor::{self, PollSome};
+use desim::network::TCP;
+use desim::node_os::NodeOs;
+use desim::proto::{AnyMessage, NetEvent, NodeEvent};
+use desim::world::NodeId;
 use tracing::debug;
 use utils::lsn::Lsn;
-use walproposer::{
-    api_bindings::Level,
-    bindings::{
-        NeonWALReadResult, SafekeeperStateDesiredEvents, WL_SOCKET_READABLE, WL_SOCKET_WRITEABLE,
-    },
-    walproposer::{ApiImpl, Config},
+use walproposer::api_bindings::Level;
+use walproposer::bindings::{
+    NeonWALReadResult, SafekeeperStateDesiredEvents, WL_SOCKET_READABLE, WL_SOCKET_WRITEABLE,
 };
+use walproposer::walproposer::{ApiImpl, Config};
 
 use super::walproposer_disk::DiskWalProposer;
 
@@ -578,7 +572,9 @@ impl ApiImpl for SimulationApi {
         let disk_lsn = disk.lock().flush_rec_ptr().0;
         debug!("start_streaming at {} (disk_lsn={})", startpos, disk_lsn);
         if startpos < disk_lsn {
-            debug!("startpos < disk_lsn, it means we wrote some transaction even before streaming started");
+            debug!(
+                "startpos < disk_lsn, it means we wrote some transaction even before streaming started"
+            );
         }
         assert!(startpos <= disk_lsn);
         let mut broadcasted = Lsn(startpos);
diff --git a/safekeeper/tests/walproposer_sim/walproposer_disk.rs b/safekeeper/tests/walproposer_sim/walproposer_disk.rs
index 7dc7f48548..fe3eee8a5a 100644
--- a/safekeeper/tests/walproposer_sim/walproposer_disk.rs
+++ b/safekeeper/tests/walproposer_sim/walproposer_disk.rs
@@ -1,4 +1,5 @@
-use std::{ffi::CStr, sync::Arc};
+use std::ffi::CStr;
+use std::sync::Arc;
 
 use parking_lot::{Mutex, MutexGuard};
 use postgres_ffi::v16::wal_generator::{LogicalMessageGenerator, WalGenerator};
diff --git a/storage_broker/Cargo.toml b/storage_broker/Cargo.toml
index 17d4aed63b..e4db9a317d 100644
--- a/storage_broker/Cargo.toml
+++ b/storage_broker/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "storage_broker"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [features]
diff --git a/storage_broker/benches/rps.rs b/storage_broker/benches/rps.rs
index 1a6fb7fedf..86f2dd9a6c 100644
--- a/storage_broker/benches/rps.rs
+++ b/storage_broker/benches/rps.rs
@@ -1,18 +1,14 @@
-use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 use std::time::{Duration, Instant};
 
 use clap::Parser;
-
-use storage_broker::proto::SafekeeperTimelineInfo;
 use storage_broker::proto::{
-    FilterTenantTimelineId, MessageType, SubscribeByFilterRequest,
+    FilterTenantTimelineId, MessageType, SafekeeperTimelineInfo, SubscribeByFilterRequest,
     TenantTimelineId as ProtoTenantTimelineId, TypeSubscription, TypedMessage,
 };
-
 use storage_broker::{BrokerClientChannel, DEFAULT_ENDPOINT};
 use tokio::time;
-
 use tonic::Request;
 
 const ABOUT: &str = r#"
diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs
index 9d4c22484c..cc33ec20ff 100644
--- a/storage_broker/src/bin/storage_broker.rs
+++ b/storage_broker/src/bin/storage_broker.rs
@@ -10,7 +10,14 @@
 //!
 //! Only safekeeper message is supported, but it is not hard to add something
 //! else with generics.
-use clap::{command, Parser};
+use std::collections::HashMap;
+use std::convert::Infallible;
+use std::net::SocketAddr;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::time::Duration;
+
+use clap::{Parser, command};
 use futures_core::Stream;
 use futures_util::StreamExt;
 use http_body_util::Full;
@@ -19,27 +26,10 @@ use hyper::header::CONTENT_TYPE;
 use hyper::service::service_fn;
 use hyper::{Method, StatusCode};
 use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
-use parking_lot::RwLock;
-use std::collections::HashMap;
-use std::convert::Infallible;
-use std::net::SocketAddr;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::time::Duration;
-use tokio::net::TcpListener;
-use tokio::sync::broadcast;
-use tokio::sync::broadcast::error::RecvError;
-use tokio::time;
-use tonic::body::{self, empty_body, BoxBody};
-use tonic::codegen::Service;
-use tonic::Code;
-use tonic::{Request, Response, Status};
-use tracing::*;
-use utils::signals::ShutdownSignals;
-
 use metrics::{Encoder, TextEncoder};
+use parking_lot::RwLock;
 use storage_broker::metrics::{
-    BROADCASTED_MESSAGES_TOTAL, BROADCAST_DROPPED_MESSAGES_TOTAL, NUM_PUBS, NUM_SUBS_ALL,
+    BROADCAST_DROPPED_MESSAGES_TOTAL, BROADCASTED_MESSAGES_TOTAL, NUM_PUBS, NUM_SUBS_ALL,
     NUM_SUBS_TIMELINE, PROCESSED_MESSAGES_TOTAL, PUBLISHED_ONEOFF_MESSAGES_TOTAL,
 };
 use storage_broker::proto::broker_service_server::{BrokerService, BrokerServiceServer};
@@ -48,10 +38,19 @@ use storage_broker::proto::{
     FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,
     SafekeeperTimelineInfo, SubscribeByFilterRequest, SubscribeSafekeeperInfoRequest, TypedMessage,
 };
-use storage_broker::{parse_proto_ttid, DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_LISTEN_ADDR};
+use storage_broker::{DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_LISTEN_ADDR, parse_proto_ttid};
+use tokio::net::TcpListener;
+use tokio::sync::broadcast;
+use tokio::sync::broadcast::error::RecvError;
+use tokio::time;
+use tonic::body::{self, BoxBody, empty_body};
+use tonic::codegen::Service;
+use tonic::{Code, Request, Response, Status};
+use tracing::*;
 use utils::id::TenantTimelineId;
 use utils::logging::{self, LogFormat};
 use utils::sentry_init::init_sentry;
+use utils::signals::ShutdownSignals;
 use utils::{project_build_tag, project_git_version};
 
 project_git_version!(GIT_VERSION);
@@ -743,11 +742,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
     use tokio::sync::broadcast::error::TryRecvError;
     use utils::id::{TenantId, TimelineId};
 
+    use super::*;
+
     fn msg(timeline_id: Vec<u8>) -> Message {
         Message::SafekeeperTimelineInfo(SafekeeperTimelineInfo {
             safekeeper_id: 1,
diff --git a/storage_broker/src/lib.rs b/storage_broker/src/lib.rs
index 3ac40f6e14..55d411f607 100644
--- a/storage_broker/src/lib.rs
+++ b/storage_broker/src/lib.rs
@@ -1,12 +1,11 @@
 use std::time::Duration;
-use tonic::codegen::StdError;
-use tonic::transport::{ClientTlsConfig, Endpoint};
-use tonic::{transport::Channel, Status};
-use utils::id::{TenantId, TenantTimelineId, TimelineId};
 
-use proto::{
-    broker_service_client::BrokerServiceClient, TenantTimelineId as ProtoTenantTimelineId,
-};
+use proto::TenantTimelineId as ProtoTenantTimelineId;
+use proto::broker_service_client::BrokerServiceClient;
+use tonic::Status;
+use tonic::codegen::StdError;
+use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
 
 // Code generated by protobuf.
 pub mod proto {
@@ -20,11 +19,8 @@ pub mod proto {
 pub mod metrics;
 
 // Re-exports to avoid direct tonic dependency in user crates.
-pub use tonic::Code;
-pub use tonic::Request;
-pub use tonic::Streaming;
-
 pub use hyper::Uri;
+pub use tonic::{Code, Request, Streaming};
 
 pub const DEFAULT_LISTEN_ADDR: &str = "127.0.0.1:50051";
 pub const DEFAULT_ENDPOINT: &str = const_format::formatcp!("http://{DEFAULT_LISTEN_ADDR}");
diff --git a/storage_broker/src/metrics.rs b/storage_broker/src/metrics.rs
index 1fd3dd5ad6..ecfb594eba 100644
--- a/storage_broker/src/metrics.rs
+++ b/storage_broker/src/metrics.rs
@@ -1,6 +1,6 @@
 //! Broker metrics.
 
-use metrics::{register_int_counter, register_int_gauge, IntCounter, IntGauge};
+use metrics::{IntCounter, IntGauge, register_int_counter, register_int_gauge};
 use once_cell::sync::Lazy;
 
 pub static NUM_PUBS: Lazy<IntGauge> = Lazy::new(|| {
diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml
index 73dc1a5c10..b63ba154da 100644
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "storage_controller"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [[bin]]
@@ -18,12 +18,14 @@ anyhow.workspace = true
 bytes.workspace = true
 chrono.workspace = true
 clap.workspace = true
+cron.workspace = true
 fail.workspace = true
 futures.workspace = true
 hex.workspace = true
 hyper0.workspace = true
 humantime.workspace = true
 itertools.workspace = true
+json-structural-diff.workspace = true
 lasso.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
diff --git a/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/down.sql b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/down.sql
new file mode 100644
index 0000000000..0f051d3ac3
--- /dev/null
+++ b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/down.sql
@@ -0,0 +1 @@
+ALTER TABLE nodes DROP listen_https_port;
diff --git a/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/up.sql b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/up.sql
new file mode 100644
index 0000000000..172237d477
--- /dev/null
+++ b/storage_controller/migrations/2025-02-11-144848_pageserver_use_https/up.sql
@@ -0,0 +1 @@
+ALTER TABLE nodes ADD listen_https_port INTEGER;
diff --git a/storage_controller/src/background_node_operations.rs b/storage_controller/src/background_node_operations.rs
index 226d4942e7..a630316f46 100644
--- a/storage_controller/src/background_node_operations.rs
+++ b/storage_controller/src/background_node_operations.rs
@@ -1,4 +1,5 @@
-use std::{borrow::Cow, fmt::Debug, fmt::Display};
+use std::borrow::Cow;
+use std::fmt::{Debug, Display};
 
 use tokio_util::sync::CancellationToken;
 use utils::id::NodeId;
diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs
index 5bc3c81f02..b602af362d 100644
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -1,7 +1,8 @@
 use std::borrow::Cow;
+use std::collections::HashMap;
 use std::error::Error as _;
 use std::sync::Arc;
-use std::{collections::HashMap, time::Duration};
+use std::time::Duration;
 
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
@@ -12,11 +13,9 @@ use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShar
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
-use tracing::{info_span, Instrument};
-use utils::{
-    backoff::{self},
-    id::{NodeId, TenantId},
-};
+use tracing::{Instrument, info_span};
+use utils::backoff::{self};
+use utils::id::{NodeId, TenantId};
 
 use crate::service::Config;
 
diff --git a/storage_controller/src/drain_utils.rs b/storage_controller/src/drain_utils.rs
index 8b7be88078..bd4b8ba38f 100644
--- a/storage_controller/src/drain_utils.rs
+++ b/storage_controller/src/drain_utils.rs
@@ -1,15 +1,14 @@
-use std::{
-    collections::{BTreeMap, HashMap},
-    sync::Arc,
-};
+use std::collections::{BTreeMap, HashMap};
+use std::sync::Arc;
 
 use pageserver_api::controller_api::{NodeSchedulingPolicy, ShardSchedulingPolicy};
-use utils::{id::NodeId, shard::TenantShardId};
+use utils::id::NodeId;
+use utils::shard::TenantShardId;
 
-use crate::{
-    background_node_operations::OperationError, node::Node, scheduler::Scheduler,
-    tenant_shard::TenantShard,
-};
+use crate::background_node_operations::OperationError;
+use crate::node::Node;
+use crate::scheduler::Scheduler;
+use crate::tenant_shard::TenantShard;
 
 pub(crate) struct TenantShardIterator<F> {
     tenants_accessor: F,
@@ -188,10 +187,8 @@ impl TenantShardDrain {
 mod tests {
     use std::sync::Arc;
 
-    use utils::{
-        id::TenantId,
-        shard::{ShardCount, ShardNumber, TenantShardId},
-    };
+    use utils::id::TenantId;
+    use utils::shard::{ShardCount, ShardNumber, TenantShardId};
 
     use super::TenantShardIterator;
 
diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs
index 52b6110667..56a331becd 100644
--- a/storage_controller/src/heartbeater.rs
+++ b/storage_controller/src/heartbeater.rs
@@ -1,24 +1,22 @@
-use futures::{stream::FuturesUnordered, StreamExt};
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::future::Future;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use futures::StreamExt;
+use futures::stream::FuturesUnordered;
+use pageserver_api::controller_api::{NodeAvailability, SkSchedulingPolicy};
+use pageserver_api::models::PageserverUtilization;
 use safekeeper_api::models::SafekeeperUtilization;
 use safekeeper_client::mgmt_api;
-use std::{
-    collections::HashMap,
-    fmt::Debug,
-    future::Future,
-    sync::Arc,
-    time::{Duration, Instant},
-};
-use tokio_util::sync::CancellationToken;
-
-use pageserver_api::{
-    controller_api::{NodeAvailability, SkSchedulingPolicy},
-    models::PageserverUtilization,
-};
-
 use thiserror::Error;
-use utils::{id::NodeId, logging::SecretString};
+use tokio_util::sync::CancellationToken;
+use utils::id::NodeId;
+use utils::logging::SecretString;
 
-use crate::{node::Node, safekeeper::Safekeeper};
+use crate::node::Node;
+use crate::safekeeper::Safekeeper;
 
 struct HeartbeaterTask<Server, State> {
     receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest<Server, State>>,
@@ -223,21 +221,21 @@ impl HeartBeat<Node, PageserverState> for HeartbeaterTask<Node, PageserverState>
                     Some((*node_id, status))
                 }
             });
+        }
 
-            loop {
-                let maybe_status = tokio::select! {
-                    next = heartbeat_futs.next() => {
-                        match next {
-                            Some(result) => result,
-                            None => { break; }
-                        }
-                    },
-                    _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); }
-                };
+        loop {
+            let maybe_status = tokio::select! {
+                next = heartbeat_futs.next() => {
+                    match next {
+                        Some(result) => result,
+                        None => { break; }
+                    }
+                },
+                _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); }
+            };
 
-                if let Some((node_id, status)) = maybe_status {
-                    new_state.insert(node_id, status);
-                }
+            if let Some((node_id, status)) = maybe_status {
+                new_state.insert(node_id, status);
             }
         }
 
@@ -363,21 +361,21 @@ impl HeartBeat<Safekeeper, SafekeeperState> for HeartbeaterTask<Safekeeper, Safe
                     Some((*node_id, status))
                 }
             });
+        }
 
-            loop {
-                let maybe_status = tokio::select! {
-                    next = heartbeat_futs.next() => {
-                        match next {
-                            Some(result) => result,
-                            None => { break; }
-                        }
-                    },
-                    _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); }
-                };
+        loop {
+            let maybe_status = tokio::select! {
+                next = heartbeat_futs.next() => {
+                    match next {
+                        Some(result) => result,
+                        None => { break; }
+                    }
+                },
+                _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); }
+            };
 
-                if let Some((node_id, status)) = maybe_status {
-                    new_state.insert(node_id, status);
-                }
+            if let Some((node_id, status)) = maybe_status {
+                new_state.insert(node_id, status);
             }
         }
 
diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs
index 1cc61a12e8..5b5ae80eaf 100644
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -1,32 +1,27 @@
-use crate::http;
-use crate::metrics::{
-    HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup,
-    METRICS_REGISTRY,
-};
-use crate::persistence::SafekeeperUpsert;
-use crate::reconciler::ReconcileError;
-use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
 use anyhow::Context;
+use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
 use futures::Future;
-use http_utils::{
-    endpoint::{
-        self, auth_middleware, check_permission_with, profile_cpu_handler, profile_heap_handler,
-        request_span,
-    },
-    error::ApiError,
-    failpoints::failpoints_handler,
-    json::{json_request, json_response},
-    request::{must_get_query_param, parse_query_param, parse_request_param},
-    RequestExt, RouterBuilder,
+use http_utils::endpoint::{
+    self, auth_middleware, check_permission_with, profile_cpu_handler, profile_heap_handler,
+    request_span,
 };
+use http_utils::error::ApiError;
+use http_utils::failpoints::failpoints_handler;
+use http_utils::json::{json_request, json_response};
+use http_utils::request::{must_get_query_param, parse_query_param, parse_request_param};
+use http_utils::{RequestExt, RouterBuilder};
 use hyper::header::CONTENT_TYPE;
-use hyper::{Body, Request, Response};
-use hyper::{StatusCode, Uri};
+use hyper::{Body, Request, Response, StatusCode, Uri};
 use metrics::{BuildInfo, NeonMetrics};
 use pageserver_api::controller_api::{
     MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
     MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
-    SafekeeperSchedulingPolicyRequest, ShardsPreferredAzsRequest, TenantCreateRequest,
+    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, SafekeeperSchedulingPolicyRequest,
+    ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest,
 };
 use pageserver_api::models::{
     TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
@@ -34,23 +29,21 @@ use pageserver_api::models::{
     TimelineCreateRequest,
 };
 use pageserver_api::shard::TenantShardId;
-use pageserver_client::{mgmt_api, BlockUnblock};
-use std::str::FromStr;
-use std::sync::Arc;
-use std::time::{Duration, Instant};
+use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
+use pageserver_client::{BlockUnblock, mgmt_api};
+use routerify::Middleware;
 use tokio_util::sync::CancellationToken;
 use utils::auth::{Scope, SwappableJwtAuth};
 use utils::id::{NodeId, TenantId, TimelineId};
 
-use pageserver_api::controller_api::{
-    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest,
-    TenantShardMigrateRequest,
+use crate::http;
+use crate::metrics::{
+    HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, METRICS_REGISTRY,
+    PageserverRequestLabelGroup,
 };
-use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
-
-use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
-
-use routerify::Middleware;
+use crate::persistence::SafekeeperUpsert;
+use crate::reconciler::ReconcileError;
+use crate::service::{LeadershipStatus, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT, Service};
 
 /// State available to HTTP request handlers
 pub struct HttpState {
@@ -598,7 +591,10 @@ async fn handle_tenant_timeline_passthrough(
 
     let _timer = latency.start_timer(labels.clone());
 
-    let client = mgmt_api::Client::new(node.base_url(), service.get_config().jwt_token.as_deref());
+    let client = mgmt_api::Client::new(
+        node.base_url(),
+        service.get_config().pageserver_jwt_token.as_deref(),
+    );
     let resp = client.get_raw(path).await.map_err(|e|
         // We return 503 here because if we can't successfully send a request to the pageserver,
         // either we aren't available or the pageserver is unavailable.
@@ -1354,10 +1350,7 @@ async fn handle_safekeeper_scheduling_policy(
         .set_safekeeper_scheduling_policy(id, body.scheduling_policy)
         .await?;
 
-    Ok(Response::builder()
-        .status(StatusCode::NO_CONTENT)
-        .body(Body::empty())
-        .unwrap())
+    json_response(StatusCode::OK, ())
 }
 
 /// Common wrapper for request handlers that call into Service and will operate on tenants: they must only
@@ -1455,8 +1448,8 @@ pub fn prologue_leadership_status_check_middleware<
     })
 }
 
-fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
-) -> Middleware<B, ApiError> {
+fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>()
+-> Middleware<B, ApiError> {
     Middleware::pre(move |req| async move {
         let meta = RequestMeta {
             method: req.method().clone(),
@@ -1469,8 +1462,8 @@ fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
     })
 }
 
-fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
-) -> Middleware<B, ApiError> {
+fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>()
+-> Middleware<B, ApiError> {
     Middleware::post_with_info(move |resp, req_info| async move {
         let request_name = match req_info.context::<RequestName>() {
             Some(name) => name,
@@ -1621,8 +1614,8 @@ async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
             Err(err) => {
                 return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(
                     anyhow::anyhow!(
-                    "Failed to parse leader uri for forwarding while in stepped down state: {err}"
-                ),
+                        "Failed to parse leader uri for forwarding while in stepped down state: {err}"
+                    ),
                 )));
             }
         };
@@ -2155,8 +2148,23 @@ mod test {
 
     #[test]
     fn test_path_without_ids() {
-        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
-        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
-        assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"), "/v1/tenant//timeline/");
+        assert_eq!(
+            path_without_ids(
+                "/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"
+            ),
+            "/v1/tenant//timeline/"
+        );
+        assert_eq!(
+            path_without_ids(
+                "/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"
+            ),
+            "/v1/tenant//timeline/"
+        );
+        assert_eq!(
+            path_without_ids(
+                "/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"
+            ),
+            "/v1/tenant//timeline/"
+        );
     }
 }
diff --git a/storage_controller/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs
index 2d8b674f86..6b0c16f0be 100644
--- a/storage_controller/src/id_lock_map.rs
+++ b/storage_controller/src/id_lock_map.rs
@@ -1,8 +1,7 @@
+use std::collections::HashMap;
 use std::fmt::Display;
-use std::time::Instant;
-use std::{collections::HashMap, sync::Arc};
-
-use std::time::Duration;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
 
 use crate::service::RECONCILE_TIMEOUT;
 
diff --git a/storage_controller/src/leadership.rs b/storage_controller/src/leadership.rs
index 5fae8991ec..5e1d6f3ec9 100644
--- a/storage_controller/src/leadership.rs
+++ b/storage_controller/src/leadership.rs
@@ -3,11 +3,9 @@ use std::sync::Arc;
 use hyper::Uri;
 use tokio_util::sync::CancellationToken;
 
-use crate::{
-    peer_client::{GlobalObservedState, PeerClient},
-    persistence::{ControllerPersistence, DatabaseError, DatabaseResult, Persistence},
-    service::Config,
-};
+use crate::peer_client::{GlobalObservedState, PeerClient};
+use crate::persistence::{ControllerPersistence, DatabaseError, DatabaseResult, Persistence};
+use crate::service::Config;
 
 /// Helper for storage controller leadership acquisition
 pub(crate) struct Leadership {
@@ -91,7 +89,9 @@ impl Leadership {
                 // Special case: if this is a brand new storage controller, migrations will not
                 // have run at this point yet, and, hence, the controllers table does not exist.
                 // Detect this case via the error string (diesel doesn't type it) and allow it.
-                tracing::info!("Detected first storage controller start-up. Allowing missing controllers table ...");
+                tracing::info!(
+                    "Detected first storage controller start-up. Allowing missing controllers table ..."
+                );
                 return Ok(None);
             }
         }
diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs
index 9a9958f7a6..04dd3bb3f6 100644
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -1,26 +1,26 @@
-use anyhow::{anyhow, Context};
-use clap::Parser;
-use hyper0::Uri;
-use metrics::launch_timestamp::LaunchTimestamp;
-use metrics::BuildInfo;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Duration;
+
+use anyhow::{Context, anyhow};
+use clap::Parser;
+use hyper0::Uri;
+use metrics::BuildInfo;
+use metrics::launch_timestamp::LaunchTimestamp;
 use storage_controller::http::make_router;
 use storage_controller::metrics::preinitialize_metrics;
 use storage_controller::persistence::Persistence;
 use storage_controller::service::chaos_injector::ChaosInjector;
 use storage_controller::service::{
-    Config, Service, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT,
+    Config, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT,
     MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
-    PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
+    PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, Service,
 };
 use tokio::signal::unix::SignalKind;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};
-
 use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version, tcp_listener};
 
@@ -34,7 +34,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 /// This adds roughly 3% overhead for allocations on average, which is acceptable considering
 /// performance-sensitive code will avoid allocations as far as possible anyway.
 #[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
+#[unsafe(export_name = "malloc_conf")]
 pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0";
 
 #[derive(Parser)]
@@ -53,6 +53,10 @@ struct Cli {
     #[arg(long)]
     jwt_token: Option<String>,
 
+    /// Token for authenticating this service with the safekeepers it controls
+    #[arg(long)]
+    safekeeper_jwt_token: Option<String>,
+
     /// Token for authenticating this service with the control plane, when calling
     /// the compute notification endpoint
     #[arg(long)]
@@ -111,10 +115,14 @@ struct Cli {
     #[arg(long)]
     neon_local_repo_dir: Option<PathBuf>,
 
-    /// Chaos testing
+    /// Chaos testing: exercise tenant migrations
     #[arg(long)]
     chaos_interval: Option<humantime::Duration>,
 
+    /// Chaos testing: exercise an immediate exit
+    #[arg(long)]
+    chaos_exit_crontab: Option<cron::Schedule>,
+
     // Maximum acceptable lag for the secondary location while draining
     // a pageserver
     #[arg(long)]
@@ -126,6 +134,10 @@ struct Cli {
 
     #[arg(long)]
     long_reconcile_threshold: Option<humantime::Duration>,
+
+    // Flag to use https for requests to pageserver API.
+    #[arg(long, default_value = "false")]
+    use_https_pageserver_api: bool,
 }
 
 enum StrictMode {
@@ -149,7 +161,8 @@ impl Default for StrictMode {
 struct Secrets {
     database_url: String,
     public_key: Option<JwtAuth>,
-    jwt_token: Option<String>,
+    pageserver_jwt_token: Option<String>,
+    safekeeper_jwt_token: Option<String>,
     control_plane_jwt_token: Option<String>,
     peer_jwt_token: Option<String>,
 }
@@ -157,6 +170,7 @@ struct Secrets {
 impl Secrets {
     const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
     const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
+    const SAFEKEEPER_JWT_TOKEN_ENV: &'static str = "SAFEKEEPER_JWT_TOKEN";
     const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
     const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN";
     const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY";
@@ -180,7 +194,14 @@ impl Secrets {
         let this = Self {
             database_url,
             public_key,
-            jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV),
+            pageserver_jwt_token: Self::load_secret(
+                &args.jwt_token,
+                Self::PAGESERVER_JWT_TOKEN_ENV,
+            ),
+            safekeeper_jwt_token: Self::load_secret(
+                &args.safekeeper_jwt_token,
+                Self::SAFEKEEPER_JWT_TOKEN_ENV,
+            ),
             control_plane_jwt_token: Self::load_secret(
                 &args.control_plane_jwt_token,
                 Self::CONTROL_PLANE_JWT_TOKEN_ENV,
@@ -260,18 +281,24 @@ async fn async_main() -> anyhow::Result<()> {
 
     let secrets = Secrets::load(&args).await?;
 
+    // TODO: once we've rolled out the safekeeper JWT token everywhere, put it into the validation code below
+    tracing::info!(
+        "safekeeper_jwt_token set: {:?}",
+        secrets.safekeeper_jwt_token.is_some()
+    );
+
     // Validate required secrets and arguments are provided in strict mode
     match strict_mode {
         StrictMode::Strict
             if (secrets.public_key.is_none()
-                || secrets.jwt_token.is_none()
+                || secrets.pageserver_jwt_token.is_none()
                 || secrets.control_plane_jwt_token.is_none()) =>
         {
             // Production systems should always have secrets configured: if public_key was not set
             // then we would implicitly disable auth.
             anyhow::bail!(
-                    "Insecure config!  One or more secrets is not set.  This is only permitted in `--dev` mode"
-                );
+                "Insecure config!  One or more secrets is not set.  This is only permitted in `--dev` mode"
+            );
         }
         StrictMode::Strict if args.compute_hook_url.is_none() => {
             // Production systems should always have a compute hook set, to prevent falling
@@ -289,7 +316,8 @@ async fn async_main() -> anyhow::Result<()> {
     }
 
     let config = Config {
-        jwt_token: secrets.jwt_token,
+        pageserver_jwt_token: secrets.pageserver_jwt_token,
+        safekeeper_jwt_token: secrets.safekeeper_jwt_token,
         control_plane_jwt_token: secrets.control_plane_jwt_token,
         peer_jwt_token: secrets.peer_jwt_token,
         compute_hook_url: args.compute_hook_url,
@@ -321,6 +349,7 @@ async fn async_main() -> anyhow::Result<()> {
         address_for_peers: args.address_for_peers,
         start_as_candidate: args.start_as_candidate,
         http_service_port: args.listen.port() as i32,
+        use_https_pageserver_api: args.use_https_pageserver_api,
     };
 
     // Validate that we can connect to the database
@@ -357,10 +386,12 @@ async fn async_main() -> anyhow::Result<()> {
         let service = service.clone();
         let cancel = CancellationToken::new();
         let cancel_bg = cancel.clone();
+        let chaos_exit_crontab = args.chaos_exit_crontab;
         (
             tokio::task::spawn(
                 async move {
-                    let mut chaos_injector = ChaosInjector::new(service, interval.into());
+                    let mut chaos_injector =
+                        ChaosInjector::new(service, interval.into(), chaos_exit_crontab);
                     chaos_injector.run(cancel_bg).await
                 }
                 .instrument(tracing::info_span!("chaos_injector")),
diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs
index 6d67e0d130..f490edb68f 100644
--- a/storage_controller/src/metrics.rs
+++ b/storage_controller/src/metrics.rs
@@ -7,17 +7,18 @@
 //!
 //! The rest of the code defines label group types and deals with converting outer types to labels.
 //!
+use std::sync::Mutex;
+
 use bytes::Bytes;
-use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
+use measured::label::LabelValue;
+use measured::metric::histogram;
+use measured::{FixedCardinalityLabel, MetricGroup};
 use metrics::NeonMetrics;
 use once_cell::sync::Lazy;
-use std::sync::Mutex;
 use strum::IntoEnumIterator;
 
-use crate::{
-    persistence::{DatabaseError, DatabaseOperation},
-    service::LeadershipStatus,
-};
+use crate::persistence::{DatabaseError, DatabaseOperation};
+use crate::service::LeadershipStatus;
 
 pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
     Lazy::new(StorageControllerMetrics::default);
diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs
index f5c2d329e0..bc7fe8802a 100644
--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -1,21 +1,22 @@
-use std::{str::FromStr, time::Duration};
+use std::str::FromStr;
+use std::time::Duration;
 
-use pageserver_api::{
-    controller_api::{
-        AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest,
-        NodeSchedulingPolicy, TenantLocateResponseShard,
-    },
-    shard::TenantShardId,
+use anyhow::anyhow;
+use pageserver_api::controller_api::{
+    AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest,
+    NodeSchedulingPolicy, TenantLocateResponseShard,
 };
+use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use reqwest::StatusCode;
 use serde::Serialize;
 use tokio_util::sync::CancellationToken;
-use utils::{backoff, id::NodeId};
+use utils::backoff;
+use utils::id::NodeId;
 
-use crate::{
-    pageserver_client::PageserverClient, persistence::NodePersistence, scheduler::MaySchedule,
-};
+use crate::pageserver_client::PageserverClient;
+use crate::persistence::NodePersistence;
+use crate::scheduler::MaySchedule;
 
 /// Represents the in-memory description of a Node.
 ///
@@ -32,12 +33,16 @@ pub(crate) struct Node {
 
     listen_http_addr: String,
     listen_http_port: u16,
+    listen_https_port: Option<u16>,
 
     listen_pg_addr: String,
     listen_pg_port: u16,
 
     availability_zone_id: AvailabilityZone,
 
+    // Flag from storcon's config to use https for pageserver admin API.
+    // Invariant: if |true|, listen_https_port should contain a value.
+    use_https: bool,
     // This cancellation token means "stop any RPCs in flight to this node, and don't start
     // any more". It is not related to process shutdown.
     #[serde(skip)]
@@ -56,7 +61,16 @@ pub(crate) enum AvailabilityTransition {
 
 impl Node {
     pub(crate) fn base_url(&self) -> String {
-        format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
+        if self.use_https {
+            format!(
+                "https://{}:{}",
+                self.listen_http_addr,
+                self.listen_https_port
+                    .expect("https port should be specified if use_https is on")
+            )
+        } else {
+            format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
+        }
     }
 
     pub(crate) fn get_id(&self) -> NodeId {
@@ -82,11 +96,20 @@ impl Node {
         self.id == register_req.node_id
             && self.listen_http_addr == register_req.listen_http_addr
             && self.listen_http_port == register_req.listen_http_port
+            // Note: listen_https_port may change. See [`Self::need_update`] for mode details.
+            // && self.listen_https_port == register_req.listen_https_port
             && self.listen_pg_addr == register_req.listen_pg_addr
             && self.listen_pg_port == register_req.listen_pg_port
             && self.availability_zone_id == register_req.availability_zone_id
     }
 
+    // Do we need to update an existing record in DB on this registration request?
+    pub(crate) fn need_update(&self, register_req: &NodeRegisterRequest) -> bool {
+        // listen_https_port is checked here because it may change during migration to https.
+        // After migration, this check may be moved to registration_match.
+        self.listen_https_port != register_req.listen_https_port
+    }
+
     /// For a shard located on this node, populate a response object
     /// with this node's address information.
     pub(crate) fn shard_location(&self, shard_id: TenantShardId) -> TenantLocateResponseShard {
@@ -95,6 +118,7 @@ impl Node {
             node_id: self.id,
             listen_http_addr: self.listen_http_addr.clone(),
             listen_http_port: self.listen_http_port,
+            listen_https_port: self.listen_https_port,
             listen_pg_addr: self.listen_pg_addr.clone(),
             listen_pg_port: self.listen_pg_port,
         }
@@ -175,25 +199,34 @@ impl Node {
         }
     }
 
+    #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
         id: NodeId,
         listen_http_addr: String,
         listen_http_port: u16,
+        listen_https_port: Option<u16>,
         listen_pg_addr: String,
         listen_pg_port: u16,
         availability_zone_id: AvailabilityZone,
-    ) -> Self {
-        Self {
+        use_https: bool,
+    ) -> anyhow::Result<Self> {
+        if use_https && listen_https_port.is_none() {
+            return Err(anyhow!("https is enabled, but node has no https port"));
+        }
+
+        Ok(Self {
             id,
             listen_http_addr,
             listen_http_port,
+            listen_https_port,
             listen_pg_addr,
             listen_pg_port,
             scheduling: NodeSchedulingPolicy::Active,
             availability: NodeAvailability::Offline,
             availability_zone_id,
+            use_https,
             cancel: CancellationToken::new(),
-        }
+        })
     }
 
     pub(crate) fn to_persistent(&self) -> NodePersistence {
@@ -202,14 +235,19 @@ impl Node {
             scheduling_policy: self.scheduling.into(),
             listen_http_addr: self.listen_http_addr.clone(),
             listen_http_port: self.listen_http_port as i32,
+            listen_https_port: self.listen_https_port.map(|x| x as i32),
             listen_pg_addr: self.listen_pg_addr.clone(),
             listen_pg_port: self.listen_pg_port as i32,
             availability_zone_id: self.availability_zone_id.0.clone(),
         }
     }
 
-    pub(crate) fn from_persistent(np: NodePersistence) -> Self {
-        Self {
+    pub(crate) fn from_persistent(np: NodePersistence, use_https: bool) -> anyhow::Result<Self> {
+        if use_https && np.listen_https_port.is_none() {
+            return Err(anyhow!("https is enabled, but node has no https port"));
+        }
+
+        Ok(Self {
             id: NodeId(np.node_id as u64),
             // At startup we consider a node offline until proven otherwise.
             availability: NodeAvailability::Offline,
@@ -217,11 +255,13 @@ impl Node {
                 .expect("Bad scheduling policy in DB"),
             listen_http_addr: np.listen_http_addr,
             listen_http_port: np.listen_http_port as u16,
+            listen_https_port: np.listen_https_port.map(|x| x as u16),
             listen_pg_addr: np.listen_pg_addr,
             listen_pg_port: np.listen_pg_port as u16,
             availability_zone_id: AvailabilityZone(np.availability_zone_id),
+            use_https,
             cancel: CancellationToken::new(),
-        }
+        })
     }
 
     /// Wrapper for issuing requests to pageserver management API: takes care of generic
@@ -285,8 +325,9 @@ impl Node {
             warn_threshold,
             max_retries,
             &format!(
-                "Call to node {} ({}:{}) management API",
-                self.id, self.listen_http_addr, self.listen_http_port
+                "Call to node {} ({}) management API",
+                self.id,
+                self.base_url(),
             ),
             cancel,
         )
@@ -302,6 +343,7 @@ impl Node {
             availability_zone_id: self.availability_zone_id.0.clone(),
             listen_http_addr: self.listen_http_addr.clone(),
             listen_http_port: self.listen_http_port,
+            listen_https_port: self.listen_https_port,
             listen_pg_addr: self.listen_pg_addr.clone(),
             listen_pg_port: self.listen_pg_port,
         }
diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs
index 645cbdfce1..e9c54414a3 100644
--- a/storage_controller/src/pageserver_client.rs
+++ b/storage_controller/src/pageserver_client.rs
@@ -1,17 +1,13 @@
-use pageserver_api::{
-    models::{
-        detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse,
-        PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse,
-        TenantShardSplitRequest, TenantShardSplitResponse, TenantWaitLsnRequest,
-        TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest,
-        TopTenantShardsResponse,
-    },
-    shard::TenantShardId,
-};
-use pageserver_client::{
-    mgmt_api::{Client, Result},
-    BlockUnblock,
+use pageserver_api::models::detach_ancestor::AncestorDetached;
+use pageserver_api::models::{
+    LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
+    TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
+    TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo,
+    TopTenantShardsRequest, TopTenantShardsResponse,
 };
+use pageserver_api::shard::TenantShardId;
+use pageserver_client::BlockUnblock;
+use pageserver_client::mgmt_api::{Client, Result};
 use reqwest::StatusCode;
 use utils::id::{NodeId, TenantId, TimelineId};
 
diff --git a/storage_controller/src/peer_client.rs b/storage_controller/src/peer_client.rs
index 1a15bae365..f3f275dee0 100644
--- a/storage_controller/src/peer_client.rs
+++ b/storage_controller/src/peer_client.rs
@@ -1,16 +1,17 @@
-use crate::tenant_shard::ObservedState;
-use pageserver_api::shard::TenantShardId;
-use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::error::Error as _;
 use std::time::Duration;
-use tokio_util::sync::CancellationToken;
 
 use http_utils::error::HttpErrorBody;
 use hyper::Uri;
+use pageserver_api::shard::TenantShardId;
 use reqwest::{StatusCode, Url};
+use serde::{Deserialize, Serialize};
+use tokio_util::sync::CancellationToken;
 use utils::backoff;
 
+use crate::tenant_shard::ObservedState;
+
 #[derive(Debug, Clone)]
 pub(crate) struct PeerClient {
     uri: Uri,
diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs
index 67b60eadf3..2e80b48859 100644
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -2,45 +2,40 @@ pub(crate) mod split_state;
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::Duration;
-use std::time::Instant;
+use std::time::{Duration, Instant};
 
-use self::split_state::SplitState;
+use diesel::deserialize::{FromSql, FromSqlRow};
+use diesel::pg::Pg;
 use diesel::prelude::*;
 use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
 use diesel_async::pooled_connection::bb8::Pool;
-use diesel_async::pooled_connection::AsyncDieselConnectionManager;
-use diesel_async::pooled_connection::ManagerConfig;
-use diesel_async::AsyncPgConnection;
-use diesel_async::RunQueryDsl;
-use futures::future::BoxFuture;
+use diesel_async::pooled_connection::{AsyncDieselConnectionManager, ManagerConfig};
+use diesel_async::{AsyncPgConnection, RunQueryDsl};
+use diesel_migrations::{EmbeddedMigrations, embed_migrations};
 use futures::FutureExt;
+use futures::future::BoxFuture;
 use itertools::Itertools;
-use pageserver_api::controller_api::AvailabilityZone;
-use pageserver_api::controller_api::MetadataHealthRecord;
-use pageserver_api::controller_api::SafekeeperDescribeResponse;
-use pageserver_api::controller_api::ShardSchedulingPolicy;
-use pageserver_api::controller_api::SkSchedulingPolicy;
-use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
+use pageserver_api::controller_api::{
+    AvailabilityZone, MetadataHealthRecord, NodeSchedulingPolicy, PlacementPolicy,
+    SafekeeperDescribeResponse, ShardSchedulingPolicy, SkSchedulingPolicy,
+};
 use pageserver_api::models::TenantConfig;
-use pageserver_api::shard::ShardConfigError;
-use pageserver_api::shard::ShardIdentity;
-use pageserver_api::shard::ShardStripeSize;
-use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId};
-use rustls::client::danger::{ServerCertVerified, ServerCertVerifier};
+use pageserver_api::shard::{
+    ShardConfigError, ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId,
+};
 use rustls::client::WebPkiServerVerifier;
+use rustls::client::danger::{ServerCertVerified, ServerCertVerifier};
 use rustls::crypto::ring;
 use scoped_futures::ScopedBoxFuture;
 use serde::{Deserialize, Serialize};
 use utils::generation::Generation;
 use utils::id::{NodeId, TenantId};
 
+use self::split_state::SplitState;
 use crate::metrics::{
     DatabaseQueryErrorLabelGroup, DatabaseQueryLatencyLabelGroup, METRICS_REGISTRY,
 };
 use crate::node::Node;
-
-use diesel_migrations::{embed_migrations, EmbeddedMigrations};
 const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
 
 /// ## What do we store?
@@ -375,18 +370,23 @@ impl Persistence {
         Ok(nodes)
     }
 
-    pub(crate) async fn update_node(
+    pub(crate) async fn update_node<V>(
         &self,
         input_node_id: NodeId,
-        input_scheduling: NodeSchedulingPolicy,
-    ) -> DatabaseResult<()> {
+        values: V,
+    ) -> DatabaseResult<()>
+    where
+        V: diesel::AsChangeset<Target = crate::schema::nodes::table> + Clone + Send + Sync,
+        V::Changeset: diesel::query_builder::QueryFragment<diesel::pg::Pg> + Send, // valid Postgres SQL
+    {
         use crate::schema::nodes::dsl::*;
         let updated = self
             .with_measured_conn(DatabaseOperation::UpdateNode, move |conn| {
+                let values = values.clone();
                 Box::pin(async move {
                     let updated = diesel::update(nodes)
                         .filter(node_id.eq(input_node_id.0 as i64))
-                        .set((scheduling_policy.eq(String::from(input_scheduling)),))
+                        .set(values)
                         .execute(conn)
                         .await?;
                     Ok(updated)
@@ -403,6 +403,32 @@ impl Persistence {
         }
     }
 
+    pub(crate) async fn update_node_scheduling_policy(
+        &self,
+        input_node_id: NodeId,
+        input_scheduling: NodeSchedulingPolicy,
+    ) -> DatabaseResult<()> {
+        use crate::schema::nodes::dsl::*;
+        self.update_node(
+            input_node_id,
+            scheduling_policy.eq(String::from(input_scheduling)),
+        )
+        .await
+    }
+
+    pub(crate) async fn update_node_on_registration(
+        &self,
+        input_node_id: NodeId,
+        input_https_port: Option<u16>,
+    ) -> DatabaseResult<()> {
+        use crate::schema::nodes::dsl::*;
+        self.update_node(
+            input_node_id,
+            listen_https_port.eq(input_https_port.map(|x| x as i32)),
+        )
+        .await
+    }
+
     /// At startup, load the high level state for shards, such as their config + policy.  This will
     /// be enriched at runtime with state discovered on pageservers.
     ///
@@ -448,8 +474,7 @@ impl Persistence {
         &self,
         shards: Vec<TenantShardPersistence>,
     ) -> DatabaseResult<()> {
-        use crate::schema::metadata_health;
-        use crate::schema::tenant_shards;
+        use crate::schema::{metadata_health, tenant_shards};
 
         let now = chrono::Utc::now();
 
@@ -523,8 +548,7 @@ impl Persistence {
         &self,
         input_node_id: NodeId,
     ) -> DatabaseResult<HashMap<TenantShardId, Generation>> {
-        use crate::schema::nodes::dsl::scheduling_policy;
-        use crate::schema::nodes::dsl::*;
+        use crate::schema::nodes::dsl::{scheduling_policy, *};
         use crate::schema::tenant_shards::dsl::*;
         let updated = self
             .with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
@@ -1452,6 +1476,7 @@ pub(crate) struct NodePersistence {
     pub(crate) listen_pg_addr: String,
     pub(crate) listen_pg_port: i32,
     pub(crate) availability_zone_id: String,
+    pub(crate) listen_https_port: Option<i32>,
 }
 
 /// Tenant metadata health status that are stored durably.
@@ -1533,7 +1558,33 @@ pub(crate) struct SafekeeperPersistence {
     pub(crate) port: i32,
     pub(crate) http_port: i32,
     pub(crate) availability_zone_id: String,
-    pub(crate) scheduling_policy: String,
+    pub(crate) scheduling_policy: SkSchedulingPolicyFromSql,
+}
+
+/// Wrapper struct around [`SkSchedulingPolicy`] because both it and [`FromSql`] are from foreign crates,
+/// and we don't want to make [`safekeeper_api`] depend on [`diesel`].
+#[derive(Serialize, Deserialize, FromSqlRow, Eq, PartialEq, Debug, Copy, Clone)]
+pub(crate) struct SkSchedulingPolicyFromSql(pub(crate) SkSchedulingPolicy);
+
+impl From<SkSchedulingPolicy> for SkSchedulingPolicyFromSql {
+    fn from(value: SkSchedulingPolicy) -> Self {
+        SkSchedulingPolicyFromSql(value)
+    }
+}
+
+impl FromSql<diesel::sql_types::VarChar, Pg> for SkSchedulingPolicyFromSql {
+    fn from_sql(
+        bytes: <Pg as diesel::backend::Backend>::RawValue<'_>,
+    ) -> diesel::deserialize::Result<Self> {
+        let bytes = bytes.as_bytes();
+        match core::str::from_utf8(bytes) {
+            Ok(s) => match SkSchedulingPolicy::from_str(s) {
+                Ok(policy) => Ok(SkSchedulingPolicyFromSql(policy)),
+                Err(e) => Err(format!("can't parse: {e}").into()),
+            },
+            Err(e) => Err(format!("invalid UTF-8 for scheduling policy: {e}").into()),
+        }
+    }
 }
 
 impl SafekeeperPersistence {
@@ -1549,14 +1600,10 @@ impl SafekeeperPersistence {
             port: upsert.port,
             http_port: upsert.http_port,
             availability_zone_id: upsert.availability_zone_id,
-            scheduling_policy: String::from(scheduling_policy),
+            scheduling_policy: SkSchedulingPolicyFromSql(scheduling_policy),
         }
     }
     pub(crate) fn as_describe_response(&self) -> Result<SafekeeperDescribeResponse, DatabaseError> {
-        let scheduling_policy =
-            SkSchedulingPolicy::from_str(&self.scheduling_policy).map_err(|e| {
-                DatabaseError::Logical(format!("can't construct SkSchedulingPolicy: {e:?}"))
-            })?;
         Ok(SafekeeperDescribeResponse {
             id: NodeId(self.id as u64),
             region_id: self.region_id.clone(),
@@ -1565,7 +1612,7 @@ impl SafekeeperPersistence {
             port: self.port,
             http_port: self.http_port,
             availability_zone_id: self.availability_zone_id.clone(),
-            scheduling_policy,
+            scheduling_policy: self.scheduling_policy.0,
         })
     }
 }
diff --git a/storage_controller/src/persistence/split_state.rs b/storage_controller/src/persistence/split_state.rs
index bce1a75843..f83191038a 100644
--- a/storage_controller/src/persistence/split_state.rs
+++ b/storage_controller/src/persistence/split_state.rs
@@ -1,8 +1,8 @@
+use diesel::deserialize::{FromSql, FromSqlRow};
+use diesel::expression::AsExpression;
 use diesel::pg::{Pg, PgValue};
-use diesel::{
-    deserialize::FromSql, deserialize::FromSqlRow, expression::AsExpression, serialize::ToSql,
-    sql_types::Int2,
-};
+use diesel::serialize::ToSql;
+use diesel::sql_types::Int2;
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, FromSqlRow, AsExpression)]
diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs
index 48f0804926..a327f6f50f 100644
--- a/storage_controller/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -1,6 +1,9 @@
-use crate::pageserver_client::PageserverClient;
-use crate::persistence::Persistence;
-use crate::{compute_hook, service};
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use json_structural_diff::JsonDiff;
 use pageserver_api::controller_api::{AvailabilityZone, MigrationConfig, PlacementPolicy};
 use pageserver_api::models::{
     LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, TenantWaitLsnRequest,
@@ -8,10 +11,6 @@ use pageserver_api::models::{
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use pageserver_client::mgmt_api;
 use reqwest::StatusCode;
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use utils::backoff::exponential_backoff;
 use utils::generation::Generation;
@@ -22,9 +21,12 @@ use utils::sync::gate::GateGuard;
 
 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
+use crate::pageserver_client::PageserverClient;
+use crate::persistence::Persistence;
 use crate::tenant_shard::{IntentState, ObservedState, ObservedStateDelta, ObservedStateLocation};
+use crate::{compute_hook, service};
 
-const DEFAULT_HEATMAP_PERIOD: &str = "60s";
+const DEFAULT_HEATMAP_PERIOD: Duration = Duration::from_secs(60);
 
 /// Object with the lifetime of the background reconcile task that is created
 /// for tenants which have a difference between their intent and observed states.
@@ -296,7 +298,7 @@ impl Reconciler {
                         .location_config(tenant_shard_id, config.clone(), flush_ms, lazy)
                         .await
                 },
-                &self.service_config.jwt_token,
+                &self.service_config.pageserver_jwt_token,
                 1,
                 3,
                 timeout,
@@ -417,7 +419,7 @@ impl Reconciler {
         let client = PageserverClient::new(
             node.get_id(),
             node.base_url(),
-            self.service_config.jwt_token.as_deref(),
+            self.service_config.pageserver_jwt_token.as_deref(),
         );
 
         client
@@ -440,7 +442,7 @@ impl Reconciler {
         let client = PageserverClient::new(
             node.get_id(),
             node.base_url(),
-            self.service_config.jwt_token.as_deref(),
+            self.service_config.pageserver_jwt_token.as_deref(),
         );
 
         let timelines = client.timeline_list(&tenant_shard_id).await?;
@@ -478,7 +480,7 @@ impl Reconciler {
                             )
                             .await
                     },
-                    &self.service_config.jwt_token,
+                    &self.service_config.pageserver_jwt_token,
                     1,
                     3,
                     request_download_timeout * 2,
@@ -510,7 +512,8 @@ impl Reconciler {
             } else if status == StatusCode::ACCEPTED {
                 let total_runtime = started_at.elapsed();
                 if total_runtime > total_download_timeout {
-                    tracing::warn!("Timed out after {}ms downloading layers to {node}.  Progress so far: {}/{} layers, {}/{} bytes",
+                    tracing::warn!(
+                        "Timed out after {}ms downloading layers to {node}.  Progress so far: {}/{} layers, {}/{} bytes",
                         total_runtime.as_millis(),
                         progress.layers_downloaded,
                         progress.layers_total,
@@ -771,7 +774,7 @@ impl Reconciler {
             let observed_conf = match attached_node
                 .with_client_retries(
                     |client| async move { client.get_location_config(tenant_shard_id).await },
-                    &self.service_config.jwt_token,
+                    &self.service_config.pageserver_jwt_token,
                     1,
                     1,
                     Duration::from_secs(5),
@@ -880,7 +883,27 @@ impl Reconciler {
                         self.generation = Some(generation);
                         wanted_conf.generation = generation.into();
                     }
-                    tracing::info!(node_id=%node.get_id(), "Observed configuration requires update.");
+
+                    let diff = match observed {
+                        Some(ObservedStateLocation {
+                            conf: Some(observed),
+                        }) => {
+                            let diff = JsonDiff::diff(
+                                &serde_json::to_value(observed.clone()).unwrap(),
+                                &serde_json::to_value(wanted_conf.clone()).unwrap(),
+                                false,
+                            );
+
+                            if let Some(json_diff) = diff.diff {
+                                serde_json::to_string(&json_diff).unwrap_or("diff err".to_string())
+                            } else {
+                                "unknown".to_string()
+                            }
+                        }
+                        _ => "full".to_string(),
+                    };
+
+                    tracing::info!(node_id=%node.get_id(), "Observed configuration requires update: {diff}");
 
                     // Because `node` comes from a ref to &self, clone it before calling into a &mut self
                     // function: this could be avoided by refactoring the state mutated by location_config into
@@ -1099,7 +1122,7 @@ impl Reconciler {
             match origin
                 .with_client_retries(
                     |client| async move { client.get_location_config(tenant_shard_id).await },
-                    &self.service_config.jwt_token,
+                    &self.service_config.pageserver_jwt_token,
                     1,
                     3,
                     Duration::from_secs(5),
@@ -1180,7 +1203,7 @@ fn ha_aware_config(config: &TenantConfig, has_secondaries: bool) -> TenantConfig
     let mut config = config.clone();
     if has_secondaries {
         if config.heatmap_period.is_none() {
-            config.heatmap_period = Some(DEFAULT_HEATMAP_PERIOD.to_string());
+            config.heatmap_period = Some(DEFAULT_HEATMAP_PERIOD);
         }
     } else {
         config.heatmap_period = None;
diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs
index 53cd8a908b..9c7e6e0894 100644
--- a/storage_controller/src/safekeeper.rs
+++ b/storage_controller/src/safekeeper.rs
@@ -1,16 +1,16 @@
-use std::{str::FromStr, time::Duration};
+use std::time::Duration;
 
 use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy};
 use reqwest::StatusCode;
 use safekeeper_client::mgmt_api;
 use tokio_util::sync::CancellationToken;
-use utils::{backoff, id::NodeId, logging::SecretString};
+use utils::backoff;
+use utils::id::NodeId;
+use utils::logging::SecretString;
 
-use crate::{
-    heartbeater::SafekeeperState,
-    persistence::{DatabaseError, SafekeeperPersistence},
-    safekeeper_client::SafekeeperClient,
-};
+use crate::heartbeater::SafekeeperState;
+use crate::persistence::{DatabaseError, SafekeeperPersistence};
+use crate::safekeeper_client::SafekeeperClient;
 
 #[derive(Clone)]
 pub struct Safekeeper {
@@ -25,7 +25,7 @@ pub struct Safekeeper {
 
 impl Safekeeper {
     pub(crate) fn from_persistence(skp: SafekeeperPersistence, cancel: CancellationToken) -> Self {
-        let scheduling_policy = SkSchedulingPolicy::from_str(&skp.scheduling_policy).unwrap();
+        let scheduling_policy = skp.scheduling_policy.0;
         Self {
             cancel,
             listen_http_addr: skp.host.clone(),
@@ -54,7 +54,7 @@ impl Safekeeper {
     }
     pub(crate) fn set_scheduling_policy(&mut self, scheduling_policy: SkSchedulingPolicy) {
         self.scheduling_policy = scheduling_policy;
-        self.skp.scheduling_policy = String::from(scheduling_policy);
+        self.skp.scheduling_policy = scheduling_policy.into();
     }
     /// Perform an operation (which is given a [`SafekeeperClient`]) with retries
     pub(crate) async fn with_client_retries<T, O, F>(
diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs
index f234ab3429..fb5be092a0 100644
--- a/storage_controller/src/safekeeper_client.rs
+++ b/storage_controller/src/safekeeper_client.rs
@@ -1,13 +1,12 @@
-use crate::metrics::PageserverRequestLabelGroup;
 use safekeeper_api::models::{
     PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest,
     TimelineStatus,
 };
 use safekeeper_client::mgmt_api::{Client, Result};
-use utils::{
-    id::{NodeId, TenantId, TimelineId},
-    logging::SecretString,
-};
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::logging::SecretString;
+
+use crate::metrics::PageserverRequestLabelGroup;
 
 /// Thin wrapper around [`safekeeper_client::mgmt_api::Client`]. It allows the storage
 /// controller to collect metrics in a non-intrusive manner.
diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs
index 106a7b2699..817cf04fe1 100644
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -1,11 +1,17 @@
-use crate::{metrics::NodeLabelGroup, node::Node, tenant_shard::TenantShard};
+use std::collections::HashMap;
+use std::fmt::Debug;
+
 use http_utils::error::ApiError;
 use itertools::Itertools;
-use pageserver_api::{controller_api::AvailabilityZone, models::PageserverUtilization};
+use pageserver_api::controller_api::AvailabilityZone;
+use pageserver_api::models::PageserverUtilization;
 use serde::Serialize;
-use std::{collections::HashMap, fmt::Debug};
 use utils::id::NodeId;
 
+use crate::metrics::NodeLabelGroup;
+use crate::node::Node;
+use crate::tenant_shard::TenantShard;
+
 /// Scenarios in which we cannot find a suitable location for a tenant shard
 #[derive(thiserror::Error, Debug)]
 pub enum ScheduleError {
@@ -775,10 +781,10 @@ impl Scheduler {
 
         if !matches!(context.mode, ScheduleMode::Speculative) {
             tracing::info!(
-            "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})",
-            scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>(),
-            preferred_az,
-       );
+                "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})",
+                scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>(),
+                preferred_az,
+            );
         }
 
         // Note that we do not update shard count here to reflect the scheduling: that
@@ -906,14 +912,14 @@ impl Scheduler {
 #[cfg(test)]
 pub(crate) mod test_utils {
 
-    use crate::node::Node;
-    use pageserver_api::{
-        controller_api::{AvailabilityZone, NodeAvailability},
-        models::utilization::test_utilization,
-    };
     use std::collections::HashMap;
+
+    use pageserver_api::controller_api::{AvailabilityZone, NodeAvailability};
+    use pageserver_api::models::utilization::test_utilization;
     use utils::id::NodeId;
 
+    use crate::node::Node;
+
     /// Test helper: synthesize the requested number of nodes, all in active state.
     ///
     /// Node IDs start at one.
@@ -930,13 +936,16 @@ pub(crate) mod test_utils {
                         NodeId(i),
                         format!("httphost-{i}"),
                         80 + i as u16,
+                        None,
                         format!("pghost-{i}"),
                         5432 + i as u16,
                         az_iter
                             .next()
                             .cloned()
                             .unwrap_or(AvailabilityZone("test-az".to_string())),
-                    );
+                        false,
+                    )
+                    .unwrap();
                     node.set_availability(NodeAvailability::Active(test_utilization::simple(0, 0)));
                     assert!(node.is_available());
                     node
@@ -948,17 +957,13 @@ pub(crate) mod test_utils {
 
 #[cfg(test)]
 mod tests {
-    use pageserver_api::{
-        controller_api::NodeAvailability, models::utilization::test_utilization,
-        shard::ShardIdentity,
-    };
-    use utils::{
-        id::TenantId,
-        shard::{ShardCount, ShardNumber, TenantShardId},
-    };
+    use pageserver_api::controller_api::NodeAvailability;
+    use pageserver_api::models::utilization::test_utilization;
+    use pageserver_api::shard::ShardIdentity;
+    use utils::id::TenantId;
+    use utils::shard::{ShardCount, ShardNumber, TenantShardId};
 
     use super::*;
-
     use crate::tenant_shard::IntentState;
     #[test]
     fn scheduler_basic() -> anyhow::Result<()> {
diff --git a/storage_controller/src/schema.rs b/storage_controller/src/schema.rs
index 14c30c296d..361253bd19 100644
--- a/storage_controller/src/schema.rs
+++ b/storage_controller/src/schema.rs
@@ -26,6 +26,7 @@ diesel::table! {
         listen_pg_addr -> Varchar,
         listen_pg_port -> Int4,
         availability_zone_id -> Varchar,
+        listen_https_port -> Nullable<Int4>,
     }
 }
 
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index fc6d2f3d29..8671e340bd 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -1,112 +1,95 @@
 pub mod chaos_injector;
 mod context_iterator;
 
-use hyper::Uri;
-use safekeeper_api::models::SafekeeperUtilization;
-use std::{
-    borrow::Cow,
-    cmp::Ordering,
-    collections::{BTreeMap, HashMap, HashSet},
-    error::Error,
-    ops::Deref,
-    path::PathBuf,
-    str::FromStr,
-    sync::Arc,
-    time::{Duration, Instant},
-};
+use std::borrow::Cow;
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::error::Error;
+use std::ops::Deref;
+use std::path::PathBuf;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
 
-use crate::{
-    background_node_operations::{
-        Drain, Fill, Operation, OperationError, OperationHandler, MAX_RECONCILES_PER_OPERATION,
-    },
-    compute_hook::{self, NotifyError},
-    drain_utils::{self, TenantShardDrain, TenantShardIterator},
-    heartbeater::SafekeeperState,
-    id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard},
-    leadership::Leadership,
-    metrics,
-    peer_client::GlobalObservedState,
-    persistence::{
-        AbortShardSplitStatus, ControllerPersistence, DatabaseResult, MetadataHealthPersistence,
-        ShardGenerationState, TenantFilter,
-    },
-    reconciler::{
-        ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder,
-        ReconcilerPriority,
-    },
-    safekeeper::Safekeeper,
-    scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode},
-    tenant_shard::{
-        MigrateAttachment, ObservedStateDelta, ReconcileNeeded, ReconcilerStatus,
-        ScheduleOptimization, ScheduleOptimizationAction,
-    },
-};
 use anyhow::Context;
+use context_iterator::TenantShardContextIterator;
 use control_plane::storage_controller::{
     AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
 };
 use diesel::result::DatabaseErrorKind;
-use futures::{stream::FuturesUnordered, StreamExt};
-use itertools::Itertools;
-use pageserver_api::{
-    controller_api::{
-        AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability,
-        NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy,
-        SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
-        ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse,
-        TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
-        TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
-        TenantShardMigrateResponse,
-    },
-    models::{
-        SecondaryProgress, TenantConfigPatchRequest, TenantConfigRequest,
-        TimelineArchivalConfigRequest, TopTenantShardsRequest,
-    },
-};
-use reqwest::StatusCode;
-use tracing::{instrument, Instrument};
-
-use crate::pageserver_client::PageserverClient;
+use futures::StreamExt;
+use futures::stream::FuturesUnordered;
 use http_utils::error::ApiError;
-use pageserver_api::{
-    models::{
-        self, LocationConfig, LocationConfigListResponse, LocationConfigMode,
-        PageserverUtilization, ShardParameters, TenantConfig, TenantLocationConfigRequest,
-        TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest,
-        TenantShardSplitResponse, TenantTimeTravelRequest, TimelineCreateRequest, TimelineInfo,
-    },
-    shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId},
-    upcall_api::{
-        ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
-        ValidateResponse, ValidateResponseTenant,
-    },
+use hyper::Uri;
+use itertools::Itertools;
+use pageserver_api::controller_api::{
+    AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability,
+    NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy,
+    SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
+    ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse,
+    TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
+    TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
+    TenantShardMigrateResponse,
 };
-use pageserver_client::{mgmt_api, BlockUnblock};
-use tokio::sync::{mpsc::error::TrySendError, TryAcquireError};
+use pageserver_api::models::{
+    self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization,
+    SecondaryProgress, ShardParameters, TenantConfig, TenantConfigPatchRequest,
+    TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse,
+    TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse,
+    TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo,
+    TopTenantShardsRequest,
+};
+use pageserver_api::shard::{
+    ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId,
+};
+use pageserver_api::upcall_api::{
+    ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, ValidateResponse,
+    ValidateResponseTenant,
+};
+use pageserver_client::{BlockUnblock, mgmt_api};
+use reqwest::StatusCode;
+use safekeeper_api::models::SafekeeperUtilization;
+use tokio::sync::TryAcquireError;
+use tokio::sync::mpsc::error::TrySendError;
 use tokio_util::sync::CancellationToken;
-use utils::{
-    completion::Barrier,
-    failpoint_support,
-    generation::Generation,
-    id::{NodeId, TenantId, TimelineId},
-    pausable_failpoint,
-    sync::gate::Gate,
-};
+use tracing::{Instrument, instrument};
+use utils::completion::Barrier;
+use utils::generation::Generation;
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::sync::gate::Gate;
+use utils::{failpoint_support, pausable_failpoint};
 
-use crate::{
-    compute_hook::ComputeHook,
-    heartbeater::{Heartbeater, PageserverState},
-    node::{AvailabilityTransition, Node},
-    persistence::{split_state::SplitState, DatabaseError, Persistence, TenantShardPersistence},
-    reconciler::attached_location_conf,
-    scheduler::Scheduler,
-    tenant_shard::{
-        IntentState, ObservedState, ObservedStateLocation, ReconcileResult, ReconcileWaitError,
-        ReconcilerWaiter, TenantShard,
-    },
+use crate::background_node_operations::{
+    Drain, Fill, MAX_RECONCILES_PER_OPERATION, Operation, OperationError, OperationHandler,
+};
+use crate::compute_hook::{self, ComputeHook, NotifyError};
+use crate::drain_utils::{self, TenantShardDrain, TenantShardIterator};
+use crate::heartbeater::{Heartbeater, PageserverState, SafekeeperState};
+use crate::id_lock_map::{
+    IdLockMap, TracingExclusiveGuard, trace_exclusive_lock, trace_shared_lock,
+};
+use crate::leadership::Leadership;
+use crate::metrics;
+use crate::node::{AvailabilityTransition, Node};
+use crate::pageserver_client::PageserverClient;
+use crate::peer_client::GlobalObservedState;
+use crate::persistence::split_state::SplitState;
+use crate::persistence::{
+    AbortShardSplitStatus, ControllerPersistence, DatabaseError, DatabaseResult,
+    MetadataHealthPersistence, Persistence, ShardGenerationState, TenantFilter,
+    TenantShardPersistence,
+};
+use crate::reconciler::{
+    ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder, ReconcilerPriority,
+    attached_location_conf,
+};
+use crate::safekeeper::Safekeeper;
+use crate::scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode, Scheduler};
+use crate::tenant_shard::{
+    IntentState, MigrateAttachment, ObservedState, ObservedStateDelta, ObservedStateLocation,
+    ReconcileNeeded, ReconcileResult, ReconcileWaitError, ReconcilerStatus, ReconcilerWaiter,
+    ScheduleOptimization, ScheduleOptimizationAction, TenantShard,
 };
-
-use context_iterator::TenantShardContextIterator;
 
 const WAITER_FILL_DRAIN_POLL_TIMEOUT: Duration = Duration::from_millis(500);
 
@@ -348,7 +331,12 @@ pub struct Config {
     // All pageservers managed by one instance of this service must have
     // the same public key.  This JWT token will be used to authenticate
     // this service to the pageservers it manages.
-    pub jwt_token: Option<String>,
+    pub pageserver_jwt_token: Option<String>,
+
+    // All safekeepers managed by one instance of this service must have
+    // the same public key. This JWT token will be used to authenticate
+    // this service to the safekeepers it manages.
+    pub safekeeper_jwt_token: Option<String>,
 
     // This JWT token will be used to authenticate this service to the control plane.
     pub control_plane_jwt_token: Option<String>,
@@ -399,6 +387,8 @@ pub struct Config {
     pub http_service_port: i32,
 
     pub long_reconcile_threshold: Duration,
+
+    pub use_https_pageserver_api: bool,
 }
 
 impl From<DatabaseError> for ApiError {
@@ -780,7 +770,9 @@ impl Service {
             });
         }
 
-        tracing::info!("Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)");
+        tracing::info!(
+            "Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)"
+        );
     }
 
     async fn initial_heartbeat_round<'a>(
@@ -880,7 +872,7 @@ impl Service {
                     let response = node
                         .with_client_retries(
                             |client| async move { client.list_location_config().await },
-                            &self.config.jwt_token,
+                            &self.config.pageserver_jwt_token,
                             1,
                             5,
                             timeout,
@@ -981,7 +973,7 @@ impl Service {
             let client = PageserverClient::new(
                 node.get_id(),
                 node.base_url(),
-                self.config.jwt_token.as_deref(),
+                self.config.pageserver_jwt_token.as_deref(),
             );
             match client
                 .location_config(
@@ -1175,7 +1167,9 @@ impl Service {
                 let mut safekeepers = (*locked.safekeepers).clone();
                 for (id, state) in deltas.0 {
                     let Some(sk) = safekeepers.get_mut(&id) else {
-                        tracing::info!("Couldn't update safekeeper safekeeper state for id {id} from heartbeat={state:?}");
+                        tracing::info!(
+                            "Couldn't update safekeeper safekeeper state for id {id} from heartbeat={state:?}"
+                        );
                         continue;
                     };
                     sk.set_availability(state);
@@ -1401,8 +1395,8 @@ impl Service {
             .list_nodes()
             .await?
             .into_iter()
-            .map(Node::from_persistent)
-            .collect::<Vec<_>>();
+            .map(|x| Node::from_persistent(x, config.use_https_pageserver_api))
+            .collect::<anyhow::Result<Vec<Node>>>()?;
         let nodes: HashMap<NodeId, Node> = nodes.into_iter().map(|n| (n.get_id(), n)).collect();
         tracing::info!("Loaded {} nodes from database.", nodes.len());
         metrics::METRICS_REGISTRY
@@ -1501,10 +1495,13 @@ impl Service {
                     NodeId(node_id as u64),
                     "".to_string(),
                     123,
+                    None,
                     "".to_string(),
                     123,
                     AvailabilityZone("test_az".to_string()),
-                );
+                    false,
+                )
+                .unwrap();
 
                 scheduler.node_upsert(&node);
             }
@@ -1527,7 +1524,9 @@ impl Service {
                     // If a node was removed before being completely drained, it is legal for it to leave behind a `generation_pageserver` referring
                     // to a non-existent node, because node deletion doesn't block on completing the reconciliations that will issue new generations
                     // on different pageservers.
-                    tracing::warn!("Tenant shard {tenant_shard_id} references non-existent node {generation_pageserver} in database, will be rescheduled");
+                    tracing::warn!(
+                        "Tenant shard {tenant_shard_id} references non-existent node {generation_pageserver} in database, will be rescheduled"
+                    );
                 }
             }
             let new_tenant = TenantShard::from_persistent(tsp, intent)?;
@@ -1548,14 +1547,14 @@ impl Service {
         let reconcilers_cancel = cancel.child_token();
 
         let heartbeater_ps = Heartbeater::new(
-            config.jwt_token.clone(),
+            config.pageserver_jwt_token.clone(),
             config.max_offline_interval,
             config.max_warming_up_interval,
             cancel.clone(),
         );
 
         let heartbeater_sk = Heartbeater::new(
-            config.jwt_token.clone(),
+            config.safekeeper_jwt_token.clone(),
             config.max_offline_interval,
             config.max_warming_up_interval,
             cancel.clone(),
@@ -1857,7 +1856,7 @@ impl Service {
         }
 
         Ok(AttachHookResponse {
-            gen: attach_req
+            r#gen: attach_req
                 .node_id
                 .map(|_| tenant_shard.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap()),
         })
@@ -1902,7 +1901,7 @@ impl Service {
         let configs = match node
             .with_client_retries(
                 |client| async move { client.list_location_config().await },
-                &self.config.jwt_token,
+                &self.config.pageserver_jwt_token,
                 1,
                 5,
                 SHORT_RECONCILE_TIMEOUT,
@@ -1960,7 +1959,7 @@ impl Service {
                             .location_config(tenant_shard_id, config, None, false)
                             .await
                     },
-                    &self.config.jwt_token,
+                    &self.config.pageserver_jwt_token,
                     1,
                     5,
                     SHORT_RECONCILE_TIMEOUT,
@@ -2029,7 +2028,7 @@ impl Service {
                 let new_gen = *new_gen;
                 response.tenants.push(ReAttachResponseTenant {
                     id: *tenant_shard_id,
-                    gen: Some(new_gen.into().unwrap()),
+                    r#gen: Some(new_gen.into().unwrap()),
                     // A tenant is only put into multi or stale modes in the middle of a [`Reconciler::live_migrate`]
                     // execution.  If a pageserver is restarted during that process, then the reconcile pass will
                     // fail, and start from scratch, so it doesn't make sense for us to try and preserve
@@ -2066,7 +2065,7 @@ impl Service {
 
                 response.tenants.push(ReAttachResponseTenant {
                     id: *tenant_shard_id,
-                    gen: None,
+                    r#gen: None,
                     mode: LocationConfigMode::Secondary,
                 });
 
@@ -2128,15 +2127,19 @@ impl Service {
             let locked = self.inner.read().unwrap();
             for req_tenant in validate_req.tenants {
                 if let Some(tenant_shard) = locked.tenants.get(&req_tenant.id) {
-                    let valid = tenant_shard.generation == Some(Generation::new(req_tenant.gen));
+                    let valid = tenant_shard.generation == Some(Generation::new(req_tenant.r#gen));
                     tracing::info!(
                         "handle_validate: {}(gen {}): valid={valid} (latest {:?})",
                         req_tenant.id,
-                        req_tenant.gen,
+                        req_tenant.r#gen,
                         tenant_shard.generation
                     );
 
-                    in_memory_result.push((req_tenant.id, Generation::new(req_tenant.gen), valid));
+                    in_memory_result.push((
+                        req_tenant.id,
+                        Generation::new(req_tenant.r#gen),
+                        valid,
+                    ));
                 } else {
                     // This is legal: for example during a shard split the pageserver may still
                     // have deletions in its queue from the old pre-split shard, or after deletion
@@ -2155,13 +2158,11 @@ impl Service {
         // in case of controller split-brain, where some other controller process might have incremented the generation.
         let db_generations = self
             .persistence
-            .shard_generations(in_memory_result.iter().filter_map(|i| {
-                if i.2 {
-                    Some(&i.0)
-                } else {
-                    None
-                }
-            }))
+            .shard_generations(
+                in_memory_result
+                    .iter()
+                    .filter_map(|i| if i.2 { Some(&i.0) } else { None }),
+            )
             .await?;
         let db_generations = db_generations.into_iter().collect::<HashMap<_, _>>();
 
@@ -2313,7 +2314,9 @@ impl Service {
                 // Unique key violation: this is probably a retry.  Because the shard count is part of the unique key,
                 // if we see a unique key violation it means that the creation request's shard count matches the previous
                 // creation's shard count.
-                tracing::info!("Tenant shards already present in database, proceeding with idempotent creation...");
+                tracing::info!(
+                    "Tenant shards already present in database, proceeding with idempotent creation..."
+                );
             }
             // Any other database error is unexpected and a bug.
             Err(e) => return Err(ApiError::InternalServerError(anyhow::anyhow!(e))),
@@ -2916,7 +2919,9 @@ impl Service {
             first
         };
 
-        let updated_config = base.apply_patch(patch);
+        let updated_config = base
+            .apply_patch(patch)
+            .map_err(|err| ApiError::BadRequest(anyhow::anyhow!(err)))?;
         self.set_tenant_config_and_reconcile(tenant_id, updated_config)
             .await
     }
@@ -2992,7 +2997,7 @@ impl Service {
                 None => {
                     return Err(ApiError::NotFound(
                         anyhow::anyhow!("Tenant not found").into(),
-                    ))
+                    ));
                 }
             }
         };
@@ -3059,7 +3064,9 @@ impl Service {
                     })
                     .find(|(_, _, mode)| *mode != LocationConfigMode::Detached);
                 if let Some((node_id, _observed_location, mode)) = maybe_attached {
-                    return Err(ApiError::InternalServerError(anyhow::anyhow!("We observed attached={mode:?} tenant in node_id={node_id} shard with tenant_shard_id={shard_id}")));
+                    return Err(ApiError::InternalServerError(anyhow::anyhow!(
+                        "We observed attached={mode:?} tenant in node_id={node_id} shard with tenant_shard_id={shard_id}"
+                    )));
                 }
             }
             let scheduler = &mut locked.scheduler;
@@ -3095,7 +3102,7 @@ impl Service {
                 let client = PageserverClient::new(
                     node.get_id(),
                     node.base_url(),
-                    self.config.jwt_token.as_deref(),
+                    self.config.pageserver_jwt_token.as_deref(),
                 );
 
                 tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
@@ -3156,7 +3163,7 @@ impl Service {
             let client = PageserverClient::new(
                 node.get_id(),
                 node.base_url(),
-                self.config.jwt_token.as_deref(),
+                self.config.pageserver_jwt_token.as_deref(),
             );
             futs.push(async move {
                 let result = client
@@ -3279,7 +3286,7 @@ impl Service {
                         .tenant_delete(TenantShardId::unsharded(tenant_id))
                         .await
                 },
-                &self.config.jwt_token,
+                &self.config.pageserver_jwt_token,
                 1,
                 3,
                 RECONCILE_TIMEOUT,
@@ -3498,7 +3505,7 @@ impl Service {
             let timeline_info = create_one(
                 shard_zero_tid,
                 shard_zero_locations,
-                self.config.jwt_token.clone(),
+                self.config.pageserver_jwt_token.clone(),
                 create_req.clone(),
             )
             .await?;
@@ -3514,7 +3521,7 @@ impl Service {
             // Create timeline on remaining shards with number >0
             if !targets.0.is_empty() {
                 // If we had multiple shards, issue requests for the remainder now.
-                let jwt = &self.config.jwt_token;
+                let jwt = &self.config.pageserver_jwt_token;
                 self.tenant_for_shards(
                     targets
                         .0
@@ -3597,7 +3604,7 @@ impl Service {
                         tenant_shard_id,
                         timeline_id,
                         node,
-                        self.config.jwt_token.clone(),
+                        self.config.pageserver_jwt_token.clone(),
                         req.clone(),
                     ))
                 })
@@ -3678,7 +3685,7 @@ impl Service {
                         tenant_shard_id,
                         timeline_id,
                         node,
-                        self.config.jwt_token.clone(),
+                        self.config.pageserver_jwt_token.clone(),
                     ))
                 })
                 .await?;
@@ -3752,7 +3759,7 @@ impl Service {
                     tenant_shard_id,
                     timeline_id,
                     node,
-                    self.config.jwt_token.clone(),
+                    self.config.pageserver_jwt_token.clone(),
                     dir,
                 ))
             })
@@ -3867,7 +3874,7 @@ impl Service {
             futs.push(async move {
                 node.with_client_retries(
                     |client| op(tenant_shard_id, client),
-                    &self.config.jwt_token,
+                    &self.config.pageserver_jwt_token,
                     warn_threshold,
                     max_retries,
                     timeout,
@@ -3932,7 +3939,9 @@ impl Service {
                                 // This can only happen if there is a split brain controller modifying the database.  This should
                                 // never happen when testing, and if it happens in production we can only log the issue.
                                 debug_assert!(false);
-                                tracing::error!("Shard {shard_id} not found in generation state!  Is another rogue controller running?");
+                                tracing::error!(
+                                    "Shard {shard_id} not found in generation state!  Is another rogue controller running?"
+                                );
                                 continue;
                             };
                             let (generation, generation_pageserver) = generation;
@@ -3941,13 +3950,17 @@ impl Service {
                                     // This is legitimate only in a very narrow window where the shard was only just configured into
                                     // Attached mode after being created in Secondary or Detached mode, and it has had its generation
                                     // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver).
-                                    tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?");
+                                    tracing::warn!(
+                                        "Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?"
+                                    );
                                 }
                             } else {
                                 // This should never happen: a shard with no generation is only permitted when it was created in some state
                                 // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory)
                                 debug_assert!(false);
-                                tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!");
+                                tracing::error!(
+                                    "Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!"
+                                );
                                 continue;
                             }
                         }
@@ -4116,7 +4129,7 @@ impl Service {
                         tenant_shard_id,
                         timeline_id,
                         node,
-                        self.config.jwt_token.clone(),
+                        self.config.pageserver_jwt_token.clone(),
                     ))
                 })
                 .await?;
@@ -4138,7 +4151,7 @@ impl Service {
                 shard_zero_tid,
                 timeline_id,
                 shard_zero_locations.latest.node,
-                self.config.jwt_token.clone(),
+                self.config.pageserver_jwt_token.clone(),
             )
             .await?;
             Ok(shard_zero_status)
@@ -4480,13 +4493,17 @@ impl Service {
                 // if the original attachment location is offline.
                 if let Some(node_id) = shard.intent.get_attached() {
                     if !nodes.get(node_id).unwrap().is_available() {
-                        tracing::info!("Demoting attached intent for {tenant_shard_id} on unavailable node {node_id}");
+                        tracing::info!(
+                            "Demoting attached intent for {tenant_shard_id} on unavailable node {node_id}"
+                        );
                         shard.intent.demote_attached(scheduler, *node_id);
                     }
                 }
                 for node_id in shard.intent.get_secondary().clone() {
                     if !nodes.get(&node_id).unwrap().is_available() {
-                        tracing::info!("Dropping secondary intent for {tenant_shard_id} on unavailable node {node_id}");
+                        tracing::info!(
+                            "Dropping secondary intent for {tenant_shard_id} on unavailable node {node_id}"
+                        );
                         shard.intent.remove_secondary(scheduler, node_id);
                     }
                 }
@@ -4514,7 +4531,9 @@ impl Service {
                 // rely on the reconciliation that happens when a node transitions to Active to clean up. Since we have
                 // removed child shards from our in-memory state and database, the reconciliation will implicitly remove
                 // them from the node.
-                tracing::warn!("Node {node} unavailable, can't clean up during split abort. It will be cleaned up when it is reactivated.");
+                tracing::warn!(
+                    "Node {node} unavailable, can't clean up during split abort. It will be cleaned up when it is reactivated."
+                );
                 continue;
             }
 
@@ -4537,7 +4556,7 @@ impl Service {
 
                         client.location_config(child_id, config, None, false).await
                     },
-                    &self.config.jwt_token,
+                    &self.config.pageserver_jwt_token,
                     1,
                     10,
                     Duration::from_secs(5),
@@ -4959,7 +4978,10 @@ impl Service {
             // applies the new stripe size to the children.
             let mut shard_ident = shard_ident.unwrap();
             if shard_ident.count.count() > 1 && shard_ident.stripe_size != new_stripe_size {
-                return Err(ApiError::BadRequest(anyhow::anyhow!("Attempted to change stripe size ({:?}->{new_stripe_size:?}) on a tenant with multiple shards", shard_ident.stripe_size)));
+                return Err(ApiError::BadRequest(anyhow::anyhow!(
+                    "Attempted to change stripe size ({:?}->{new_stripe_size:?}) on a tenant with multiple shards",
+                    shard_ident.stripe_size
+                )));
             }
 
             shard_ident.stripe_size = new_stripe_size;
@@ -5137,7 +5159,7 @@ impl Service {
             let client = PageserverClient::new(
                 node.get_id(),
                 node.base_url(),
-                self.config.jwt_token.as_deref(),
+                self.config.pageserver_jwt_token.as_deref(),
             );
             let response = client
                 .tenant_shard_split(
@@ -5214,8 +5236,11 @@ impl Service {
                 )
                 .await
             {
-                tracing::warn!("Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})",
-                        child_id, child_ps);
+                tracing::warn!(
+                    "Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})",
+                    child_id,
+                    child_ps
+                );
                 failed_notifications.push(child_id);
             }
         }
@@ -5271,9 +5296,13 @@ impl Service {
                 match shard.policy {
                     PlacementPolicy::Attached(n) => {
                         // If our new attached node was a secondary, it no longer should be.
-                        shard.intent.remove_secondary(scheduler, migrate_req.node_id);
+                        shard
+                            .intent
+                            .remove_secondary(scheduler, migrate_req.node_id);
 
-                        shard.intent.set_attached(scheduler, Some(migrate_req.node_id));
+                        shard
+                            .intent
+                            .set_attached(scheduler, Some(migrate_req.node_id));
 
                         // If we were already attached to something, demote that to a secondary
                         if let Some(old_attached) = old_attached {
@@ -5294,7 +5323,7 @@ impl Service {
                     PlacementPolicy::Detached => {
                         return Err(ApiError::BadRequest(anyhow::anyhow!(
                             "Cannot migrate a tenant that is PlacementPolicy::Detached: configure it to an attached policy first"
-                        )))
+                        )));
                     }
                 }
 
@@ -5355,7 +5384,9 @@ impl Service {
                     shard.intent
                 );
             } else if shard.intent.get_attached() == &Some(migrate_req.node_id) {
-                tracing::info!("Migrating secondary to {node}: already attached where we were asked to create a secondary");
+                tracing::info!(
+                    "Migrating secondary to {node}: already attached where we were asked to create a secondary"
+                );
             } else {
                 let old_secondaries = shard.intent.get_secondary().clone();
                 for secondary in old_secondaries {
@@ -5463,7 +5494,7 @@ impl Service {
         let client = PageserverClient::new(
             node.get_id(),
             node.base_url(),
-            self.config.jwt_token.as_deref(),
+            self.config.pageserver_jwt_token.as_deref(),
         );
 
         let scan_result = client
@@ -5868,7 +5899,7 @@ impl Service {
                     return Err(ApiError::InternalServerError(anyhow::anyhow!(
                         "{} attached as primary+secondary on the same node",
                         tid
-                    )))
+                    )));
                 }
                 (true, false) => Some(false),
                 (false, true) => Some(true),
@@ -5907,8 +5938,10 @@ impl Service {
         )
         .await;
 
+        #[derive(PartialEq)]
         enum RegistrationStatus {
-            Matched,
+            UpToDate,
+            NeedUpdate,
             Mismatched,
             New,
         }
@@ -5917,7 +5950,11 @@ impl Service {
             let locked = self.inner.read().unwrap();
             if let Some(node) = locked.nodes.get(&register_req.node_id) {
                 if node.registration_match(&register_req) {
-                    RegistrationStatus::Matched
+                    if node.need_update(&register_req) {
+                        RegistrationStatus::NeedUpdate
+                    } else {
+                        RegistrationStatus::UpToDate
+                    }
                 } else {
                     RegistrationStatus::Mismatched
                 }
@@ -5927,9 +5964,9 @@ impl Service {
         };
 
         match registration_status {
-            RegistrationStatus::Matched => {
+            RegistrationStatus::UpToDate => {
                 tracing::info!(
-                    "Node {} re-registered with matching address",
+                    "Node {} re-registered with matching address and is up to date",
                     register_req.node_id
                 );
 
@@ -5947,7 +5984,7 @@ impl Service {
                     "Node is already registered with different address".to_string(),
                 ));
             }
-            RegistrationStatus::New => {
+            RegistrationStatus::New | RegistrationStatus::NeedUpdate => {
                 // fallthrough
             }
         }
@@ -5976,6 +6013,16 @@ impl Service {
             ));
         }
 
+        if self.config.use_https_pageserver_api && register_req.listen_https_port.is_none() {
+            return Err(ApiError::PreconditionFailed(
+                format!(
+                    "Node {} has no https port, but use_https is enabled",
+                    register_req.node_id
+                )
+                .into(),
+            ));
+        }
+
         // Ordering: we must persist the new node _before_ adding it to in-memory state.
         // This ensures that before we use it for anything or expose it via any external
         // API, it is guaranteed to be available after a restart.
@@ -5983,13 +6030,29 @@ impl Service {
             register_req.node_id,
             register_req.listen_http_addr,
             register_req.listen_http_port,
+            register_req.listen_https_port,
             register_req.listen_pg_addr,
             register_req.listen_pg_port,
             register_req.availability_zone_id.clone(),
+            self.config.use_https_pageserver_api,
         );
+        let new_node = match new_node {
+            Ok(new_node) => new_node,
+            Err(error) => return Err(ApiError::InternalServerError(error)),
+        };
 
-        // TODO: idempotency if the node already exists in the database
-        self.persistence.insert_node(&new_node).await?;
+        match registration_status {
+            RegistrationStatus::New => self.persistence.insert_node(&new_node).await?,
+            RegistrationStatus::NeedUpdate => {
+                self.persistence
+                    .update_node_on_registration(
+                        register_req.node_id,
+                        register_req.listen_https_port,
+                    )
+                    .await?
+            }
+            _ => unreachable!("Other statuses have been processed earlier"),
+        }
 
         let mut locked = self.inner.write().unwrap();
         let mut new_nodes = (*locked.nodes).clone();
@@ -6004,12 +6067,24 @@ impl Service {
             .storage_controller_pageserver_nodes
             .set(locked.nodes.len() as i64);
 
-        tracing::info!(
-            "Registered pageserver {} ({}), now have {} pageservers",
-            register_req.node_id,
-            register_req.availability_zone_id,
-            locked.nodes.len()
-        );
+        match registration_status {
+            RegistrationStatus::New => {
+                tracing::info!(
+                    "Registered pageserver {} ({}), now have {} pageservers",
+                    register_req.node_id,
+                    register_req.availability_zone_id,
+                    locked.nodes.len()
+                );
+            }
+            RegistrationStatus::NeedUpdate => {
+                tracing::info!(
+                    "Re-registered and updated node {} ({})",
+                    register_req.node_id,
+                    register_req.availability_zone_id,
+                );
+            }
+            _ => unreachable!("Other statuses have been processed earlier"),
+        }
         Ok(())
     }
 
@@ -6027,7 +6102,9 @@ impl Service {
         if let Some(scheduling) = scheduling {
             // Scheduling is a persistent part of Node: we must write updates to the database before
             // applying them in memory
-            self.persistence.update_node(node_id, scheduling).await?;
+            self.persistence
+                .update_node_scheduling_policy(node_id, scheduling)
+                .await?;
         }
 
         // If we're activating a node, then before setting it active we must reconcile any shard locations
@@ -6598,11 +6675,12 @@ impl Service {
     ) -> Option<ReconcilerWaiter> {
         let reconcile_needed = shard.get_reconcile_needed(nodes);
 
-        match reconcile_needed {
+        let reconcile_reason = match reconcile_needed {
             ReconcileNeeded::No => return None,
             ReconcileNeeded::WaitExisting(waiter) => return Some(waiter),
-            ReconcileNeeded::Yes => {
+            ReconcileNeeded::Yes(reason) => {
                 // Fall through to try and acquire units for spawning reconciler
+                reason
             }
         };
 
@@ -6641,6 +6719,7 @@ impl Service {
         };
 
         shard.spawn_reconciler(
+            reconcile_reason,
             &self.result_tx,
             nodes,
             &self.compute_hook,
@@ -6765,7 +6844,7 @@ impl Service {
         // with the frequency of background calls, this acts as an implicit rate limit that runs a small
         // trickle of optimizations in the background, rather than executing a large number in parallel
         // when a change occurs.
-        const MAX_OPTIMIZATIONS_EXEC_PER_PASS: usize = 2;
+        const MAX_OPTIMIZATIONS_EXEC_PER_PASS: usize = 16;
 
         // Synchronous prepare: scan shards for possible scheduling optimizations
         let candidate_work = self.optimize_all_plan();
@@ -6816,7 +6895,7 @@ impl Service {
         // How many candidate optimizations we will generate, before evaluating them for readniess: setting
         // this higher than the execution limit gives us a chance to execute some work even if the first
         // few optimizations we find are not ready.
-        const MAX_OPTIMIZATIONS_PLAN_PER_PASS: usize = 8;
+        const MAX_OPTIMIZATIONS_PLAN_PER_PASS: usize = 64;
 
         let mut work = Vec::new();
         let mut locked = self.inner.write().unwrap();
@@ -6863,12 +6942,16 @@ impl Service {
                         // Check that maybe_optimizable doesn't disagree with the actual optimization functions.
                         // Only do this in testing builds because it is not a correctness-critical check, so we shouldn't
                         // panic in prod if we hit this, or spend cycles on it in prod.
-                        assert!(shard
-                            .optimize_attachment(scheduler, &schedule_context)
-                            .is_none());
-                        assert!(shard
-                            .optimize_secondary(scheduler, &schedule_context)
-                            .is_none());
+                        assert!(
+                            shard
+                                .optimize_attachment(scheduler, &schedule_context)
+                                .is_none()
+                        );
+                        assert!(
+                            shard
+                                .optimize_secondary(scheduler, &schedule_context)
+                                .is_none()
+                        );
                     }
                     continue;
                 }
@@ -6924,7 +7007,9 @@ impl Service {
                         }
                         Some(node) => {
                             if !node.is_available() {
-                                tracing::info!("Skipping optimization migration of {tenant_shard_id} to {new_attached_node_id} because node unavailable");
+                                tracing::info!(
+                                    "Skipping optimization migration of {tenant_shard_id} to {new_attached_node_id} because node unavailable"
+                                );
                             } else {
                                 // Accumulate optimizations that require fetching secondary status, so that we can execute these
                                 // remote API requests concurrently.
@@ -6970,7 +7055,9 @@ impl Service {
         {
             match secondary_status {
                 Err(e) => {
-                    tracing::info!("Skipping migration of {tenant_shard_id} to {node}, error querying secondary: {e}");
+                    tracing::info!(
+                        "Skipping migration of {tenant_shard_id} to {node}, error querying secondary: {e}"
+                    );
                 }
                 Ok(progress) => {
                     // We require secondary locations to have less than 10GiB of downloads pending before we will use
@@ -6983,7 +7070,9 @@ impl Service {
                         || progress.bytes_total - progress.bytes_downloaded
                             > DOWNLOAD_FRESHNESS_THRESHOLD
                     {
-                        tracing::info!("Skipping migration of {tenant_shard_id} to {node} because secondary isn't ready: {progress:?}");
+                        tracing::info!(
+                            "Skipping migration of {tenant_shard_id} to {node} because secondary isn't ready: {progress:?}"
+                        );
 
                         #[cfg(feature = "testing")]
                         if progress.heatmap_mtime.is_none() {
@@ -7043,7 +7132,7 @@ impl Service {
         match attached_node
             .with_client_retries(
                 |client| async move { client.tenant_heatmap_upload(tenant_shard_id).await },
-                &self.config.jwt_token,
+                &self.config.pageserver_jwt_token,
                 3,
                 10,
                 SHORT_RECONCILE_TIMEOUT,
@@ -7079,7 +7168,7 @@ impl Service {
                             )
                             .await
                     },
-                    &self.config.jwt_token,
+                    &self.config.pageserver_jwt_token,
                     3,
                     10,
                     SHORT_RECONCILE_TIMEOUT,
@@ -7089,14 +7178,18 @@ impl Service {
             {
                 Some(Err(e)) => {
                     tracing::info!(
-                "Failed to download heatmap from {secondary_node} for {tenant_shard_id}: {e}"
-            );
+                        "Failed to download heatmap from {secondary_node} for {tenant_shard_id}: {e}"
+                    );
                 }
                 None => {
-                    tracing::info!("Cancelled while downloading heatmap from {secondary_node} for {tenant_shard_id}");
+                    tracing::info!(
+                        "Cancelled while downloading heatmap from {secondary_node} for {tenant_shard_id}"
+                    );
                 }
                 Some(Ok(progress)) => {
-                    tracing::info!("Successfully downloaded heatmap from {secondary_node} for {tenant_shard_id}: {progress:?}");
+                    tracing::info!(
+                        "Successfully downloaded heatmap from {secondary_node} for {tenant_shard_id}: {progress:?}"
+                    );
                 }
             }
         }
@@ -7134,7 +7227,7 @@ impl Service {
                         let request = request_ref.clone();
                         client.top_tenant_shards(request.clone()).await
                     },
-                    &self.config.jwt_token,
+                    &self.config.pageserver_jwt_token,
                     3,
                     3,
                     Duration::from_secs(5),
@@ -7181,7 +7274,9 @@ impl Service {
 
         // We spawn a task to run this, so it's exactly like some external API client requesting it.  We don't
         // want to block the background reconcile loop on this.
-        tracing::info!("Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}");
+        tracing::info!(
+            "Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}"
+        );
 
         let this = self.clone();
         tokio::spawn(
@@ -7307,7 +7402,7 @@ impl Service {
         match node
             .with_client_retries(
                 |client| async move { client.tenant_secondary_status(tenant_shard_id).await },
-                &self.config.jwt_token,
+                &self.config.pageserver_jwt_token,
                 1,
                 3,
                 Duration::from_millis(250),
diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs
index aa0ee0df5a..2ff68d7037 100644
--- a/storage_controller/src/service/chaos_injector.rs
+++ b/storage_controller/src/service/chaos_injector.rs
@@ -1,8 +1,6 @@
-use std::{
-    collections::{BTreeMap, HashMap},
-    sync::Arc,
-    time::Duration,
-};
+use std::collections::{BTreeMap, HashMap};
+use std::sync::Arc;
+use std::time::Duration;
 
 use pageserver_api::controller_api::ShardSchedulingPolicy;
 use rand::seq::SliceRandom;
@@ -16,29 +14,80 @@ use super::{Node, Scheduler, Service, TenantShard};
 pub struct ChaosInjector {
     service: Arc<Service>,
     interval: Duration,
+    chaos_exit_crontab: Option<cron::Schedule>,
+}
+
+fn cron_to_next_duration(cron: &cron::Schedule) -> anyhow::Result<tokio::time::Sleep> {
+    use chrono::Utc;
+    let next = cron.upcoming(Utc).next().unwrap();
+    let duration = (next - Utc::now()).to_std()?;
+    Ok(tokio::time::sleep(duration))
+}
+
+async fn maybe_sleep(sleep: Option<tokio::time::Sleep>) -> Option<()> {
+    if let Some(sleep) = sleep {
+        sleep.await;
+        Some(())
+    } else {
+        None
+    }
 }
 
 impl ChaosInjector {
-    pub fn new(service: Arc<Service>, interval: Duration) -> Self {
-        Self { service, interval }
+    pub fn new(
+        service: Arc<Service>,
+        interval: Duration,
+        chaos_exit_crontab: Option<cron::Schedule>,
+    ) -> Self {
+        Self {
+            service,
+            interval,
+            chaos_exit_crontab,
+        }
     }
 
     pub async fn run(&mut self, cancel: CancellationToken) {
         let mut interval = tokio::time::interval(self.interval);
-
-        loop {
-            tokio::select! {
-                _ = interval.tick() => {}
-                _ = cancel.cancelled() => {
-                    tracing::info!("Shutting down");
-                    return;
+        let cron_interval = {
+            if let Some(ref chaos_exit_crontab) = self.chaos_exit_crontab {
+                match cron_to_next_duration(chaos_exit_crontab) {
+                    Ok(interval_exit) => Some(interval_exit),
+                    Err(e) => {
+                        tracing::error!("Error processing the cron schedule: {e}");
+                        None
+                    }
                 }
+            } else {
+                None
             }
-
-            self.inject_chaos().await;
-
-            tracing::info!("Chaos iteration...");
+        };
+        enum ChaosEvent {
+            ShuffleTenant,
+            ForceKill,
         }
+        let chaos_type = tokio::select! {
+            _ = interval.tick() => {
+                ChaosEvent::ShuffleTenant
+            }
+            Some(_) = maybe_sleep(cron_interval) => {
+                ChaosEvent::ForceKill
+            }
+            _ = cancel.cancelled() => {
+                tracing::info!("Shutting down");
+                return;
+            }
+        };
+
+        match chaos_type {
+            ChaosEvent::ShuffleTenant => {
+                self.inject_chaos().await;
+            }
+            ChaosEvent::ForceKill => {
+                self.force_kill().await;
+            }
+        }
+
+        tracing::info!("Chaos iteration...");
     }
 
     /// If a shard has a secondary and attached location, then re-assign the secondary to be
@@ -95,6 +144,11 @@ impl ChaosInjector {
         );
     }
 
+    async fn force_kill(&mut self) {
+        tracing::warn!("Injecting chaos: force kill");
+        std::process::exit(1);
+    }
+
     async fn inject_chaos(&mut self) {
         // Pick some shards to interfere with
         let batch_size = 128;
@@ -120,12 +174,19 @@ impl ChaosInjector {
 
         let mut victims = Vec::with_capacity(batch_size);
         if out_of_home_az.len() >= batch_size {
-            tracing::info!("Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)", out_of_home_az.len());
+            tracing::info!(
+                "Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)",
+                out_of_home_az.len()
+            );
 
             out_of_home_az.shuffle(&mut thread_rng());
             victims.extend(out_of_home_az.into_iter().take(batch_size));
         } else {
-            tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate", out_of_home_az.len(), std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len()));
+            tracing::info!(
+                "Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate",
+                out_of_home_az.len(),
+                std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len())
+            );
 
             victims.extend(out_of_home_az);
             in_home_az.shuffle(&mut thread_rng());
diff --git a/storage_controller/src/service/context_iterator.rs b/storage_controller/src/service/context_iterator.rs
index dd6913e988..c4784e5e36 100644
--- a/storage_controller/src/service/context_iterator.rs
+++ b/storage_controller/src/service/context_iterator.rs
@@ -54,17 +54,16 @@ impl<'a> Iterator for TenantShardContextIterator<'a> {
 
 #[cfg(test)]
 mod tests {
-    use std::{collections::BTreeMap, str::FromStr};
+    use std::collections::BTreeMap;
+    use std::str::FromStr;
 
     use pageserver_api::controller_api::PlacementPolicy;
     use utils::shard::{ShardCount, ShardNumber};
 
-    use crate::{
-        scheduler::test_utils::make_test_nodes, service::Scheduler,
-        tenant_shard::tests::make_test_tenant_with_id,
-    };
-
     use super::*;
+    use crate::scheduler::test_utils::make_test_nodes;
+    use crate::service::Scheduler;
+    use crate::tenant_shard::tests::make_test_tenant_with_id;
 
     #[test]
     fn test_context_iterator() {
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index 219c0dffe7..34fd244023 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -1,50 +1,39 @@
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Arc,
-    time::Duration,
-};
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
 
-use crate::{
-    metrics::{
-        self, ReconcileCompleteLabelGroup, ReconcileLongRunningLabelGroup, ReconcileOutcome,
-    },
-    persistence::TenantShardPersistence,
-    reconciler::{ReconcileUnits, ReconcilerConfig},
-    scheduler::{
-        AffinityScore, AttachedShardTag, NodeSchedulingScore, NodeSecondarySchedulingScore,
-        RefCountUpdate, ScheduleContext, SecondaryShardTag, ShardTag,
-    },
-    service::ReconcileResultRequest,
-};
 use futures::future::{self, Either};
 use itertools::Itertools;
 use pageserver_api::controller_api::{AvailabilityZone, PlacementPolicy, ShardSchedulingPolicy};
-use pageserver_api::{
-    models::{LocationConfig, LocationConfigMode, TenantConfig},
-    shard::{ShardIdentity, TenantShardId},
-};
+use pageserver_api::models::{LocationConfig, LocationConfigMode, TenantConfig};
+use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use serde::{Deserialize, Serialize};
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
-use tracing::{instrument, Instrument};
-use utils::{
-    generation::Generation,
-    id::NodeId,
-    seqwait::{SeqWait, SeqWaitError},
-    shard::ShardCount,
-    sync::gate::GateGuard,
-};
+use tracing::{Instrument, instrument};
+use utils::generation::Generation;
+use utils::id::NodeId;
+use utils::seqwait::{SeqWait, SeqWaitError};
+use utils::shard::ShardCount;
+use utils::sync::gate::GateGuard;
 
-use crate::{
-    compute_hook::ComputeHook,
-    node::Node,
-    persistence::{split_state::SplitState, Persistence},
-    reconciler::{
-        attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState,
-    },
-    scheduler::{ScheduleError, Scheduler},
-    service, Sequence,
+use crate::compute_hook::ComputeHook;
+use crate::metrics::{
+    self, ReconcileCompleteLabelGroup, ReconcileLongRunningLabelGroup, ReconcileOutcome,
 };
+use crate::node::Node;
+use crate::persistence::split_state::SplitState;
+use crate::persistence::{Persistence, TenantShardPersistence};
+use crate::reconciler::{
+    ReconcileError, ReconcileUnits, Reconciler, ReconcilerConfig, TargetState,
+    attached_location_conf, secondary_location_conf,
+};
+use crate::scheduler::{
+    AffinityScore, AttachedShardTag, NodeSchedulingScore, NodeSecondarySchedulingScore,
+    RefCountUpdate, ScheduleContext, ScheduleError, Scheduler, SecondaryShardTag, ShardTag,
+};
+use crate::service::ReconcileResultRequest;
+use crate::{Sequence, service};
 
 /// Serialization helper
 fn read_last_error<S, T>(v: &std::sync::Mutex<Option<T>>, serializer: S) -> Result<S::Ok, S::Error>
@@ -481,7 +470,14 @@ pub(crate) enum ReconcileNeeded {
     /// spawned: wait for the existing reconciler rather than spawning a new one.
     WaitExisting(ReconcilerWaiter),
     /// shard needs reconciliation: call into [`TenantShard::spawn_reconciler`]
-    Yes,
+    Yes(ReconcileReason),
+}
+
+#[derive(Debug)]
+pub(crate) enum ReconcileReason {
+    ActiveNodesDirty,
+    UnknownLocation,
+    PendingComputeNotification,
 }
 
 /// Pending modification to the observed state of a tenant shard.
@@ -828,7 +824,9 @@ impl TenantShard {
                 let current_score = current_score.for_optimization();
 
                 if candidate_score < current_score {
-                    tracing::info!("Found a lower scoring location! {candidate} is better than {current} ({candidate_score:?} is better than {current_score:?})");
+                    tracing::info!(
+                        "Found a lower scoring location! {candidate} is better than {current} ({candidate_score:?} is better than {current_score:?})"
+                    );
                     Some(true)
                 } else {
                     // The candidate node is no better than our current location, so don't migrate
@@ -998,7 +996,7 @@ impl TenantShard {
                 // most cases, even if some nodes are offline or have scheduling=pause set.
 
                 debug_assert!(self.intent.attached.is_some()); // We should not make it here unless attached -- this
-                                                               // logic presumes we are in a mode where we want secondaries to be in non-home AZ
+                // logic presumes we are in a mode where we want secondaries to be in non-home AZ
                 if let Some(retain_secondary) = self.intent.get_secondary().iter().find(|n| {
                     let in_home_az = scheduler.get_node_az(n) == self.intent.preferred_az_id;
                     let is_available = secondary_scores
@@ -1022,7 +1020,8 @@ impl TenantShard {
                 }
 
                 // Fall through: we didn't identify one to remove.  This ought to be rare.
-                tracing::warn!("Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)",
+                tracing::warn!(
+                    "Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)",
                     self.intent.get_secondary()
                 );
             } else {
@@ -1341,12 +1340,18 @@ impl TenantShard {
 
         let active_nodes_dirty = self.dirty(pageservers);
 
-        // Even if there is no pageserver work to be done, if we have a pending notification to computes,
-        // wake up a reconciler to send it.
-        let do_reconcile =
-            active_nodes_dirty || dirty_observed || self.pending_compute_notification;
+        let reconcile_needed = match (
+            active_nodes_dirty,
+            dirty_observed,
+            self.pending_compute_notification,
+        ) {
+            (true, _, _) => ReconcileNeeded::Yes(ReconcileReason::ActiveNodesDirty),
+            (_, true, _) => ReconcileNeeded::Yes(ReconcileReason::UnknownLocation),
+            (_, _, true) => ReconcileNeeded::Yes(ReconcileReason::PendingComputeNotification),
+            _ => ReconcileNeeded::No,
+        };
 
-        if !do_reconcile {
+        if matches!(reconcile_needed, ReconcileNeeded::No) {
             tracing::debug!("Not dirty, no reconciliation needed.");
             return ReconcileNeeded::No;
         }
@@ -1389,7 +1394,7 @@ impl TenantShard {
             }
         }
 
-        ReconcileNeeded::Yes
+        reconcile_needed
     }
 
     /// Ensure the sequence number is set to a value where waiting for this value will make us wait
@@ -1479,6 +1484,7 @@ impl TenantShard {
     #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
     pub(crate) fn spawn_reconciler(
         &mut self,
+        reason: ReconcileReason,
         result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResultRequest>,
         pageservers: &Arc<HashMap<NodeId, Node>>,
         compute_hook: &Arc<ComputeHook>,
@@ -1538,7 +1544,7 @@ impl TenantShard {
         let reconcile_seq = self.sequence;
         let long_reconcile_threshold = service_config.long_reconcile_threshold;
 
-        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
+        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler ({reason:?})");
         let must_notify = self.pending_compute_notification;
         let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
                                                         tenant_id=%reconciler.tenant_shard_id.tenant_id,
@@ -1784,8 +1790,8 @@ impl TenantShard {
                 let conf = observed.conf.as_ref()?;
 
                 match (conf.generation, conf.mode) {
-                    (Some(gen), AttachedMulti | AttachedSingle | AttachedStale) => {
-                        Some((*node_id, gen))
+                    (Some(gen_), AttachedMulti | AttachedSingle | AttachedStale) => {
+                        Some((*node_id, gen_))
                     }
                     _ => None,
                 }
@@ -1793,7 +1799,7 @@ impl TenantShard {
             .sorted_by(|(_lhs_node_id, lhs_gen), (_rhs_node_id, rhs_gen)| {
                 lhs_gen.cmp(rhs_gen).reverse()
             })
-            .map(|(node_id, gen)| (node_id, Generation::new(gen)))
+            .map(|(node_id, gen_)| (node_id, Generation::new(gen_)))
             .collect()
     }
 
@@ -1825,7 +1831,10 @@ impl TenantShard {
                         (Some(crnt), Some(new)) if crnt_gen > new_gen => {
                             tracing::warn!(
                                 "Skipping observed state update {}: {:?} and using None due to stale generation ({} > {})",
-                                node_id, loc, crnt, new
+                                node_id,
+                                loc,
+                                crnt,
+                                new
                             );
 
                             self.observed
@@ -1882,18 +1891,17 @@ impl Drop for TenantShard {
 
 #[cfg(test)]
 pub(crate) mod tests {
-    use std::{cell::RefCell, rc::Rc};
+    use std::cell::RefCell;
+    use std::rc::Rc;
 
-    use pageserver_api::{
-        controller_api::NodeAvailability,
-        shard::{ShardCount, ShardNumber},
-    };
-    use rand::{rngs::StdRng, SeedableRng};
+    use pageserver_api::controller_api::NodeAvailability;
+    use pageserver_api::shard::{ShardCount, ShardNumber};
+    use rand::SeedableRng;
+    use rand::rngs::StdRng;
     use utils::id::TenantId;
 
-    use crate::scheduler::test_utils::make_test_nodes;
-
     use super::*;
+    use crate::scheduler::test_utils::make_test_nodes;
 
     fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantShard {
         let tenant_id = TenantId::generate();
@@ -2071,16 +2079,20 @@ pub(crate) mod tests {
 
         // In pause mode, schedule() shouldn't do anything
         tenant_shard.scheduling_policy = ShardSchedulingPolicy::Pause;
-        assert!(tenant_shard
-            .schedule(&mut scheduler, &mut ScheduleContext::default())
-            .is_ok());
+        assert!(
+            tenant_shard
+                .schedule(&mut scheduler, &mut ScheduleContext::default())
+                .is_ok()
+        );
         assert!(tenant_shard.intent.all_pageservers().is_empty());
 
         // In active mode, schedule() works
         tenant_shard.scheduling_policy = ShardSchedulingPolicy::Active;
-        assert!(tenant_shard
-            .schedule(&mut scheduler, &mut ScheduleContext::default())
-            .is_ok());
+        assert!(
+            tenant_shard
+                .schedule(&mut scheduler, &mut ScheduleContext::default())
+                .is_ok()
+        );
         assert!(!tenant_shard.intent.all_pageservers().is_empty());
 
         tenant_shard.intent.clear(&mut scheduler);
@@ -2607,9 +2619,11 @@ pub(crate) mod tests {
         );
         let mut schedule_context = ScheduleContext::default();
         for shard in &mut shards {
-            assert!(shard
-                .schedule(&mut scheduler, &mut schedule_context)
-                .is_ok());
+            assert!(
+                shard
+                    .schedule(&mut scheduler, &mut schedule_context)
+                    .is_ok()
+            );
         }
 
         // Initial: attached locations land in the tenant's home AZ.
diff --git a/storage_scrubber/Cargo.toml b/storage_scrubber/Cargo.toml
index 609f3bf009..7f6544b894 100644
--- a/storage_scrubber/Cargo.toml
+++ b/storage_scrubber/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "storage_scrubber"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true
 
 [dependencies]
diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs
index b42709868b..f0ba632fd4 100644
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -1,12 +1,19 @@
 use std::collections::{HashMap, HashSet};
 use std::time::SystemTime;
 
+use futures_util::StreamExt;
 use itertools::Itertools;
+use pageserver::tenant::IndexPart;
 use pageserver::tenant::checks::check_valid_layermap;
 use pageserver::tenant::layer_map::LayerMap;
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::remote_timeline_client::manifest::TenantManifest;
+use pageserver::tenant::remote_timeline_client::{
+    parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path,
+};
+use pageserver::tenant::storage_layer::LayerName;
 use pageserver_api::shard::ShardIndex;
+use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
 use tokio_util::sync::CancellationToken;
 use tracing::{info, warn};
 use utils::generation::Generation;
@@ -15,14 +22,7 @@ use utils::shard::TenantShardId;
 
 use crate::cloud_admin_api::BranchData;
 use crate::metadata_stream::stream_listing;
-use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId};
-use futures_util::StreamExt;
-use pageserver::tenant::remote_timeline_client::{
-    parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path,
-};
-use pageserver::tenant::storage_layer::LayerName;
-use pageserver::tenant::IndexPart;
-use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
+use crate::{RootTarget, TenantShardTimelineId, download_object_with_retries};
 
 pub(crate) struct TimelineAnalysis {
     /// Anomalies detected
@@ -329,11 +329,11 @@ pub(crate) enum BlobDataParseResult {
 pub(crate) fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generation), String> {
     match name.rsplit_once('-') {
         // FIXME: this is gross, just use a regex?
-        Some((layer_filename, gen)) if gen.len() == 8 => {
+        Some((layer_filename, gen_)) if gen_.len() == 8 => {
             let layer = layer_filename.parse::<LayerName>()?;
-            let gen =
-                Generation::parse_suffix(gen).ok_or("Malformed generation suffix".to_string())?;
-            Ok((layer, gen))
+            let gen_ =
+                Generation::parse_suffix(gen_).ok_or("Malformed generation suffix".to_string())?;
+            Ok((layer, gen_))
         }
         _ => Ok((name.parse::<LayerName>()?, Generation::none())),
     }
@@ -423,9 +423,9 @@ async fn list_timeline_blobs_impl(
                 tracing::info!("initdb archive preserved {key}");
             }
             Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
-                Ok((new_layer, gen)) => {
-                    tracing::debug!("Parsed layer key: {new_layer} {gen:?}");
-                    s3_layers.insert((new_layer, gen));
+                Ok((new_layer, gen_)) => {
+                    tracing::debug!("Parsed layer key: {new_layer} {gen_:?}");
+                    s3_layers.insert((new_layer, gen_));
                 }
                 Err(e) => {
                     tracing::info!("Error parsing {maybe_layer_name} as layer name: {e}");
@@ -465,7 +465,7 @@ async fn list_timeline_blobs_impl(
         .max_by_key(|i| i.1)
         .map(|(k, g)| (k.clone(), g))
     {
-        Some((key, gen)) => (Some::<ListingObject>(key.to_owned()), gen),
+        Some((key, gen_)) => (Some::<ListingObject>(key.to_owned()), gen_),
         None => {
             // Legacy/missing case: one or zero index parts, which did not have a generation
             (index_part_keys.pop(), Generation::none())
@@ -521,7 +521,7 @@ async fn list_timeline_blobs_impl(
                     },
                     unused_index_keys: index_part_keys,
                     unknown_keys,
-                }))
+                }));
             }
             Err(index_parse_error) => errors.push(format!(
                 "index_part.json body parsing error: {index_parse_error}"
@@ -631,7 +631,7 @@ pub(crate) async fn list_tenant_manifests(
         .map(|(g, obj)| (*g, obj.clone()))
         .unwrap();
 
-    manifests.retain(|(gen, _obj)| gen != &latest_generation);
+    manifests.retain(|(gen_, _obj)| gen_ != &latest_generation);
 
     let manifest_bytes =
         match download_object_with_retries(remote_client, &latest_listing_object.key).await {
diff --git a/storage_scrubber/src/cloud_admin_api.rs b/storage_scrubber/src/cloud_admin_api.rs
index b1dfe3a53f..5cf286c662 100644
--- a/storage_scrubber/src/cloud_admin_api.rs
+++ b/storage_scrubber/src/cloud_admin_api.rs
@@ -3,11 +3,9 @@ use std::error::Error as _;
 use chrono::{DateTime, Utc};
 use futures::Future;
 use hex::FromHex;
-
-use reqwest::{header, Client, StatusCode, Url};
+use reqwest::{Client, StatusCode, Url, header};
 use serde::Deserialize;
 use tokio::sync::Semaphore;
-
 use tokio_util::sync::CancellationToken;
 use utils::backoff;
 use utils::id::{TenantId, TimelineId};
diff --git a/storage_scrubber/src/find_large_objects.rs b/storage_scrubber/src/find_large_objects.rs
index 95d3af1453..efb05fb55e 100644
--- a/storage_scrubber/src/find_large_objects.rs
+++ b/storage_scrubber/src/find_large_objects.rs
@@ -5,10 +5,9 @@ use pageserver::tenant::storage_layer::LayerName;
 use remote_storage::ListingMode;
 use serde::{Deserialize, Serialize};
 
-use crate::{
-    checks::parse_layer_object_name, init_remote, metadata_stream::stream_tenants,
-    stream_objects_with_retries, BucketConfig, NodeKind,
-};
+use crate::checks::parse_layer_object_name;
+use crate::metadata_stream::stream_tenants;
+use crate::{BucketConfig, NodeKind, init_remote, stream_objects_with_retries};
 
 #[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
 enum LargeObjectKind {
diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs
index a4e5107e3d..e4f69a1669 100644
--- a/storage_scrubber/src/garbage.rs
+++ b/storage_scrubber/src/garbage.rs
@@ -3,11 +3,9 @@
 //! Garbage means S3 objects which are either not referenced by any metadata,
 //! or are referenced by a control plane tenant/timeline in a deleted state.
 
-use std::{
-    collections::{HashMap, HashSet},
-    sync::Arc,
-    time::Duration,
-};
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
 
 use anyhow::Context;
 use futures_util::TryStreamExt;
@@ -16,13 +14,14 @@ use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePat
 use serde::{Deserialize, Serialize};
 use tokio_stream::StreamExt;
 use tokio_util::sync::CancellationToken;
-use utils::{backoff, id::TenantId};
+use utils::backoff;
+use utils::id::TenantId;
 
+use crate::cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData};
+use crate::metadata_stream::{stream_tenant_timelines, stream_tenants_maybe_prefix};
 use crate::{
-    cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData},
+    BucketConfig, ConsoleConfig, MAX_RETRIES, NodeKind, TenantShardTimelineId, TraversingDepth,
     init_remote, list_objects_with_retries,
-    metadata_stream::{stream_tenant_timelines, stream_tenants_maybe_prefix},
-    BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth, MAX_RETRIES,
 };
 
 #[derive(Serialize, Deserialize, Debug)]
@@ -259,14 +258,21 @@ async fn find_garbage_inner(
                 .await?;
                 if let Some(object) = tenant_objects.keys.first() {
                     if object.key.get_path().as_str().ends_with("heatmap-v1.json") {
-                        tracing::info!("Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)");
+                        tracing::info!(
+                            "Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)"
+                        );
                         garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id));
                         continue;
                     } else {
-                        tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key);
+                        tracing::info!(
+                            "Tenant {tenant_shard_id} is missing in console and contains one object: {}",
+                            object.key
+                        );
                     }
                 } else {
-                    tracing::info!("Tenant {tenant_shard_id} is missing in console appears to have been deleted while we ran");
+                    tracing::info!(
+                        "Tenant {tenant_shard_id} is missing in console appears to have been deleted while we ran"
+                    );
                 }
             } else {
                 // A console-unknown tenant with timelines: check if these timelines only contain initdb.tar.zst, from the initial
@@ -295,9 +301,13 @@ async fn find_garbage_inner(
                 }
 
                 if any_non_initdb {
-                    tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb");
+                    tracing::info!(
+                        "Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb"
+                    );
                 } else {
-                    tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb");
+                    tracing::info!(
+                        "Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb"
+                    );
                     garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id));
                     continue;
                 }
@@ -546,7 +556,9 @@ pub async fn purge_garbage(
         .any(|g| matches!(g.entity, GarbageEntity::Timeline(_)))
         && garbage_list.active_timeline_count == 0
     {
-        anyhow::bail!("Refusing to purge a garbage list containing garbage timelines that reports 0 active timelines");
+        anyhow::bail!(
+            "Refusing to purge a garbage list containing garbage timelines that reports 0 active timelines"
+        );
     }
 
     let filtered_items = garbage_list
diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs
index 224235098c..34e43fcc0b 100644
--- a/storage_scrubber/src/lib.rs
+++ b/storage_scrubber/src/lib.rs
@@ -17,15 +17,14 @@ use std::time::{Duration, SystemTime};
 
 use anyhow::Context;
 use aws_config::retry::{RetryConfigBuilder, RetryMode};
+use aws_sdk_s3::Client;
 use aws_sdk_s3::config::Region;
 use aws_sdk_s3::error::DisplayErrorContext;
-use aws_sdk_s3::Client;
-
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::ValueEnum;
 use futures::{Stream, StreamExt};
-use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_timeline_path};
 use pageserver::tenant::TENANTS_SEGMENT_NAME;
+use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_timeline_path};
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{
     DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig,
@@ -38,7 +37,8 @@ use tokio::io::AsyncReadExt;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, warn};
 use tracing_appender::non_blocking::WorkerGuard;
-use tracing_subscriber::{fmt, prelude::*, EnvFilter};
+use tracing_subscriber::prelude::*;
+use tracing_subscriber::{EnvFilter, fmt};
 use utils::fs_ext;
 use utils::id::{TenantId, TenantTimelineId, TimelineId};
 
@@ -411,10 +411,10 @@ async fn init_remote(
     let default_prefix = default_prefix_in_bucket(node_kind).to_string();
 
     match &mut storage_config.0.storage {
-        RemoteStorageKind::AwsS3(ref mut config) => {
+        RemoteStorageKind::AwsS3(config) => {
             config.prefix_in_bucket.get_or_insert(default_prefix);
         }
-        RemoteStorageKind::AzureContainer(ref mut config) => {
+        RemoteStorageKind::AzureContainer(config) => {
             config.prefix_in_container.get_or_insert(default_prefix);
         }
         RemoteStorageKind::LocalFs { .. } => (),
diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs
index fa6ee90b66..fb2ab02565 100644
--- a/storage_scrubber/src/main.rs
+++ b/storage_scrubber/src/main.rs
@@ -1,24 +1,20 @@
-use anyhow::{anyhow, bail, Context};
+use anyhow::{Context, anyhow, bail};
 use camino::Utf8PathBuf;
+use clap::{Parser, Subcommand};
 use pageserver_api::controller_api::{MetadataHealthUpdateRequest, MetadataHealthUpdateResponse};
 use pageserver_api::shard::TenantShardId;
 use reqwest::{Method, Url};
 use storage_controller_client::control_api;
-use storage_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
-use storage_scrubber::pageserver_physical_gc::GcMode;
+use storage_scrubber::garbage::{PurgeMode, find_garbage, purge_garbage};
+use storage_scrubber::pageserver_physical_gc::{GcMode, pageserver_physical_gc};
 use storage_scrubber::scan_pageserver_metadata::scan_pageserver_metadata;
-use storage_scrubber::scan_safekeeper_metadata::DatabaseOrList;
+use storage_scrubber::scan_safekeeper_metadata::{DatabaseOrList, scan_safekeeper_metadata};
 use storage_scrubber::tenant_snapshot::SnapshotDownloader;
-use storage_scrubber::{find_large_objects, ControllerClientConfig};
 use storage_scrubber::{
-    init_logging, pageserver_physical_gc::pageserver_physical_gc,
-    scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig, NodeKind,
-    TraversingDepth,
+    BucketConfig, ConsoleConfig, ControllerClientConfig, NodeKind, TraversingDepth,
+    find_large_objects, init_logging,
 };
-
-use clap::{Parser, Subcommand};
 use utils::id::TenantId;
-
 use utils::{project_build_tag, project_git_version};
 
 project_git_version!(GIT_VERSION);
@@ -173,15 +169,23 @@ async fn main() -> anyhow::Result<()> {
             if let NodeKind::Safekeeper = node_kind {
                 let db_or_list = match (timeline_lsns, dump_db_connstr) {
                     (Some(timeline_lsns), _) => {
-                        let timeline_lsns = serde_json::from_str(&timeline_lsns).context("parsing timeline_lsns")?;
+                        let timeline_lsns = serde_json::from_str(&timeline_lsns)
+                            .context("parsing timeline_lsns")?;
                         DatabaseOrList::List(timeline_lsns)
                     }
                     (None, Some(dump_db_connstr)) => {
-                        let dump_db_table = dump_db_table.ok_or_else(|| anyhow::anyhow!("dump_db_table not specified"))?;
+                        let dump_db_table = dump_db_table
+                            .ok_or_else(|| anyhow::anyhow!("dump_db_table not specified"))?;
                         let tenant_ids = tenant_ids.iter().map(|tshid| tshid.tenant_id).collect();
-                        DatabaseOrList::Database { tenant_ids, connstr: dump_db_connstr, table: dump_db_table }
+                        DatabaseOrList::Database {
+                            tenant_ids,
+                            connstr: dump_db_connstr,
+                            table: dump_db_table,
+                        }
                     }
-                    (None, None) => anyhow::bail!("neither `timeline_lsns` specified, nor `dump_db_connstr` and `dump_db_table`"),
+                    (None, None) => anyhow::bail!(
+                        "neither `timeline_lsns` specified, nor `dump_db_connstr` and `dump_db_table`"
+                    ),
                 };
                 let summary = scan_safekeeper_metadata(bucket_config.clone(), db_or_list).await?;
                 if json {
@@ -371,7 +375,9 @@ pub async fn scan_pageserver_metadata_cmd(
     exit_code: bool,
 ) -> anyhow::Result<()> {
     if controller_client.is_none() && post_to_storcon {
-        return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run"));
+        return Err(anyhow!(
+            "Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run"
+        ));
     }
     match scan_pageserver_metadata(bucket_config.clone(), tenant_shard_ids, verbose).await {
         Err(e) => {
diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs
index 47447d681c..af2407856d 100644
--- a/storage_scrubber/src/metadata_stream.rs
+++ b/storage_scrubber/src/metadata_stream.rs
@@ -1,17 +1,17 @@
 use std::str::FromStr;
 
-use anyhow::{anyhow, Context};
+use anyhow::{Context, anyhow};
 use async_stream::{stream, try_stream};
 use futures::StreamExt;
+use pageserver_api::shard::TenantShardId;
 use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePath};
 use tokio_stream::Stream;
+use utils::id::{TenantId, TimelineId};
 
 use crate::{
-    list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target,
-    TenantShardTimelineId,
+    RootTarget, S3Target, TenantShardTimelineId, list_objects_with_retries,
+    stream_objects_with_retries,
 };
-use pageserver_api::shard::TenantShardId;
-use utils::id::{TenantId, TimelineId};
 
 /// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes
 pub fn stream_tenants<'a>(
diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs
index 063c6bcfb9..c956b1abbc 100644
--- a/storage_scrubber/src/pageserver_physical_gc.rs
+++ b/storage_scrubber/src/pageserver_physical_gc.rs
@@ -2,22 +2,16 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
 use std::sync::Arc;
 use std::time::Duration;
 
-use crate::checks::{
-    list_tenant_manifests, list_timeline_blobs, BlobDataParseResult, ListTenantManifestResult,
-    RemoteTenantManifestInfo,
-};
-use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
-use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, MAX_RETRIES};
 use async_stream::try_stream;
 use futures::future::Either;
 use futures_util::{StreamExt, TryStreamExt};
+use pageserver::tenant::IndexPart;
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::remote_timeline_client::manifest::OffloadedTimelineManifest;
 use pageserver::tenant::remote_timeline_client::{
     parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path,
 };
 use pageserver::tenant::storage_layer::LayerName;
-use pageserver::tenant::IndexPart;
 use pageserver_api::controller_api::TenantDescribeResponse;
 use pageserver_api::shard::{ShardIndex, TenantShardId};
 use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath};
@@ -25,11 +19,18 @@ use reqwest::Method;
 use serde::Serialize;
 use storage_controller_client::control_api;
 use tokio_util::sync::CancellationToken;
-use tracing::{info_span, Instrument};
+use tracing::{Instrument, info_span};
 use utils::backoff;
 use utils::generation::Generation;
 use utils::id::{TenantId, TenantTimelineId};
 
+use crate::checks::{
+    BlobDataParseResult, ListTenantManifestResult, RemoteTenantManifestInfo, list_tenant_manifests,
+    list_timeline_blobs,
+};
+use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
+use crate::{BucketConfig, MAX_RETRIES, NodeKind, RootTarget, TenantShardTimelineId, init_remote};
+
 #[derive(Serialize, Default)]
 pub struct GcSummary {
     indices_deleted: usize,
diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs
index a31fb5b242..ba75f25984 100644
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -1,21 +1,22 @@
 use std::collections::{HashMap, HashSet};
 
-use crate::checks::{
-    branch_cleanup_and_check_errors, list_timeline_blobs, BlobDataParseResult,
-    RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis,
-};
-use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
-use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
 use futures_util::{StreamExt, TryStreamExt};
 use pageserver::tenant::remote_timeline_client::remote_layer_path;
 use pageserver_api::controller_api::MetadataHealthUpdateRequest;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;
 use serde::Serialize;
-use tracing::{info_span, Instrument};
+use tracing::{Instrument, info_span};
 use utils::id::TenantId;
 use utils::shard::ShardCount;
 
+use crate::checks::{
+    BlobDataParseResult, RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis,
+    branch_cleanup_and_check_errors, list_timeline_blobs,
+};
+use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
+use crate::{BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, init_remote};
+
 #[derive(Serialize, Default)]
 pub struct MetadataSummary {
     tenant_count: usize,
diff --git a/storage_scrubber/src/scan_safekeeper_metadata.rs b/storage_scrubber/src/scan_safekeeper_metadata.rs
index 0a4d4266a0..f10d758097 100644
--- a/storage_scrubber/src/scan_safekeeper_metadata.rs
+++ b/storage_scrubber/src/scan_safekeeper_metadata.rs
@@ -1,23 +1,24 @@
-use std::{collections::HashSet, str::FromStr, sync::Arc};
+use std::collections::HashSet;
+use std::str::FromStr;
+use std::sync::Arc;
 
-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use futures::stream::{StreamExt, TryStreamExt};
 use once_cell::sync::OnceCell;
 use pageserver_api::shard::TenantShardId;
-use postgres_ffi::{XLogFileName, PG_TLI};
+use postgres_ffi::{PG_TLI, XLogFileName};
 use remote_storage::GenericRemoteStorage;
 use rustls::crypto::ring;
 use serde::Serialize;
 use tokio_postgres::types::PgLsn;
 use tracing::{debug, error, info};
-use utils::{
-    id::{TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
 
+use crate::cloud_admin_api::CloudAdminApiClient;
+use crate::metadata_stream::stream_listing;
 use crate::{
-    cloud_admin_api::CloudAdminApiClient, init_remote, metadata_stream::stream_listing,
-    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId,
+    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId, init_remote,
 };
 
 /// Generally we should ask safekeepers, but so far we use everywhere default 16MB.
diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs
index 60e79fb859..e17409c20e 100644
--- a/storage_scrubber/src/tenant_snapshot.rs
+++ b/storage_scrubber/src/tenant_snapshot.rs
@@ -1,25 +1,26 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use crate::checks::{list_timeline_blobs, BlobDataParseResult, RemoteTimelineBlobData};
-use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines};
-use crate::{
-    download_object_to_file_s3, init_remote, init_remote_s3, BucketConfig, NodeKind, RootTarget,
-    TenantShardTimelineId,
-};
 use anyhow::Context;
 use async_stream::stream;
 use aws_sdk_s3::Client;
 use camino::Utf8PathBuf;
 use futures::{StreamExt, TryStreamExt};
+use pageserver::tenant::IndexPart;
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver::tenant::storage_layer::LayerName;
-use pageserver::tenant::IndexPart;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::{GenericRemoteStorage, S3Config};
 use utils::generation::Generation;
 use utils::id::TenantId;
 
+use crate::checks::{BlobDataParseResult, RemoteTimelineBlobData, list_timeline_blobs};
+use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines};
+use crate::{
+    BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, download_object_to_file_s3,
+    init_remote, init_remote_s3,
+};
+
 pub struct SnapshotDownloader {
     s3_client: Arc<Client>,
     s3_root: RootTarget,
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 58c5dbfd29..5159ad4e3b 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1167,15 +1167,15 @@ class NeonEnv:
                 "max_batch_size": 32,
             }
 
-            # Concurrent IO (https://github.com/neondatabase/neon/issues/9378):
-            # enable concurrent IO by default in tests and benchmarks.
-            # Compat tests are exempt because old versions fail to parse the new config.
-            get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
             if config.test_may_use_compatibility_snapshot_binaries:
                 log.info(
-                    "Forcing use of binary-built-in default to avoid forward-compatibility related test failures"
+                    "Skipping WAL contiguity validation to avoid forward-compatibility related test failures"
                 )
-                get_vectored_concurrent_io = None
+            else:
+                # Look for gaps in WAL received from safekeepeers
+                ps_cfg["validate_wal_contiguity"] = True
+
+            get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io
             if get_vectored_concurrent_io is not None:
                 ps_cfg["get_vectored_concurrent_io"] = {
                     "mode": self.pageserver_get_vectored_concurrent_io,
@@ -1630,6 +1630,7 @@ def neon_env_builder(
 class PageserverPort:
     pg: int
     http: int
+    https: int | None = None
 
 
 class LogUtils:
@@ -1886,6 +1887,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
             "node_id": int(node.id),
             "listen_http_addr": "localhost",
             "listen_http_port": node.service_port.http,
+            "listen_https_port": node.service_port.https,
             "listen_pg_addr": "localhost",
             "listen_pg_port": node.service_port.pg,
             "availability_zone_id": node.az_id,
@@ -4521,33 +4523,6 @@ class Safekeeper(LogUtils):
         for na in not_allowed:
             assert not self.log_contains(na)
 
-    def append_logical_message(
-        self, tenant_id: TenantId, timeline_id: TimelineId, request: dict[str, Any]
-    ) -> dict[str, Any]:
-        """
-        Send JSON_CTRL query to append LogicalMessage to WAL and modify
-        safekeeper state. It will construct LogicalMessage from provided
-        prefix and message, and then will write it to WAL.
-        """
-
-        # "replication=0" hacks psycopg not to send additional queries
-        # on startup, see https://github.com/psycopg/psycopg2/pull/482
-        token = self.env.auth_keys.generate_tenant_token(tenant_id)
-        connstr = f"host=localhost port={self.port.pg} password={token} replication=0 options='-c timeline_id={timeline_id} tenant_id={tenant_id}'"
-
-        with closing(psycopg2.connect(connstr)) as conn:
-            # server doesn't support transactions
-            conn.autocommit = True
-            with conn.cursor() as cur:
-                request_json = json.dumps(request)
-                log.info(f"JSON_CTRL request on port {self.port.pg}: {request_json}")
-                cur.execute("JSON_CTRL " + request_json)
-                all = cur.fetchall()
-                log.info(f"JSON_CTRL response: {all[0][0]}")
-                res = json.loads(all[0][0])
-                assert isinstance(res, dict)
-                return res
-
     def http_client(
         self, auth_token: str | None = None, gen_sk_wide_token: bool = True
     ) -> SafekeeperHttpClient:
diff --git a/test_runner/fixtures/safekeeper/http.py b/test_runner/fixtures/safekeeper/http.py
index 493ce7334e..7038d87aba 100644
--- a/test_runner/fixtures/safekeeper/http.py
+++ b/test_runner/fixtures/safekeeper/http.py
@@ -273,10 +273,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
         assert isinstance(res_json, dict)
         return res_json
 
+    def timeline_exclude(
+        self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration
+    ) -> dict[str, Any]:
+        res = self.put(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/exclude",
+            data=to.to_json(),
+        )
+        res.raise_for_status()
+        res_json = res.json()
+        assert isinstance(res_json, dict)
+        return res_json
+
     def membership_switch(
         self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration
     ) -> TimelineMembershipSwitchResponse:
-        res = self.post(
+        res = self.put(
             f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/membership",
             data=to.to_json(),
         )
diff --git a/test_runner/performance/test_perf_many_relations.py b/test_runner/performance/test_perf_many_relations.py
index 0ee0efe8b9..2570c55f6c 100644
--- a/test_runner/performance/test_perf_many_relations.py
+++ b/test_runner/performance/test_perf_many_relations.py
@@ -2,8 +2,10 @@ import os
 from pathlib import Path
 
 import pytest
+from fixtures.benchmark_fixture import NeonBenchmarker
 from fixtures.compare_fixtures import RemoteCompare
 from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder
 
 
 def get_num_relations(default: int = 1000) -> list[int]:
@@ -64,3 +66,52 @@ def test_perf_many_relations(remote_compare: RemoteCompare, num_relations: int):
             env.pg_bin.run_capture(
                 ["psql", env.pg.connstr(options="-cstatement_timeout=1000s "), "-c", sql]
             )
+
+
+def test_perf_simple_many_relations_reldir_v2(
+    neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
+):
+    """
+    Test creating many relations in a single database.
+    """
+    env = neon_env_builder.init_start(initial_tenant_conf={"rel_size_v2_enabled": "true"})
+    ep = env.endpoints.create_start(
+        "main",
+        config_lines=[
+            "shared_buffers=1000MB",
+            "max_locks_per_transaction=16384",
+        ],
+    )
+
+    n = 100000
+    step = 5000
+    # Create many relations
+    log.info(f"Creating {n} relations...")
+    begin = 0
+    with zenbenchmark.record_duration("create_first_relation"):
+        ep.safe_psql("CREATE TABLE IF NOT EXISTS table_begin (id SERIAL PRIMARY KEY, data TEXT)")
+    with zenbenchmark.record_duration("create_many_relations"):
+        while True:
+            end = begin + step
+            ep.safe_psql_many(
+                [
+                    "BEGIN",
+                    f"""DO $$
+                DECLARE
+                    i INT;
+                    table_name TEXT;
+                BEGIN
+                    FOR i IN {begin}..{end} LOOP
+                        table_name := 'table_' || i;
+                        EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)';
+                    END LOOP;
+                END $$;
+                """,
+                    "COMMIT",
+                ]
+            )
+            begin = end
+            if begin >= n:
+                break
+    with zenbenchmark.record_duration("create_last_relation"):
+        ep.safe_psql(f"CREATE TABLE IF NOT EXISTS table_{begin} (id SERIAL PRIMARY KEY, data TEXT)")
diff --git a/test_runner/regress/test_bad_connection.py b/test_runner/regress/test_bad_connection.py
index c0c9537421..bfc5cb174e 100644
--- a/test_runner/regress/test_bad_connection.py
+++ b/test_runner/regress/test_bad_connection.py
@@ -7,6 +7,7 @@ import psycopg2.errors
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnvBuilder
+from fixtures.utils import USE_LFC
 
 
 @pytest.mark.timeout(600)
@@ -80,3 +81,193 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder):
     # do a graceful shutdown which would had caught the allowed_errors before
     # https://github.com/neondatabase/neon/pull/8632
     env.pageserver.stop()
+
+
+def test_compute_pageserver_hung_connections(neon_env_builder: NeonEnvBuilder):
+    """
+    Test timeouts in waiting for response to pageserver request
+    """
+    env = neon_env_builder.init_start()
+    env.pageserver.allowed_errors.append(".*slow GetPage.*")
+    pageserver_http = env.pageserver.http_client()
+    endpoint = env.endpoints.create_start(
+        "main",
+        tenant_id=env.initial_tenant,
+        config_lines=["autovacuum = off"],
+    )
+    pg_conn = endpoint.connect()
+    cur = pg_conn.cursor()
+
+    # Create table, and insert some rows. Make it big enough that it doesn't fit in
+    # shared_buffers, otherwise the SELECT after restart will just return answer
+    # from shared_buffers without hitting the page server, which defeats the point
+    # of this test.
+    cur.execute("CREATE TABLE foo (t text)")
+    cur.execute(
+        """
+        INSERT INTO foo
+            SELECT 'long string to consume some space' || g
+            FROM generate_series(1, 100000) g
+        """
+    )
+
+    # Verify that the table is larger than shared_buffers
+    cur.execute(
+        """
+        select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size
+        from pg_settings where name = 'shared_buffers'
+        """
+    )
+    row = cur.fetchone()
+    assert row is not None
+    log.debug(f"shared_buffers is {row[0]}, table size {row[1]}")
+    assert int(row[0]) < int(row[1])
+
+    # Print the backend PID so that it can be compared with the logs easily
+    cur.execute("SELECT pg_backend_pid()")
+    row = cur.fetchone()
+    assert row is not None
+    log.info(f"running test workload in backend PID {row[0]}")
+
+    def run_workload(duration: float):
+        end_time = time.time() + duration
+        times_executed = 0
+        while time.time() < end_time:
+            if random.random() < 0.5:
+                cur.execute("INSERT INTO foo VALUES ('stas'), ('heikki')")
+            else:
+                cur.execute("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10")
+                cur.fetchall()
+                times_executed += 1
+        log.info(f"Workload executed {times_executed} times")
+        assert times_executed > 0
+
+    ## Test short connection hiccups
+    ##
+    ## This is to exercise the logging timeout.
+    log.info("running workload with log timeout")
+    cur.execute("SET neon.pageserver_response_log_timeout = '500ms'")
+    pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%3*return(3000)"))
+    run_workload(20)
+
+    # check that the message was logged
+    assert endpoint.log_contains("no response received from pageserver for .* s, still waiting")
+    assert endpoint.log_contains("received response from pageserver after .* s")
+
+    ## Test connections that are hung for longer
+    ##
+    ## This exercises the disconnect timeout. We'll disconnect and
+    ## reconnect after 500 ms.
+    log.info("running workload with disconnect timeout")
+    cur.execute("SET neon.pageserver_response_log_timeout = '250ms'")
+    cur.execute("SET neon.pageserver_response_disconnect_timeout = '500ms'")
+    pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%3*return(3000)"))
+    run_workload(15)
+
+    assert endpoint.log_contains("no response from pageserver for .* s, disconnecting")
+
+    # do a graceful shutdown which would had caught the allowed_errors before
+    # https://github.com/neondatabase/neon/pull/8632
+    env.pageserver.stop()
+
+
+def test_compute_pageserver_statement_timeout(neon_env_builder: NeonEnvBuilder):
+    """
+    Test statement_timeout while waiting for response to pageserver request
+    """
+    env = neon_env_builder.init_start()
+    env.pageserver.allowed_errors.append(".*slow GetPage.*")
+    pageserver_http = env.pageserver.http_client()
+
+    # Make sure the shared_buffers and LFC are tiny, to ensure the queries
+    # hit the storage. Disable autovacuum to make the test more deterministic.
+    config_lines = [
+        "shared_buffers='512kB'",
+        "autovacuum = off",
+    ]
+    if USE_LFC:
+        config_lines = ["neon.max_file_cache_size = 1MB", "neon.file_cache_size_limit = 1MB"]
+    endpoint = env.endpoints.create_start(
+        "main",
+        tenant_id=env.initial_tenant,
+        config_lines=config_lines,
+    )
+    pg_conn = endpoint.connect()
+    cur = pg_conn.cursor()
+
+    # Disable parallel query. Parallel workers open their own pageserver connections,
+    # which messes up the test logic.
+    cur.execute("SET max_parallel_workers_per_gather=0")
+    cur.execute("SET effective_io_concurrency=0")
+
+    # Create table, and insert some rows. Make it big enough that it doesn't fit in
+    # shared_buffers, otherwise the SELECT after restart will just return answer
+    # from shared_buffers without hitting the page server, which defeats the point
+    # of this test.
+    cur.execute("CREATE TABLE foo (t text)")
+    cur.execute(
+        """
+        INSERT INTO foo
+            SELECT 'long string to consume some space' || g
+            FROM generate_series(1, 100000) g
+        """
+    )
+
+    # Verify that the table is larger than shared_buffers
+    cur.execute(
+        """
+        select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size
+        from pg_settings where name = 'shared_buffers'
+        """
+    )
+    row = cur.fetchone()
+    assert row is not None
+    log.debug(f"shared_buffers is {row[0]}, table size {row[1]}")
+    assert int(row[0]) < int(row[1])
+
+    ## Run a query until the compute->pageserver connection hits the failpoint and
+    ## get stuck. This tests that the statement_timeout is obeyed while waiting on a
+    ## GetPage request.
+    log.info("running workload with statement_timeout")
+    cur.execute("SET neon.pageserver_response_log_timeout = '2000ms'")
+    cur.execute("SET neon.pageserver_response_disconnect_timeout = '30000ms'")
+    cur.execute("SET statement_timeout='10s'")
+    pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%return(60000)"))
+
+    start_time = time.time()
+    with pytest.raises(psycopg2.errors.QueryCanceled):
+        cur.execute("SELECT count(*) FROM foo")
+        cur.fetchall()
+    log.info("Statement timeout reached")
+    end_time = time.time()
+    # Verify that the statement_timeout canceled the query before
+    # neon.pageserver_response_disconnect_timeout expired
+    assert end_time - start_time < 40
+    times_canceled = 1
+
+    # Should not have disconnected yet
+    assert not endpoint.log_contains("no response from pageserver for .* s, disconnecting")
+
+    # Clear the failpoint. This doesn't affect the connection that already hit it. It
+    # will keep waiting. But subsequent connections will work normally.
+    pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "off"))
+
+    # If we keep retrying, we should eventually succeed. (This tests that the
+    # neon.pageserver_response_disconnect_timeout is not reset on query
+    # cancellation.)
+    while times_canceled < 10:
+        try:
+            cur.execute("SELECT count(*) FROM foo")
+            cur.fetchall()
+            log.info("Statement succeeded")
+            break
+        except psycopg2.errors.QueryCanceled:
+            log.info("Statement timed out, retrying")
+            times_canceled += 1
+    assert times_canceled > 1 and times_canceled < 10
+
+    assert endpoint.log_contains("no response from pageserver for .* s, disconnecting")
+
+    # do a graceful shutdown which would had caught the allowed_errors before
+    # https://github.com/neondatabase/neon/pull/8632
+    env.pageserver.stop()
diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py
index 124e62999a..d49686b57c 100644
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -29,6 +29,8 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
             ".*failed to load metadata.*",
             ".*load failed.*load local timeline.*",
             ".*: layer load failed, assuming permanent failure:.*",
+            ".*failed to get checkpoint bytes.*",
+            ".*failed to get control bytes.*",
         ]
     )
 
@@ -75,7 +77,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
     # (We don't check layer file contents on startup, when loading the timeline)
     #
     # This will change when we implement checksums for layers
-    with pytest.raises(Exception, match="get_values_reconstruct_data for layer ") as err:
+    with pytest.raises(Exception, match="failed to get checkpoint bytes") as err:
         pg1.start()
     log.info(
         f"As expected, compute startup failed for timeline {tenant1}/{timeline1} with corrupt layers: {err}"
diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py
index c091cd0869..0df88e14c2 100644
--- a/test_runner/regress/test_compaction.py
+++ b/test_runner/regress/test_compaction.py
@@ -319,8 +319,12 @@ def test_pageserver_gc_compaction_idempotent(
             },
         )
         wait_until(compaction_finished, timeout=60)
+        workload.validate(env.pageserver.id)
+        # Ensure all data are uploaded so that the duplicated layer gets into index_part.json
+        ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_flushed=True)
     if compaction_mode == "after_restart":
         env.pageserver.restart(True)
+        workload.validate(env.pageserver.id)
         ps_http.timeline_gc(
             tenant_id, timeline_id, None
         )  # Force refresh gc info to have gc_cutoff generated
@@ -335,6 +339,7 @@ def test_pageserver_gc_compaction_idempotent(
                     "sub_compaction_max_job_size_mb": 16,
                 },
             )
+            workload.validate(env.pageserver.id)
             wait_until(compaction_finished, timeout=60)
 
     # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
@@ -466,6 +471,59 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder):
     ps_http.timeline_gc(tenant_id, timeline_id, None)
 
 
+@skip_in_debug_build("only run with release build")
+def test_pageserver_gc_compaction_trigger(neon_env_builder: NeonEnvBuilder):
+    SMOKE_CONF = {
+        # Run both gc and gc-compaction.
+        "gc_period": "5s",
+        "compaction_period": "5s",
+        # No PiTR interval and small GC horizon
+        "pitr_interval": "0s",
+        "gc_horizon": f"{1024 * 16}",
+        "lsn_lease_length": "0s",
+        "gc_compaction_enabled": "true",
+        "gc_compaction_initial_threshold_kb": "16",
+        "gc_compaction_ratio_percent": "50",
+        # Do not generate image layers with create_image_layers
+        "image_layer_creation_check_threshold": "100",
+    }
+
+    env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF)
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    row_count = 10000
+    churn_rounds = 20
+
+    ps_http = env.pageserver.http_client()
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageserver.id)
+
+    log.info("Writing initial data ...")
+    workload.write_rows(row_count, env.pageserver.id)
+
+    ps_http.timeline_gc(
+        tenant_id, timeline_id, None
+    )  # Force refresh gc info to have gc_cutoff generated
+
+    def compaction_finished():
+        queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id))
+        assert queue_depth == 0
+
+    for i in range(1, churn_rounds + 1):
+        log.info(f"Running churn round {i}/{churn_rounds} ...")
+        workload.churn_rows(row_count, env.pageserver.id, upload=True)
+        wait_until(compaction_finished, timeout=60)
+        workload.validate(env.pageserver.id)
+
+    # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked)
+    env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction")
+
+    log.info("Validating at workload end ...")
+    workload.validate(env.pageserver.id)
+
+
 # Stripe sizes in number of pages.
 TINY_STRIPES = 16
 LARGE_STRIPES = 32768
diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py
index 99d41e410a..b360162dc1 100644
--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -501,19 +501,31 @@ def test_compute_installed_extensions_metric(neon_simple_env: NeonEnv):
     """
     Test that the compute_installed_extensions properly reports accurate
     results. Important to note that currently this metric is only gathered on
-    compute start.
+    compute start. We install the neon extension into a database other than
+    postgres because compute_ctl will run `ALTER EXTENSION neon UPDATE` during
+    Postgres startup in the postgres database, creating a race condition.
     """
+    DB_NAME = "test"
+
     env = neon_simple_env
 
     endpoint = env.endpoints.create_start("main")
+    endpoint.safe_psql(f"CREATE DATABASE {DB_NAME}")
+
+    # The metric is only gathered on compute start, so restart to check that
+    # plpgsql is now in 3 databases, instead of its regular 2, template1 and
+    # postgres.
+    endpoint.stop()
+    endpoint.start()
 
     client = endpoint.http_client()
 
     def __has_plpgsql(samples: list[Sample]) -> bool:
         """
-        Check that plpgsql is installed in the template1 and postgres databases
+        Check that plpgsql is installed in the template1, postgres, and test
+        databases
         """
-        return len(samples) == 1 and samples[0].value == 2
+        return len(samples) == 1 and samples[0].value == 3
 
     wait_until(
         collect_metric(
@@ -525,8 +537,8 @@ def test_compute_installed_extensions_metric(neon_simple_env: NeonEnv):
         name="compute_installed_extensions",
     )
 
-    # Install the neon extension, so we can check for it on the restart
-    endpoint.safe_psql("CREATE EXTENSION neon VERSION '1.0'")
+    # Install the neon extension, so we can check for it on the restart.
+    endpoint.safe_psql("CREATE EXTENSION neon VERSION '1.0'", dbname=DB_NAME)
 
     # The metric is only gathered on compute start, so restart to check if the
     # neon extension will now be there.
diff --git a/test_runner/regress/test_lfc_prefetch.py b/test_runner/regress/test_lfc_prefetch.py
new file mode 100644
index 0000000000..dd422d996e
--- /dev/null
+++ b/test_runner/regress/test_lfc_prefetch.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import time
+
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnv
+from fixtures.utils import USE_LFC
+
+
+@pytest.mark.timeout(600)
+@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
+def test_lfc_prefetch(neon_simple_env: NeonEnv):
+    """
+    Test resizing the Local File Cache
+    """
+    env = neon_simple_env
+    endpoint = env.endpoints.create_start(
+        "main",
+        config_lines=[
+            "neon.max_file_cache_size=1GB",
+            "neon.file_cache_size_limit=1GB",
+            "effective_io_concurrency=100",
+            "shared_buffers=1MB",
+            "enable_bitmapscan=off",
+            "enable_seqscan=off",
+            "autovacuum=off",
+        ],
+    )
+    conn = endpoint.connect()
+    cur = conn.cursor()
+    cur.execute("create extension neon")
+    cur.execute("create table t(pk integer, sk integer, filler text default repeat('x',200))")
+    cur.execute("set statement_timeout=0")
+    cur.execute("select setseed(0.5)")
+    cur.execute("insert into t values (generate_series(1,1000000),random()*1000000)")
+    cur.execute("create index on t(sk)")
+    cur.execute("vacuum t")
+
+    # reset LFC
+    cur.execute("alter system set neon.file_cache_size_limit=0")
+    cur.execute("select pg_reload_conf()")
+    time.sleep(1)
+    cur.execute("alter system set neon.file_cache_size_limit='1GB'")
+    cur.execute("select pg_reload_conf()")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 100000 and 200000 limit 100) s1"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 200000 and 300000 limit 100) s2"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 300000 and 400000 limit 100) s3"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 100000 and 200000 limit 100) s4"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    # if prefetch requests are not stored in LFC, we continue to sent unused prefetch request tyo PS
+    assert prefetch_expired > 0
+
+    cur.execute("set neon.store_prefetch_result_in_lfc=on")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 500000 and 600000 limit 100) s5"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 600000 and 700000 limit 100) s6"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 700000 and 800000 limit 100) s7"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    cur.execute(
+        "explain (analyze,prefetch,format json) select sum(pk) from (select pk from t where sk between 500000 and 600000 limit 100) s8"
+    )
+    prefetch_expired = cur.fetchall()[0][0][0]["Plan"]["Prefetch Expired Requests"]
+    log.info(f"Unused prefetches: {prefetch_expired}")
+
+    # No redundant prefethc requrests if prefetch results are stored in LFC
+    assert prefetch_expired == 0
diff --git a/test_runner/regress/test_normal_work.py b/test_runner/regress/test_normal_work.py
index ae2d171058..c8458b963e 100644
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -6,9 +6,14 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
 from fixtures.pageserver.http import PageserverHttpClient
 
 
-def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
+def check_tenant(
+    env: NeonEnv, pageserver_http: PageserverHttpClient, safekeeper_proto_version: int
+):
     tenant_id, timeline_id = env.create_tenant()
-    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
+    config_lines = [
+        f"neon.safekeeper_proto_version = {safekeeper_proto_version}",
+    ]
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=config_lines)
     # we rely upon autocommit after each statement
     res_1 = endpoint.safe_psql_many(
         queries=[
@@ -33,7 +38,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient):
 
 
 @pytest.mark.parametrize("num_timelines,num_safekeepers", [(3, 1)])
-def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int):
+# Test both proto versions until we fully migrate.
+@pytest.mark.parametrize("safekeeper_proto_version", [2, 3])
+def test_normal_work(
+    neon_env_builder: NeonEnvBuilder,
+    num_timelines: int,
+    num_safekeepers: int,
+    safekeeper_proto_version: int,
+):
     """
     Basic test:
     * create new tenant with a timeline
@@ -52,4 +64,4 @@ def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_s
     pageserver_http = env.pageserver.http_client()
 
     for _ in range(num_timelines):
-        check_tenant(env, pageserver_http)
+        check_tenant(env, pageserver_http, safekeeper_proto_version)
diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py
index 602d493ae6..a9b897b741 100644
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -8,9 +8,10 @@ from pathlib import Path
 from typing import TYPE_CHECKING
 
 import pytest
-from fixtures.common_types import TenantId, TenantShardId, TimelineId
+from fixtures.common_types import TenantId, TenantShardId, TimelineArchivalState, TimelineId
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
+    DEFAULT_BRANCH_NAME,
     NeonEnvBuilder,
     NeonPageserver,
     StorageControllerMigrationConfig,
@@ -927,8 +928,12 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
     workload.write_rows(128, upload=True)
     workload.write_rows(128, upload=True)
     workload.write_rows(128, upload=True)
+
+    child_timeline_id = env.create_branch(
+        "foo", tenant_id, ancestor_branch_name=DEFAULT_BRANCH_NAME
+    )
+
     workload.write_rows(128, upload=True)
-    workload.stop()
 
     # Expect lots of layers
     assert len(ps_attached.list_layers(tenant_id, timeline_id)) > 10
@@ -937,9 +942,19 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
     for ps in env.pageservers:
         ps.http_client().configure_failpoints([("secondary-layer-download-sleep", "return(1000)")])
 
+    def timeline_heatmap(tlid):
+        assert env.pageserver_remote_storage is not None
+
+        heatmap = env.pageserver_remote_storage.heatmap_content(tenant_id)
+        for htl in heatmap["timelines"]:
+            if htl["timeline_id"] == str(tlid):
+                return htl
+
+        raise RuntimeError(f"No heatmap for timeline: {tlid}")
+
     # Upload a heatmap, so that secondaries have something to download
     ps_attached.http_client().tenant_heatmap_upload(tenant_id)
-    heatmap_before_migration = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    heatmap_before_migration = timeline_heatmap(timeline_id)
 
     # This has no chance to succeed: we have lots of layers and each one takes at least 1000ms.
     # However, it pulls the heatmap, which will be important later.
@@ -971,17 +986,12 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
     assert env.storage_controller.locate(tenant_id)[0]["node_id"] == ps_secondary.id
 
     ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
-    heatmap_after_migration = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    heatmap_after_migration = timeline_heatmap(timeline_id)
 
-    assert len(heatmap_before_migration["timelines"][0]["layers"]) > 0
+    assert len(heatmap_before_migration["layers"]) > 0
 
-    # The new layer map should contain all the layers in the pre-migration one
-    # and a new in memory layer
-    after_migration_heatmap_layers_count = len(heatmap_after_migration["timelines"][0]["layers"])
-    assert (
-        len(heatmap_before_migration["timelines"][0]["layers"]) + 1
-        == after_migration_heatmap_layers_count
-    )
+    after_migration_heatmap_layers_count = len(heatmap_after_migration["layers"])
+    assert len(heatmap_before_migration["layers"]) <= after_migration_heatmap_layers_count
 
     log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}")
 
@@ -989,10 +999,71 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder):
         TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
     )
 
-    def all_layers_downloaded():
+    # Now simulate the case where a child timeline is archived, parent layers
+    # are evicted and the child is unarchived. When the child is unarchived,
+    # itself and the parent update their heatmaps to contain layers needed by the
+    # child. One can warm up the timeline hierarchy since the heatmaps are ready.
+
+    def all_layers_downloaded(expected_layer_count: int):
         local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id))
 
         log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}")
-        assert local_layers_count == after_migration_heatmap_layers_count
+        assert local_layers_count >= expected_layer_count
 
-    wait_until(all_layers_downloaded)
+    wait_until(lambda: all_layers_downloaded(after_migration_heatmap_layers_count))
+    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
+
+    before = (
+        ps_secondary.http_client()
+        .get_metrics()
+        .query_one("pageserver_remote_ondemand_downloaded_layers_total")
+        .value
+    )
+    workload.validate()
+    after = (
+        ps_secondary.http_client()
+        .get_metrics()
+        .query_one("pageserver_remote_ondemand_downloaded_layers_total")
+        .value
+    )
+
+    workload.stop()
+    assert before == after
+
+    def check_archival_state(state: TimelineArchivalState, tline):
+        timelines = (
+            timeline["timeline_id"]
+            for timeline in ps_secondary.http_client().timeline_list(tenant_id=tenant_id)
+        )
+
+        if state == TimelineArchivalState.ARCHIVED:
+            assert str(tline) not in timelines
+        elif state == TimelineArchivalState.UNARCHIVED:
+            assert str(tline) in timelines
+
+    ps_secondary.http_client().timeline_archival_config(
+        tenant_id, child_timeline_id, TimelineArchivalState.ARCHIVED
+    )
+    ps_secondary.http_client().timeline_offload(tenant_id, child_timeline_id)
+    wait_until(lambda: check_archival_state(TimelineArchivalState.ARCHIVED, child_timeline_id))
+
+    ps_secondary.http_client().evict_all_layers(tenant_id, timeline_id)
+    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
+    assert len(timeline_heatmap(timeline_id)["layers"]) == 0
+
+    ps_secondary.http_client().timeline_archival_config(
+        tenant_id, child_timeline_id, TimelineArchivalState.UNARCHIVED
+    )
+    wait_until(lambda: check_archival_state(TimelineArchivalState.UNARCHIVED, child_timeline_id))
+
+    ps_secondary.http_client().tenant_heatmap_upload(tenant_id)
+    log.info(f"Parent timeline heatmap size: {len(timeline_heatmap(timeline_id)['layers'])}")
+    log.info(f"Child timeline heatmap size: {len(timeline_heatmap(child_timeline_id)['layers'])}")
+
+    expected_locally = len(timeline_heatmap(timeline_id)["layers"])
+    assert expected_locally > 0
+
+    env.storage_controller.download_heatmap_layers(
+        TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id
+    )
+    wait_until(lambda: all_layers_downloaded(expected_locally))
diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py
index 411888efbc..6a76ad5ca8 100644
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -332,8 +332,10 @@ def test_sql_regress(
 
 
 @skip_in_debug_build("only run with release build")
+@pytest.mark.parametrize("reldir_type", ["v1", "v2"])
 def test_tx_abort_with_many_relations(
     neon_env_builder: NeonEnvBuilder,
+    reldir_type: str,
 ):
     """
     This is not a pg_regress test as such, but perhaps it should be -- this test exercises postgres
@@ -342,7 +344,11 @@ def test_tx_abort_with_many_relations(
     Reproducer for https://github.com/neondatabase/neon/issues/9505
     """
 
-    env = neon_env_builder.init_start()
+    env = neon_env_builder.init_start(
+        initial_tenant_conf={
+            "rel_size_v2_enabled": "true" if reldir_type == "v2" else "false",
+        }
+    )
     ep = env.endpoints.create_start(
         "main",
         tenant_id=env.initial_tenant,
@@ -354,48 +360,65 @@ def test_tx_abort_with_many_relations(
 
     # How many relations: this number is tuned to be long enough to take tens of seconds
     # if the rollback code path is buggy, tripping the test's timeout.
-    n = 4000
+    if reldir_type == "v1":
+        n = 4000
+        step = 4000
+    else:
+        n = 20000
+        step = 5000
 
     def create():
         # Create many relations
         log.info(f"Creating {n} relations...")
-        ep.safe_psql_many(
-            [
-                "BEGIN",
-                f"""DO $$
-            DECLARE
-                i INT;
-                table_name TEXT;
-            BEGIN
-                FOR i IN 1..{n} LOOP
-                    table_name := 'table_' || i;
-                    EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)';
-                END LOOP;
-            END $$;
-            """,
-                "COMMIT",
-            ]
-        )
+        begin = 0
+        while True:
+            end = begin + step
+            ep.safe_psql_many(
+                [
+                    "BEGIN",
+                    f"""DO $$
+                DECLARE
+                    i INT;
+                    table_name TEXT;
+                BEGIN
+                    FOR i IN {begin}..{end} LOOP
+                        table_name := 'table_' || i;
+                        EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)';
+                    END LOOP;
+                END $$;
+                """,
+                    "COMMIT",
+                ]
+            )
+            begin = end
+            if begin >= n:
+                break
 
     def truncate():
         # Truncate relations, then roll back the transaction containing the truncations
         log.info(f"Truncating {n} relations...")
-        ep.safe_psql_many(
-            [
-                "BEGIN",
-                f"""DO $$
-            DECLARE
-                i INT;
-                table_name TEXT;
-            BEGIN
-                FOR i IN 1..{n} LOOP
-                    table_name := 'table_' || i;
-                    EXECUTE 'TRUNCATE ' || table_name ;
-                END LOOP;
-            END $$;
-            """,
-            ]
-        )
+        begin = 0
+        while True:
+            end = begin + step
+            ep.safe_psql_many(
+                [
+                    "BEGIN",
+                    f"""DO $$
+                DECLARE
+                    i INT;
+                    table_name TEXT;
+                BEGIN
+                    FOR i IN {begin}..{end} LOOP
+                        table_name := 'table_' || i;
+                        EXECUTE 'TRUNCATE ' || table_name ;
+                    END LOOP;
+                END $$;
+                """,
+                ]
+            )
+            begin = end
+            if begin >= n:
+                break
 
     def rollback_and_wait():
         log.info(f"Rolling back after truncating {n} relations...")
diff --git a/test_runner/regress/test_pgstat.py b/test_runner/regress/test_pgstat.py
new file mode 100644
index 0000000000..bf9b982e14
--- /dev/null
+++ b/test_runner/regress/test_pgstat.py
@@ -0,0 +1,83 @@
+import pytest
+from fixtures.neon_fixtures import NeonEnv
+from fixtures.pg_version import PgVersion
+
+
+#
+# Test that pgstat statistic is preserved across sessions
+#
+def test_pgstat(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    if env.pg_version == PgVersion.V14:
+        pytest.skip("PG14 doesn't support pgstat statistic persistence")
+
+    n = 10000
+    endpoint = env.endpoints.create_start(
+        "main", config_lines=["neon.pgstat_file_size_limit=100kB", "autovacuum=off"]
+    )
+
+    con = endpoint.connect()
+    cur = con.cursor()
+
+    cur.execute("create table t(x integer)")
+    cur.execute(f"insert into t values (generate_series(1,{n}))")
+    cur.execute("vacuum analyze t")
+    cur.execute("select sum(x) from t")
+    cur.execute("update t set x=x+1")
+
+    cur.execute("select pg_stat_force_next_flush()")
+
+    cur.execute(
+        "select seq_scan,seq_tup_read,n_tup_ins,n_tup_upd,n_live_tup,n_dead_tup, vacuum_count,analyze_count from pg_stat_user_tables"
+    )
+    rec = cur.fetchall()[0]
+    assert rec == (2, n * 2, n, n, n * 2, n, 1, 1)
+
+    endpoint.stop()
+    endpoint.start()
+
+    con = endpoint.connect()
+    cur = con.cursor()
+
+    cur.execute(
+        "select seq_scan,seq_tup_read,n_tup_ins,n_tup_upd,n_live_tup,n_dead_tup, vacuum_count,analyze_count from pg_stat_user_tables"
+    )
+    rec = cur.fetchall()[0]
+    assert rec == (2, n * 2, n, n, n * 2, n, 1, 1)
+
+    cur.execute("update t set x=x+1")
+
+    # stop without checkpoint
+    endpoint.stop(mode="immediate")
+    endpoint.start()
+
+    con = endpoint.connect()
+    cur = con.cursor()
+
+    cur.execute(
+        "select seq_scan,seq_tup_read,n_tup_ins,n_tup_upd,n_live_tup,n_dead_tup, vacuum_count,analyze_count from pg_stat_user_tables"
+    )
+    rec = cur.fetchall()[0]
+    # pgstat information should be discarded in case of abnormal termination
+    assert rec == (0, 0, 0, 0, 0, 0, 0, 0)
+
+    cur.execute("select sum(x) from t")
+
+    # create more relations to increase size of statistics
+    for i in range(1, 1000):
+        cur.execute(f"create table t{i}(pk integer primary key)")
+
+    cur.execute("select pg_stat_force_next_flush()")
+
+    endpoint.stop()
+    endpoint.start()
+
+    con = endpoint.connect()
+    cur = con.cursor()
+
+    cur.execute(
+        "select seq_scan,seq_tup_read,n_tup_ins,n_tup_upd,n_live_tup,n_dead_tup, vacuum_count,analyze_count from pg_stat_user_tables"
+    )
+    rec = cur.fetchall()[0]
+    # pgstat information is not restored because its size exeeds 100k threshold
+    assert rec == (0, 0, 0, 0, 0, 0, 0, 0)
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index 1d95312140..d5acc257b2 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -182,6 +182,13 @@ def test_storage_controller_smoke(neon_env_builder: NeonEnvBuilder, combination)
     time.sleep(1)
     assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0
 
+    # Exercise live migration of a tenant back to the original pageserver
+    migrate_tenant = env.pageservers[1].http_client().tenant_list_locations()["tenant_shards"][0][0]
+    env.storage_controller.tenant_shard_migrate(
+        TenantShardId.parse(migrate_tenant), env.pageservers[0].id
+    )
+    assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 1
+
     # Restarting a pageserver should not detach any tenants (i.e. /re-attach works)
     before_restart = env.pageservers[1].http_client().tenant_list_locations()
     env.pageservers[1].stop()
@@ -2139,8 +2146,9 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
         workload.validate()
 
 
+@pytest.mark.parametrize(**fixtures.utils.allpairs_versions())
 @pytest.mark.parametrize("num_azs", [1, 2])
-def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder, num_azs: int):
+def test_graceful_cluster_restart(neon_env_builder: NeonEnvBuilder, num_azs: int, combination):
     """
     Graceful reststart of storage controller clusters use the drain and
     fill hooks in order to migrate attachments away from pageservers before
@@ -3764,3 +3772,96 @@ def test_storage_controller_node_flap_detach_race(
             assert len(locs) == 1, f"{shard} has {len(locs)} attached locations"
 
     wait_until(validate_locations, timeout=10)
+
+
+def test_update_node_on_registration(neon_env_builder: NeonEnvBuilder):
+    """
+    Check that storage controller handles node_register requests with updated fields correctly.
+    1. Run storage controller and register 1 pageserver without https port.
+    2. Register the same pageserver with https port. Check that port has been updated.
+    3. Restart the storage controller. Check that https port is persistent.
+    4. Register the same pageserver without https port again (rollback). Check that port has been removed.
+    """
+    neon_env_builder.num_pageservers = 1
+    env = neon_env_builder.init_configs()
+
+    env.storage_controller.start()
+    env.storage_controller.wait_until_ready()
+
+    pageserver = env.pageservers[0]
+
+    # Step 1. Register pageserver without https port.
+    env.storage_controller.node_register(pageserver)
+    env.storage_controller.consistency_check()
+
+    nodes = env.storage_controller.node_list()
+    assert len(nodes) == 1
+    assert nodes[0]["listen_https_port"] is None
+
+    # Step 2. Register pageserver with https port.
+    pageserver.service_port.https = 1234
+    env.storage_controller.node_register(pageserver)
+    env.storage_controller.consistency_check()
+
+    nodes = env.storage_controller.node_list()
+    assert len(nodes) == 1
+    assert nodes[0]["listen_https_port"] == 1234
+
+    # Step 3. Restart storage controller.
+    env.storage_controller.stop()
+    env.storage_controller.start()
+    env.storage_controller.wait_until_ready()
+    env.storage_controller.consistency_check()
+
+    nodes = env.storage_controller.node_list()
+    assert len(nodes) == 1
+    assert nodes[0]["listen_https_port"] == 1234
+
+    # Step 4. Register pageserver with no https port again.
+    pageserver.service_port.https = None
+    env.storage_controller.node_register(pageserver)
+    env.storage_controller.consistency_check()
+
+    nodes = env.storage_controller.node_list()
+    assert len(nodes) == 1
+    assert nodes[0]["listen_https_port"] is None
+
+
+def test_storage_controller_location_conf_equivalence(neon_env_builder: NeonEnvBuilder):
+    """
+    Validate that a storage controller restart with no shards in a transient state
+    performs zero reconciliations at start-up. Implicitly, this means that the location
+    configs returned by the pageserver are identical to the persisted state in the
+    storage controller database.
+    """
+    neon_env_builder.num_pageservers = 1
+    neon_env_builder.storage_controller_config = {
+        "start_as_candidate": False,
+    }
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tenant_id = TenantId.generate()
+    env.storage_controller.tenant_create(
+        tenant_id, shard_count=2, tenant_config={"pitr_interval": "1h2m3s"}
+    )
+
+    env.storage_controller.reconcile_until_idle()
+
+    reconciles_before_restart = env.storage_controller.get_metric_value(
+        "storage_controller_reconcile_complete_total", filter={"status": "ok"}
+    )
+
+    assert reconciles_before_restart != 0
+
+    env.storage_controller.stop()
+    env.storage_controller.start()
+
+    env.storage_controller.reconcile_until_idle()
+
+    reconciles_after_restart = env.storage_controller.get_metric_value(
+        "storage_controller_reconcile_complete_total", filter={"status": "ok"}
+    )
+
+    assert reconciles_after_restart == 0
diff --git a/test_runner/regress/test_subscriber_branching.py b/test_runner/regress/test_subscriber_branching.py
index 849d4f024d..6175643389 100644
--- a/test_runner/regress/test_subscriber_branching.py
+++ b/test_runner/regress/test_subscriber_branching.py
@@ -1,9 +1,10 @@
 from __future__ import annotations
 
+import threading
 import time
 
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv
+from fixtures.neon_fixtures import NeonEnv, logical_replication_sync
 from fixtures.utils import query_scalar, wait_until
 
 
@@ -239,3 +240,173 @@ def test_subscriber_branching(neon_simple_env: NeonEnv):
             res = scur_postgres.fetchall()
             assert len(res) == 1
             assert str(sub_child_2_timeline_id) == res[0][0]
+
+
+def test_multiple_subscription_branching(neon_simple_env: NeonEnv):
+    """
+    Test that compute_ctl can handle concurrent deletion of subscriptions in a multiple databases
+    """
+    env = neon_simple_env
+
+    NUMBER_OF_DBS = 5
+
+    # Create and start endpoint so that neon_local put all the generated
+    # stuff into the spec.json file.
+    endpoint = env.endpoints.create_start(
+        "main",
+        config_lines=[
+            "max_replication_slots = 10",
+            "max_logical_replication_workers=10",
+            "max_worker_processes=10",
+        ],
+    )
+
+    TEST_DB_NAMES = [
+        {
+            "name": "neondb",
+            "owner": "cloud_admin",
+        },
+        {
+            "name": "publisher_db",
+            "owner": "cloud_admin",
+        },
+    ]
+
+    for i in range(NUMBER_OF_DBS):
+        TEST_DB_NAMES.append(
+            {
+                "name": f"db{i}",
+                "owner": "cloud_admin",
+            }
+        )
+
+    # Update the spec.json file to create the databases
+    # and reconfigure the endpoint to apply the changes.
+    endpoint.respec_deep(
+        **{
+            "skip_pg_catalog_updates": False,
+            "cluster": {
+                "databases": TEST_DB_NAMES,
+            },
+        }
+    )
+    endpoint.reconfigure()
+
+    connstr = endpoint.connstr(dbname="publisher_db").replace("'", "''")
+
+    # create table, replication and subscription for each of the databases
+    with endpoint.cursor(dbname="publisher_db") as publisher_cursor:
+        for i in range(NUMBER_OF_DBS):
+            publisher_cursor.execute(f"CREATE TABLE t{i}(a int)")
+            publisher_cursor.execute(f"CREATE PUBLICATION mypub{i} FOR TABLE t{i}")
+            publisher_cursor.execute(
+                f"select pg_catalog.pg_create_logical_replication_slot('mysub{i}', 'pgoutput');"
+            )
+            publisher_cursor.execute(f"INSERT INTO t{i} VALUES ({i})")
+
+            with endpoint.cursor(dbname=f"db{i}") as cursor:
+                cursor.execute(f"CREATE TABLE t{i}(a int)")
+                cursor.execute(
+                    f"CREATE SUBSCRIPTION mysub{i} CONNECTION '{connstr}' PUBLICATION mypub{i}  WITH (create_slot = false) "
+                )
+
+    # wait for the subscription to be active
+    for i in range(NUMBER_OF_DBS):
+        logical_replication_sync(
+            endpoint,
+            endpoint,
+            f"mysub{i}",
+            sub_dbname=f"db{i}",
+            pub_dbname="publisher_db",
+        )
+
+    # Check that replication is working
+    for i in range(NUMBER_OF_DBS):
+        with endpoint.cursor(dbname=f"db{i}") as cursor:
+            cursor.execute(f"SELECT * FROM t{i}")
+            rows = cursor.fetchall()
+            assert len(rows) == 1
+            assert rows[0][0] == i
+
+            last_insert_lsn = query_scalar(cursor, "select pg_current_wal_insert_lsn();")
+
+    def start_publisher_workload(table_num: int, duration: int):
+        start = time.time()
+        with endpoint.cursor(dbname="publisher_db") as cur:
+            while time.time() - start < duration:
+                cur.execute(f"INSERT INTO t{i} SELECT FROM generate_series(1,1000)")
+
+    LOAD_DURATION = 5
+    threads = [
+        threading.Thread(target=start_publisher_workload, args=(i, LOAD_DURATION))
+        for i in range(NUMBER_OF_DBS)
+    ]
+
+    for thread in threads:
+        thread.start()
+
+    sub_child_1_timeline_id = env.create_branch(
+        "subscriber_child_1",
+        ancestor_branch_name="main",
+        ancestor_start_lsn=last_insert_lsn,
+    )
+
+    sub_child_1 = env.endpoints.create("subscriber_child_1")
+
+    sub_child_1.respec(
+        skip_pg_catalog_updates=False,
+        reconfigure_concurrency=5,
+        drop_subscriptions_before_start=True,
+        cluster={
+            "databases": TEST_DB_NAMES,
+            "roles": [],
+        },
+    )
+
+    sub_child_1.start()
+
+    # ensure that subscription deletion happened on this timeline
+    with sub_child_1.cursor() as scur_postgres:
+        scur_postgres.execute("SELECT timeline_id from neon.drop_subscriptions_done")
+        res = scur_postgres.fetchall()
+        log.info(f"res = {res}")
+        assert len(res) == 1
+        assert str(sub_child_1_timeline_id) == res[0][0]
+
+    # ensure that there are no subscriptions in the databases
+    for i in range(NUMBER_OF_DBS):
+        with sub_child_1.cursor(dbname=f"db{i}") as cursor:
+            cursor.execute("SELECT * FROM pg_catalog.pg_subscription")
+            res = cursor.fetchall()
+            assert len(res) == 0
+
+            # ensure that there are no unexpected rows in the tables
+            cursor.execute(f"SELECT * FROM t{i}")
+            rows = cursor.fetchall()
+            assert len(rows) == 1
+            assert rows[0][0] == i
+
+    for thread in threads:
+        thread.join()
+
+    # ensure that logical replication is still working in main endpoint
+    # wait for it to catch up
+    for i in range(NUMBER_OF_DBS):
+        logical_replication_sync(
+            endpoint,
+            endpoint,
+            f"mysub{i}",
+            sub_dbname=f"db{i}",
+            pub_dbname="publisher_db",
+        )
+
+    # verify that the data is the same in publisher and subscriber tables
+    with endpoint.cursor(dbname="publisher_db") as publisher_cursor:
+        for i in range(NUMBER_OF_DBS):
+            with endpoint.cursor(dbname=f"db{i}") as cursor:
+                publisher_cursor.execute(f"SELECT count(*) FROM t{i}")
+                cursor.execute(f"SELECT count(*) FROM t{i}")
+                pub_res = publisher_cursor.fetchone()
+                sub_res = cursor.fetchone()
+                log.info(f"for table t{i}: pub_res = {pub_res}, sub_res = {sub_res}")
+                assert pub_res == sub_res
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index 2706ddf2f0..c17840d31c 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -823,6 +823,8 @@ def test_timeline_retain_lsn(
             [
                 ".*initial size calculation failed: PageRead.MissingKey.could not find data for key.*",
                 ".*page_service_conn_main.*could not find data for key.*",
+                ".*failed to get checkpoint bytes.*",
+                ".*failed to get control bytes.*",
             ]
         )
     if offload_child is None or "no-restart" not in offload_child:
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index c5045fe4a4..0a05189bfb 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -811,60 +811,6 @@ class ProposerPostgres(PgProtocol):
         self.pg_bin.run(args)
 
 
-# insert wal in all safekeepers and run sync on proposer
-def test_sync_safekeepers(
-    neon_env_builder: NeonEnvBuilder,
-    pg_bin: PgBin,
-    port_distributor: PortDistributor,
-):
-    # We don't really need the full environment for this test, just the
-    # safekeepers would be enough.
-    neon_env_builder.num_safekeepers = 3
-    env = neon_env_builder.init_start()
-
-    tenant_id = TenantId.generate()
-    timeline_id = TimelineId.generate()
-
-    # write config for proposer
-    pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata")
-    pg = ProposerPostgres(
-        pgdata_dir, pg_bin, tenant_id, timeline_id, "127.0.0.1", port_distributor.get_port()
-    )
-    pg.create_dir_config(env.get_safekeeper_connstrs())
-
-    # valid lsn, which is not in the segment start, nor in zero segment
-    epoch_start_lsn = Lsn("0/16B9188")
-    begin_lsn = epoch_start_lsn
-
-    # append and commit WAL
-    lsn_after_append = []
-    for i in range(3):
-        res = env.safekeepers[i].append_logical_message(
-            tenant_id,
-            timeline_id,
-            {
-                "lm_prefix": "prefix",
-                "lm_message": "message",
-                "set_commit_lsn": True,
-                "send_proposer_elected": True,
-                "term": 2,
-                "begin_lsn": int(begin_lsn),
-                "epoch_start_lsn": int(epoch_start_lsn),
-                "truncate_lsn": int(epoch_start_lsn),
-                "pg_version": int(env.pg_version) * 10000,
-            },
-        )
-        lsn = Lsn(res["inserted_wal"]["end_lsn"])
-        lsn_after_append.append(lsn)
-        log.info(f"safekeeper[{i}] lsn after append: {lsn}")
-
-    # run sync safekeepers
-    lsn_after_sync = pg.sync_safekeepers()
-    log.info(f"lsn after sync = {lsn_after_sync}")
-
-    assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
-
-
 @pytest.mark.parametrize("auth_enabled", [False, True])
 def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
     neon_env_builder.auth_enabled = auth_enabled
@@ -1740,7 +1686,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
 
 
 @pytest.mark.parametrize("auth_enabled", [False, True])
-def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
+def test_delete(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
     neon_env_builder.auth_enabled = auth_enabled
     env = neon_env_builder.init_start()
 
@@ -2269,13 +2215,21 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.num_safekeepers = 1
     env = neon_env_builder.init_start()
 
+    # These are expected after timeline deletion on safekeepers.
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*Timeline .* was not found in global map.*",
+            ".*Timeline .* was cancelled and cannot be used anymore.*",
+        ]
+    )
+
     tenant_id = env.initial_tenant
     timeline_id = env.initial_timeline
 
     sk = env.safekeepers[0]
     http_cli = sk.http_client()
 
-    sk_id_1 = SafekeeperId(env.safekeepers[0].id, "localhost", sk.port.pg_tenant_only)
+    sk_id_1 = SafekeeperId(sk.id, "localhost", sk.port.pg_tenant_only)
     sk_id_2 = SafekeeperId(11, "localhost", 5434)  # just a mock
 
     # Request to switch before timeline creation should fail.
@@ -2303,19 +2257,28 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder):
     log.info(f"conf after restart: {after_restart}")
     assert after_restart.generation == 4
 
-    # Switch into disjoint conf.
-    non_joint = Configuration(generation=5, members=[sk_id_2], new_members=None)
+    # Switch into non joint conf of which sk is not a member, must fail.
+    non_joint_not_member = Configuration(generation=5, members=[sk_id_2], new_members=None)
+    with pytest.raises(requests.exceptions.HTTPError):
+        resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint_not_member)
+
+    # Switch into good non joint conf.
+    non_joint = Configuration(generation=6, members=[sk_id_1], new_members=None)
     resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint)
     log.info(f"non joint switch resp: {resp}")
     assert resp.previous_conf.generation == 4
-    assert resp.current_conf.generation == 5
+    assert resp.current_conf.generation == 6
 
-    # Switch request to lower conf should be ignored.
-    lower_conf = Configuration(generation=3, members=[], new_members=None)
-    resp = http_cli.membership_switch(tenant_id, timeline_id, lower_conf)
-    log.info(f"lower switch resp: {resp}")
-    assert resp.previous_conf.generation == 5
-    assert resp.current_conf.generation == 5
+    # Switch request to lower conf should be rejected.
+    lower_conf = Configuration(generation=3, members=[sk_id_1], new_members=None)
+    with pytest.raises(requests.exceptions.HTTPError):
+        http_cli.membership_switch(tenant_id, timeline_id, lower_conf)
+
+    # Now, exclude sk from the membership, timeline should be deleted.
+    excluded_conf = Configuration(generation=7, members=[sk_id_2], new_members=None)
+    http_cli.timeline_exclude(tenant_id, timeline_id, excluded_conf)
+    with pytest.raises(requests.exceptions.HTTPError):
+        http_cli.timeline_status(tenant_id, timeline_id)
 
 
 # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries
diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py
index 936c774657..56539a0a08 100644
--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -539,13 +539,16 @@ def test_recovery_uncommitted(neon_env_builder: NeonEnvBuilder):
     asyncio.run(run_recovery_uncommitted(env))
 
 
-async def run_wal_truncation(env: NeonEnv):
+async def run_wal_truncation(env: NeonEnv, safekeeper_proto_version: int):
     tenant_id = env.initial_tenant
     timeline_id = env.initial_timeline
 
     (sk1, sk2, sk3) = env.safekeepers
 
-    ep = env.endpoints.create_start("main")
+    config_lines = [
+        f"neon.safekeeper_proto_version = {safekeeper_proto_version}",
+    ]
+    ep = env.endpoints.create_start("main", config_lines=config_lines)
     ep.safe_psql("create table t (key int, value text)")
     ep.safe_psql("insert into t select generate_series(1, 100), 'payload'")
 
@@ -572,6 +575,7 @@ async def run_wal_truncation(env: NeonEnv):
     sk2.start()
     ep = env.endpoints.create_start(
         "main",
+        config_lines=config_lines,
     )
     ep.safe_psql("insert into t select generate_series(1, 200), 'payload'")
 
@@ -590,11 +594,13 @@ async def run_wal_truncation(env: NeonEnv):
 
 # Simple deterministic test creating tail of WAL on safekeeper which is
 # truncated when majority without this sk elects walproposer starting earlier.
-def test_wal_truncation(neon_env_builder: NeonEnvBuilder):
+# Test both proto versions until we fully migrate.
+@pytest.mark.parametrize("safekeeper_proto_version", [2, 3])
+def test_wal_truncation(neon_env_builder: NeonEnvBuilder, safekeeper_proto_version: int):
     neon_env_builder.num_safekeepers = 3
     env = neon_env_builder.init_start()
 
-    asyncio.run(run_wal_truncation(env))
+    asyncio.run(run_wal_truncation(env, safekeeper_proto_version))
 
 
 async def run_segment_init_failure(env: NeonEnv):
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index 81e2eef061..9b118b1cff 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit 81e2eef0616c65c2233c75b06f25766ae4c080c4
+Subproject commit 9b118b1cffa6e4ca0d63389b57b54d11e207e9a8
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index 9422247c58..799e7a08dd 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit 9422247c582e7c1a08a4855d04af0874f8df2f34
+Subproject commit 799e7a08dd171aa06a7395dd326f4243aaeb9f93
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
index a8fea8b4be..517b8dc244 160000
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
@@ -1 +1 @@
-Subproject commit a8fea8b4be43039f0782347c88a9b9b25f50c9d8
+Subproject commit 517b8dc244abf3e56f0089849e464af76f70b94e
diff --git a/vendor/revisions.json b/vendor/revisions.json
index 72d97d7f6a..8dde46a01e 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,15 +1,15 @@
 {
   "v17": [
     "17.4",
-    "a8fea8b4be43039f0782347c88a9b9b25f50c9d8"
+    "517b8dc244abf3e56f0089849e464af76f70b94e"
   ],
   "v16": [
     "16.8",
-    "9422247c582e7c1a08a4855d04af0874f8df2f34"
+    "799e7a08dd171aa06a7395dd326f4243aaeb9f93"
   ],
   "v15": [
     "15.12",
-    "81e2eef0616c65c2233c75b06f25766ae4c080c4"
+    "9b118b1cffa6e4ca0d63389b57b54d11e207e9a8"
   ],
   "v14": [
     "14.17",