Use neonvm-daemon's control socket for swap resizing and setting disk quota

ATTENTION: This requires a new-enough neonvm-runner image, which contains the new interface in neonvm-daemon. The deployment plan is to update neonvm-runner everywhere first, and only then merge this PR.
fix(compute_ctl): Skip invalid DBs in PerDatabasePhase (#10910 )
2026-05-15 20:20:38 +00:00 · 2025-02-22 23:58:14 +02:00 · 2025-02-21 21:50:50 +00:00 · 2025-02-21 20:29:05 +00:00 · 2025-02-21 18:29:48 +00:00 · 2025-02-21 16:56:16 +00:00
214 changed files with 7769 additions and 2648 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -28,3 +28,7 @@ config-variables:
  - DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
  - SLACK_ON_CALL_STORAGE_STAGING_STREAM
  - SLACK_CICD_CHANNEL_ID
+  - SLACK_STORAGE_CHANNEL_ID
+  - NEON_DEV_AWS_ACCOUNT_ID
+  - NEON_PROD_AWS_ACCOUNT_ID
+  - AWS_ECR_REGION
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -19,7 +19,11 @@ inputs:
    default: '[1, 1]'
  # settings below only needed if you want the project to be sharded from the beginning
  shard_split_project:
-    description: 'by default new projects are not shard-split, specify true to shard-split'
+    description: 'by default new projects are not shard-split initiailly, but only when shard-split threshold is reached, specify true to explicitly shard-split initially'
+    required: false
+    default: 'false'
+  disable_sharding:
+    description: 'by default new projects use storage controller default policy to shard-split when shard-split threshold is reached, specify true to explicitly disable sharding'
    required: false
    default: 'false'
  admin_api_key:
@@ -107,6 +111,21 @@ runs:
            -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
            -d "{\"new_shard_count\": $SHARD_COUNT, \"new_stripe_size\": $STRIPE_SIZE}"
        fi
+        if [ "${DISABLE_SHARDING}" = "true" ]; then
+          # determine tenant ID
+          TENANT_ID=`${PSQL} ${dsn} -t -A -c "SHOW neon.tenant_id"`
+
+          echo "Explicitly disabling shard-splitting for project ${project_id} with tenant_id ${TENANT_ID}"
+
+          echo "Sending PUT request to https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy"
+          echo "with body {\"scheduling\": \"Essential\"}"
+
+          # we need an ADMIN API KEY to invoke storage controller API for shard splitting (bash -u above checks that the variable is set)
+          curl -X PUT \
+            "https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/storage/proxy/control/v1/tenant/${TENANT_ID}/policy" \
+            -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
+            -d "{\"scheduling\": \"Essential\"}"
+        fi

      env:
        API_HOST: ${{ inputs.api_host }}
@@ -116,6 +135,7 @@ runs:
        MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
        MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
        SHARD_SPLIT_PROJECT: ${{ inputs.shard_split_project }}
+        DISABLE_SHARDING: ${{ inputs.disable_sharding }}
        ADMIN_API_KEY: ${{ inputs.admin_api_key }}
        SHARD_COUNT: ${{ inputs.shard_count }}
        STRIPE_SIZE: ${{ inputs.stripe_size }}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -348,6 +348,10 @@ jobs:
          rerun_failed: true
          pg_version: ${{ matrix.pg_version }}
          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
+          # Attempt to stop tests gracefully to generate test reports
+          # until they are forcibly stopped by the stricter `timeout-minutes` limit.
+          extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
--- a/.github/workflows/_push-to-acr.yml
+++ b/.github/workflows/_push-to-acr.yml
@@ -1,56 +0,0 @@
-name: Push images to ACR
-on:
-  workflow_call:
-    inputs:
-      client_id:
-        description: Client ID of Azure managed identity or Entra app
-        required: true
-        type: string
-      image_tag:
-        description: Tag for the container image
-        required: true
-        type: string
-      images:
-        description: Images to push
-        required: true
-        type: string
-      registry_name:
-        description: Name of the container registry
-        required: true
-        type: string
-      subscription_id:
-        description: Azure subscription ID
-        required: true
-        type: string
-      tenant_id:
-        description: Azure tenant ID
-        required: true
-        type: string
-
-jobs:
-  push-to-acr:
-    runs-on: ubuntu-22.04
-    permissions:
-      contents: read  # This is required for actions/checkout
-      id-token: write # This is required for Azure Login to work.
-
-    steps:
-      - name: Azure login
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
-        with:
-          client-id: ${{ inputs.client_id }}
-          subscription-id: ${{ inputs.subscription_id }}
-          tenant-id: ${{ inputs.tenant_id }}
-
-      - name: Login to ACR
-        run: |
-          az acr login --name=${{ inputs.registry_name }}
-
-      - name: Copy docker images to ACR ${{ inputs.registry_name }}
-        run: |
-          images='${{ inputs.images }}'
-          for image in ${images}; do
-            docker buildx imagetools create \
-              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
-                                                        neondatabase/${image}:${{ inputs.image_tag }}
-          done
--- a/.github/workflows/_push-to-container-registry.yml
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -0,0 +1,101 @@
+name: Push images to Container Registry
+on:
+  workflow_call:
+    inputs:
+      # Example: {"docker.io/neondatabase/neon:13196061314":["${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/neon:13196061314","neoneastus2.azurecr.io/neondatabase/neon:13196061314"]}
+      image-map:
+        description: JSON map of images, mapping from a source image to an array of target images that should be pushed.
+        required: true
+        type: string
+      aws-region:
+        description: AWS region to log in to. Required when pushing to ECR.
+        required: false
+        type: string
+      aws-account-ids:
+        description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR.
+        required: false
+        type: string
+      azure-client-id:
+        description: Client ID of Azure managed identity or Entra app. Required when pushing to ACR.
+        required: false
+        type: string
+      azure-subscription-id:
+        description: Azure subscription ID. Required when pushing to ACR.
+        required: false
+        type: string
+      azure-tenant-id:
+        description: Azure tenant ID. Required when pushing to ACR.
+        required: false
+        type: string
+      acr-registry-name:
+        description: ACR registry name. Required when pushing to ACR.
+        required: false
+        type: string
+    secrets:
+      docker-hub-username:
+        description: Docker Hub username. Required when pushing to Docker Hub.
+        required: false
+      docker-hub-password:
+        description: Docker Hub password. Required when pushing to Docker Hub.
+        required: false
+      aws-role-to-assume:
+        description: AWS role to assume. Required when pushing to ECR.
+        required: false
+
+permissions: {}
+
+defaults:
+  run:
+    shell: bash -euo pipefail {0}
+
+jobs:
+  push-to-container-registry:
+    runs-on: ubuntu-22.04
+    permissions:
+      id-token: write  # Required for aws/azure login
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          sparse-checkout: scripts/push_with_image_map.py
+          sparse-checkout-cone-mode: false
+
+      - name: Print image-map
+        run: echo '${{ inputs.image-map }}' | jq
+
+      - name: Configure AWS credentials
+        if: contains(inputs.image-map, 'amazonaws.com/')
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-region: "${{ inputs.aws-region }}"
+          role-to-assume: "${{ secrets.aws-role-to-assume }}"
+          role-duration-seconds: 3600
+
+      - name: Login to ECR
+        if: contains(inputs.image-map, 'amazonaws.com/')
+        uses: aws-actions/amazon-ecr-login@v2
+        with:
+          registries: "${{ inputs.aws-account-ids }}"
+
+      - name: Configure Azure credentials
+        if: contains(inputs.image-map, 'azurecr.io/')
+        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
+        with:
+          client-id: ${{ inputs.azure-client-id }}
+          subscription-id: ${{ inputs.azure-subscription-id }}
+          tenant-id: ${{ inputs.azure-tenant-id }}
+
+      - name: Login to ACR
+        if: contains(inputs.image-map, 'azurecr.io/')
+        run: |
+          az acr login --name=${{ inputs.acr-registry-name }}
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.docker-hub-username }}
+          password: ${{ secrets.docker-hub-password }}
+
+      - name: Copy docker images to target registries
+        run: python scripts/push_with_image_map.py
+        env:
+          IMAGE_MAP: ${{ inputs.image-map }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -68,7 +68,7 @@ jobs:
  tag:
    needs: [ check-permissions ]
    runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
    outputs:
      build-tag: ${{steps.build-tag.outputs.tag}}

@@ -263,8 +263,9 @@ jobs:
          echo "json=$(jq --compact-output '.' /tmp/benchmark_durations.json)" >> $GITHUB_OUTPUT

  benchmarks:
-    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
-    needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `deploy` in PRs
+    if: github.ref_name == 'main' || (contains(github.event.pull_request.labels.*.name, 'run-benchmarks') && !failure() && !cancelled())
+    needs: [ check-permissions, build-build-tools-image, get-benchmarks-durations, deploy ]
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -497,7 +498,7 @@ jobs:

  trigger-e2e-tests:
    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images-dev, tag ]
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
    secrets: inherit

@@ -571,21 +572,6 @@ jobs:
                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64

-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Push multi-arch image to ECR
-        run: |
-          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{ needs.tag.outputs.build-tag }} \
-                                                                                neondatabase/neon:${{ needs.tag.outputs.build-tag }}
-
  compute-node-image-arch:
    needs: [ check-permissions, build-build-tools-image, tag ]
    permissions:
@@ -632,16 +618,6 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
      - uses: docker/login-action@v3
        with:
          registry: cache.neon.build
@@ -729,21 +705,6 @@ jobs:
                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR
-        run: |
-          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                                                                neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
-
  vm-compute-node-image:
    needs: [ check-permissions, tag, compute-node-image ]
    runs-on: [ self-hosted, large ]
@@ -876,133 +837,112 @@ jobs:
          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down

-  promote-images-dev:
-    needs: [ check-permissions, tag, vm-compute-node-image, neon-image ]
+  generate-image-maps:
+    needs: [ tag ]
    runs-on: ubuntu-22.04
-
-    permissions:
-      id-token: write # aws-actions/configure-aws-credentials
-      statuses: write
-      contents: read
-
-    env:
-      VERSIONS: v14 v15 v16 v17
-
+    outputs:
+      neon-dev: ${{ steps.generate.outputs.neon-dev }}
+      neon-prod: ${{ steps.generate.outputs.neon-prod }}
+      compute-dev: ${{ steps.generate.outputs.compute-dev }}
+      compute-prod: ${{ steps.generate.outputs.compute-prod }}
    steps:
-      - uses: docker/login-action@v3
+      - uses: actions/checkout@v4
        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+          sparse-checkout: scripts/generate_image_maps.py
+          sparse-checkout-cone-mode: false

-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
+      - name: Generate Image Maps
+        id: generate
+        run: python scripts/generate_image_maps.py
+        env:
+          BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
+          BRANCH: "${{ github.ref_name }}"
+          DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
+          PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
+          DEV_AWS: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+          PROD_AWS: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+          AWS_REGION: "${{ vars.AWS_ECR_REGION }}"

-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Copy vm-compute-node images to ECR
-        run: |
-          for version in ${VERSIONS}; do
-            docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }} \
-                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-          done
-
-  promote-images-prod:
-    needs: [ check-permissions, tag, test-images, promote-images-dev ]
-    runs-on: ubuntu-22.04
-    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-
-    permissions:
-      id-token: write # aws-actions/configure-aws-credentials
-      statuses: write
-      contents: read
-
-    env:
-      VERSIONS: v14 v15 v16 v17
-
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Add latest tag to images
-        if: github.ref_name == 'main'
-        run: |
-          for repo in neondatabase 369495373322.dkr.ecr.eu-central-1.amazonaws.com; do
-            docker buildx imagetools create -t $repo/neon:latest \
-                                               $repo/neon:${{ needs.tag.outputs.build-tag }}
-
-            for version in ${VERSIONS}; do
-              docker buildx imagetools create -t $repo/compute-node-${version}:latest \
-                                                 $repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-
-              docker buildx imagetools create -t $repo/vm-compute-node-${version}:latest \
-                                                 $repo/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
-            done
-          done
-          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
-                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
-
-      - name: Configure AWS-prod credentials
-        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          mask-aws-account-id: true
-          role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
-
-      - name: Login to prod ECR
-        uses: docker/login-action@v3
-        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        with:
-          registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
-
-      - name: Copy all images to prod ECR
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-        run: |
-          for image in neon {vm-,}compute-node-{v14,v15,v16,v17}; do
-            docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
-                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
-          done
-
-  push-to-acr-dev:
-    if: github.ref_name == 'main'
-    needs: [ tag, promote-images-dev ]
-    uses: ./.github/workflows/_push-to-acr.yml
+  push-neon-image-dev:
+    needs: [ generate-image-maps, neon-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
-      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
-      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
+      image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-  push-to-acr-prod:
+  push-compute-image-dev:
+    needs: [ generate-image-maps, vm-compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  push-neon-image-prod:
    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images-prod ]
-    uses: ./.github/workflows/_push-to-acr.yml
+    needs: [ generate-image-maps, neon-image, test-images ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
-      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
-      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
+      image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  push-compute-image-prod:
+    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+    needs: [ generate-image-maps, vm-compute-node-image, test-images ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+  # This is a bit of a special case so we're not using a generated image map.
+  add-latest-tag-to-neon-extensions-test-image:
+    if: github.ref_name == 'main'
+    needs: [ tag, compute-node-image ]
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+        }
+    secrets:
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

  trigger-custom-extensions-build-and-wait:
    needs: [ check-permissions, tag ]
@@ -1084,7 +1024,7 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
    permissions:
@@ -1092,7 +1032,7 @@ jobs:
      statuses: write
      contents: write
    runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/ansible:latest
    steps:
      - uses: actions/checkout@v4

@@ -1241,6 +1181,22 @@ jobs:
            exit 1
          fi

+  notify-storage-release-deploy-failure:
+    needs: [ deploy ]
+    # We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
+    if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Post release-deploy failure to team-storage slack channel
+        uses: slackapi/slack-github-action@v2
+        with:
+          method: chat.postMessage
+          token: ${{ secrets.SLACK_BOT_TOKEN }}
+          payload: |
+            channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
+            text: |
+              🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
+
  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
    needs: [ deploy ]
@@ -1337,7 +1293,7 @@ jobs:
          done

  pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
+    needs: [ build-build-tools-image, test-images, build-and-test-locally ]
    if: github.ref_name == 'main'
    uses: ./.github/workflows/pin-build-tools-image.yml
    with:
@@ -1362,7 +1318,8 @@ jobs:
      - check-codestyle-rust
      - check-dependencies-rust
      - files-changed
-      - promote-images-dev
+      - push-compute-image-dev
+      - push-neon-image-dev
      - test-images
      - trigger-custom-extensions-build-and-wait
    runs-on: ubuntu-22.04
@@ -1379,6 +1336,7 @@ jobs:
          || needs.check-codestyle-python.result == 'skipped'
          || needs.check-codestyle-rust.result == 'skipped'
          || needs.files-changed.result == 'skipped'
-          || needs.promote-images-dev.result == 'skipped'
+          || needs.push-compute-image-dev.result == 'skipped'
+          || needs.push-neon-image-dev.result == 'skipped'
          || needs.test-images.result == 'skipped'
          || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
--- a/.github/workflows/build_and_test_with_sanitizers.yml
+++ b/.github/workflows/build_and_test_with_sanitizers.yml
@@ -27,7 +27,7 @@ env:
 jobs:
  tag:
    runs-on: [ self-hosted, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
    outputs:
      build-tag: ${{steps.build-tag.outputs.tag}}

--- a/.github/workflows/force-test-extensions-upgrade.yml
+++ b/.github/workflows/force-test-extensions-upgrade.yml
@@ -0,0 +1,76 @@
+name: Force Test Upgrading of Extension
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '45 2 * * *' # run once a day, timezone is utc
+  workflow_dispatch: # adds ability to run this manually
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow
+  group: ${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  id-token: write # aws-actions/configure-aws-credentials
+  statuses: write
+  contents: read
+
+jobs:
+  regress:
+    strategy:
+      fail-fast: false
+      matrix:
+        pg-version: [16, 17]
+
+    runs-on: small
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: false
+
+      - name: Get the last compute release tag
+        id: get-last-compute-release-tag
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "/repos/${GITHUB_REPOSITORY}/releases")
+          echo tag=${tag} >> ${GITHUB_OUTPUT}
+
+      - name: Test extension upgrade
+        timeout-minutes: 20
+        env:
+          NEWTAG: latest
+          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          PG_VERSION: ${{ matrix.pg-version }}
+          FORCE_ALL_UPGRADE_TESTS: true
+        run: ./docker-compose/test_extensions_upgrade.sh
+
+      - name: Print logs and clean up
+        if: always()
+        run: |
+          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml logs || true
+          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down
+
+      - name: Post to the Slack channel
+        if: ${{ github.event.schedule && failure() }}
+        uses: slackapi/slack-github-action@v1
+        with:
+          channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
+          slack-message: |
+            Test upgrading of extensions: ${{ job.status }}
+            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+        env:
+          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/ingest_benchmark.yml
+++ b/.github/workflows/ingest_benchmark.yml
@@ -32,18 +32,27 @@ jobs:
          - target_project: new_empty_project_stripe_size_2048 
            stripe_size: 2048 # 16 MiB
            postgres_version: 16
+            disable_sharding: false
          - target_project: new_empty_project_stripe_size_32768
            stripe_size: 32768 # 256 MiB # note that this is different from null because using null will shard_split the project only if it reaches the threshold
                               # while here it is sharded from the beginning with a shard size of 256 MiB
+            disable_sharding: false
            postgres_version: 16
          - target_project: new_empty_project
            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: false
            postgres_version: 16
          - target_project: new_empty_project
            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: false
            postgres_version: 17
          - target_project: large_existing_project
            stripe_size: null # cannot re-shared or choose different stripe size for existing, already sharded project
+            disable_sharding: false
+            postgres_version: 16
+          - target_project: new_empty_project_unsharded
+            stripe_size: null # run with neon defaults which will shard split only when reaching the threshold
+            disable_sharding: true
            postgres_version: 16
      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
    permissions:
@@ -96,6 +105,7 @@ jobs:
        admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }} 
        shard_count: 8
        stripe_size: ${{ matrix.stripe_size }}
+        disable_sharding: ${{ matrix.disable_sharding }} 

    - name: Initialize Neon project
      if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -33,10 +33,6 @@ concurrency:
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}

-env:
-  FROM_TAG: ${{ inputs.from-tag }}
-  TO_TAG: pinned
-
 jobs:
  check-manifests:
    runs-on: ubuntu-22.04
@@ -46,11 +42,14 @@ jobs:
    steps:
      - name: Check if we really need to pin the image
        id: check-manifests
+        env:
+          FROM_TAG: ${{ inputs.from-tag }}
+          TO_TAG: pinned
        run: |
-          docker manifest inspect neondatabase/build-tools:${FROM_TAG} > ${FROM_TAG}.json
-          docker manifest inspect neondatabase/build-tools:${TO_TAG}   > ${TO_TAG}.json
+          docker manifest inspect "docker.io/neondatabase/build-tools:${FROM_TAG}" > "${FROM_TAG}.json"
+          docker manifest inspect "docker.io/neondatabase/build-tools:${TO_TAG}"   > "${TO_TAG}.json"

-          if diff ${FROM_TAG}.json ${TO_TAG}.json; then
+          if diff "${FROM_TAG}.json" "${TO_TAG}.json"; then
            skip=true
          else
            skip=false
@@ -64,55 +63,34 @@ jobs:
    # use format(..) to catch both inputs.force = true AND inputs.force = 'true'
    if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'

-    runs-on: ubuntu-22.04
-
    permissions:
-      id-token: write # for `azure/login` and aws auth
+      id-token: write  # Required for aws/azure login

-    steps:
-      - uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-          role-duration-seconds: 3600
-
-      - name: Login to Amazon Dev ECR
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Azure login
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
-        with:
-          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
-          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
-          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
-
-      - name: Login to ACR
-        run: |
-          az acr login --name=neoneastus2
-
-      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
-        env:
-          DEFAULT_DEBIAN_VERSION: bookworm
-        run: |
-          for debian_version in bullseye bookworm; do
-            tags=()
-
-            tags+=("-t" "neondatabase/build-tools:${TO_TAG}-${debian_version}")
-            tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}-${debian_version}")
-            tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}-${debian_version}")
-
-            if [ "${debian_version}" == "${DEFAULT_DEBIAN_VERSION}" ]; then
-              tags+=("-t" "neondatabase/build-tools:${TO_TAG}")
-              tags+=("-t" "369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG}")
-              tags+=("-t" "neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG}")
-            fi
-
-            docker buildx imagetools create "${tags[@]}" \
-                                              neondatabase/build-tools:${FROM_TAG}-${debian_version}
-          done
+    uses: ./.github/workflows/_push-to-container-registry.yml
+    with:
+      image-map: |
+        {
+          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
+            "docker.io/neondatabase/build-tools:pinned-bullseye",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
+          ],
+          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
+            "docker.io/neondatabase/build-tools:pinned-bookworm",
+            "docker.io/neondatabase/build-tools:pinned",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm",
+            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm",
+            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned"
+          ]
+        }
+      aws-region: ${{ vars.AWS_ECR_REGION }}
+      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
+      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
+      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
+      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
+    secrets:
+      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
+      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
--- a/.github/workflows/regenerate-pg-setting.yml
+++ b/.github/workflows/regenerate-pg-setting.yml
@@ -0,0 +1,41 @@
+name: Regenerate Postgres Settings
+
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+    paths:
+      - pgxn/neon/**.c
+      - vendor/postgres-v*
+      - vendor/revisions.json
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+permissions:
+  pull-requests: write
+
+jobs:
+  regenerate-pg-settings:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Add comment
+        uses: thollander/actions-comment-pull-request@v3
+        with:
+          comment-tag: ${{ github.job }}
+          pr-number: ${{ github.event.number }}
+          message: |
+            If this PR added a GUC in the Postgres fork or `neon` extension,
+            please regenerate the Postgres settings in the `cloud` repo:
+
+            ```
+            make NEON_WORKDIR=path/to/neon/checkout \
+              -C goapp/internal/shareddomain/postgres generate
+            ```
+
+            If you're an external contributor, a Neon employee will assist in
+            making sure this step is done.
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -15,7 +15,14 @@ env:
  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}

 jobs:
+  check-permissions:
+    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
+    uses: ./.github/workflows/check-permissions.yml
+    with:
+      github-event-name: ${{ github.event_name }}
+
  cancel-previous-e2e-tests:
+    needs: [ check-permissions ]
    if: github.event_name == 'pull_request'
    runs-on: ubuntu-22.04

@@ -29,6 +36,7 @@ jobs:
              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"

  tag:
+    needs: [ check-permissions ]
    runs-on: ubuntu-22.04
    outputs:
      build-tag: ${{ steps.build-tag.outputs.tag }}
@@ -68,7 +76,7 @@ jobs:
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      TAG: ${{ needs.tag.outputs.build-tag }}
    steps:
-      - name: Wait for `promote-images-dev` job to finish
+      - name: Wait for `push-{neon,compute}-image-dev` job to finish
        # It's important to have a timeout here, the script in the step can run infinitely
        timeout-minutes: 60
        run: |
@@ -79,20 +87,20 @@ jobs:
          # For PRs we use the run id as the tag
          BUILD_AND_TEST_RUN_ID=${TAG}
          while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
-            case "$conclusion" in
-              success)
-                break
-                ;;
-              failure | cancelled | skipped)
-                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
-                exit 1
-                ;;
-              *)
-                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
-                sleep 60
-                ;;
-            esac
+            gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '[.jobs[] | select((.name | startswith("push-neon-image-dev")) or (.name | startswith("push-compute-image-dev"))) | {"name": .name, "conclusion": .conclusion, "url": .url}]' > jobs.json
+            if [ $(jq '[.[] | select(.conclusion == "success")] | length' jobs.json) -eq 2 ]; then
+              break
+            fi
+            jq -c '.[]' jobs.json | while read -r job; do
+              case $(echo $job | jq .conclusion) in
+                failure | cancelled | skipped)
+                  echo "The '$(echo $job | jq .name)' job didn't succeed: '$(echo $job | jq .conclusion)'. See log in '$(echo $job | jq .url)' Exiting..."
+                  exit 1
+                  ;;
+              esac
+            done
+            echo "The 'push-{neon,compute}-image-dev' jobs haven't succeeded yet. Waiting..."
+            sleep 60
          done

      - name: Set e2e-platforms
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -786,7 +786,7 @@ dependencies = [
 [[package]]
 name = "azure_core"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
 "async-trait",
 "base64 0.22.1",
@@ -815,7 +815,7 @@ dependencies = [
 [[package]]
 name = "azure_identity"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
 "async-lock",
 "async-trait",
@@ -834,7 +834,7 @@ dependencies = [
 [[package]]
 name = "azure_storage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
 "RustyXML",
 "async-lock",
@@ -852,7 +852,7 @@ dependencies = [
 [[package]]
 name = "azure_storage_blobs"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
 "RustyXML",
 "azure_core",
@@ -872,7 +872,7 @@ dependencies = [
 [[package]]
 name = "azure_svc_blobstorage"
 version = "0.21.0"
-source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e"
+source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#f64bd57262ced51afce5d8909c06dcb11a6dd85a"
 dependencies = [
 "azure_core",
 "bytes",
@@ -1029,12 +1029,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "boxcar"
-version = "0.2.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2721c3c5a6f0e7f7e607125d963fedeb765f545f67adc9d71ed934693881eb42"
-
 [[package]]
 name = "bstr"
 version = "1.5.0"
@@ -1293,6 +1287,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
+ "jsonwebtoken",
 "regex",
 "remote_storage",
 "serde",
@@ -1308,6 +1303,7 @@ dependencies = [
 "aws-config",
 "aws-sdk-kms",
 "aws-sdk-s3",
+ "aws-smithy-types",
 "axum",
 "base64 0.13.1",
 "bytes",
@@ -1320,6 +1316,8 @@ dependencies = [
 "flate2",
 "futures",
 "http 1.1.0",
+ "hyper 1.4.1",
+ "hyper-util",
 "metrics",
 "nix 0.27.1",
 "notify",
@@ -1329,7 +1327,6 @@ dependencies = [
 "opentelemetry_sdk",
 "postgres",
 "postgres_initdb",
- "prometheus",
 "regex",
 "remote_storage",
 "reqwest",
@@ -1348,13 +1345,13 @@ dependencies = [
 "tower 0.5.2",
 "tower-http",
 "tracing",
- "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
 "url",
 "utils",
 "uuid",
 "vm_monitor",
+ "walkdir",
 "workspace_hack",
 "zstd",
 ]
@@ -1879,6 +1876,12 @@ dependencies = [
 "syn 2.0.90",
 ]

+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -3336,6 +3339,17 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "json-structural-diff"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf"
+dependencies = [
+ "difflib",
+ "regex",
+ "serde_json",
+]
+
 [[package]]
 name = "jsonwebtoken"
 version = "9.2.0"
@@ -4927,7 +4941,6 @@ dependencies = [
 "aws-sdk-iam",
 "aws-sigv4",
 "base64 0.13.1",
- "boxcar",
 "bstr",
 "bytes",
 "camino",
@@ -4979,7 +4992,6 @@ dependencies = [
 "postgres-protocol2",
 "postgres_backend",
 "pq_proto",
- "prometheus",
 "rand 0.8.5",
 "rand_distr",
 "rcgen",
@@ -5004,7 +5016,6 @@ dependencies = [
 "smallvec",
 "smol_str",
 "socket2",
- "strum",
 "strum_macros",
 "subtle",
 "thiserror 1.0.69",
@@ -5019,7 +5030,6 @@ dependencies = [
 "tracing",
 "tracing-log",
 "tracing-opentelemetry",
- "tracing-serde",
 "tracing-subscriber",
 "tracing-utils",
 "try-lock",
@@ -6452,6 +6462,7 @@ dependencies = [
 "humantime",
 "hyper 0.14.30",
 "itertools 0.10.5",
+ "json-structural-diff",
 "lasso",
 "measured",
 "metrics",
@@ -6460,10 +6471,13 @@ dependencies = [
 "pageserver_client",
 "postgres_connection",
 "rand 0.8.5",
+ "regex",
 "reqwest",
 "routerify",
 "rustls 0.23.18",
 "rustls-native-certs 0.8.0",
+ "safekeeper_api",
+ "safekeeper_client",
 "scoped-futures",
 "scopeguard",
 "serde",
@@ -6471,6 +6485,7 @@ dependencies = [
 "strum",
 "strum_macros",
 "thiserror 1.0.69",
+ "tikv-jemallocator",
 "tokio",
 "tokio-postgres",
 "tokio-postgres-rustls",
@@ -7024,14 +7039,11 @@ dependencies = [
 name = "tokio-postgres2"
 version = "0.1.0"
 dependencies = [
- "async-trait",
- "byteorder",
 "bytes",
 "fallible-iterator",
 "futures-util",
 "log",
 "parking_lot 0.12.1",
- "percent-encoding",
 "phf",
 "pin-project-lite",
 "postgres-protocol2",
@@ -7618,13 +7630,13 @@ dependencies = [
 "hex",
 "hex-literal",
 "humantime",
- "inferno 0.12.0",
 "jsonwebtoken",
 "metrics",
 "nix 0.27.1",
 "once_cell",
 "pin-project-lite",
 "postgres_connection",
+ "pprof",
 "pq_proto",
 "rand 0.8.5",
 "regex",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -210,6 +210,7 @@ rustls-native-certs = "0.8"
 x509-parser = "0.16"
 whoami = "1.5.1"
 zerocopy = { version = "0.7", features = ["derive"] }
+json-structural-diff = { version = "0.2.0" }

 ## TODO replace this with tracing
 env_logger = "0.10"
--- a/41
+++ b/41
@@ -10,6 +10,28 @@ ARG STABLE_PG_VERSION=16
 ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim

+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}
+
 # Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
 WORKDIR /home/nonroot
@@ -28,6 +50,14 @@ RUN set -e \
    && rm -rf pg_install/build \
    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .

+# Prepare cargo-chef recipe
+FROM $REPOSITORY/$IMAGE:$TAG AS plan
+WORKDIR /home/nonroot
+
+COPY --chown=nonroot . .
+
+RUN cargo chef prepare --recipe-path recipe.json
+
 # Build neon binaries
 FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
@@ -41,9 +71,15 @@ COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_i
 COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
 COPY --from=pg-build /home/nonroot/pg_install/v17/lib                       pg_install/v17/lib
+COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
+
+ARG ADDITIONAL_RUSTFLAGS=""
+
+RUN set -e \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json
+
 COPY --chown=nonroot . .

-ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
@@ -59,7 +95,7 @@ RUN set -e \

 # Build final image
 #
-FROM debian:${DEBIAN_FLAVOR}
+FROM $BASE_IMAGE_SHA
 ARG DEFAULT_PG_VERSION
 WORKDIR /data

@@ -112,4 +148,3 @@ EXPOSE 6400
 EXPOSE 9898

 CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
-
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -1,6 +1,29 @@
 ARG DEBIAN_VERSION=bookworm
+ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim

-FROM debian:bookworm-slim AS pgcopydb_builder
+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}
+
+FROM $BASE_IMAGE_SHA AS pgcopydb_builder
 ARG DEBIAN_VERSION

 # Use strict mode for bash to catch errors early
@@ -9,7 +32,7 @@ SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 # By default, /bin/sh used in debian images will treat '\n' as eol,
 # but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
    echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc

 COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
@@ -58,7 +81,7 @@ RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
        mkdir -p mkdir -p /pgcopydb/lib && touch /pgcopydb/lib/libpq.so.5; \
    fi

-FROM debian:${DEBIAN_VERSION}-slim AS build_tools
+FROM $BASE_IMAGE_SHA AS build_tools
 ARG DEBIAN_VERSION

 # Add nonroot user
@@ -75,7 +98,7 @@ COPY --from=pgcopydb_builder /usr/lib/postgresql/16/bin/pgcopydb /pgcopydb/bin/p
 COPY --from=pgcopydb_builder /pgcopydb/lib/libpq.so.5 /pgcopydb/lib/libpq.so.5

 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
    echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc

 # System deps
@@ -138,7 +161,8 @@ RUN curl -fsSL \
    --output sql_exporter.tar.gz \
    && mkdir /tmp/sql_exporter \
    && tar xzvf sql_exporter.tar.gz -C /tmp/sql_exporter --strip-components=1 \
-    && mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter
+    && mv /tmp/sql_exporter/sql_exporter /usr/local/bin/sql_exporter \
+    && rm sql_exporter.tar.gz

 # protobuf-compiler (protoc)
 ENV PROTOC_VERSION=25.1
@@ -268,7 +292,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.84.1
+ENV RUSTC_VERSION=1.85.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
@@ -276,6 +300,7 @@ ARG CARGO_HAKARI_VERSION=0.9.33
 ARG CARGO_DENY_VERSION=0.16.2
 ARG CARGO_HACK_VERSION=0.6.33
 ARG CARGO_NEXTEST_VERSION=0.9.85
+ARG CARGO_CHEF_VERSION=0.1.71
 ARG CARGO_DIESEL_CLI_VERSION=2.2.6
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
@@ -290,6 +315,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} \
                                      --features postgres-bundled --no-default-features && \
    rm -rf /home/nonroot/.cargo/registry && \
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -83,7 +83,28 @@ ARG TAG=pinned
 ARG BUILD_TAG
 ARG DEBIAN_VERSION=bookworm
 ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
-ARG ALPINE_CURL_VERSION=8.11.1
+
+# Here are the INDEX DIGESTS for the images we use.
+# You can get them following next steps for now:
+# 1. Get an authentication token from DockerHub:
+#    TOKEN=$(curl -s "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/debian:pull" | jq -r .token)
+# 2. Using that token, query index for the given tag:
+#    curl -s -H "Authorization: Bearer $TOKEN" \
+#       -H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
+#       "https://registry.hub.docker.com/v2/library/debian/manifests/bullseye-slim" \
+#       -I | grep -i docker-content-digest
+# 3. As a next step, TODO(fedordikarev): create script and schedule workflow to run these checks
+#    and updates on regular bases and in automated way.
+ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+
+# Here we use ${var/search/replace} syntax, to check
+# if base image is one of the images, we pin image index for.
+# If var will match one the known images, we will replace it with the known sha.
+# If no match, than value will be unaffected, and will process with no-pinned image.
+ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
+ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}

 # By default, build all PostgreSQL extensions. For quick local testing when you don't
 # care about the extensions, pass EXTENSIONS=none or EXTENSIONS=minimal
@@ -94,7 +115,7 @@ ARG EXTENSIONS=all
 # Layer "build-deps"
 #
 #########################################################################################
-FROM debian:$DEBIAN_FLAVOR AS build-deps
+FROM $BASE_IMAGE_SHA AS build-deps
 ARG DEBIAN_VERSION

 # Use strict mode for bash to catch errors early
@@ -103,7 +124,7 @@ SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
 # By default, /bin/sh used in debian images will treat '\n' as eol,
 # but as we use bash as SHELL, and built-in echo in bash requires '-e' flag for that.
 RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
-    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\n" > /root/.wgetrc && \
+    echo -e "retry_connrefused = on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
    echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc

 RUN case $DEBIAN_VERSION in \
@@ -127,7 +148,7 @@ RUN case $DEBIAN_VERSION in \
    apt install --no-install-recommends --no-install-suggests -y \
    ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
-    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \
+    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
    $VERSION_INSTALLS \
    && apt clean && rm -rf /var/lib/apt/lists/*

@@ -139,11 +160,11 @@ RUN case $DEBIAN_VERSION in \
 #########################################################################################
 FROM build-deps AS pg-build
 ARG PG_VERSION
-COPY vendor/postgres-${PG_VERSION} postgres
+COPY vendor/postgres-${PG_VERSION:?} postgres
 RUN cd postgres && \
    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
    --with-icu --with-libxml --with-libxslt --with-lz4" && \
-    if [ "${PG_VERSION}" != "v14" ]; then \
+    if [ "${PG_VERSION:?}" != "v14" ]; then \
        # zstd is available only from PG15
        export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
    fi && \
@@ -237,7 +258,7 @@ RUN case "${DEBIAN_VERSION}" in \

 # Postgis 3.5.0 supports v17
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        export POSTGIS_VERSION=3.5.0 \
        export POSTGIS_CHECKSUM=ca698a22cc2b2b3467ac4e063b43a28413f3004ddd505bdccdd74c56a647f510 \
@@ -312,7 +333,7 @@ FROM build-deps AS pgrouting-src
 ARG DEBIAN_VERSION
 ARG PG_VERSION
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        export PGROUTING_VERSION=3.6.2 \
        export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \
@@ -358,7 +379,7 @@ COPY compute/patches/plv8-3.1.10.patch .
 #
 # Use new version only for v17
 # because since v3.2, plv8 doesn't include plcoffee and plls extensions
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        export PLV8_TAG=v3.2.3 \
    ;; \
@@ -372,17 +393,24 @@ RUN case "${PG_VERSION}" in \
    git clone --recurse-submodules --depth 1 --branch ${PLV8_TAG} https://github.com/plv8/plv8.git plv8-src && \
    tar -czf plv8.tar.gz --exclude .git plv8-src && \
    cd plv8-src && \
-    if [[ "${PG_VERSION}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
+    if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi

-FROM pg-build AS plv8-build
+# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
+# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
+# (The V8 engine takes a very long time to build)
+FROM build-deps AS plv8-build
 ARG PG_VERSION
+WORKDIR /ext-src/plv8-src
 RUN apt update && \
    apt install --no-install-recommends --no-install-suggests -y \
    ninja-build python3-dev libncurses5 binutils clang \
    && apt clean && rm -rf /var/lib/apt/lists/*
-
 COPY --from=plv8-src /ext-src/ /ext-src/
-WORKDIR /ext-src/plv8-src
+RUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8
+
+# Step 2: Build the PostgreSQL-dependent parts
+COPY --from=pg-build /usr/local/pgsql /usr/local/pgsql
+ENV PATH="/usr/local/pgsql/bin:$PATH"
 RUN \
    # generate and copy upgrade scripts
    make generate_upgrades && \
@@ -392,7 +420,7 @@ RUN \
    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
    # don't break computes with installed old version of plv8
    cd /usr/local/pgsql/lib/ && \
-    case "${PG_VERSION}" in \
+    case "${PG_VERSION:?}" in \
    "v17") \
        ln -s plv8-3.2.3.so plv8-3.1.8.so && \
        ln -s plv8-3.2.3.so plv8-3.1.5.so && \
@@ -729,7 +757,7 @@ FROM build-deps AS timescaledb-src
 ARG PG_VERSION

 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
      "v14" | "v15") \
        export TIMESCALEDB_VERSION=2.10.1 \
        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -767,7 +795,7 @@ ARG PG_VERSION

 # version-specific, has separate releases for each version
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
      "v14") \
        export PG_HINT_PLAN_VERSION=14_1_4_1 \
        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -843,7 +871,7 @@ ARG PG_VERSION
 # https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1

 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        export RDKIT_VERSION=Release_2024_09_1 \
        export RDKIT_CHECKSUM=034c00d6e9de323506834da03400761ed8c3721095114369d06805409747a60f \
@@ -970,7 +998,7 @@ ARG PG_VERSION
 #
 # last release v0.40.0 - Jul 22, 2024
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        export SEMVER_VERSION=0.40.0 \
        export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \
@@ -1006,7 +1034,7 @@ ARG PG_VERSION
 # This is our extension, support stopped in favor of pgvector
 # TODO: deprecate it
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
      "v14" | "v15") \
        export PG_EMBEDDING_VERSION=0.3.5 \
        export PG_EMBEDDING_CHECKSUM=0e95b27b8b6196e2cf0a0c9ec143fe2219b82e54c5bb4ee064e76398cbe69ae9 \
@@ -1039,7 +1067,7 @@ ARG PG_VERSION
 # This is an experimental extension, never got to real production.
 # !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in "v17") \
+RUN case "${PG_VERSION:?}" in "v17") \
    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
    esac && \
    wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
@@ -1091,7 +1119,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
 FROM pg-build-nonroot-with-cargo AS rust-extensions-build
 ARG PG_VERSION

-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
        'v17') \
            echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \
    esac && \
@@ -1270,7 +1298,7 @@ FROM build-deps AS pgx_ulid-src
 ARG PG_VERSION

 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v14" | "v15" | "v16") \
        ;; \
    *) \
@@ -1302,7 +1330,7 @@ FROM build-deps AS pgx_ulid-pgrx12-src
 ARG PG_VERSION

 WORKDIR /ext-src
-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION:?}" in \
    "v17") \
        ;; \
    *) \
@@ -1430,8 +1458,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_mooncake-src
 ARG PG_VERSION
 WORKDIR /ext-src
-RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.1/pg_mooncake-0.1.1.tar.gz -O pg_mooncake.tar.gz && \
-    echo "a2d16eff7948dde64f072609ca5d2962d6b4d07cb89d45952add473529c55f55 pg_mooncake.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \
+    echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \
    mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
    echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
    chmod a+x neon-test.sh
@@ -1443,6 +1471,31 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control

+#########################################################################################
+#
+# Layer "pg-duckdb-pg-build"
+# compile pg_duckdb extension
+#
+#########################################################################################
+FROM build-deps AS pg_duckdb-src
+WORKDIR /ext-src
+COPY compute/patches/pg_duckdb_v031.patch .
+# pg_duckdb build requires source dir to be a git repo to get submodules
+# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: 
+# - extension management function duckdb.install_extension()
+# - access to duckdb.extensions table and its sequence
+RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
+    cd pg_duckdb-src && \
+    git submodule update --init --recursive && \
+    patch -p1 < /ext-src/pg_duckdb_v031.patch
+
+FROM pg-build AS pg_duckdb-build
+ARG PG_VERSION
+COPY --from=pg_duckdb-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pg_duckdb-src
+RUN make install -j $(getconf _NPROCESSORS_ONLN) && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control 
+        
 #########################################################################################
 #
 # Layer "pg_repack"
@@ -1463,6 +1516,73 @@ WORKDIR /ext-src/pg_repack-src
 RUN make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

+
+#########################################################################################
+#
+# Layer "pgaudit"
+# compile pgaudit extension
+#
+#########################################################################################
+
+FROM build-deps AS pgaudit-src
+ARG PG_VERSION
+WORKDIR /ext-src
+RUN case "${PG_VERSION}" in \
+    "v14") \
+    export PGAUDIT_VERSION=1.6.2 \
+    export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
+    ;; \
+    "v15") \
+    export PGAUDIT_VERSION=1.7.0 \
+    export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
+    ;; \
+    "v16") \
+    export PGAUDIT_VERSION=16.0 \
+    export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
+    ;; \
+    "v17") \
+    export PGAUDIT_VERSION=17.0 \
+    export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
+    ;; \
+    *) \
+    echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
+    esac && \
+    wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
+    echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
+    mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgaudit-build
+COPY --from=pgaudit-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgaudit-src
+RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
+
+#########################################################################################
+#
+# Layer "pgauditlogtofile"
+# compile pgauditlogtofile extension
+#
+#########################################################################################
+
+FROM build-deps AS pgauditlogtofile-src
+ARG PG_VERSION
+WORKDIR /ext-src
+RUN case "${PG_VERSION}" in \
+    "v14" | "v15" | "v16" | "v17") \
+    export PGAUDITLOGTOFILE_VERSION=v1.6.4 \
+    export PGAUDITLOGTOFILE_CHECKSUM=ef801eb09c26aaa935c0dabd92c81eb9ebe338930daa9674d420a280c6bc2d70 \
+    ;; \
+    *) \
+    echo "pgauditlogtofile is not supported on this PostgreSQL version" && exit 1;; \
+    esac && \
+    wget https://github.com/fmbiete/pgauditlogtofile/archive/refs/tags/${PGAUDITLOGTOFILE_VERSION}.tar.gz -O pgauditlogtofile.tar.gz && \
+    echo "${PGAUDITLOGTOFILE_CHECKSUM} pgauditlogtofile.tar.gz" | sha256sum --check && \
+    mkdir pgauditlogtofile-src && cd pgauditlogtofile-src && tar xzf ../pgauditlogtofile.tar.gz --strip-components=1 -C .
+
+FROM pg-build AS pgauditlogtofile-build
+COPY --from=pgauditlogtofile-src /ext-src/ /ext-src/
+WORKDIR /ext-src/pgauditlogtofile-src
+RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
+
 #########################################################################################
 #
 # Layer "neon-ext-build"
@@ -1556,7 +1676,14 @@ COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
+
+# Disabled temporarily, because it clashed with pg_mooncake. pg_mooncake
+# also depends on libduckdb, but a different version.
+#COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
+
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/

 #########################################################################################
 #
@@ -1594,7 +1721,7 @@ RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
 #
 #########################################################################################

-FROM debian:$DEBIAN_FLAVOR AS pgbouncer
+FROM $BASE_IMAGE_SHA AS pgbouncer
 RUN set -e \
    && echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries \
    && apt update \
@@ -1624,13 +1751,12 @@ RUN set -e \
 # Layer "exporters"
 #
 #########################################################################################
-FROM alpine/curl:${ALPINE_CURL_VERSION} AS exporters
+FROM build-deps AS exporters
 ARG TARGETARCH
 # Keep sql_exporter version same as in build-tools.Dockerfile and
 # test_runner/regress/test_compute_metrics.py
 # See comment on the top of the file regading `echo`, `-e` and `\n`
-RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc; \
-    if [ "$TARGETARCH" = "amd64" ]; then\
+RUN if [ "$TARGETARCH" = "amd64" ]; then\
        postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\
        pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
        sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
@@ -1649,29 +1775,6 @@ RUN echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 30
    && echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
    && echo "${sql_exporter_sha256} sql_exporter" | sha256sum -c -

-#########################################################################################
-#
-# Layer "awscli"
-#
-#########################################################################################
-FROM alpine/curl:${ALPINE_CURL_VERSION} AS awscli
-ARG TARGETARCH
-RUN set -ex; \
-    if [ "${TARGETARCH}" = "amd64" ]; then \
-        TARGETARCH_ALT="x86_64"; \
-        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
-    elif [ "${TARGETARCH}" = "arm64" ]; then \
-        TARGETARCH_ALT="aarch64"; \
-        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
-    else \
-        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
-    fi; \
-    curl --retry 5 -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
-    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
-    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
-    /tmp/awscliv2/aws/install; \
-    rm -rf /tmp/awscliv2.zip /tmp/awscliv2
-
 #########################################################################################
 #
 # Clean up postgres folder before inclusion
@@ -1704,7 +1807,7 @@ USER nonroot

 COPY --chown=nonroot compute compute

-RUN make PG_VERSION="${PG_VERSION}" -C compute
+RUN make PG_VERSION="${PG_VERSION:?}" -C compute

 #########################################################################################
 #
@@ -1730,15 +1833,15 @@ COPY --from=pg_graphql-src /ext-src/ /ext-src/
 COPY --from=hypopg-src /ext-src/ /ext-src/
 COPY --from=pg_hashids-src /ext-src/ /ext-src/
 COPY --from=rum-src /ext-src/ /ext-src/
-#COPY --from=pgtap-src /ext-src/ /ext-src/
+COPY --from=pgtap-src /ext-src/ /ext-src/
 COPY --from=ip4r-src /ext-src/ /ext-src/
 COPY --from=prefix-src /ext-src/ /ext-src/
 COPY --from=hll-src /ext-src/ /ext-src/
 COPY --from=plpgsql_check-src /ext-src/ /ext-src/
 #COPY --from=timescaledb-src /ext-src/ /ext-src/
 COPY --from=pg_hint_plan-src /ext-src/ /ext-src/
-COPY compute/patches/pg_hint_plan_${PG_VERSION}.patch /ext-src
-RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION}.patch
+COPY compute/patches/pg_hint_plan_${PG_VERSION:?}.patch /ext-src
+RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan_${PG_VERSION:?}.patch
 COPY --from=pg_cron-src /ext-src/ /ext-src/
 #COPY --from=pgx_ulid-src /ext-src/ /ext-src/
 #COPY --from=pgx_ulid-pgrx12-src /ext-src/ /ext-src/
@@ -1752,14 +1855,20 @@ COPY --from=pg_semver-src /ext-src/ /ext-src/
 COPY --from=pg_ivm-src /ext-src/ /ext-src/
 COPY --from=pg_partman-src /ext-src/ /ext-src/
 #COPY --from=pg_mooncake-src /ext-src/ /ext-src/
-#COPY --from=pg_repack-src /ext-src/ /ext-src/
+COPY --from=pg_repack-src /ext-src/ /ext-src/
+COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY compute/patches/pg_repack.patch /ext-src
+RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch

 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
+RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
+   && apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
 ENV PGUSER=cloud_admin
 ENV PGDATABASE=postgres
+ENV PG_VERSION=${PG_VERSION:?}

 #########################################################################################
 #
@@ -1767,7 +1876,7 @@ ENV PGDATABASE=postgres
 # Put it all together into the final image
 #
 #########################################################################################
-FROM debian:$DEBIAN_FLAVOR
+FROM $BASE_IMAGE_SHA
 ARG DEBIAN_VERSION

 # Use strict mode for bash to catch errors early
@@ -1841,9 +1950,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    mkdir /usr/local/download_extensions && \
    chown -R postgres:postgres /usr/local/download_extensions

-# aws cli is used by fast_import
-COPY --from=awscli /usr/local/aws-cli /usr/local/aws-cli
-
 # pgbouncer and its config
 COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
--- a/compute/patches/pg_duckdb_v031.patch
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -0,0 +1,11 @@
+diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
+index d777d76..af60106 100644
+--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
+@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
+ GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
+ GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
+ GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
--- a/compute/patches/pg_repack.patch
+++ b/compute/patches/pg_repack.patch
@@ -0,0 +1,72 @@
+diff --git a/regress/Makefile b/regress/Makefile
+index bf6edcb..89b4c7f 100644
+--- a/regress/Makefile
+++ b/regress/Makefile
+@@ -17,7 +17,7 @@ INTVERSION := $(shell echo $$(($$(echo $(VERSION).0 | sed 's/\([[:digit:]]\{1,\}
+ # Test suite
+ #
+ 
+-REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper tablespace get_order_by trigger
+REGRESS := init-extension repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
+ 
+ USE_PGXS = 1	# use pgxs if not in contrib directory
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+diff --git a/regress/expected/nosuper.out b/regress/expected/nosuper.out
+index 8d0a94e..63b68bf 100644
+--- a/regress/expected/nosuper.out
+++ b/regress/expected/nosuper.out
+@@ -4,22 +4,22 @@
+ SET client_min_messages = error;
+ DROP ROLE IF EXISTS nosuper;
+ SET client_min_messages = warning;
+-CREATE ROLE nosuper WITH LOGIN;
+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
+ -- => OK
+ \! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
+ INFO: repacking table "public.tbl_cluster"
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+ ERROR: pg_repack failed with error: You must be a superuser to use pg_repack
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ ERROR: pg_repack failed with error: ERROR:  permission denied for schema repack
+ LINE 1: select repack.version(), repack.version_sql()
+                ^
+ GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
+ GRANT USAGE ON SCHEMA repack TO nosuper;
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ INFO: repacking table "public.tbl_cluster"
+ ERROR: query failed: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+ DETAIL: query was: RESET lock_timeout
+diff --git a/regress/sql/nosuper.sql b/regress/sql/nosuper.sql
+index 072f0fa..dbe60f8 100644
+--- a/regress/sql/nosuper.sql
+++ b/regress/sql/nosuper.sql
+@@ -4,19 +4,19 @@
+ SET client_min_messages = error;
+ DROP ROLE IF EXISTS nosuper;
+ SET client_min_messages = warning;
+-CREATE ROLE nosuper WITH LOGIN;
+CREATE ROLE nosuper WITH LOGIN PASSWORD 'NoSuPeRpAsSwOrD';
+ -- => OK
+ \! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-superuser-check
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ 
+ GRANT ALL ON ALL TABLES IN SCHEMA repack TO nosuper;
+ GRANT USAGE ON SCHEMA repack TO nosuper;
+ 
+ -- => ERROR
+-\! pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+\! PGPASSWORD=NoSuPeRpAsSwOrD pg_repack --dbname=contrib_regression --table=tbl_cluster --username=nosuper --no-superuser-check
+ 
+ REVOKE ALL ON ALL TABLES IN SCHEMA repack FROM nosuper;
+ REVOKE USAGE ON SCHEMA repack FROM nosuper;
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -5,16 +5,6 @@ commands:
    user: root
    sysvInitAction: sysinit
    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
-  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
-  # running it as root.
-  - name: chmod-resize-swap
-    user: root
-    sysvInitAction: sysinit
-    shell: 'chmod 711 /neonvm/bin/resize-swap'
-  - name: chmod-set-disk-quota
-    user: root
-    sysvInitAction: sysinit
-    shell: 'chmod 711 /neonvm/bin/set-disk-quota'
  - name: pgbouncer
    user: postgres
    sysvInitAction: respawn
@@ -44,12 +34,9 @@ shutdownHook: |
 files:
  - filename: compute_ctl-sudoers
    content: |
-      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
-      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
-      # regardless of hostname (ALL)
-      #
-      # Also allow it to shut down the VM. The fast_import job does that when it's finished.
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff
+      # Allow postgres user to shut down the VM. The fast_import job does that
+      # when it's finished.
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/poweroff
  - filename: cgconfig.conf
    content: |
      # Configuration for cgroups in VM compute nodes
@@ -74,8 +61,8 @@ build: |
  # At time of migration to bookworm (2024-10-09), debian has a version of libcgroup/cgroup-tools 2.0.2,
  # and it _probably_ can be used as-is. However, we'll build it ourselves to minimise the changeset
  # for debian version migration.
-  #
-  FROM debian:bookworm-slim as libcgroup-builder
+  ARG BOOKWORM_SLIM_SHA=sha256:40b107342c492725bc7aacbe93a49945445191ae364184a6d24fedb28172f6f7
+  FROM debian@$BOOKWORM_SLIM_SHA as libcgroup-builder
  ENV LIBCGROUP_VERSION=v2.0.3

  RUN set -exu \
@@ -111,10 +98,7 @@ merge: |
      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \
         )

-  # Allow postgres user (compute_ctl) to run swap resizer.
-  # Need to install sudo in order to allow this.
-  #
-  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
+  # Install sudo so that the fast_import can do "sudo poweroff"
  RUN set -e \
      && apt update \
      && apt install --no-install-recommends -y \
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -5,16 +5,6 @@ commands:
    user: root
    sysvInitAction: sysinit
    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
-  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
-  # running it as root.
-  - name: chmod-resize-swap
-    user: root
-    sysvInitAction: sysinit
-    shell: 'chmod 711 /neonvm/bin/resize-swap'
-  - name: chmod-set-disk-quota
-    user: root
-    sysvInitAction: sysinit
-    shell: 'chmod 711 /neonvm/bin/set-disk-quota'
  - name: pgbouncer
    user: postgres
    sysvInitAction: respawn
@@ -44,10 +34,9 @@ shutdownHook: |
 files:
  - filename: compute_ctl-sudoers
    content: |
-      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
-      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
-      # regardless of hostname (ALL)
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
+      # Allow postgres user to shut down the VM. The fast_import job does that
+      # when it's finished.
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/poweroff
  - filename: cgconfig.conf
    content: |
      # Configuration for cgroups in VM compute nodes
@@ -68,7 +57,8 @@ build: |
  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
  # requires cgroup v2, so we'll build cgroup-tools ourselves.
-  FROM debian:bullseye-slim as libcgroup-builder
+  ARG BULLSEYE_SLIM_SHA=sha256:e831d9a884d63734fe3dd9c491ed9a5a3d4c6a6d32c5b14f2067357c49b0b7e1
+  FROM debian@$BULLSEYE_SLIM_SHA as libcgroup-builder
  ENV LIBCGROUP_VERSION=v2.0.3

  RUN set -exu \
@@ -104,10 +94,7 @@ merge: |
      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \
         )

-  # Allow postgres user (compute_ctl) to run swap resizer.
-  # Need to install sudo in order to allow this.
-  #
-  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
+  # Install sudo so that the fast_import can do "sudo poweroff"
  RUN set -e \
      && apt update \
      && apt install --no-install-recommends -y \
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -14,6 +14,7 @@ base64.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-sdk-kms.workspace = true
+aws-smithy-types.workspace = true
 anyhow.workspace = true
 axum = { workspace = true, features = [] }
 camino.workspace = true
@@ -24,6 +25,8 @@ fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
 http.workspace = true
+hyper-util.workspace = true
+hyper.workspace = true
 metrics.workspace = true
 nix.workspace = true
 notify.workspace = true
@@ -46,13 +49,12 @@ tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
 tracing.workspace = true
-tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
 url.workspace = true
 uuid.workspace = true
-prometheus.workspace = true
+walkdir.workspace = true

 postgres_initdb.workspace = true
 compute_api.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -41,21 +41,20 @@ use std::process::exit;
 use std::str::FromStr;
 use std::sync::atomic::Ordering;
 use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
-use std::time::SystemTime;
 use std::{thread, time::Duration};

 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Parser;
-use compute_tools::disk_quota::set_disk_quota;
 use compute_tools::http::server::Server;
 use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
+use compute_tools::neonvmd_client::{resize_swap, set_disk_quota};
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
 use tracing::{error, info, warn};
 use url::Url;

-use compute_api::responses::ComputeStatus;
+use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
 use compute_api::spec::ComputeSpec;

 use compute_tools::compute::{
@@ -67,7 +66,6 @@ use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;
 use rlimit::{setrlimit, Resource};
 use utils::failpoint_support;

@@ -86,19 +84,6 @@ fn parse_remote_ext_config(arg: &str) -> Result<String> {
    }
 }

-/// Generate a compute ID if one is not supplied. This exists to keep forward
-/// compatibility tests working, but will be removed in a future iteration.
-fn generate_compute_id() -> String {
-    let now = SystemTime::now();
-
-    format!(
-        "compute-{}",
-        now.duration_since(SystemTime::UNIX_EPOCH)
-            .unwrap()
-            .as_secs()
-    )
-}
-
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
@@ -112,16 +97,13 @@ struct Cli {
    /// outside the compute will talk to the compute through this port. Keep
    /// the previous name for this argument around for a smoother release
    /// with the control plane.
-    ///
-    /// TODO: Remove the alias after the control plane release which teaches the
-    /// control plane about the renamed argument.
-    #[arg(long, alias = "http-port", default_value_t = 3080)]
+    #[arg(long, default_value_t = 3080)]
    pub external_http_port: u16,

-    /// The port to bind the internal listening HTTP server to. Clients like
+    /// The port to bind the internal listening HTTP server to. Clients include
    /// the neon extension (for installing remote extensions) and local_proxy.
-    #[arg(long)]
-    pub internal_http_port: Option<u16>,
+    #[arg(long, default_value_t = 3081)]
+    pub internal_http_port: u16,

    #[arg(short = 'D', long, value_name = "DATADIR")]
    pub pgdata: String,
@@ -147,6 +129,7 @@ struct Cli {
    #[arg(long, action = clap::ArgAction::SetTrue)]
    pub resize_swap_on_bind: bool,

+    /// This is no longer used for anything. It's kept for now just for backwards-compatibility.
    #[arg(long)]
    pub set_disk_quota_for_fs: Option<String>,

@@ -156,7 +139,7 @@ struct Cli {
    #[arg(short = 'S', long, group = "spec-path")]
    pub spec_path: Option<OsString>,

-    #[arg(short = 'i', long, group = "compute-id", default_value = generate_compute_id())]
+    #[arg(short = 'i', long, group = "compute-id")]
    pub compute_id: String,

    #[arg(short = 'p', long, conflicts_with_all = ["spec", "spec-path"], value_name = "CONTROL_PLANE_API_BASE_URL")]
@@ -281,6 +264,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
        info!("got spec from cli argument {}", spec_json);
        return Ok(CliSpecParams {
            spec: Some(serde_json::from_str(spec_json)?),
+            compute_ctl_config: ComputeCtlConfig::default(),
            live_config_allowed: false,
        });
    }
@@ -290,6 +274,7 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
        let file = File::open(Path::new(spec_path))?;
        return Ok(CliSpecParams {
            spec: Some(serde_json::from_reader(file)?),
+            compute_ctl_config: ComputeCtlConfig::default(),
            live_config_allowed: true,
        });
    }
@@ -299,8 +284,9 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
    };

    match get_spec_from_control_plane(cli.control_plane_uri.as_ref().unwrap(), &cli.compute_id) {
-        Ok(spec) => Ok(CliSpecParams {
-            spec,
+        Ok(resp) => Ok(CliSpecParams {
+            spec: resp.0,
+            compute_ctl_config: resp.1,
            live_config_allowed: true,
        }),
        Err(e) => {
@@ -317,6 +303,8 @@ fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
 struct CliSpecParams {
    /// If a spec was provided via CLI or file, the [`ComputeSpec`]
    spec: Option<ComputeSpec>,
+    #[allow(dead_code)]
+    compute_ctl_config: ComputeCtlConfig,
    live_config_allowed: bool,
 }

@@ -326,6 +314,7 @@ fn wait_spec(
    CliSpecParams {
        spec,
        live_config_allowed,
+        compute_ctl_config: _,
    }: CliSpecParams,
 ) -> Result<Arc<ComputeNode>> {
    let mut new_state = ComputeState::new();
@@ -353,7 +342,7 @@ fn wait_spec(
        pgbin: cli.pgbin.clone(),
        pgversion: get_pg_version_string(&cli.pgbin),
        external_http_port: cli.external_http_port,
-        internal_http_port: cli.internal_http_port.unwrap_or(cli.external_http_port + 1),
+        internal_http_port: cli.internal_http_port,
        live_config_allowed,
        state: Mutex::new(new_state),
        state_changed: Condvar::new(),
@@ -377,7 +366,7 @@ fn wait_spec(

    // The internal HTTP server could be launched later, but there isn't much
    // sense in waiting.
-    Server::Internal(cli.internal_http_port.unwrap_or(cli.external_http_port + 1)).launch(&compute);
+    Server::Internal(cli.internal_http_port).launch(&compute);

    if !spec_set {
        // No spec provided, hang waiting for it.
@@ -468,10 +457,8 @@ fn start_postgres(
    }

    // Set disk quota if the compute spec says so
-    if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
-        (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
-    {
-        match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
+    if let Some(disk_quota_bytes) = disk_quota_bytes {
+        match set_disk_quota(disk_quota_bytes) {
            Ok(()) => {
                let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
                info!(%disk_quota_bytes, %size_mib, "set disk quota");
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -25,10 +25,10 @@
 //! docker push localhost:3030/localregistry/compute-node-v14:latest
 //! ```

-use anyhow::Context;
+use anyhow::{bail, Context};
 use aws_config::BehaviorVersion;
 use camino::{Utf8Path, Utf8PathBuf};
-use clap::Parser;
+use clap::{Parser, Subcommand};
 use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
 use nix::unistd::Pid;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -44,32 +44,59 @@ mod s3_uri;
 const PG_WAIT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
 const PG_WAIT_RETRY_INTERVAL: std::time::Duration = std::time::Duration::from_millis(300);

+#[derive(Subcommand, Debug)]
+enum Command {
+    /// Runs local postgres (neon binary), restores into it,
+    /// uploads pgdata to s3 to be consumed by pageservers
+    Pgdata {
+        /// Raw connection string to the source database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        source_connection_string: Option<String>,
+        /// If specified, will not shut down the local postgres after the import. Used in local testing
+        #[clap(short, long)]
+        interactive: bool,
+        /// Port to run postgres on. Default is 5432.
+        #[clap(long, default_value_t = 5432)]
+        pg_port: u16, // port to run postgres on, 5432 is default
+
+        /// Number of CPUs in the system. This is used to configure # of
+        /// parallel worker processes, for index creation.
+        #[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
+        num_cpus: Option<usize>,
+
+        /// Amount of RAM in the system. This is used to configure shared_buffers
+        /// and maintenance_work_mem.
+        #[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
+        memory_mb: Option<usize>,
+    },
+
+    /// Runs pg_dump-pg_restore from source to destination without running local postgres.
+    DumpRestore {
+        /// Raw connection string to the source database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        source_connection_string: Option<String>,
+        /// Raw connection string to the destination database. Used only in tests,
+        /// real scenario uses encrypted connection string in spec.json from s3.
+        #[clap(long)]
+        destination_connection_string: Option<String>,
+    },
+}
+
 #[derive(clap::Parser)]
 struct Args {
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_WORKDIR")]
    working_directory: Utf8PathBuf,
    #[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
    s3_prefix: Option<s3_uri::S3Uri>,
-    #[clap(long)]
-    source_connection_string: Option<String>,
-    #[clap(short, long)]
-    interactive: bool,
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_PG_BIN_DIR")]
    pg_bin_dir: Utf8PathBuf,
-    #[clap(long)]
+    #[clap(long, env = "NEON_IMPORTER_PG_LIB_DIR")]
    pg_lib_dir: Utf8PathBuf,
-    #[clap(long)]
-    pg_port: Option<u16>, // port to run postgres on, 5432 is default

-    /// Number of CPUs in the system. This is used to configure # of
-    /// parallel worker processes, for index creation.
-    #[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
-    num_cpus: Option<usize>,
-
-    /// Amount of RAM in the system. This is used to configure shared_buffers
-    /// and maintenance_work_mem.
-    #[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
-    memory_mb: Option<usize>,
+    #[clap(subcommand)]
+    command: Command,
 }

 #[serde_with::serde_as]
@@ -78,6 +105,8 @@ struct Spec {
    encryption_secret: EncryptionSecret,
    #[serde_as(as = "serde_with::base64::Base64")]
    source_connstring_ciphertext_base64: Vec<u8>,
+    #[serde_as(as = "Option<serde_with::base64::Base64>")]
+    destination_connstring_ciphertext_base64: Option<Vec<u8>>,
 }

 #[derive(serde::Deserialize)]
@@ -93,192 +122,150 @@ const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
    "C.UTF-8"
 };

-#[tokio::main]
-pub(crate) async fn main() -> anyhow::Result<()> {
-    utils::logging::init(
-        utils::logging::LogFormat::Plain,
-        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
-        utils::logging::Output::Stdout,
-    )?;
-
-    info!("starting");
-
-    let args = Args::parse();
-
-    // Validate arguments
-    if args.s3_prefix.is_none() && args.source_connection_string.is_none() {
-        anyhow::bail!("either s3_prefix or source_connection_string must be specified");
-    }
-    if args.s3_prefix.is_some() && args.source_connection_string.is_some() {
-        anyhow::bail!("only one of s3_prefix or source_connection_string can be specified");
-    }
-
-    let working_directory = args.working_directory;
-    let pg_bin_dir = args.pg_bin_dir;
-    let pg_lib_dir = args.pg_lib_dir;
-    let pg_port = args.pg_port.unwrap_or_else(|| {
-        info!("pg_port not specified, using default 5432");
-        5432
-    });
-
-    // Initialize AWS clients only if s3_prefix is specified
-    let (aws_config, kms_client) = if args.s3_prefix.is_some() {
-        let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
-        let kms = aws_sdk_kms::Client::new(&config);
-        (Some(config), Some(kms))
-    } else {
-        (None, None)
-    };
-
-    // Get source connection string either from S3 spec or direct argument
-    let source_connection_string = if let Some(s3_prefix) = &args.s3_prefix {
-        let spec: Spec = {
-            let spec_key = s3_prefix.append("/spec.json");
-            let s3_client = aws_sdk_s3::Client::new(aws_config.as_ref().unwrap());
-            let object = s3_client
-                .get_object()
-                .bucket(&spec_key.bucket)
-                .key(spec_key.key)
-                .send()
-                .await
-                .context("get spec from s3")?
-                .body
-                .collect()
-                .await
-                .context("download spec body")?;
-            serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
-        };
-
-        match spec.encryption_secret {
-            EncryptionSecret::KMS { key_id } => {
-                let mut output = kms_client
-                    .unwrap()
-                    .decrypt()
-                    .key_id(key_id)
-                    .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
-                        spec.source_connstring_ciphertext_base64,
-                    ))
-                    .send()
-                    .await
-                    .context("decrypt source connection string")?;
-                let plaintext = output
-                    .plaintext
-                    .take()
-                    .context("get plaintext source connection string")?;
-                String::from_utf8(plaintext.into_inner())
-                    .context("parse source connection string as utf8")?
-            }
-        }
-    } else {
-        args.source_connection_string.unwrap()
-    };
-
-    match tokio::fs::create_dir(&working_directory).await {
-        Ok(()) => {}
-        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
-            if !is_directory_empty(&working_directory)
-                .await
-                .context("check if working directory is empty")?
-            {
-                anyhow::bail!("working directory is not empty");
-            } else {
-                // ok
-            }
-        }
-        Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
-    }
-
-    let pgdata_dir = working_directory.join("pgdata");
-    tokio::fs::create_dir(&pgdata_dir)
+async fn decode_connstring(
+    kms_client: &aws_sdk_kms::Client,
+    key_id: &String,
+    connstring_ciphertext_base64: Vec<u8>,
+) -> Result<String, anyhow::Error> {
+    let mut output = kms_client
+        .decrypt()
+        .key_id(key_id)
+        .ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
+            connstring_ciphertext_base64,
+        ))
+        .send()
        .await
-        .context("create pgdata directory")?;
+        .context("decrypt connection string")?;

-    let pgbin = pg_bin_dir.join("postgres");
-    let pg_version = match get_pg_version(pgbin.as_ref()) {
-        PostgresMajorVersion::V14 => 14,
-        PostgresMajorVersion::V15 => 15,
-        PostgresMajorVersion::V16 => 16,
-        PostgresMajorVersion::V17 => 17,
-    };
-    let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
-    postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
-        superuser,
-        locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
-        pg_version,
-        initdb_bin: pg_bin_dir.join("initdb").as_ref(),
-        library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
-        pgdata: &pgdata_dir,
-    })
-    .await
-    .context("initdb")?;
+    let plaintext = output
+        .plaintext
+        .take()
+        .context("get plaintext connection string")?;

-    // If the caller didn't specify CPU / RAM to use for sizing, default to
-    // number of CPUs in the system, and pretty arbitrarily, 256 MB of RAM.
-    let nproc = args.num_cpus.unwrap_or_else(num_cpus::get);
-    let memory_mb = args.memory_mb.unwrap_or(256);
+    String::from_utf8(plaintext.into_inner()).context("parse connection string as utf8")
+}

-    // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
-    // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
-    // available for misc other stuff that PostgreSQL uses memory for.
-    let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
-    let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
+struct PostgresProcess {
+    pgdata_dir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pgbin: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    postgres_proc: Option<tokio::process::Child>,
+}

-    //
-    // Launch postgres process
-    //
-    let mut postgres_proc = tokio::process::Command::new(pgbin)
-        .arg("-D")
-        .arg(&pgdata_dir)
-        .args(["-p", &format!("{pg_port}")])
-        .args(["-c", "wal_level=minimal"])
-        .args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
-        .args(["-c", "max_wal_senders=0"])
-        .args(["-c", "fsync=off"])
-        .args(["-c", "full_page_writes=off"])
-        .args(["-c", "synchronous_commit=off"])
-        .args([
-            "-c",
-            &format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
-        ])
-        .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
-        .args(["-c", &format!("max_parallel_workers={nproc}")])
-        .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
-        .args(["-c", &format!("max_worker_processes={nproc}")])
-        .args([
-            "-c",
-            &format!(
-                "effective_io_concurrency={}",
-                if cfg!(target_os = "macos") { 0 } else { 100 }
-            ),
-        ])
-        .env_clear()
-        .env("LD_LIBRARY_PATH", &pg_lib_dir)
-        .env(
-            "ASAN_OPTIONS",
-            std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+impl PostgresProcess {
+    fn new(pgdata_dir: Utf8PathBuf, pg_bin_dir: Utf8PathBuf, pg_lib_dir: Utf8PathBuf) -> Self {
+        Self {
+            pgdata_dir,
+            pgbin: pg_bin_dir.join("postgres"),
+            pg_bin_dir,
+            pg_lib_dir,
+            postgres_proc: None,
+        }
+    }
+
+    async fn prepare(&self, initdb_user: &str) -> Result<(), anyhow::Error> {
+        tokio::fs::create_dir(&self.pgdata_dir)
+            .await
+            .context("create pgdata directory")?;
+
+        let pg_version = match get_pg_version(self.pgbin.as_ref()) {
+            PostgresMajorVersion::V14 => 14,
+            PostgresMajorVersion::V15 => 15,
+            PostgresMajorVersion::V16 => 16,
+            PostgresMajorVersion::V17 => 17,
+        };
+        postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
+            superuser: initdb_user,
+            locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
+            pg_version,
+            initdb_bin: self.pg_bin_dir.join("initdb").as_ref(),
+            library_search_path: &self.pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
+            pgdata: &self.pgdata_dir,
+        })
+        .await
+        .context("initdb")
+    }
+
+    async fn start(
+        &mut self,
+        initdb_user: &str,
+        port: u16,
+        nproc: usize,
+        memory_mb: usize,
+    ) -> Result<&tokio::process::Child, anyhow::Error> {
+        self.prepare(initdb_user).await?;
+
+        // Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
+        // maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
+        // available for misc other stuff that PostgreSQL uses memory for.
+        let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
+        let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
+
+        //
+        // Launch postgres process
+        //
+        let mut proc = tokio::process::Command::new(&self.pgbin)
+            .arg("-D")
+            .arg(&self.pgdata_dir)
+            .args(["-p", &format!("{port}")])
+            .args(["-c", "wal_level=minimal"])
+            .args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
+            .args(["-c", "max_wal_senders=0"])
+            .args(["-c", "fsync=off"])
+            .args(["-c", "full_page_writes=off"])
+            .args(["-c", "synchronous_commit=off"])
+            .args([
+                "-c",
+                &format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
+            ])
+            .args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
+            .args(["-c", &format!("max_parallel_workers={nproc}")])
+            .args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
+            .args(["-c", &format!("max_worker_processes={nproc}")])
+            .args(["-c", "effective_io_concurrency=100"])
+            .env_clear()
+            .env("LD_LIBRARY_PATH", &self.pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
+            .stdout(std::process::Stdio::piped())
+            .stderr(std::process::Stdio::piped())
+            .spawn()
+            .context("spawn postgres")?;
+
+        info!("spawned postgres, waiting for it to become ready");
+        tokio::spawn(
+            child_stdio_to_log::relay_process_output(proc.stdout.take(), proc.stderr.take())
+                .instrument(info_span!("postgres")),
+        );
+
+        self.postgres_proc = Some(proc);
+        Ok(self.postgres_proc.as_ref().unwrap())
+    }
+
+    async fn shutdown(&mut self) -> Result<(), anyhow::Error> {
+        let proc: &mut tokio::process::Child = self.postgres_proc.as_mut().unwrap();
+        info!("shutdown postgres");
+        nix::sys::signal::kill(
+            Pid::from_raw(i32::try_from(proc.id().unwrap()).expect("convert child pid to i32")),
+            nix::sys::signal::SIGTERM,
        )
-        .env(
-            "UBSAN_OPTIONS",
-            std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
-        )
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .spawn()
-        .context("spawn postgres")?;
-
-    info!("spawned postgres, waiting for it to become ready");
-    tokio::spawn(
-        child_stdio_to_log::relay_process_output(
-            postgres_proc.stdout.take(),
-            postgres_proc.stderr.take(),
-        )
-        .instrument(info_span!("postgres")),
-    );
+        .context("signal postgres to shut down")?;
+        proc.wait()
+            .await
+            .context("wait for postgres to shut down")
+            .map(|_| ())
+    }
+}

+async fn wait_until_ready(connstring: String, create_dbname: String) {
    // Create neondb database in the running postgres
-    let restore_pg_connstring =
-        format!("host=localhost port={pg_port} user={superuser} dbname=postgres");
-
    let start_time = std::time::Instant::now();

    loop {
@@ -289,7 +276,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
            std::process::exit(1);
        }

-        match tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await {
+        match tokio_postgres::connect(
+            &connstring.replace("dbname=neondb", "dbname=postgres"),
+            tokio_postgres::NoTls,
+        )
+        .await
+        {
            Ok((client, connection)) => {
                // Spawn the connection handling task to maintain the connection
                tokio::spawn(async move {
@@ -298,9 +290,12 @@ pub(crate) async fn main() -> anyhow::Result<()> {
                    }
                });

-                match client.simple_query("CREATE DATABASE neondb;").await {
+                match client
+                    .simple_query(format!("CREATE DATABASE {create_dbname};").as_str())
+                    .await
+                {
                    Ok(_) => {
-                        info!("created neondb database");
+                        info!("created {} database", create_dbname);
                        break;
                    }
                    Err(e) => {
@@ -324,10 +319,16 @@ pub(crate) async fn main() -> anyhow::Result<()> {
            }
        }
    }
+}

-    let restore_pg_connstring = restore_pg_connstring.replace("dbname=postgres", "dbname=neondb");
-
-    let dumpdir = working_directory.join("dumpdir");
+async fn run_dump_restore(
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    source_connstring: String,
+    destination_connstring: String,
+) -> Result<(), anyhow::Error> {
+    let dumpdir = workdir.join("dumpdir");

    let common_args = [
        // schema mapping (prob suffices to specify them on one side)
@@ -356,10 +357,18 @@ pub(crate) async fn main() -> anyhow::Result<()> {
            .arg("--no-sync")
            // POSITIONAL args
            // source db (db name included in connection string)
-            .arg(&source_connection_string)
+            .arg(&source_connstring)
            // how we run it
            .env_clear()
            .env("LD_LIBRARY_PATH", &pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
            .kill_on_drop(true)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
@@ -376,24 +385,31 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        let st = pg_dump.wait().await.context("wait for pg_dump")?;
        info!(status=?st, "pg_dump exited");
        if !st.success() {
-            warn!(status=%st, "pg_dump failed, restore will likely fail as well");
+            error!(status=%st, "pg_dump failed, restore will likely fail as well");
+            bail!("pg_dump failed");
        }
    }

-    // TODO: do it in a streaming way, plenty of internal research done on this already
+    // TODO: maybe do it in a streaming way, plenty of internal research done on this already
    // TODO: do the unlogged table trick
-
-    info!("restore from working directory into vanilla postgres");
    {
        let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
            .args(&common_args)
            .arg("-d")
-            .arg(&restore_pg_connstring)
+            .arg(&destination_connstring)
            // POSITIONAL args
            .arg(&dumpdir)
            // how we run it
            .env_clear()
            .env("LD_LIBRARY_PATH", &pg_lib_dir)
+            .env(
+                "ASAN_OPTIONS",
+                std::env::var("ASAN_OPTIONS").unwrap_or_default(),
+            )
+            .env(
+                "UBSAN_OPTIONS",
+                std::env::var("UBSAN_OPTIONS").unwrap_or_default(),
+            )
            .kill_on_drop(true)
            .stdout(std::process::Stdio::piped())
            .stderr(std::process::Stdio::piped())
@@ -411,48 +427,259 @@ pub(crate) async fn main() -> anyhow::Result<()> {
        let st = pg_restore.wait().await.context("wait for pg_restore")?;
        info!(status=?st, "pg_restore exited");
        if !st.success() {
-            warn!(status=%st, "pg_restore failed, restore will likely fail as well");
-        }
-    }
-
-    // If interactive mode, wait for Ctrl+C
-    if args.interactive {
-        info!("Running in interactive mode. Press Ctrl+C to shut down.");
-        tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
-    }
-
-    info!("shutdown postgres");
-    {
-        nix::sys::signal::kill(
-            Pid::from_raw(
-                i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
-            ),
-            nix::sys::signal::SIGTERM,
-        )
-        .context("signal postgres to shut down")?;
-        postgres_proc
-            .wait()
-            .await
-            .context("wait for postgres to shut down")?;
-    }
-
-    // Only sync if s3_prefix was specified
-    if let Some(s3_prefix) = args.s3_prefix {
-        info!("upload pgdata");
-        aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
-            .await
-            .context("sync dump directory to destination")?;
-
-        info!("write status");
-        {
-            let status_dir = working_directory.join("status");
-            std::fs::create_dir(&status_dir).context("create status directory")?;
-            let status_file = status_dir.join("pgdata");
-            std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
-                .context("write status file")?;
-            aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
-                .await
-                .context("sync status directory to destination")?;
+            error!(status=%st, "pg_restore failed, restore will likely fail as well");
+            bail!("pg_restore failed");
+        }
+    }
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn cmd_pgdata(
+    s3_client: Option<aws_sdk_s3::Client>,
+    kms_client: Option<aws_sdk_kms::Client>,
+    maybe_s3_prefix: Option<s3_uri::S3Uri>,
+    maybe_spec: Option<Spec>,
+    source_connection_string: Option<String>,
+    interactive: bool,
+    pg_port: u16,
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+    num_cpus: Option<usize>,
+    memory_mb: Option<usize>,
+) -> Result<(), anyhow::Error> {
+    if maybe_spec.is_none() && source_connection_string.is_none() {
+        bail!("spec must be provided for pgdata command");
+    }
+    if maybe_spec.is_some() && source_connection_string.is_some() {
+        bail!("only one of spec or source_connection_string can be provided");
+    }
+
+    let source_connection_string = if let Some(spec) = maybe_spec {
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                decode_connstring(
+                    kms_client.as_ref().unwrap(),
+                    &key_id,
+                    spec.source_connstring_ciphertext_base64,
+                )
+                .await?
+            }
+        }
+    } else {
+        source_connection_string.unwrap()
+    };
+
+    let superuser = "cloud_admin";
+    let destination_connstring = format!(
+        "host=localhost port={} user={} dbname=neondb",
+        pg_port, superuser
+    );
+
+    let pgdata_dir = workdir.join("pgdata");
+    let mut proc = PostgresProcess::new(pgdata_dir.clone(), pg_bin_dir.clone(), pg_lib_dir.clone());
+    let nproc = num_cpus.unwrap_or_else(num_cpus::get);
+    let memory_mb = memory_mb.unwrap_or(256);
+    proc.start(superuser, pg_port, nproc, memory_mb).await?;
+    wait_until_ready(destination_connstring.clone(), "neondb".to_string()).await;
+
+    run_dump_restore(
+        workdir.clone(),
+        pg_bin_dir,
+        pg_lib_dir,
+        source_connection_string,
+        destination_connstring,
+    )
+    .await?;
+
+    // If interactive mode, wait for Ctrl+C
+    if interactive {
+        info!("Running in interactive mode. Press Ctrl+C to shut down.");
+        tokio::signal::ctrl_c().await.context("wait for ctrl-c")?;
+    }
+
+    proc.shutdown().await?;
+
+    // Only sync if s3_prefix was specified
+    if let Some(s3_prefix) = maybe_s3_prefix {
+        info!("upload pgdata");
+        aws_s3_sync::upload_dir_recursive(
+            s3_client.as_ref().unwrap(),
+            Utf8Path::new(&pgdata_dir),
+            &s3_prefix.append("/pgdata/"),
+        )
+        .await
+        .context("sync dump directory to destination")?;
+
+        info!("write status");
+        {
+            let status_dir = workdir.join("status");
+            std::fs::create_dir(&status_dir).context("create status directory")?;
+            let status_file = status_dir.join("pgdata");
+            std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
+                .context("write status file")?;
+            aws_s3_sync::upload_dir_recursive(
+                s3_client.as_ref().unwrap(),
+                &status_dir,
+                &s3_prefix.append("/status/"),
+            )
+            .await
+            .context("sync status directory to destination")?;
+        }
+    }
+
+    Ok(())
+}
+
+async fn cmd_dumprestore(
+    kms_client: Option<aws_sdk_kms::Client>,
+    maybe_spec: Option<Spec>,
+    source_connection_string: Option<String>,
+    destination_connection_string: Option<String>,
+    workdir: Utf8PathBuf,
+    pg_bin_dir: Utf8PathBuf,
+    pg_lib_dir: Utf8PathBuf,
+) -> Result<(), anyhow::Error> {
+    let (source_connstring, destination_connstring) = if let Some(spec) = maybe_spec {
+        match spec.encryption_secret {
+            EncryptionSecret::KMS { key_id } => {
+                let source = decode_connstring(
+                    kms_client.as_ref().unwrap(),
+                    &key_id,
+                    spec.source_connstring_ciphertext_base64,
+                )
+                .await?;
+
+                let dest = if let Some(dest_ciphertext) =
+                    spec.destination_connstring_ciphertext_base64
+                {
+                    decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
+                        .await?
+                } else {
+                    bail!("destination connection string must be provided in spec for dump_restore command");
+                };
+
+                (source, dest)
+            }
+        }
+    } else {
+        (
+            source_connection_string.unwrap(),
+            if let Some(val) = destination_connection_string {
+                val
+            } else {
+                bail!("destination connection string must be provided for dump_restore command");
+            },
+        )
+    };
+
+    run_dump_restore(
+        workdir,
+        pg_bin_dir,
+        pg_lib_dir,
+        source_connstring,
+        destination_connstring,
+    )
+    .await
+}
+
+#[tokio::main]
+pub(crate) async fn main() -> anyhow::Result<()> {
+    utils::logging::init(
+        utils::logging::LogFormat::Json,
+        utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
+        utils::logging::Output::Stdout,
+    )?;
+
+    info!("starting");
+
+    let args = Args::parse();
+
+    // Initialize AWS clients only if s3_prefix is specified
+    let (s3_client, kms_client) = if args.s3_prefix.is_some() {
+        let config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
+        let s3_client = aws_sdk_s3::Client::new(&config);
+        let kms = aws_sdk_kms::Client::new(&config);
+        (Some(s3_client), Some(kms))
+    } else {
+        (None, None)
+    };
+
+    let spec: Option<Spec> = if let Some(s3_prefix) = &args.s3_prefix {
+        let spec_key = s3_prefix.append("/spec.json");
+        let object = s3_client
+            .as_ref()
+            .unwrap()
+            .get_object()
+            .bucket(&spec_key.bucket)
+            .key(spec_key.key)
+            .send()
+            .await
+            .context("get spec from s3")?
+            .body
+            .collect()
+            .await
+            .context("download spec body")?;
+        serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
+    } else {
+        None
+    };
+
+    match tokio::fs::create_dir(&args.working_directory).await {
+        Ok(()) => {}
+        Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+            if !is_directory_empty(&args.working_directory)
+                .await
+                .context("check if working directory is empty")?
+            {
+                bail!("working directory is not empty");
+            } else {
+                // ok
+            }
+        }
+        Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
+    }
+
+    match args.command {
+        Command::Pgdata {
+            source_connection_string,
+            interactive,
+            pg_port,
+            num_cpus,
+            memory_mb,
+        } => {
+            cmd_pgdata(
+                s3_client,
+                kms_client,
+                args.s3_prefix,
+                spec,
+                source_connection_string,
+                interactive,
+                pg_port,
+                args.working_directory,
+                args.pg_bin_dir,
+                args.pg_lib_dir,
+                num_cpus,
+                memory_mb,
+            )
+            .await?;
+        }
+        Command::DumpRestore {
+            source_connection_string,
+            destination_connection_string,
+        } => {
+            cmd_dumprestore(
+                kms_client,
+                spec,
+                source_connection_string,
+                destination_connection_string,
+                args.working_directory,
+                args.pg_bin_dir,
+                args.pg_lib_dir,
+            )
+            .await?;
        }
    }

--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -1,24 +1,102 @@
-use anyhow::Context;
-use camino::Utf8Path;
+use camino::{Utf8Path, Utf8PathBuf};
+use tokio::task::JoinSet;
+use walkdir::WalkDir;

 use super::s3_uri::S3Uri;

-pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
-    let mut builder = tokio::process::Command::new("aws");
-    builder
-        .arg("s3")
-        .arg("sync")
-        .arg(local.as_str())
-        .arg(remote.to_string());
-    let st = builder
-        .spawn()
-        .context("spawn aws s3 sync")?
-        .wait()
-        .await
-        .context("wait for aws s3 sync")?;
-    if st.success() {
-        Ok(())
-    } else {
-        Err(anyhow::anyhow!("aws s3 sync failed"))
+use tracing::{info, warn};
+
+const MAX_PARALLEL_UPLOADS: usize = 10;
+
+/// Upload all files from 'local' to 'remote'
+pub(crate) async fn upload_dir_recursive(
+    s3_client: &aws_sdk_s3::Client,
+    local: &Utf8Path,
+    remote: &S3Uri,
+) -> anyhow::Result<()> {
+    // Recursively scan directory
+    let mut dirwalker = WalkDir::new(local)
+        .into_iter()
+        .map(|entry| {
+            let entry = entry?;
+            let file_type = entry.file_type();
+            let path = <&Utf8Path>::try_from(entry.path())?.to_path_buf();
+            Ok((file_type, path))
+        })
+        .filter_map(|e: anyhow::Result<(std::fs::FileType, Utf8PathBuf)>| {
+            match e {
+                Ok((file_type, path)) if file_type.is_file() => Some(Ok(path)),
+                Ok((file_type, _path)) if file_type.is_dir() => {
+                    // The WalkDir iterator will recurse into directories, but we don't want
+                    // to do anything with directories as such. There's no concept of uploading
+                    // an empty directory to S3.
+                    None
+                }
+                Ok((file_type, path)) if file_type.is_symlink() => {
+                    // huh, didn't expect a symlink. Can't upload that to S3. Warn and skip.
+                    warn!("cannot upload symlink ({})", path);
+                    None
+                }
+                Ok((_file_type, path)) => {
+                    // should not happen
+                    warn!("directory entry has unexpected type ({})", path);
+                    None
+                }
+                Err(e) => Some(Err(e)),
+            }
+        });
+
+    // Spawn upload tasks for each file, keeping MAX_PARALLEL_UPLOADS active in
+    // parallel.
+    let mut joinset = JoinSet::new();
+    loop {
+        // Could we upload more?
+        while joinset.len() < MAX_PARALLEL_UPLOADS {
+            if let Some(full_local_path) = dirwalker.next() {
+                let full_local_path = full_local_path?;
+                let relative_local_path = full_local_path
+                    .strip_prefix(local)
+                    .expect("all paths start from the walkdir root");
+                let remote_path = remote.append(relative_local_path.as_str());
+                info!(
+                    "starting upload of {} to {}",
+                    &full_local_path, &remote_path
+                );
+                let upload_task = upload_file(s3_client.clone(), full_local_path, remote_path);
+                joinset.spawn(upload_task);
+            } else {
+                info!("draining upload tasks");
+                break;
+            }
+        }
+
+        // Wait for an upload to complete
+        if let Some(res) = joinset.join_next().await {
+            let _ = res?;
+        } else {
+            // all done!
+            break;
+        }
    }
+    Ok(())
+}
+
+pub(crate) async fn upload_file(
+    s3_client: aws_sdk_s3::Client,
+    local_path: Utf8PathBuf,
+    remote: S3Uri,
+) -> anyhow::Result<()> {
+    use aws_smithy_types::byte_stream::ByteStream;
+    let stream = ByteStream::from_path(&local_path).await?;
+
+    let _result = s3_client
+        .put_object()
+        .bucket(remote.bucket)
+        .key(&remote.key)
+        .body(stream)
+        .send()
+        .await?;
+    info!("upload of {} to {} finished", &local_path, &remote.key);
+
+    Ok(())
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1400,26 +1400,27 @@ impl ComputeNode {
        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
        config::write_postgres_conf(&postgresql_conf_path, &spec, self.internal_http_port)?;

-        let max_concurrent_connections = spec.reconfigure_concurrency;
+        if !spec.skip_pg_catalog_updates {
+            let max_concurrent_connections = spec.reconfigure_concurrency;
+            // Temporarily reset max_cluster_size in config
+            // to avoid the possibility of hitting the limit, while we are reconfiguring:
+            // creating new extensions, roles, etc.
+            config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
+                self.pg_reload_conf()?;

-        // Temporarily reset max_cluster_size in config
-        // to avoid the possibility of hitting the limit, while we are reconfiguring:
-        // creating new extensions, roles, etc.
-        config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
-            self.pg_reload_conf()?;
+                if spec.mode == ComputeMode::Primary {
+                    let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap();
+                    conf.application_name("apply_config");
+                    let conf = Arc::new(conf);

-            if spec.mode == ComputeMode::Primary {
-                let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap();
-                conf.application_name("apply_config");
-                let conf = Arc::new(conf);
+                    let spec = Arc::new(spec.clone());

-                let spec = Arc::new(spec.clone());
+                    self.apply_spec_sql(spec, conf, max_concurrent_connections)?;
+                }

-                self.apply_spec_sql(spec, conf, max_concurrent_connections)?;
-            }
-
-            Ok(())
-        })?;
+                Ok(())
+            })?;
+        }

        self.pg_reload_conf()?;

--- a/compute_tools/src/disk_quota.rs
+++ b/compute_tools/src/disk_quota.rs
@@ -1,25 +0,0 @@
-use anyhow::Context;
-
-pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
-
-/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
-/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
-pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
-    let size_kb = size_bytes / 1024;
-    // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
-    let child_result = std::process::Command::new("/usr/bin/sudo")
-        .arg(DISK_QUOTA_BIN)
-        .arg(size_kb.to_string())
-        .arg(fs_mountpoint)
-        .spawn();
-
-    child_result
-        .context("spawn() failed")
-        .and_then(|mut child| child.wait().context("wait() failed"))
-        .and_then(|status| match status.success() {
-            true => Ok(()),
-            false => Err(anyhow::anyhow!("process exited with {status}")),
-        })
-        // wrap any prior error with the overall context that we couldn't run the command
-        .with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
-}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -11,7 +11,6 @@ pub mod http;
 pub mod logger;
 pub mod catalog;
 pub mod compute;
-pub mod disk_quota;
 pub mod extension_server;
 pub mod installed_extensions;
 pub mod local_proxy;
@@ -19,9 +18,9 @@ pub mod lsn_lease;
 pub mod metrics;
 mod migration;
 pub mod monitor;
+pub mod neonvmd_client;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
 mod spec_apply;
-pub mod swap;
 pub mod sync_sk;
--- a/compute_tools/src/neonvmd_client.rs
+++ b/compute_tools/src/neonvmd_client.rs
@@ -0,0 +1,102 @@
+use anyhow::Context;
+use hyper::client::conn;
+use hyper::client::conn::http1::SendRequest;
+use hyper::{Request, StatusCode};
+use hyper_util::rt::TokioIo;
+use tracing::warn;
+
+const NEONVM_DAEMON_CONTROL_SOCKET_PATH: &str = "/run/neonvm-daemon-socket";
+
+/// Open a connection to neonvm-daemon's control socket, prepare to send
+/// requests to it with hyper.
+async fn connect_neonvm_daemon<B>() -> anyhow::Result<SendRequest<B>>
+where
+    B: hyper::body::Body + 'static + Send,
+    B::Data: Send,
+    B::Error: Into<Box<dyn std::error::Error + Send + Sync>>,
+{
+    let mut attempts = 0;
+    let stream = loop {
+        match tokio::net::UnixStream::connect(NEONVM_DAEMON_CONTROL_SOCKET_PATH).await {
+            Ok(stream) => break stream,
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound && attempts < 50 => {
+                // Retry
+                warn!("neonvm-daemon control socket not found, retrying...");
+                attempts += 1;
+                tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+            }
+            Err(err) => Err(err).context("opening neonvm-daemon control socket")?,
+        }
+    };
+    let io = TokioIo::new(stream);
+    let (request_sender, connection) = conn::http1::handshake(io).await.unwrap();
+
+    // spawn a task to poll the connection and drive the HTTP state
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            eprintln!("Error in connection: {}", e);
+        }
+    });
+
+    Ok(request_sender)
+}
+
+pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
+    let rt = tokio::runtime::Handle::current();
+    rt.block_on(resize_swap_async(size_bytes))
+}
+
+pub async fn resize_swap_async(size_bytes: u64) -> anyhow::Result<()> {
+    let mut neonvmd = connect_neonvm_daemon().await?;
+
+    // Passing 'once' causes neonvm-daemon to reject any future resize requests
+    let request = Request::builder()
+        .method("POST")
+        .uri("/resize-swap-once")
+        .header("Host", "localhost") // hyper requires Host, even though the server won't care
+        .body(format!("{}", size_bytes))
+        .unwrap();
+
+    let resp = neonvmd.send_request(request).await?;
+    let status = resp.status();
+    match status {
+        StatusCode::OK => Ok(()),
+        StatusCode::CONFLICT => {
+            // 409 Conflict means that the swap was already resized. That happens if the
+            // compute_ctl restarts within the VM. That's considered OK.
+            warn!("Swap was already resized");
+            Ok(())
+        }
+        _ => Err(anyhow::anyhow!(
+            "error resizing swap: {}",
+            status.to_string()
+        )),
+    }
+}
+
+pub fn set_disk_quota(size_bytes: u64) -> anyhow::Result<()> {
+    let rt = tokio::runtime::Handle::current();
+    rt.block_on(set_disk_quota_async(size_bytes))
+}
+
+/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
+pub async fn set_disk_quota_async(size_bytes: u64) -> anyhow::Result<()> {
+    let mut neonvmd = connect_neonvm_daemon().await?;
+
+    let request = Request::builder()
+        .method("POST")
+        .uri("/set-disk-quota")
+        .header("Host", "localhost") // hyper requires Host, even though the server won't care
+        .body(format!("{}", size_bytes))
+        .unwrap();
+
+    let resp = neonvmd.send_request(request).await?;
+    let status = resp.status();
+    match status {
+        StatusCode::OK => Ok(()),
+        _ => Err(anyhow::anyhow!(
+            "error setting disk quota: {}",
+            status.to_string()
+        )),
+    }
+}
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -11,7 +11,9 @@ use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

-use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
+use compute_api::responses::{
+    ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
+};
 use compute_api::spec::ComputeSpec;

 // Do control plane request and return response if any. In case of error it
@@ -73,14 +75,13 @@ fn do_control_plane_request(
 pub fn get_spec_from_control_plane(
    base_uri: &str,
    compute_id: &str,
-) -> Result<Option<ComputeSpec>> {
+) -> Result<(Option<ComputeSpec>, ComputeCtlConfig)> {
    let cp_uri = format!("{base_uri}/compute/api/v2/computes/{compute_id}/spec");
    let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
        Ok(v) => v,
        Err(_) => "".to_string(),
    };
    let mut attempt = 1;
-    let mut spec: Result<Option<ComputeSpec>> = Ok(None);

    info!("getting spec from control plane: {}", cp_uri);

@@ -90,7 +91,7 @@ pub fn get_spec_from_control_plane(
    // - no spec for compute yet (Empty state) -> return Ok(None)
    // - got spec -> return Ok(Some(spec))
    while attempt < 4 {
-        spec = match do_control_plane_request(&cp_uri, &jwt) {
+        let result = match do_control_plane_request(&cp_uri, &jwt) {
            Ok(spec_resp) => {
                CPLANE_REQUESTS_TOTAL
                    .with_label_values(&[
@@ -99,10 +100,10 @@ pub fn get_spec_from_control_plane(
                    ])
                    .inc();
                match spec_resp.status {
-                    ControlPlaneComputeStatus::Empty => Ok(None),
+                    ControlPlaneComputeStatus::Empty => Ok((None, spec_resp.compute_ctl_config)),
                    ControlPlaneComputeStatus::Attached => {
                        if let Some(spec) = spec_resp.spec {
-                            Ok(Some(spec))
+                            Ok((Some(spec), spec_resp.compute_ctl_config))
                        } else {
                            bail!("compute is attached, but spec is empty")
                        }
@@ -121,10 +122,10 @@ pub fn get_spec_from_control_plane(
            }
        };

-        if let Err(e) = &spec {
+        if let Err(e) = &result {
            error!("attempt {} to get spec failed with: {}", attempt, e);
        } else {
-            return spec;
+            return result;
        }

        attempt += 1;
@@ -132,7 +133,9 @@ pub fn get_spec_from_control_plane(
    }

    // All attempts failed, return error.
-    spec
+    Err(anyhow::anyhow!(
+        "Exhausted all attempts to retrieve the spec from the control plane"
+    ))
 }

 /// Check `pg_hba.conf` and update if needed to allow external connections.
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -7,12 +7,12 @@ use std::sync::Arc;

 use crate::compute::construct_superuser_query;
 use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
-use anyhow::{bail, Result};
+use anyhow::Result;
 use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
-use tracing::{debug, info_span, Instrument};
+use tracing::{debug, info_span, warn, Instrument};

 #[derive(Clone)]
 pub enum DB {
@@ -47,6 +47,11 @@ pub enum PerDatabasePhase {
    DeleteDBRoleReferences,
    ChangeSchemaPerms,
    HandleAnonExtension,
+    /// This is a shared phase, used for both i) dropping dangling LR subscriptions
+    /// before dropping the DB, and ii) dropping all subscriptions after creating
+    /// a fresh branch.
+    /// N.B. we will skip all DBs that are not present in Postgres, invalid, or
+    /// have `datallowconn = false` (`restrict_conn`).
    DropLogicalSubscriptions,
 }

@@ -168,7 +173,7 @@ where
 ///
 /// In the future we may generate a single stream of changes and then
 /// sort/merge/batch execution, but for now this is a nice way to improve
-/// batching behaviour of the commands.
+/// batching behavior of the commands.
 async fn get_operations<'a>(
    spec: &'a ComputeSpec,
    ctx: &'a RwLock<MutableApplyContext>,
@@ -451,6 +456,38 @@ async fn get_operations<'a>(
            )),
        }))),
        ApplySpecPhase::RunInEachDatabase { db, subphase } => {
+            // Do some checks that user DB exists and we can access it.
+            //
+            // During the phases like DropLogicalSubscriptions, DeleteDBRoleReferences,
+            // which happen before dropping the DB, the current run could be a retry,
+            // so it's a valid case when DB is absent already. The case of
+            // `pg_database.datallowconn = false`/`restrict_conn` is a bit tricky, as
+            // in theory user can have some dangling objects there, so we will fail at
+            // the actual drop later. Yet, to fix that in the current code we would need
+            // to ALTER DATABASE, and then check back, but that even more invasive, so
+            // that's not what we really want to do here.
+            //
+            // For ChangeSchemaPerms, skipping DBs we cannot access is totally fine.
+            if let DB::UserDB(db) = db {
+                let databases = &ctx.read().await.dbs;
+
+                let edb = match databases.get(&db.name) {
+                    Some(edb) => edb,
+                    None => {
+                        warn!("skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL", subphase, db.name);
+                        return Ok(Box::new(empty()));
+                    }
+                };
+
+                if edb.restrict_conn || edb.invalid {
+                    warn!(
+                        "skipping RunInEachDatabase phase {:?}, database {} is (restrict_conn={}, invalid={})",
+                        subphase, db.name, edb.restrict_conn, edb.invalid
+                    );
+                    return Ok(Box::new(empty()));
+                }
+            }
+
            match subphase {
                PerDatabasePhase::DropLogicalSubscriptions => {
                    match &db {
@@ -530,25 +567,12 @@ async fn get_operations<'a>(
                    Ok(Box::new(operations))
                }
                PerDatabasePhase::ChangeSchemaPerms => {
-                    let ctx = ctx.read().await;
-                    let databases = &ctx.dbs;
-
                    let db = match &db {
                        // ignore schema permissions on the system database
                        DB::SystemDB => return Ok(Box::new(empty())),
                        DB::UserDB(db) => db,
                    };

-                    if databases.get(&db.name).is_none() {
-                        bail!("database {} doesn't exist in PostgreSQL", db.name);
-                    }
-
-                    let edb = databases.get(&db.name).unwrap();
-
-                    if edb.restrict_conn || edb.invalid {
-                        return Ok(Box::new(empty()));
-                    }
-
                    let operations = vec![
                        Operation {
                            query: format!(
@@ -566,6 +590,7 @@ async fn get_operations<'a>(

                    Ok(Box::new(operations))
                }
+                // TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
                PerDatabasePhase::HandleAnonExtension => {
                    // Only install Anon into user databases
                    let db = match &db {
--- a/compute_tools/src/sql/drop_subscriptions.sql
+++ b/compute_tools/src/sql/drop_subscriptions.sql
@@ -2,6 +2,7 @@ DO $$
 DECLARE
    subname TEXT;
 BEGIN
+    LOCK TABLE pg_subscription IN ACCESS EXCLUSIVE MODE;
    FOR subname IN SELECT pg_subscription.subname FROM pg_subscription WHERE subdbid = (SELECT oid FROM pg_database WHERE datname = {datname_str}) LOOP
        EXECUTE format('ALTER SUBSCRIPTION %I DISABLE;', subname);
        EXECUTE format('ALTER SUBSCRIPTION %I SET (slot_name = NONE);', subname);
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,45 +0,0 @@
-use std::path::Path;
-
-use anyhow::{anyhow, Context};
-use tracing::warn;
-
-pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
-
-pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
-    // run `/neonvm/bin/resize-swap --once {size_bytes}`
-    //
-    // Passing '--once' causes resize-swap to delete itself after successful completion, which
-    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
-    // postgres is running.
-    //
-    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
-    let child_result = std::process::Command::new("/usr/bin/sudo")
-        .arg(RESIZE_SWAP_BIN)
-        .arg("--once")
-        .arg(size_bytes.to_string())
-        .spawn();
-
-    child_result
-        .context("spawn() failed")
-        .and_then(|mut child| child.wait().context("wait() failed"))
-        .and_then(|status| match status.success() {
-            true => Ok(()),
-            false => {
-                // The command failed. Maybe it was because the resize-swap file doesn't exist?
-                // The --once flag causes it to delete itself on success so we don't disable swap
-                // while postgres is running; maybe this is fine.
-                match Path::new(RESIZE_SWAP_BIN).try_exists() {
-                    Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
-                    // The path doesn't exist; we're actually ok 
-                    Ok(false) => {
-                        warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
-                        Ok(())
-                    },
-                }
-            }
-        })
-        // wrap any prior error with the overall context that we couldn't run the command
-        .with_context(|| {
-            format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
-        })
-}
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -46,8 +46,12 @@ use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
+use std::time::SystemTime;
+use std::time::UNIX_EPOCH;

 use anyhow::{anyhow, bail, Context, Result};
+use compute_api::requests::ConfigurationRequest;
+use compute_api::responses::ComputeCtlConfig;
 use compute_api::spec::Database;
 use compute_api::spec::PgIdent;
 use compute_api::spec::RemoteExtSpec;
@@ -57,6 +61,7 @@ use nix::sys::signal::Signal;
 use pageserver_api::shard::ShardStripeSize;
 use reqwest::header::CONTENT_TYPE;
 use serde::{Deserialize, Serialize};
+use tracing::debug;
 use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};

@@ -79,8 +84,10 @@ pub struct EndpointConf {
    internal_http_port: u16,
    pg_version: u32,
    skip_pg_catalog_updates: bool,
+    reconfigure_concurrency: usize,
    drop_subscriptions_before_start: bool,
    features: Vec<ComputeFeature>,
+    cluster: Option<Cluster>,
 }

 //
@@ -177,7 +184,9 @@ impl ComputeControlPlane {
            // we also skip catalog updates in the cloud.
            skip_pg_catalog_updates,
            drop_subscriptions_before_start,
+            reconfigure_concurrency: 1,
            features: vec![],
+            cluster: None,
        });

        ep.create_endpoint_dir()?;
@@ -194,7 +203,9 @@ impl ComputeControlPlane {
                pg_version,
                skip_pg_catalog_updates,
                drop_subscriptions_before_start,
+                reconfigure_concurrency: 1,
                features: vec![],
+                cluster: None,
            })?,
        )?;
        std::fs::write(
@@ -259,8 +270,11 @@ pub struct Endpoint {
    skip_pg_catalog_updates: bool,

    drop_subscriptions_before_start: bool,
+    reconfigure_concurrency: usize,
    // Feature flags
    features: Vec<ComputeFeature>,
+    // Cluster settings
+    cluster: Option<Cluster>,
 }

 #[derive(PartialEq, Eq)]
@@ -300,6 +314,8 @@ impl Endpoint {
        let conf: EndpointConf =
            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;

+        debug!("serialized endpoint conf: {:?}", conf);
+
        Ok(Endpoint {
            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), conf.pg_port),
            external_http_address: SocketAddr::new(
@@ -317,8 +333,10 @@ impl Endpoint {
            tenant_id: conf.tenant_id,
            pg_version: conf.pg_version,
            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
+            reconfigure_concurrency: conf.reconfigure_concurrency,
            drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
            features: conf.features,
+            cluster: conf.cluster,
        })
    }

@@ -605,7 +623,7 @@ impl Endpoint {
        };

        // Create spec file
-        let spec = ComputeSpec {
+        let mut spec = ComputeSpec {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
            format_version: 1.0,
            operation_uuid: None,
@@ -638,7 +656,7 @@ impl Endpoint {
                    Vec::new()
                },
                settings: None,
-                postgresql_conf: Some(postgresql_conf),
+                postgresql_conf: Some(postgresql_conf.clone()),
            },
            delta_operations: None,
            tenant_id: Some(self.tenant_id),
@@ -651,9 +669,35 @@ impl Endpoint {
            pgbouncer_settings: None,
            shard_stripe_size: Some(shard_stripe_size),
            local_proxy_config: None,
-            reconfigure_concurrency: 1,
+            reconfigure_concurrency: self.reconfigure_concurrency,
            drop_subscriptions_before_start: self.drop_subscriptions_before_start,
        };
+
+        // this strange code is needed to support respec() in tests
+        if self.cluster.is_some() {
+            debug!("Cluster is already set in the endpoint spec, using it");
+            spec.cluster = self.cluster.clone().unwrap();
+
+            debug!("spec.cluster {:?}", spec.cluster);
+
+            // fill missing fields again
+            if create_test_user {
+                spec.cluster.roles.push(Role {
+                    name: PgIdent::from_str("test").unwrap(),
+                    encrypted_password: None,
+                    options: None,
+                });
+                spec.cluster.databases.push(Database {
+                    name: PgIdent::from_str("neondb").unwrap(),
+                    owner: PgIdent::from_str("test").unwrap(),
+                    options: None,
+                    restrict_conn: false,
+                    invalid: false,
+                });
+            }
+            spec.cluster.postgresql_conf = Some(postgresql_conf);
+        }
+
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

@@ -671,18 +715,14 @@ impl Endpoint {
            println!("Also at '{}'", conn_str);
        }
        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
-        //cmd.args([
-        //    "--external-http-port",
-        //    &self.external_http_address.port().to_string(),
-        //])
-        //.args([
-        //    "--internal-http-port",
-        //    &self.internal_http_address.port().to_string(),
-        //])
        cmd.args([
-            "--http-port",
+            "--external-http-port",
            &self.external_http_address.port().to_string(),
        ])
+        .args([
+            "--internal-http-port",
+            &self.internal_http_address.port().to_string(),
+        ])
        .args(["--pgdata", self.pgdata().to_str().unwrap()])
        .args(["--connstr", &conn_str])
        .args([
@@ -699,20 +739,16 @@ impl Endpoint {
        ])
        // TODO: It would be nice if we generated compute IDs with the same
        // algorithm as the real control plane.
-        //
-        // TODO: Add this back when
-        // https://github.com/neondatabase/neon/pull/10747 is merged.
-        //
-        //.args([
-        //    "--compute-id",
-        //    &format!(
-        //        "compute-{}",
-        //        SystemTime::now()
-        //            .duration_since(UNIX_EPOCH)
-        //            .unwrap()
-        //            .as_secs()
-        //    ),
-        //])
+        .args([
+            "--compute-id",
+            &format!(
+                "compute-{}",
+                SystemTime::now()
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap()
+                    .as_secs()
+            ),
+        ])
        .stdin(std::process::Stdio::null())
        .stderr(logfile.try_clone()?)
        .stdout(logfile);
@@ -880,10 +916,13 @@ impl Endpoint {
                self.external_http_address.port()
            ))
            .header(CONTENT_TYPE.as_str(), "application/json")
-            .body(format!(
-                "{{\"spec\":{}}}",
-                serde_json::to_string_pretty(&spec)?
-            ))
+            .body(
+                serde_json::to_string(&ConfigurationRequest {
+                    spec,
+                    compute_ctl_config: ComputeCtlConfig::default(),
+                })
+                .unwrap(),
+            )
            .send()
            .await?;

--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -335,13 +335,21 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'checkpoint_distance' as an integer")?,
-            checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
+            checkpoint_timeout: settings
+                .remove("checkpoint_timeout")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'checkpoint_timeout' as duration")?,
            compaction_target_size: settings
                .remove("compaction_target_size")
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'compaction_target_size' as an integer")?,
-            compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
+            compaction_period: settings
+                .remove("compaction_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'compaction_period' as duration")?,
            compaction_threshold: settings
                .remove("compaction_threshold")
                .map(|x| x.parse::<usize>())
@@ -362,6 +370,11 @@ impl PageServerNode {
                .map(|x| x.parse::<bool>())
                .transpose()
                .context("Failed to parse 'compaction_l0_first' as a bool")?,
+            compaction_l0_semaphore: settings
+                .remove("compaction_l0_semaphore")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'compaction_l0_semaphore' as a bool")?,
            l0_flush_delay_threshold: settings
                .remove("l0_flush_delay_threshold")
                .map(|x| x.parse::<usize>())
@@ -382,7 +395,10 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'gc_horizon' as an integer")?,
-            gc_period: settings.remove("gc_period").map(|x| x.to_string()),
+            gc_period: settings.remove("gc_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'gc_period' as duration")?,
            image_creation_threshold: settings
                .remove("image_creation_threshold")
                .map(|x| x.parse::<usize>())
@@ -398,13 +414,20 @@ impl PageServerNode {
                .map(|x| x.parse::<usize>())
                .transpose()
                .context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
-            pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
+            pitr_interval: settings.remove("pitr_interval")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'pitr_interval' as duration")?,
            walreceiver_connect_timeout: settings
                .remove("walreceiver_connect_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'walreceiver_connect_timeout' as duration")?,
            lagging_wal_timeout: settings
                .remove("lagging_wal_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lagging_wal_timeout' as duration")?,
            max_lsn_wal_lag: settings
                .remove("max_lsn_wal_lag")
                .map(|x| x.parse::<NonZeroU64>())
@@ -422,8 +445,14 @@ impl PageServerNode {
                .context("Failed to parse 'min_resident_size_override' as integer")?,
            evictions_low_residence_duration_metric_threshold: settings
                .remove("evictions_low_residence_duration_metric_threshold")
-                .map(|x| x.to_string()),
-            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration")?,
+            heatmap_period: settings
+                .remove("heatmap_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'heatmap_period' as duration")?,
            lazy_slru_download: settings
                .remove("lazy_slru_download")
                .map(|x| x.parse::<bool>())
@@ -434,10 +463,15 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
+            lsn_lease_length: settings.remove("lsn_lease_length")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length' as duration")?,
            lsn_lease_length_for_ts: settings
                .remove("lsn_lease_length_for_ts")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length_for_ts' as duration")?,
            timeline_offloading: settings
                .remove("timeline_offloading")
                .map(|x| x.parse::<bool>())
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -838,7 +838,10 @@ impl StorageController {
        self.dispatch(
            Method::PUT,
            format!("control/v1/tenant/{tenant_shard_id}/migrate"),
-            Some(TenantShardMigrateRequest { node_id }),
+            Some(TenantShardMigrateRequest {
+                node_id,
+                migration_config: None,
+            }),
        )
        .await
    }
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -22,7 +22,7 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::{self};
 use reqwest::{Method, StatusCode, Url};
-use utils::id::{NodeId, TenantId};
+use utils::id::{NodeId, TenantId, TimelineId};

 use pageserver_api::controller_api::{
    NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -47,6 +47,9 @@ enum Command {
        listen_http_addr: String,
        #[arg(long)]
        listen_http_port: u16,
+        #[arg(long)]
+        listen_https_port: Option<u16>,
+
        #[arg(long)]
        availability_zone_id: String,
    },
@@ -239,6 +242,19 @@ enum Command {
        #[arg(long)]
        scheduling_policy: SkSchedulingPolicyArg,
    },
+    /// Downloads any missing heatmap layers for all shard for a given timeline
+    DownloadHeatmapLayers {
+        /// Tenant ID or tenant shard ID. When an unsharded tenant ID is specified,
+        /// the operation is performed on all shards. When a sharded tenant ID is
+        /// specified, the operation is only performed on the specified shard.
+        #[arg(long)]
+        tenant_shard_id: TenantShardId,
+        #[arg(long)]
+        timeline_id: TimelineId,
+        /// Optional: Maximum download concurrency (default is 16)
+        #[arg(long)]
+        concurrency: Option<usize>,
+    },
 }

 #[derive(Parser)]
@@ -381,6 +397,7 @@ async fn main() -> anyhow::Result<()> {
            listen_pg_port,
            listen_http_addr,
            listen_http_port,
+            listen_https_port,
            availability_zone_id,
        } => {
            storcon_client
@@ -393,6 +410,7 @@ async fn main() -> anyhow::Result<()> {
                        listen_pg_port,
                        listen_http_addr,
                        listen_http_port,
+                        listen_https_port,
                        availability_zone_id: AvailabilityZone(availability_zone_id),
                    }),
                )
@@ -609,7 +627,10 @@ async fn main() -> anyhow::Result<()> {
            tenant_shard_id,
            node,
        } => {
-            let req = TenantShardMigrateRequest { node_id: node };
+            let req = TenantShardMigrateRequest {
+                node_id: node,
+                migration_config: None,
+            };

            storcon_client
                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -623,7 +644,10 @@ async fn main() -> anyhow::Result<()> {
            tenant_shard_id,
            node,
        } => {
-            let req = TenantShardMigrateRequest { node_id: node };
+            let req = TenantShardMigrateRequest {
+                node_id: node,
+                migration_config: None,
+            };

            storcon_client
                .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
@@ -935,7 +959,7 @@ async fn main() -> anyhow::Result<()> {
                                threshold: threshold.into(),
                            },
                        )),
-                        heatmap_period: Some("300s".to_string()),
+                        heatmap_period: Some(Duration::from_secs(300)),
                        ..Default::default()
                    },
                })
@@ -1082,7 +1106,10 @@ async fn main() -> anyhow::Result<()> {
                            .dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
                                Method::PUT,
                                format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
-                                Some(TenantShardMigrateRequest { node_id: mv.to }),
+                                Some(TenantShardMigrateRequest {
+                                    node_id: mv.to,
+                                    migration_config: None,
+                                }),
                            )
                            .await
                            .map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
@@ -1238,6 +1265,24 @@ async fn main() -> anyhow::Result<()> {
                String::from(scheduling_policy)
            );
        }
+        Command::DownloadHeatmapLayers {
+            tenant_shard_id,
+            timeline_id,
+            concurrency,
+        } => {
+            let mut path = format!(
+                "/v1/tenant/{}/timeline/{}/download_heatmap_layers",
+                tenant_shard_id, timeline_id,
+            );
+
+            if let Some(c) = concurrency {
+                path = format!("{path}?concurrency={c}");
+            }
+
+            storcon_client
+                .dispatch::<(), ()>(Method::POST, path, None)
+                .await?;
+        }
    }

    Ok(())
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -77,4 +77,5 @@ echo "Start compute node"
 /usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
     -C "postgresql://cloud_admin@localhost:55433/postgres"  \
     -b /usr/local/bin/postgres                              \
+     --compute-id "compute-$RANDOM"                          \
     -S ${SPEC_FILE}
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -71,7 +71,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)"
        # We are running tests now
        rm -f testout.txt testout_contrib.txt
-        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
+        docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \
        $TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
        docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
        $TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
@@ -81,15 +81,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
            [ $EXT_SUCCESS -eq 0 ] && FAILED=$(tail -1 testout.txt | awk '{for(i=1;i<=NF;i++){print "/ext-src/"$i;}}')
            [ $CONTRIB_SUCCESS -eq 0 ] && CONTRIB_FAILED=$(tail -1 testout_contrib.txt | awk '{for(i=0;i<=NF;i++){print "/postgres/contrib/"$i;}}')
            for d in $FAILED $CONTRIB_FAILED; do
-                dn="$(basename $d)"
-                rm -rf $dn
-                mkdir $dn
-                docker cp $TEST_CONTAINER_NAME:$d/regression.diffs $dn || [ $? -eq 1 ]
-                docker cp $TEST_CONTAINER_NAME:$d/regression.out $dn || [ $? -eq 1 ]
-                cat $dn/regression.out $dn/regression.diffs || true
-                rm -rf $dn
+                docker exec $TEST_CONTAINER_NAME bash -c 'for file in $(find '"$d"' -name regression.diffs -o -name regression.out); do cat $file; done' || [ $? -eq 1 ]
            done
-        rm -rf $FAILED
        exit 1
        fi
    fi
--- a/docker-compose/ext-src/pg_repack-src/test-upgrade.sh
+++ b/docker-compose/ext-src/pg_repack-src/test-upgrade.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
+${PG_REGRESS} --use-existing --inputdir=./regress --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression repack-setup repack-run error-on-invalid-idx no-error-on-invalid-idx after-schema repack-check nosuper get_order_by trigger
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch
@@ -0,0 +1,24 @@
+diff --git a/test/sql/base.sql b/test/sql/base.sql
+index 53adb30..2eed91b 100644
+--- a/test/sql/base.sql
+++ b/test/sql/base.sql
+@@ -2,7 +2,6 @@
+ BEGIN;
+ 
+ \i test/pgtap-core.sql
+-CREATE EXTENSION semver;
+ 
+ SELECT plan(334);
+ --SELECT * FROM no_plan();
+diff --git a/test/sql/corpus.sql b/test/sql/corpus.sql
+index c0fe98e..39cdd2e 100644
+--- a/test/sql/corpus.sql
+++ b/test/sql/corpus.sql
+@@ -4,7 +4,6 @@ BEGIN;
+ -- Test the SemVer corpus from https://regex101.com/r/Ly7O1x/3/.
+ 
+ \i test/pgtap-core.sql
+-CREATE EXTENSION semver;
+ 
+ SELECT plan(76);
+ --SELECT * FROM no_plan();
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade.sh
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade.sh
@@ -1,6 +1,7 @@
 #!/bin/sh
 set -ex
 cd "$(dirname ${0})"
-patch -p1 <test-upgrade.patch
+patch -p1 <test-upgrade-${PG_VERSION}.patch
+psql -d contrib_regression -c "DROP EXTENSION IF EXISTS pgtap"
 PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
 ${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin'    --inputdir=test --dbname=contrib_regression base corpus
--- a/docker-compose/ext-src/pgtap-src/test-upgrade.patch
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.patch
@@ -0,0 +1,28 @@
+diff --git a/Makefile b/Makefile
+index f255fe6..0a0fa65 100644
+--- a/Makefile
+++ b/Makefile
+@@ -346,7 +346,7 @@ test: test-serial test-parallel
+ TB_DIR = test/build
+ GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests
+ REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe
+-REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
+REGRESS_OPTS = --use-existing --dbname=pgtap_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
+ SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!
+ IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))
+ PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))
+diff --git a/test/schedule/create.sql b/test/schedule/create.sql
+index ba355ed..7e250f5 100644
+--- a/test/schedule/create.sql
+++ b/test/schedule/create.sql
+@@ -1,3 +1,2 @@
+ \unset ECHO
+ \i test/psql.sql
+-CREATE EXTENSION pgtap;
+diff --git a/test/schedule/main.sch b/test/schedule/main.sch
+index a8a5fbc..0463fc4 100644
+--- a/test/schedule/main.sch
+++ b/test/schedule/main.sch
+@@ -1,2 +1 @@
+-test: build
+ test: create
--- a/docker-compose/ext-src/pgtap-src/test-upgrade.sh
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -ex
+cd "$(dirname ${0})"
+patch -p1 <test-upgrade.patch
+make installcheck
--- a/docker-compose/ext-src/plv8-src/test-upgrade.sh
+++ b/docker-compose/ext-src/plv8-src/test-upgrade.sh
@@ -2,4 +2,5 @@
 set -ex
 cd "$(dirname ${0})"
 PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
-${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression plv8 plv8-errors scalar_args inline json startup_pre startup varparam json_conv jsonb_conv window guc es6 arraybuffer composites currentresource startup_perms bytea find_function_perms memory_limits reset show array_spread regression dialect bigint procedure
+REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension")+15);}')"
+${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin'  --use-existing --dbname=contrib_regression ${REGRESS}
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -11,6 +11,7 @@ if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEW
  exit 1
 fi
 export PG_VERSION=${PG_VERSION:-16}
+export PG_TEST_VERSION=${PG_VERSION}
 function wait_for_ready {
  TIME=0
  while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
@@ -41,7 +42,9 @@ EXTENSIONS='[
 {"extname": "roaringbitmap", "extdir": "pg_roaringbitmap-src"},
 {"extname": "semver", "extdir": "pg_semver-src"},
 {"extname": "pg_ivm", "extdir": "pg_ivm-src"},
-{"extname": "pgjwt", "extdir": "pgjwt-src"}
+{"extname": "pgjwt", "extdir": "pgjwt-src"},
+{"extname": "pgtap", "extdir": "pgtap-src"},
+{"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
 TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
@@ -57,9 +60,15 @@ wait_for_ready
 docker compose cp  ext-src neon-test-extensions:/
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
+docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression"
+docker compose exec neon-test-extensions psql -d pgtap_regression -c "CREATE EXTENSION pgtap"
 create_extensions "${EXTNAMES}"
-query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
-exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
+if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then
+  exts="${EXTNAMES}"
+else
+  query="select pge.extname from pg_extension pge join (select key as extname, value as extversion from json_each_text('${new_vers}')) x on pge.extname=x.extname and pge.extversion <> x.extversion"
+  exts=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
+fi
 if [ -z "${exts}" ]; then
  echo "No extensions were upgraded"
 else
@@ -87,7 +96,10 @@ else
      exit 1
    fi
    docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
-    docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh
+    if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then
+      docker  compose exec neon-test-extensions  cat /ext-src/${EXTDIR}/regression.diffs
+      exit 1
+    fi
    docker compose exec neon-test-extensions psql -d contrib_regression -c "alter extension ${ext} update"
    docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
  done
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
+jsonwebtoken.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 regex.workspace = true
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,18 +1,20 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
 use crate::{
    privilege::Privilege,
+    responses::ComputeCtlConfig,
    spec::{ComputeSpec, ExtVersion, PgIdent},
 };
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};

 /// Request of the /configure API
 ///
 /// We now pass only `spec` in the configuration request, but later we can
 /// extend it and something like `restart: bool` or something else. So put
 /// `spec` into a struct initially to be more flexible in the future.
-#[derive(Deserialize, Debug)]
+#[derive(Debug, Deserialize, Serialize)]
 pub struct ConfigurationRequest {
    pub spec: ComputeSpec,
+    pub compute_ctl_config: ComputeCtlConfig,
 }

 #[derive(Deserialize, Debug)]
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -3,6 +3,7 @@
 use std::fmt::Display;

 use chrono::{DateTime, Utc};
+use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};

 use crate::{
@@ -135,13 +136,27 @@ pub struct CatalogObjects {
    pub databases: Vec<Database>,
 }

+#[derive(Debug, Deserialize, Serialize)]
+pub struct ComputeCtlConfig {
+    pub jwks: JwkSet,
+}
+
+impl Default for ComputeCtlConfig {
+    fn default() -> Self {
+        Self {
+            jwks: JwkSet {
+                keys: Vec::default(),
+            },
+        }
+    }
+}
+
 /// Response of the `/computes/{compute_id}/spec` control-plane API.
-/// This is not actually a compute API response, so consider moving
-/// to a different place.
 #[derive(Deserialize, Debug)]
 pub struct ControlPlaneSpecResponse {
    pub spec: Option<ComputeSpec>,
    pub status: ControlPlaneComputeStatus,
+    pub compute_ctl_config: ComputeCtlConfig,
 }

 #[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -252,7 +252,7 @@ pub enum ComputeMode {
    Replica,
 }

-#[derive(Clone, Debug, Default, Deserialize, Serialize)]
+#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Cluster {
    pub cluster_id: Option<String>,
    pub name: Option<String>,
@@ -283,7 +283,7 @@ pub struct DeltaOp {

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
@@ -292,7 +292,7 @@ pub struct Role {

 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct Database {
    pub name: PgIdent,
    pub owner: PgIdent,
@@ -308,7 +308,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub struct GenericOption {
    pub name: String,
    pub value: Option<String>,
--- a/libs/http-utils/src/pprof.rs
+++ b/libs/http-utils/src/pprof.rs
@@ -2,7 +2,6 @@ use anyhow::bail;
 use flate2::write::{GzDecoder, GzEncoder};
 use flate2::Compression;
 use itertools::Itertools as _;
-use once_cell::sync::Lazy;
 use pprof::protos::{Function, Line, Location, Message as _, Profile};
 use regex::Regex;

@@ -58,38 +57,30 @@ pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {

        // Resolve the line and function for each location.
        backtrace::resolve(loc.address as *mut c_void, |symbol| {
-            let Some(symname) = symbol.name() else {
+            let Some(symbol_name) = symbol.name() else {
                return;
            };
-            let mut name = symname.to_string();

-            // Strip the Rust monomorphization suffix from the symbol name.
-            static SUFFIX_REGEX: Lazy<Regex> =
-                Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex"));
-            if let Some(m) = SUFFIX_REGEX.find(&name) {
-                name.truncate(m.start());
-            }
-
-            let function_id = match functions.get(&name) {
-                Some(function) => function.id,
-                None => {
-                    let id = functions.len() as u64 + 1;
-                    let system_name = String::from_utf8_lossy(symname.as_bytes());
+            let function_name = format!("{symbol_name:#}");
+            let functions_len = functions.len();
+            let function_id = functions
+                .entry(function_name)
+                .or_insert_with_key(|function_name| {
+                    let function_id = functions_len as u64 + 1;
+                    let system_name = String::from_utf8_lossy(symbol_name.as_bytes());
                    let filename = symbol
                        .filename()
                        .map(|path| path.to_string_lossy())
                        .unwrap_or(Cow::Borrowed(""));
-                    let function = Function {
-                        id,
-                        name: string_id(&name),
+                    Function {
+                        id: function_id,
+                        name: string_id(function_name),
                        system_name: string_id(&system_name),
                        filename: string_id(&filename),
                        ..Default::default()
-                    };
-                    functions.insert(name, function);
-                    id
-                }
-            };
+                    }
+                })
+                .id;
            loc.line.push(Line {
                function_id,
                line: symbol.lineno().unwrap_or(0) as i64,
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -94,7 +94,6 @@ pub struct ConfigToml {
    pub ondemand_download_behavior_treat_error_as_warn: bool,
    #[serde(with = "humantime_serde")]
    pub background_task_maximum_delay: Duration,
-    pub use_compaction_semaphore: bool,
    pub control_plane_api: Option<reqwest::Url>,
    pub control_plane_api_token: Option<String>,
    pub control_plane_emergency_mode: bool,
@@ -123,6 +122,8 @@ pub struct ConfigToml {
    pub page_service_pipelining: PageServicePipeliningConfig,
    pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
    pub enable_read_path_debugging: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub validate_wal_contiguity: Option<bool>,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -266,6 +267,9 @@ pub struct TenantConfigToml {
    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
    /// If true, compact down L0 across all tenant timelines before doing regular compaction.
    pub compaction_l0_first: bool,
+    /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
+    /// has an effect if `compaction_l0_first` is `true`.
+    pub compaction_l0_semaphore: bool,
    /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
    /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
    /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
@@ -349,7 +353,7 @@ pub struct TenantConfigToml {

    /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
    /// `index_part.json`, and it cannot be reversed.
-    pub rel_size_v2_enabled: Option<bool>,
+    pub rel_size_v2_enabled: bool,

    // gc-compaction related configs
    /// Enable automatic gc-compaction trigger on this tenant.
@@ -474,7 +478,6 @@ impl Default for ConfigToml {
                DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
            )
            .unwrap()),
-            use_compaction_semaphore: false,

            control_plane_api: (None),
            control_plane_api_token: (None),
@@ -520,6 +523,7 @@ impl Default for ConfigToml {
            } else {
                None
            },
+            validate_wal_contiguity: None,
        }
    }
 }
@@ -543,11 +547,13 @@ pub mod tenant_conf_defaults {
    pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;

-    // This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
-    // most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
-    // calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
-    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
+    // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
+    // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
+    // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
+    // with this config, we can get a maximum peak compaction usage of 9 GB.
+    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
    pub const DEFAULT_COMPACTION_L0_FIRST: bool = false;
+    pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;

    pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
        crate::models::CompactionAlgorithm::Legacy;
@@ -598,6 +604,7 @@ impl Default for TenantConfigToml {
                kind: DEFAULT_COMPACTION_ALGORITHM,
            },
            compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
+            compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
            l0_flush_delay_threshold: None,
            l0_flush_stall_threshold: None,
            l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
@@ -630,7 +637,7 @@ impl Default for TenantConfigToml {
            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
            timeline_offloading: true,
            wal_receiver_protocol_override: None,
-            rel_size_v2_enabled: None,
+            rel_size_v2_enabled: false,
            gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
            gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
            gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -57,6 +57,7 @@ pub struct NodeRegisterRequest {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,

    pub availability_zone_id: AvailabilityZone,
 }
@@ -105,6 +106,7 @@ pub struct TenantLocateResponseShard {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,
 }

 #[derive(Serialize, Deserialize)]
@@ -148,6 +150,7 @@ pub struct NodeDescribeResponse {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
+    pub listen_https_port: Option<u16>,

    pub listen_pg_addr: String,
    pub listen_pg_port: u16,
@@ -182,6 +185,18 @@ pub struct TenantDescribeResponseShard {
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantShardMigrateRequest {
    pub node_id: NodeId,
+    #[serde(default)]
+    pub migration_config: Option<MigrationConfig>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct MigrationConfig {
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub secondary_warmup_timeout: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub secondary_download_request_timeout: Option<Duration>,
 }

 #[derive(Serialize, Clone, Debug)]
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,10 +1,12 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
+use bytes::Bytes;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::Oid;
 use postgres_ffi::RepOriginId;
 use serde::{Deserialize, Serialize};
 use std::{fmt, ops::Range};
+use utils::const_assert;

 use crate::reltag::{BlockNumber, RelTag, SlruKind};

@@ -49,6 +51,64 @@ pub const AUX_KEY_PREFIX: u8 = 0x62;
 /// The key prefix of ReplOrigin keys.
 pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;

+/// The key prefix of db directory keys.
+pub const DB_DIR_KEY_PREFIX: u8 = 0x64;
+
+/// The key prefix of rel directory keys.
+pub const REL_DIR_KEY_PREFIX: u8 = 0x65;
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub enum RelDirExists {
+    Exists,
+    Removed,
+}
+
+#[derive(Debug)]
+pub struct DecodeError;
+
+impl fmt::Display for DecodeError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "invalid marker")
+    }
+}
+
+impl std::error::Error for DecodeError {}
+
+impl RelDirExists {
+    /// The value of the rel directory keys that indicates the existence of a relation.
+    const REL_EXISTS_MARKER: Bytes = Bytes::from_static(b"r");
+
+    pub fn encode(&self) -> Bytes {
+        match self {
+            Self::Exists => Self::REL_EXISTS_MARKER.clone(),
+            Self::Removed => SPARSE_TOMBSTONE_MARKER.clone(),
+        }
+    }
+
+    pub fn decode_option(data: Option<impl AsRef<[u8]>>) -> Result<Self, DecodeError> {
+        match data {
+            Some(marker) if marker.as_ref() == Self::REL_EXISTS_MARKER => Ok(Self::Exists),
+            // Any other marker is invalid
+            Some(_) => Err(DecodeError),
+            None => Ok(Self::Removed),
+        }
+    }
+
+    pub fn decode(data: impl AsRef<[u8]>) -> Result<Self, DecodeError> {
+        let data = data.as_ref();
+        if data == Self::REL_EXISTS_MARKER {
+            Ok(Self::Exists)
+        } else if data == SPARSE_TOMBSTONE_MARKER {
+            Ok(Self::Removed)
+        } else {
+            Err(DecodeError)
+        }
+    }
+}
+
+/// A tombstone in the sparse keyspace, which is an empty buffer.
+pub const SPARSE_TOMBSTONE_MARKER: Bytes = Bytes::from_static(b"");
+
 /// Check if the key falls in the range of metadata keys.
 pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
    key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
@@ -110,6 +170,24 @@ impl Key {
        }
    }

+    pub fn rel_dir_sparse_key_range() -> Range<Self> {
+        Key {
+            field1: REL_DIR_KEY_PREFIX,
+            field2: 0,
+            field3: 0,
+            field4: 0,
+            field5: 0,
+            field6: 0,
+        }..Key {
+            field1: REL_DIR_KEY_PREFIX + 1,
+            field2: 0,
+            field3: 0,
+            field4: 0,
+            field5: 0,
+            field6: 0,
+        }
+    }
+
    /// This function checks more extensively what keys we can take on the write path.
    /// If a key beginning with 00 does not have a global/default tablespace OID, it
    /// will be rejected on the write path.
@@ -440,6 +518,36 @@ pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
    }
 }

+#[inline(always)]
+pub fn rel_tag_sparse_key(spcnode: Oid, dbnode: Oid, relnode: Oid, forknum: u8) -> Key {
+    Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: relnode,
+        field5: forknum,
+        field6: 1,
+    }
+}
+
+pub fn rel_tag_sparse_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
+    Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: REL_DIR_KEY_PREFIX,
+        field2: spcnode,
+        field3: dbnode,
+        field4: u32::MAX,
+        field5: u8::MAX,
+        field6: u32::MAX,
+    } // it's fine to exclude the last key b/c we only use field6 == 1
+}
+
 #[inline(always)]
 pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
    Key {
@@ -734,9 +842,9 @@ impl Key {
        self.field1 == RELATION_SIZE_PREFIX
    }

-    pub fn sparse_non_inherited_keyspace() -> Range<Key> {
+    pub const fn sparse_non_inherited_keyspace() -> Range<Key> {
        // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
-        debug_assert_eq!(AUX_KEY_PREFIX + 1, REPL_ORIGIN_KEY_PREFIX);
+        const_assert!(AUX_KEY_PREFIX + 1 == REPL_ORIGIN_KEY_PREFIX);
        Key {
            field1: AUX_KEY_PREFIX,
            field2: 0,
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -466,6 +466,8 @@ pub struct TenantConfigPatch {
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub compaction_l0_first: FieldPatch<bool>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
+    pub compaction_l0_semaphore: FieldPatch<bool>,
+    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub l0_flush_delay_threshold: FieldPatch<usize>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub l0_flush_stall_threshold: FieldPatch<usize>,
@@ -524,34 +526,55 @@ pub struct TenantConfigPatch {
 #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
 pub struct TenantConfig {
    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub checkpoint_timeout: Option<Duration>,
    pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub compaction_period: Option<Duration>,
    pub compaction_threshold: Option<usize>,
    pub compaction_upper_limit: Option<usize>,
    // defer parsing compaction_algorithm, like eviction_policy
    pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
    pub compaction_l0_first: Option<bool>,
+    pub compaction_l0_semaphore: Option<bool>,
    pub l0_flush_delay_threshold: Option<usize>,
    pub l0_flush_stall_threshold: Option<usize>,
    pub l0_flush_wait_upload: Option<bool>,
    pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub gc_period: Option<Duration>,
    pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
-    pub walreceiver_connect_timeout: Option<String>,
-    pub lagging_wal_timeout: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub pitr_interval: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub walreceiver_connect_timeout: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lagging_wal_timeout: Option<Duration>,
    pub max_lsn_wal_lag: Option<NonZeroU64>,
    pub eviction_policy: Option<EvictionPolicy>,
    pub min_resident_size_override: Option<u64>,
-    pub evictions_low_residence_duration_metric_threshold: Option<String>,
-    pub heatmap_period: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub heatmap_period: Option<Duration>,
    pub lazy_slru_download: Option<bool>,
    pub timeline_get_throttle: Option<ThrottleConfig>,
    pub image_layer_creation_check_threshold: Option<u8>,
    pub image_creation_preempt_threshold: Option<usize>,
-    pub lsn_lease_length: Option<String>,
-    pub lsn_lease_length_for_ts: Option<String>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lsn_lease_length: Option<Duration>,
+    #[serde(default)]
+    #[serde(with = "humantime_serde")]
+    pub lsn_lease_length_for_ts: Option<Duration>,
    pub timeline_offloading: Option<bool>,
    pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
    pub rel_size_v2_enabled: Option<bool>,
@@ -561,7 +584,10 @@ pub struct TenantConfig {
 }

 impl TenantConfig {
-    pub fn apply_patch(self, patch: TenantConfigPatch) -> TenantConfig {
+    pub fn apply_patch(
+        self,
+        patch: TenantConfigPatch,
+    ) -> Result<TenantConfig, humantime::DurationError> {
        let Self {
            mut checkpoint_distance,
            mut checkpoint_timeout,
@@ -571,6 +597,7 @@ impl TenantConfig {
            mut compaction_upper_limit,
            mut compaction_algorithm,
            mut compaction_l0_first,
+            mut compaction_l0_semaphore,
            mut l0_flush_delay_threshold,
            mut l0_flush_stall_threshold,
            mut l0_flush_wait_upload,
@@ -600,17 +627,26 @@ impl TenantConfig {
        } = self;

        patch.checkpoint_distance.apply(&mut checkpoint_distance);
-        patch.checkpoint_timeout.apply(&mut checkpoint_timeout);
+        patch
+            .checkpoint_timeout
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut checkpoint_timeout);
        patch
            .compaction_target_size
            .apply(&mut compaction_target_size);
-        patch.compaction_period.apply(&mut compaction_period);
+        patch
+            .compaction_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut compaction_period);
        patch.compaction_threshold.apply(&mut compaction_threshold);
        patch
            .compaction_upper_limit
            .apply(&mut compaction_upper_limit);
        patch.compaction_algorithm.apply(&mut compaction_algorithm);
        patch.compaction_l0_first.apply(&mut compaction_l0_first);
+        patch
+            .compaction_l0_semaphore
+            .apply(&mut compaction_l0_semaphore);
        patch
            .l0_flush_delay_threshold
            .apply(&mut l0_flush_delay_threshold);
@@ -619,15 +655,25 @@ impl TenantConfig {
            .apply(&mut l0_flush_stall_threshold);
        patch.l0_flush_wait_upload.apply(&mut l0_flush_wait_upload);
        patch.gc_horizon.apply(&mut gc_horizon);
-        patch.gc_period.apply(&mut gc_period);
+        patch
+            .gc_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut gc_period);
        patch
            .image_creation_threshold
            .apply(&mut image_creation_threshold);
-        patch.pitr_interval.apply(&mut pitr_interval);
+        patch
+            .pitr_interval
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut pitr_interval);
        patch
            .walreceiver_connect_timeout
+            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut walreceiver_connect_timeout);
-        patch.lagging_wal_timeout.apply(&mut lagging_wal_timeout);
+        patch
+            .lagging_wal_timeout
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut lagging_wal_timeout);
        patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
        patch.eviction_policy.apply(&mut eviction_policy);
        patch
@@ -635,8 +681,12 @@ impl TenantConfig {
            .apply(&mut min_resident_size_override);
        patch
            .evictions_low_residence_duration_metric_threshold
+            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut evictions_low_residence_duration_metric_threshold);
-        patch.heatmap_period.apply(&mut heatmap_period);
+        patch
+            .heatmap_period
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut heatmap_period);
        patch.lazy_slru_download.apply(&mut lazy_slru_download);
        patch
            .timeline_get_throttle
@@ -647,9 +697,13 @@ impl TenantConfig {
        patch
            .image_creation_preempt_threshold
            .apply(&mut image_creation_preempt_threshold);
-        patch.lsn_lease_length.apply(&mut lsn_lease_length);
+        patch
+            .lsn_lease_length
+            .map(|v| humantime::parse_duration(&v))?
+            .apply(&mut lsn_lease_length);
        patch
            .lsn_lease_length_for_ts
+            .map(|v| humantime::parse_duration(&v))?
            .apply(&mut lsn_lease_length_for_ts);
        patch.timeline_offloading.apply(&mut timeline_offloading);
        patch
@@ -666,7 +720,7 @@ impl TenantConfig {
            .gc_compaction_ratio_percent
            .apply(&mut gc_compaction_ratio_percent);

-        Self {
+        Ok(Self {
            checkpoint_distance,
            checkpoint_timeout,
            compaction_target_size,
@@ -675,6 +729,7 @@ impl TenantConfig {
            compaction_upper_limit,
            compaction_algorithm,
            compaction_l0_first,
+            compaction_l0_semaphore,
            l0_flush_delay_threshold,
            l0_flush_stall_threshold,
            l0_flush_wait_upload,
@@ -701,7 +756,7 @@ impl TenantConfig {
            gc_compaction_enabled,
            gc_compaction_initial_threshold_kb,
            gc_compaction_ratio_percent,
-        }
+        })
    }
 }

@@ -1072,8 +1127,7 @@ pub struct TenantInfo {

    /// Opaque explanation if gc is being blocked.
    ///
-    /// Only looked up for the individual tenant detail, not the listing. This is purely for
-    /// debugging, not included in openapi.
+    /// Only looked up for the individual tenant detail, not the listing.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub gc_blocking: Option<String>,
 }
@@ -1128,7 +1182,26 @@ pub struct TimelineInfo {
    pub ancestor_lsn: Option<Lsn>,
    pub last_record_lsn: Lsn,
    pub prev_record_lsn: Option<Lsn>,
+
+    /// Legacy field for compat with control plane.  Synonym of `min_readable_lsn`.
+    /// TODO: remove once control plane no longer reads it.
    pub latest_gc_cutoff_lsn: Lsn,
+
+    /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
+    /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
+    /// as it is easier to reason about.
+    #[serde(default)]
+    pub applied_gc_cutoff_lsn: Lsn,
+
+    /// The upper bound of data which is either already GC'ed, or elegible to be GC'ed at any time based on PITR interval.
+    /// This LSN represents the "end of history" for this timeline, and callers should use it to figure out the oldest
+    /// LSN at which it is legal to create a branch or ephemeral endpoint.
+    ///
+    /// Note that holders of valid LSN leases may be able to create branches and read pages earlier
+    /// than this LSN, but new leases may not be taken out earlier than this LSN.
+    #[serde(default)]
+    pub min_readable_lsn: Lsn,
+
    pub disk_consistent_lsn: Lsn,

    /// The LSN that we have succesfully uploaded to remote storage
@@ -2477,7 +2550,7 @@ mod tests {
            ..base.clone()
        };

-        let patched = base.apply_patch(decoded.config);
+        let patched = base.apply_patch(decoded.config).unwrap();

        assert_eq!(patched, expected);
    }
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -9,6 +9,8 @@ use bytes::Bytes;
 use serde::{Deserialize, Serialize};
 use std::io::ErrorKind;
 use std::net::SocketAddr;
+use std::os::fd::AsRawFd;
+use std::os::fd::RawFd;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{ready, Poll};
@@ -268,6 +270,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> MaybeWriteOnly<IO> {
 }

 pub struct PostgresBackend<IO> {
+    pub socket_fd: RawFd,
    framed: MaybeWriteOnly<IO>,

    pub state: ProtoState,
@@ -293,9 +296,11 @@ impl PostgresBackend<tokio::net::TcpStream> {
        tls_config: Option<Arc<rustls::ServerConfig>>,
    ) -> io::Result<Self> {
        let peer_addr = socket.peer_addr()?;
+        let socket_fd = socket.as_raw_fd();
        let stream = MaybeTlsStream::Unencrypted(socket);

        Ok(Self {
+            socket_fd,
            framed: MaybeWriteOnly::Full(Framed::new(stream)),
            state: ProtoState::Initialization,
            auth_type,
@@ -307,6 +312,7 @@ impl PostgresBackend<tokio::net::TcpStream> {

 impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
    pub fn new_from_io(
+        socket_fd: RawFd,
        socket: IO,
        peer_addr: SocketAddr,
        auth_type: AuthType,
@@ -315,6 +321,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
        let stream = MaybeTlsStream::Unencrypted(socket);

        Ok(Self {
+            socket_fd,
            framed: MaybeWriteOnly::Full(Framed::new(stream)),
            state: ProtoState::Initialization,
            auth_type,
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -278,7 +278,7 @@ pub fn generate_pg_control(
    checkpoint_bytes: &[u8],
    lsn: Lsn,
    pg_version: u32,
-) -> anyhow::Result<(Bytes, u64)> {
+) -> anyhow::Result<(Bytes, u64, bool)> {
    dispatch_pgversion!(
        pg_version,
        pgv::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -124,23 +124,59 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
    }
 }

+/// Generate a pg_control file, for a basebackup for starting up Postgres at the given LSN
+///
+/// 'pg_control_bytes' and 'checkpoint_bytes' are the contents of those keys persisted in
+/// the pageserver. They use the same format as the PostgreSQL control file and the
+/// checkpoint record, but see walingest.rs for how exactly they are kept up to date.
+/// 'lsn' is the LSN at which we're starting up.
+///
+/// Returns:
+/// - pg_control file contents
+/// - system_identifier, extracted from the persisted information
+/// - true, if we're starting up from a "clean shutdown", i.e. if there was a shutdown
+///   checkpoint at the given LSN
 pub fn generate_pg_control(
    pg_control_bytes: &[u8],
    checkpoint_bytes: &[u8],
    lsn: Lsn,
-) -> anyhow::Result<(Bytes, u64)> {
+) -> anyhow::Result<(Bytes, u64, bool)> {
    let mut pg_control = ControlFileData::decode(pg_control_bytes)?;
    let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;

    // Generate new pg_control needed for bootstrap
+    //
+    // NB: In the checkpoint struct that we persist in the pageserver, we have a different
+    // convention for the 'redo' field than in PostgreSQL: On a shutdown checkpoint,
+    // 'redo' points the *end* of the checkpoint WAL record. On PostgreSQL, it points to
+    // the beginning. Furthermore, on an online checkpoint, 'redo' is set to 0.
+    //
+    // We didn't always have this convention however, and old persisted records will have
+    // old REDO values that point to some old LSN.
+    //
+    // The upshot is that if 'redo' is equal to the "current" LSN, there was a shutdown
+    // checkpoint record at that point in WAL, with no new WAL records after it. That case
+    // can be treated as starting from a clean shutdown. All other cases are treated as
+    // non-clean shutdown. In Neon, we don't do WAL replay at startup in either case, so
+    // that distinction doesn't matter very much. As of this writing, it only affects
+    // whether the persisted pg_stats information can be used or not.
+    //
+    // In the Checkpoint struct in the returned pg_control file, the redo pointer is
+    // always set to the LSN we're starting at, to hint that no WAL replay is required.
+    // (There's some neon-specific code in Postgres startup to make that work, though.
+    // Just setting the redo pointer is not sufficient.)
+    let was_shutdown = Lsn(checkpoint.redo) == lsn;
    checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;

-    //save new values in pg_control
+    // We use DBState_DB_SHUTDOWNED even if it was not a clean shutdown.  The
+    // neon-specific code at postgres startup ignores the state stored in the control
+    // file, similar to archive recovery in standalone PostgreSQL. Similarly, the
+    // checkPoint pointer is ignored, so just set it to 0.
    pg_control.checkPoint = 0;
    pg_control.checkPointCopy = checkpoint;
    pg_control.state = DBState_DB_SHUTDOWNED;

-    Ok((pg_control.encode(), pg_control.system_identifier))
+    Ok((pg_control.encode(), pg_control.system_identifier, was_shutdown))
 }

 pub fn get_current_timestamp() -> TimestampTz {
--- a/libs/proxy/tokio-postgres2/Cargo.toml
+++ b/libs/proxy/tokio-postgres2/Cargo.toml
@@ -5,18 +5,15 @@ edition = "2021"
 license = "MIT/Apache-2.0"

 [dependencies]
-async-trait.workspace = true
 bytes.workspace = true
-byteorder.workspace = true
 fallible-iterator.workspace = true
 futures-util = { workspace = true, features = ["sink"] }
 log = "0.4"
 parking_lot.workspace = true
-percent-encoding = "2.0"
 pin-project-lite.workspace = true
 phf = "0.11"
 postgres-protocol2 = { path = "../postgres-protocol2" }
 postgres-types2 = { path = "../postgres-types2" }
 tokio = { workspace = true, features = ["io-util", "time", "net"] }
 tokio-util = { workspace = true, features = ["codec"] }
-serde = { workspace = true, features = ["derive"] }
+serde = { workspace = true, features = ["derive"] }
--- a/libs/proxy/tokio-postgres2/src/client.rs
+++ b/libs/proxy/tokio-postgres2/src/client.rs
@@ -10,8 +10,8 @@ use crate::simple_query::SimpleQueryStream;
 use crate::types::{Oid, ToSql, Type};

 use crate::{
-    prepare, query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
-    SimpleQueryMessage, Statement, ToStatement, Transaction, TransactionBuilder,
+    query, simple_query, slice_iter, CancelToken, Error, ReadyForQueryStatus, Row,
+    SimpleQueryMessage, Statement, Transaction, TransactionBuilder,
 };
 use bytes::BytesMut;
 use fallible_iterator::FallibleIterator;
@@ -54,18 +54,18 @@ impl Responses {
 }

 /// A cache of type info and prepared statements for fetching type info
-/// (corresponding to the queries in the [prepare] module).
+/// (corresponding to the queries in the [crate::prepare] module).
 #[derive(Default)]
 struct CachedTypeInfo {
    /// A statement for basic information for a type from its
-    /// OID. Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_QUERY) (or its
+    /// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its
    /// fallback).
    typeinfo: Option<Statement>,
    /// A statement for getting information for a composite type from its OID.
-    /// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY).
+    /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY).
    typeinfo_composite: Option<Statement>,
    /// A statement for getting information for a composite type from its OID.
-    /// Corresponds to [TYPEINFO_QUERY](prepare::TYPEINFO_COMPOSITE_QUERY) (or
+    /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY) (or
    /// its fallback).
    typeinfo_enum: Option<Statement>,

@@ -190,26 +190,6 @@ impl Client {
        &self.inner
    }

-    /// Creates a new prepared statement.
-    ///
-    /// Prepared statements can be executed repeatedly, and may contain query parameters (indicated by `$1`, `$2`, etc),
-    /// which are set when executed. Prepared statements can only be used with the connection that created them.
-    pub async fn prepare(&self, query: &str) -> Result<Statement, Error> {
-        self.prepare_typed(query, &[]).await
-    }
-
-    /// Like `prepare`, but allows the types of query parameters to be explicitly specified.
-    ///
-    /// The list of types may be smaller than the number of parameters - the types of the remaining parameters will be
-    /// inferred. For example, `client.prepare_typed(query, &[])` is equivalent to `client.prepare(query)`.
-    pub async fn prepare_typed(
-        &self,
-        query: &str,
-        parameter_types: &[Type],
-    ) -> Result<Statement, Error> {
-        prepare::prepare(&self.inner, query, parameter_types).await
-    }
-
    /// Executes a statement, returning a vector of the resulting rows.
    ///
    /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
@@ -222,14 +202,11 @@ impl Client {
    /// # Panics
    ///
    /// Panics if the number of parameters provided does not match the number expected.
-    pub async fn query<T>(
+    pub async fn query(
        &self,
-        statement: &T,
+        statement: Statement,
        params: &[&(dyn ToSql + Sync)],
-    ) -> Result<Vec<Row>, Error>
-    where
-        T: ?Sized + ToStatement,
-    {
+    ) -> Result<Vec<Row>, Error> {
        self.query_raw(statement, slice_iter(params))
            .await?
            .try_collect()
@@ -250,13 +227,15 @@ impl Client {
    /// Panics if the number of parameters provided does not match the number expected.
    ///
    /// [`query`]: #method.query
-    pub async fn query_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<RowStream, Error>
+    pub async fn query_raw<'a, I>(
+        &self,
+        statement: Statement,
+        params: I,
+    ) -> Result<RowStream, Error>
    where
-        T: ?Sized + ToStatement,
        I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
        I::IntoIter: ExactSizeIterator,
    {
-        let statement = statement.__convert().into_statement(self).await?;
        query::query(&self.inner, statement, params).await
    }

@@ -271,55 +250,6 @@ impl Client {
        query::query_txt(&self.inner, statement, params).await
    }

-    /// Executes a statement, returning the number of rows modified.
-    ///
-    /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
-    /// provided, 1-indexed.
-    ///
-    /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
-    /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
-    /// with the `prepare` method.
-    ///
-    /// If the statement does not modify any rows (e.g. `SELECT`), 0 is returned.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the number of parameters provided does not match the number expected.
-    pub async fn execute<T>(
-        &self,
-        statement: &T,
-        params: &[&(dyn ToSql + Sync)],
-    ) -> Result<u64, Error>
-    where
-        T: ?Sized + ToStatement,
-    {
-        self.execute_raw(statement, slice_iter(params)).await
-    }
-
-    /// The maximally flexible version of [`execute`].
-    ///
-    /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list
-    /// provided, 1-indexed.
-    ///
-    /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be
-    /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front
-    /// with the `prepare` method.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the number of parameters provided does not match the number expected.
-    ///
-    /// [`execute`]: #method.execute
-    pub async fn execute_raw<'a, T, I>(&self, statement: &T, params: I) -> Result<u64, Error>
-    where
-        T: ?Sized + ToStatement,
-        I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
-        I::IntoIter: ExactSizeIterator,
-    {
-        let statement = statement.__convert().into_statement(self).await?;
-        query::execute(self.inner(), statement, params).await
-    }
-
    /// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
    ///
    /// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
--- a/libs/proxy/tokio-postgres2/src/generic_client.rs
+++ b/libs/proxy/tokio-postgres2/src/generic_client.rs
@@ -1,7 +1,8 @@
+#![allow(async_fn_in_trait)]
+
 use crate::query::RowStream;
 use crate::types::Type;
 use crate::{Client, Error, Transaction};
-use async_trait::async_trait;
 use postgres_protocol2::Oid;

 mod private {
@@ -11,7 +12,6 @@ mod private {
 /// A trait allowing abstraction over connections and transactions.
 ///
 /// This trait is "sealed", and cannot be implemented outside of this crate.
-#[async_trait]
 pub trait GenericClient: private::Sealed {
    /// Like `Client::query_raw_txt`.
    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
@@ -26,7 +26,6 @@ pub trait GenericClient: private::Sealed {

 impl private::Sealed for Client {}

-#[async_trait]
 impl GenericClient for Client {
    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
@@ -39,14 +38,12 @@ impl GenericClient for Client {

    /// Query for type information
    async fn get_type(&self, oid: Oid) -> Result<Type, Error> {
-        self.get_type(oid).await
+        crate::prepare::get_type(self.inner(), oid).await
    }
 }

 impl private::Sealed for Transaction<'_> {}

-#[async_trait]
-#[allow(clippy::needless_lifetimes)]
 impl GenericClient for Transaction<'_> {
    async fn query_raw_txt<S, I>(&self, statement: &str, params: I) -> Result<RowStream, Error>
    where
--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -14,7 +14,6 @@ pub use crate::row::{Row, SimpleQueryRow};
 pub use crate::simple_query::SimpleQueryStream;
 pub use crate::statement::{Column, Statement};
 pub use crate::tls::NoTls;
-pub use crate::to_statement::ToStatement;
 pub use crate::transaction::Transaction;
 pub use crate::transaction_builder::{IsolationLevel, TransactionBuilder};
 use crate::types::ToSql;
@@ -65,7 +64,6 @@ pub mod row;
 mod simple_query;
 mod statement;
 pub mod tls;
-mod to_statement;
 mod transaction;
 mod transaction_builder;
 pub mod types;
--- a/libs/proxy/tokio-postgres2/src/prepare.rs
+++ b/libs/proxy/tokio-postgres2/src/prepare.rs
@@ -1,7 +1,6 @@
 use crate::client::InnerClient;
 use crate::codec::FrontendMessage;
 use crate::connection::RequestMessages;
-use crate::error::SqlState;
 use crate::types::{Field, Kind, Oid, Type};
 use crate::{query, slice_iter};
 use crate::{Column, Error, Statement};
@@ -13,7 +12,6 @@ use postgres_protocol2::message::backend::Message;
 use postgres_protocol2::message::frontend;
 use std::future::Future;
 use std::pin::Pin;
-use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;

 pub(crate) const TYPEINFO_QUERY: &str = "\
@@ -24,14 +22,6 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
 WHERE t.oid = $1
 ";

-// Range types weren't added until Postgres 9.2, so pg_range may not exist
-const TYPEINFO_FALLBACK_QUERY: &str = "\
-SELECT t.typname, t.typtype, t.typelem, NULL::OID, t.typbasetype, n.nspname, t.typrelid
-FROM pg_catalog.pg_type t
-INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid
-WHERE t.oid = $1
-";
-
 const TYPEINFO_ENUM_QUERY: &str = "\
 SELECT enumlabel
 FROM pg_catalog.pg_enum
@@ -39,14 +29,6 @@ WHERE enumtypid = $1
 ORDER BY enumsortorder
 ";

-// Postgres 9.0 didn't have enumsortorder
-const TYPEINFO_ENUM_FALLBACK_QUERY: &str = "\
-SELECT enumlabel
-FROM pg_catalog.pg_enum
-WHERE enumtypid = $1
-ORDER BY oid
-";
-
 pub(crate) const TYPEINFO_COMPOSITE_QUERY: &str = "\
 SELECT attname, atttypid
 FROM pg_catalog.pg_attribute
@@ -56,15 +38,13 @@ AND attnum > 0
 ORDER BY attnum
 ";

-static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
-
 pub async fn prepare(
    client: &Arc<InnerClient>,
+    name: &'static str,
    query: &str,
    types: &[Type],
 ) -> Result<Statement, Error> {
-    let name = format!("s{}", NEXT_ID.fetch_add(1, Ordering::SeqCst));
-    let buf = encode(client, &name, query, types)?;
+    let buf = encode(client, name, query, types)?;
    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;

    match responses.next().await? {
@@ -105,10 +85,11 @@ pub async fn prepare(

 fn prepare_rec<'a>(
    client: &'a Arc<InnerClient>,
+    name: &'static str,
    query: &'a str,
    types: &'a [Type],
 ) -> Pin<Box<dyn Future<Output = Result<Statement, Error>> + 'a + Send>> {
-    Box::pin(prepare(client, query, types))
+    Box::pin(prepare(client, name, query, types))
 }

 fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result<Bytes, Error> {
@@ -192,13 +173,8 @@ async fn typeinfo_statement(client: &Arc<InnerClient>) -> Result<Statement, Erro
        return Ok(stmt);
    }

-    let stmt = match prepare_rec(client, TYPEINFO_QUERY, &[]).await {
-        Ok(stmt) => stmt,
-        Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_TABLE) => {
-            prepare_rec(client, TYPEINFO_FALLBACK_QUERY, &[]).await?
-        }
-        Err(e) => return Err(e),
-    };
+    let typeinfo = "neon_proxy_typeinfo";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_QUERY, &[]).await?;

    client.set_typeinfo(&stmt);
    Ok(stmt)
@@ -219,13 +195,8 @@ async fn typeinfo_enum_statement(client: &Arc<InnerClient>) -> Result<Statement,
        return Ok(stmt);
    }

-    let stmt = match prepare_rec(client, TYPEINFO_ENUM_QUERY, &[]).await {
-        Ok(stmt) => stmt,
-        Err(ref e) if e.code() == Some(&SqlState::UNDEFINED_COLUMN) => {
-            prepare_rec(client, TYPEINFO_ENUM_FALLBACK_QUERY, &[]).await?
-        }
-        Err(e) => return Err(e),
-    };
+    let typeinfo = "neon_proxy_typeinfo_enum";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_ENUM_QUERY, &[]).await?;

    client.set_typeinfo_enum(&stmt);
    Ok(stmt)
@@ -255,7 +226,8 @@ async fn typeinfo_composite_statement(client: &Arc<InnerClient>) -> Result<State
        return Ok(stmt);
    }

-    let stmt = prepare_rec(client, TYPEINFO_COMPOSITE_QUERY, &[]).await?;
+    let typeinfo = "neon_proxy_typeinfo_composite";
+    let stmt = prepare_rec(client, typeinfo, TYPEINFO_COMPOSITE_QUERY, &[]).await?;

    client.set_typeinfo_composite(&stmt);
    Ok(stmt)
--- a/libs/proxy/tokio-postgres2/src/query.rs
+++ b/libs/proxy/tokio-postgres2/src/query.rs
@@ -157,49 +157,6 @@ where
    })
 }

-pub async fn execute<'a, I>(
-    client: &InnerClient,
-    statement: Statement,
-    params: I,
-) -> Result<u64, Error>
-where
-    I: IntoIterator<Item = &'a (dyn ToSql + Sync)>,
-    I::IntoIter: ExactSizeIterator,
-{
-    let buf = if log_enabled!(Level::Debug) {
-        let params = params.into_iter().collect::<Vec<_>>();
-        debug!(
-            "executing statement {} with parameters: {:?}",
-            statement.name(),
-            BorrowToSqlParamsDebug(params.as_slice()),
-        );
-        encode(client, &statement, params)?
-    } else {
-        encode(client, &statement, params)?
-    };
-    let mut responses = start(client, buf).await?;
-
-    let mut rows = 0;
-    loop {
-        match responses.next().await? {
-            Message::DataRow(_) => {}
-            Message::CommandComplete(body) => {
-                rows = body
-                    .tag()
-                    .map_err(Error::parse)?
-                    .rsplit(' ')
-                    .next()
-                    .unwrap()
-                    .parse()
-                    .unwrap_or(0);
-            }
-            Message::EmptyQueryResponse => rows = 0,
-            Message::ReadyForQuery(_) => return Ok(rows),
-            _ => return Err(Error::unexpected_message()),
-        }
-    }
-}
-
 async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
    let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;

--- a/libs/proxy/tokio-postgres2/src/statement.rs
+++ b/libs/proxy/tokio-postgres2/src/statement.rs
@@ -13,7 +13,7 @@ use std::{

 struct StatementInner {
    client: Weak<InnerClient>,
-    name: String,
+    name: &'static str,
    params: Vec<Type>,
    columns: Vec<Column>,
 }
@@ -22,7 +22,7 @@ impl Drop for StatementInner {
    fn drop(&mut self) {
        if let Some(client) = self.client.upgrade() {
            let buf = client.with_buf(|buf| {
-                frontend::close(b'S', &self.name, buf).unwrap();
+                frontend::close(b'S', self.name, buf).unwrap();
                frontend::sync(buf);
                buf.split().freeze()
            });
@@ -40,7 +40,7 @@ pub struct Statement(Arc<StatementInner>);
 impl Statement {
    pub(crate) fn new(
        inner: &Arc<InnerClient>,
-        name: String,
+        name: &'static str,
        params: Vec<Type>,
        columns: Vec<Column>,
    ) -> Statement {
@@ -55,14 +55,14 @@ impl Statement {
    pub(crate) fn new_anonymous(params: Vec<Type>, columns: Vec<Column>) -> Statement {
        Statement(Arc::new(StatementInner {
            client: Weak::new(),
-            name: String::new(),
+            name: "<anonymous>",
            params,
            columns,
        }))
    }

    pub(crate) fn name(&self) -> &str {
-        &self.0.name
+        self.0.name
    }

    /// Returns the expected types of the statement's parameters.
--- a/libs/proxy/tokio-postgres2/src/to_statement.rs
+++ b/libs/proxy/tokio-postgres2/src/to_statement.rs
@@ -1,57 +0,0 @@
-use crate::to_statement::private::{Sealed, ToStatementType};
-use crate::Statement;
-
-mod private {
-    use crate::{Client, Error, Statement};
-
-    pub trait Sealed {}
-
-    pub enum ToStatementType<'a> {
-        Statement(&'a Statement),
-        Query(&'a str),
-    }
-
-    impl ToStatementType<'_> {
-        pub async fn into_statement(self, client: &Client) -> Result<Statement, Error> {
-            match self {
-                ToStatementType::Statement(s) => Ok(s.clone()),
-                ToStatementType::Query(s) => client.prepare(s).await,
-            }
-        }
-    }
-}
-
-/// A trait abstracting over prepared and unprepared statements.
-///
-/// Many methods are generic over this bound, so that they support both a raw query string as well as a statement which
-/// was prepared previously.
-///
-/// This trait is "sealed" and cannot be implemented by anything outside this crate.
-pub trait ToStatement: Sealed {
-    #[doc(hidden)]
-    fn __convert(&self) -> ToStatementType<'_>;
-}
-
-impl ToStatement for Statement {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Statement(self)
-    }
-}
-
-impl Sealed for Statement {}
-
-impl ToStatement for str {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Query(self)
-    }
-}
-
-impl Sealed for str {}
-
-impl ToStatement for String {
-    fn __convert(&self) -> ToStatementType<'_> {
-        ToStatementType::Query(self)
-    }
-}
-
-impl Sealed for String {}
--- a/libs/safekeeper_api/src/membership.rs
+++ b/libs/safekeeper_api/src/membership.rs
@@ -9,13 +9,43 @@ use anyhow::bail;
 use serde::{Deserialize, Serialize};
 use utils::id::NodeId;

-/// Number uniquely identifying safekeeper configuration.
-/// Note: it is a part of sk control file.
-pub type Generation = u32;
 /// 1 is the first valid generation, 0 is used as
 /// a placeholder before we fully migrate to generations.
-pub const INVALID_GENERATION: Generation = 0;
-pub const INITIAL_GENERATION: Generation = 1;
+pub const INVALID_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(0);
+pub const INITIAL_GENERATION: SafekeeperGeneration = SafekeeperGeneration::new(1);
+
+/// Number uniquely identifying safekeeper configuration.
+/// Note: it is a part of sk control file.
+///
+/// Like tenant generations, but for safekeepers.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
+pub struct SafekeeperGeneration(u32);
+
+impl SafekeeperGeneration {
+    pub const fn new(v: u32) -> Self {
+        Self(v)
+    }
+
+    #[track_caller]
+    pub fn previous(&self) -> Option<Self> {
+        Some(Self(self.0.checked_sub(1)?))
+    }
+
+    #[track_caller]
+    pub fn next(&self) -> Self {
+        Self(self.0 + 1)
+    }
+
+    pub fn into_inner(self) -> u32 {
+        self.0
+    }
+}
+
+impl Display for SafekeeperGeneration {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}

 /// Membership is defined by ids so e.g. walproposer uses them to figure out
 /// quorums, but we also carry host and port to give wp idea where to connect.
@@ -89,7 +119,7 @@ impl Display for MemberSet {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Configuration {
    /// Unique id.
-    pub generation: Generation,
+    pub generation: SafekeeperGeneration,
    /// Current members of the configuration.
    pub members: MemberSet,
    /// Some means it is a joint conf.
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -282,3 +282,18 @@ pub struct TimelineTermBumpResponse {
 pub struct SafekeeperUtilization {
    pub timeline_count: u64,
 }
+
+/// pull_timeline request body.
+#[derive(Debug, Deserialize, Serialize)]
+pub struct PullTimelineRequest {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub http_hosts: Vec<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct PullTimelineResponse {
+    // Donor safekeeper host
+    pub safekeeper_host: String,
+    // TODO: add more fields?
+}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -24,11 +24,10 @@ diatomic-waker.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
-inferno.workspace = true
 fail.workspace = true
 futures = { workspace = true }
 jsonwebtoken.workspace = true
-nix.workspace = true
+nix = { workspace = true, features = ["ioctl"] }
 once_cell.workspace = true
 pin-project-lite.workspace = true
 regex.workspace = true
@@ -62,6 +61,7 @@ bytes.workspace = true
 criterion.workspace = true
 hex-literal.workspace = true
 camino-tempfile.workspace = true
+pprof.workspace = true
 serde_assert.workspace = true
 tokio = { workspace = true, features = ["test-util"] }

--- a/libs/utils/benches/README.md
+++ b/libs/utils/benches/README.md
@@ -0,0 +1,26 @@
+## Utils Benchmarks
+
+To run benchmarks:
+
+```sh
+# All benchmarks.
+cargo bench --package utils
+
+# Specific file.
+cargo bench --package utils --bench benchmarks
+
+# Specific benchmark.
+cargo bench --package utils --bench benchmarks warn_slow/enabled=true
+
+# List available benchmarks.
+cargo bench --package utils --benches -- --list
+
+# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
+# Output in target/criterion/*/profile/flamegraph.svg.
+cargo bench --package utils --bench benchmarks warn_slow/enabled=true --profile-time 10
+```
+
+Additional charts and statistics are available in `target/criterion/report/index.html`.
+
+Benchmarks are automatically compared against the previous run. To compare against other runs, see
+`--baseline` and `--save-baseline`.
--- a/libs/utils/benches/benchmarks.rs
+++ b/libs/utils/benches/benchmarks.rs
@@ -1,5 +1,18 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use std::time::Duration;
+
+use criterion::{criterion_group, criterion_main, Bencher, Criterion};
+use pprof::criterion::{Output, PProfProfiler};
 use utils::id;
+use utils::logging::warn_slow;
+
+// Register benchmarks with Criterion.
+criterion_group!(
+    name = benches;
+    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
+    targets = bench_id_stringify,
+    bench_warn_slow,
+);
+criterion_main!(benches);

 pub fn bench_id_stringify(c: &mut Criterion) {
    // Can only use public methods.
@@ -16,5 +29,31 @@ pub fn bench_id_stringify(c: &mut Criterion) {
    });
 }

-criterion_group!(benches, bench_id_stringify);
-criterion_main!(benches);
+pub fn bench_warn_slow(c: &mut Criterion) {
+    for enabled in [false, true] {
+        c.bench_function(&format!("warn_slow/enabled={enabled}"), |b| {
+            run_bench(b, enabled).unwrap()
+        });
+    }
+
+    // The actual benchmark.
+    fn run_bench(b: &mut Bencher, enabled: bool) -> anyhow::Result<()> {
+        const THRESHOLD: Duration = Duration::from_secs(1);
+
+        // Use a multi-threaded runtime to avoid thread parking overhead when yielding.
+        let runtime = tokio::runtime::Builder::new_multi_thread()
+            .enable_all()
+            .build()?;
+
+        // Test both with and without warn_slow, since we're essentially measuring Tokio scheduling
+        // performance too. Use a simple noop future that yields once, to avoid any scheduler fast
+        // paths for a ready future.
+        if enabled {
+            b.iter(|| runtime.block_on(warn_slow("ready", THRESHOLD, tokio::task::yield_now())));
+        } else {
+            b.iter(|| runtime.block_on(tokio::task::yield_now()));
+        }
+
+        Ok(())
+    }
+}
--- a/libs/utils/src/bin_ser.rs
+++ b/libs/utils/src/bin_ser.rs
@@ -286,6 +286,11 @@ mod tests {
    const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
    const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];

+    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+    struct NewTypeStruct(u32);
+    const NT1: NewTypeStruct = NewTypeStruct(414243);
+    const NT1_INNER: u32 = 414243;
+
    #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
    pub struct LongMsg {
        pub tag: u8,
@@ -408,4 +413,42 @@ mod tests {
        let msg2 = LongMsg::des(&encoded).unwrap();
        assert_eq!(msg, msg2);
    }
+
+    #[test]
+    /// Ensure that newtype wrappers around u32 don't change the serialization format
+    fn be_nt() {
+        use super::BeSer;
+
+        assert_eq!(NT1.serialized_size().unwrap(), 4);
+
+        let msg = NT1;
+
+        let encoded = msg.ser().unwrap();
+        let expected = hex_literal::hex!("0006 5223");
+        assert_eq!(encoded, expected);
+
+        assert_eq!(encoded, NT1_INNER.ser().unwrap());
+
+        let msg2 = NewTypeStruct::des(&encoded).unwrap();
+        assert_eq!(msg, msg2);
+    }
+
+    #[test]
+    /// Ensure that newtype wrappers around u32 don't change the serialization format
+    fn le_nt() {
+        use super::LeSer;
+
+        assert_eq!(NT1.serialized_size().unwrap(), 4);
+
+        let msg = NT1;
+
+        let encoded = msg.ser().unwrap();
+        let expected = hex_literal::hex!("2352 0600");
+        assert_eq!(encoded, expected);
+
+        assert_eq!(encoded, NT1_INNER.ser().unwrap());
+
+        let msg2 = NewTypeStruct::des(&encoded).unwrap();
+        assert_eq!(msg, msg2);
+    }
 }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -93,6 +93,9 @@ pub mod try_rcu;

 pub mod guard_arc_swap;

+#[cfg(target_os = "linux")]
+pub mod linux_socket_ioctl;
+
 // Re-export used in macro. Avoids adding git-version as dep in target crates.
 #[doc(hidden)]
 pub use git_version;
--- a/libs/utils/src/linux_socket_ioctl.rs
+++ b/libs/utils/src/linux_socket_ioctl.rs
@@ -0,0 +1,35 @@
+//! Linux-specific socket ioctls.
+//!
+//! <https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27>
+
+use std::{
+    io,
+    mem::MaybeUninit,
+    os::{fd::RawFd, raw::c_int},
+};
+
+use nix::libc::{FIONREAD, TIOCOUTQ};
+
+unsafe fn do_ioctl(socket_fd: RawFd, cmd: nix::libc::Ioctl) -> io::Result<c_int> {
+    let mut inq: MaybeUninit<c_int> = MaybeUninit::uninit();
+    let err = nix::libc::ioctl(socket_fd, cmd, inq.as_mut_ptr());
+    if err == 0 {
+        Ok(inq.assume_init())
+    } else {
+        Err(io::Error::last_os_error())
+    }
+}
+
+/// # Safety
+///
+/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.
+pub unsafe fn inq(socket_fd: RawFd) -> io::Result<c_int> {
+    do_ioctl(socket_fd, FIONREAD)
+}
+
+/// # Safety
+///
+/// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor.
+pub unsafe fn outq(socket_fd: RawFd) -> io::Result<c_int> {
+    do_ioctl(socket_fd, TIOCOUTQ)
+}
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,9 +1,13 @@
+use std::future::Future;
 use std::str::FromStr;
+use std::time::Duration;

 use anyhow::Context;
 use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, VariantNames};
+use tokio::time::Instant;
+use tracing::warn;

 /// Logs a critical error, similarly to `tracing::error!`. This will:
 ///
@@ -318,6 +322,41 @@ impl std::fmt::Debug for SecretString {
    }
 }

+/// Logs a periodic warning if a future is slow to complete.
+///
+/// This is performance-sensitive as it's used on the GetPage read path.
+#[inline]
+pub async fn warn_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
+    // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
+    // won't fit on the stack.
+    let mut f = Box::pin(f);
+
+    let started = Instant::now();
+    let mut attempt = 1;
+
+    loop {
+        // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
+        // case where the timeout doesn't fire.
+        let deadline = started + attempt * threshold;
+        if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
+            // NB: we check if we exceeded the threshold even if the timeout never fired, because
+            // scheduling or execution delays may cause the future to succeed even if it exceeds the
+            // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
+            // false negatives.
+            let elapsed = started.elapsed();
+            if elapsed >= threshold {
+                warn!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
+            }
+            return output;
+        }
+
+        let elapsed = started.elapsed().as_secs_f64();
+        warn!("slow {name} still running after {elapsed:.3}s",);
+
+        attempt += 1;
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use metrics::{core::Opts, IntCounterVec};
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -117,6 +117,10 @@ impl TenantShardId {
        )
    }

+    pub fn range(&self) -> RangeInclusive<Self> {
+        RangeInclusive::new(*self, *self)
+    }
+
    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
        ShardSlug(self)
    }
--- a/libs/vm_monitor/Cargo.toml
+++ b/libs/vm_monitor/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "vm_monitor"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true

 [[bin]]
--- a/libs/vm_monitor/src/cgroup.rs
+++ b/libs/vm_monitor/src/cgroup.rs
@@ -1,12 +1,10 @@
 use std::fmt::{self, Debug, Formatter};
 use std::time::{Duration, Instant};

-use anyhow::{anyhow, Context};
-use cgroups_rs::{
-    hierarchies::{self, is_cgroup2_unified_mode},
-    memory::MemController,
-    Subsystem,
-};
+use anyhow::{Context, anyhow};
+use cgroups_rs::Subsystem;
+use cgroups_rs::hierarchies::{self, is_cgroup2_unified_mode};
+use cgroups_rs::memory::MemController;
 use tokio::sync::watch;
 use tracing::{info, warn};

--- a/libs/vm_monitor/src/dispatcher.rs
+++ b/libs/vm_monitor/src/dispatcher.rs
@@ -6,17 +6,15 @@
 //! the cgroup (requesting upscale), and the signals that go to the cgroup
 //! (notifying it of upscale).

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use axum::extract::ws::{Message, Utf8Bytes, WebSocket};
-use futures::{
-    stream::{SplitSink, SplitStream},
-    SinkExt, StreamExt,
-};
+use futures::stream::{SplitSink, SplitStream};
+use futures::{SinkExt, StreamExt};
 use tracing::{debug, info};

 use crate::protocol::{
-    OutboundMsg, OutboundMsgKind, ProtocolRange, ProtocolResponse, ProtocolVersion,
-    PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION,
+    OutboundMsg, OutboundMsgKind, PROTOCOL_MAX_VERSION, PROTOCOL_MIN_VERSION, ProtocolRange,
+    ProtocolResponse, ProtocolVersion,
 };

 /// The central handler for all communications in the monitor.
--- a/libs/vm_monitor/src/filecache.rs
+++ b/libs/vm_monitor/src/filecache.rs
@@ -2,12 +2,14 @@

 use std::num::NonZeroU64;

-use crate::MiB;
-use anyhow::{anyhow, Context};
-use tokio_postgres::{types::ToSql, Client, NoTls, Row};
+use anyhow::{Context, anyhow};
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{Client, NoTls, Row};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};

+use crate::MiB;
+
 /// Manages Postgres' file cache by keeping a connection open.
 #[derive(Debug)]
 pub struct FileCacheState {
--- a/libs/vm_monitor/src/lib.rs
+++ b/libs/vm_monitor/src/lib.rs
@@ -2,24 +2,26 @@
 #![deny(clippy::undocumented_unsafe_blocks)]
 #![cfg(target_os = "linux")]

+use std::fmt::Debug;
+use std::net::SocketAddr;
+use std::time::Duration;
+
 use anyhow::Context;
-use axum::{
-    extract::{ws::WebSocket, State, WebSocketUpgrade},
-    response::Response,
-};
-use axum::{routing::get, Router};
+use axum::Router;
+use axum::extract::ws::WebSocket;
+use axum::extract::{State, WebSocketUpgrade};
+use axum::response::Response;
+use axum::routing::get;
 use clap::Parser;
 use futures::Future;
-use std::net::SocketAddr;
-use std::{fmt::Debug, time::Duration};
+use runner::Runner;
 use sysinfo::{RefreshKind, System, SystemExt};
 use tokio::net::TcpListener;
-use tokio::{sync::broadcast, task::JoinHandle};
+use tokio::sync::broadcast;
+use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};

-use runner::Runner;
-
 // Code that interfaces with agent
 pub mod dispatcher;
 pub mod protocol;
--- a/libs/vm_monitor/src/protocol.rs
+++ b/libs/vm_monitor/src/protocol.rs
@@ -35,7 +35,8 @@
 use core::fmt;
 use std::cmp;

-use serde::{de::Error, Deserialize, Serialize};
+use serde::de::Error;
+use serde::{Deserialize, Serialize};

 /// A Message we send to the agent.
 #[derive(Serialize, Deserialize, Debug, Clone)]
--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -7,7 +7,7 @@
 use std::fmt::Debug;
 use std::time::{Duration, Instant};

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use axum::extract::ws::{Message, WebSocket};
 use futures::StreamExt;
 use tokio::sync::{broadcast, watch};
@@ -18,7 +18,7 @@ use crate::cgroup::{self, CgroupWatcher};
 use crate::dispatcher::Dispatcher;
 use crate::filecache::{FileCacheConfig, FileCacheState};
 use crate::protocol::{InboundMsg, InboundMsgKind, OutboundMsg, OutboundMsgKind, Resources};
-use crate::{bytes_to_mebibytes, get_total_system_memory, spawn_with_cancel, Args, MiB};
+use crate::{Args, MiB, bytes_to_mebibytes, get_total_system_memory, spawn_with_cancel};

 /// Central struct that interacts with agent, dispatcher, and cgroup to handle
 /// signals from the agent.
@@ -233,7 +233,9 @@ impl Runner {
            //
            // TODO: make the duration here configurable.
            if last_time.elapsed() > Duration::from_secs(5) {
-                bail!("haven't gotten cgroup memory stats recently enough to determine downscaling information");
+                bail!(
+                    "haven't gotten cgroup memory stats recently enough to determine downscaling information"
+                );
            } else if last_history.samples_count <= 1 {
                let status = "haven't received enough cgroup memory stats yet";
                info!(status, "discontinuing downscale");
--- a/libs/wal_decoder/proto/interpreted_wal.proto
+++ b/libs/wal_decoder/proto/interpreted_wal.proto
@@ -5,6 +5,7 @@ package interpreted_wal;
 message InterpretedWalRecords {
  repeated InterpretedWalRecord records = 1;
  optional uint64 next_record_lsn = 2;
+  optional uint64 raw_wal_start_lsn = 3;
 }

 message InterpretedWalRecord {
--- a/libs/wal_decoder/src/models.rs
+++ b/libs/wal_decoder/src/models.rs
@@ -60,7 +60,11 @@ pub struct InterpretedWalRecords {
    pub records: Vec<InterpretedWalRecord>,
    // Start LSN of the next record after the batch.
    // Note that said record may not belong to the current shard.
-    pub next_record_lsn: Option<Lsn>,
+    pub next_record_lsn: Lsn,
+    // Inclusive start LSN of the PG WAL from which the interpreted
+    // WAL records were extracted. Note that this is not necessarily the
+    // start LSN of the first interpreted record in the batch.
+    pub raw_wal_start_lsn: Option<Lsn>,
 }

 /// An interpreted Postgres WAL record, ready to be handled by the pageserver
--- a/libs/wal_decoder/src/wire_format.rs
+++ b/libs/wal_decoder/src/wire_format.rs
@@ -167,7 +167,8 @@ impl TryFrom<InterpretedWalRecords> for proto::InterpretedWalRecords {
            .collect::<Result<Vec<_>, _>>()?;
        Ok(proto::InterpretedWalRecords {
            records,
-            next_record_lsn: value.next_record_lsn.map(|l| l.0),
+            next_record_lsn: Some(value.next_record_lsn.0),
+            raw_wal_start_lsn: value.raw_wal_start_lsn.map(|l| l.0),
        })
    }
 }
@@ -254,7 +255,11 @@ impl TryFrom<proto::InterpretedWalRecords> for InterpretedWalRecords {

        Ok(InterpretedWalRecords {
            records,
-            next_record_lsn: value.next_record_lsn.map(Lsn::from),
+            next_record_lsn: value
+                .next_record_lsn
+                .map(Lsn::from)
+                .expect("Always provided"),
+            raw_wal_start_lsn: value.raw_wal_start_lsn.map(Lsn::from),
        })
    }
 }
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -477,6 +477,26 @@ impl Client {
        self.request(Method::POST, &uri, ()).await.map(|_| ())
    }

+    pub async fn timeline_download_heatmap_layers(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        concurrency: Option<usize>,
+    ) -> Result<()> {
+        let mut path = reqwest::Url::parse(&format!(
+            "{}/v1/tenant/{}/timeline/{}/download_heatmap_layers",
+            self.mgmt_api_endpoint, tenant_shard_id, timeline_id
+        ))
+        .expect("Cannot build URL");
+
+        if let Some(concurrency) = concurrency {
+            path.query_pairs_mut()
+                .append_pair("concurrency", &format!("{}", concurrency));
+        }
+
+        self.request(Method::POST, path, ()).await.map(|_| ())
+    }
+
    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{}/reset",
--- a/pageserver/ctl/src/key.rs
+++ b/pageserver/ctl/src/key.rs
@@ -345,6 +345,7 @@ impl AuxFileV2 {
                AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
            }
            (2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
+            (3, 1) => AuxFileV2::Recognized("pg_stat/pgstat.stat", hash),
            (1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
            (0xff, 0xff) => AuxFileV2::Other(hash),
            _ => return None,
--- a/pageserver/src/aux_file.rs
+++ b/pageserver/src/aux_file.rs
@@ -39,6 +39,7 @@ fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key

 const AUX_DIR_PG_LOGICAL: u8 = 0x01;
 const AUX_DIR_PG_REPLSLOT: u8 = 0x02;
+const AUX_DIR_PG_STAT: u8 = 0x03;
 const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;

 /// Encode the aux file into a fixed-size key.
@@ -53,6 +54,7 @@ const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
 /// * pg_logical/replorigin_checkpoint -> 0x0103
 /// * pg_logical/others -> 0x01FF
 /// * pg_replslot/ -> 0x0201
+/// * pg_stat/pgstat.stat -> 0x0301
 /// * others -> 0xFFFF
 ///
 /// If you add new AUX files to this function, please also add a test case to `test_encoding_portable`.
@@ -75,6 +77,8 @@ pub fn encode_aux_file_key(path: &str) -> Key {
        aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0xFF, fname.as_bytes())
    } else if let Some(fname) = path.strip_prefix("pg_replslot/") {
        aux_hash_to_metadata_key(AUX_DIR_PG_REPLSLOT, 0x01, fname.as_bytes())
+    } else if let Some(fname) = path.strip_prefix("pg_stat/") {
+        aux_hash_to_metadata_key(AUX_DIR_PG_STAT, 0x01, fname.as_bytes())
    } else {
        if cfg!(debug_assertions) {
            warn!(
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -13,7 +13,7 @@
 use anyhow::{anyhow, Context};
 use bytes::{BufMut, Bytes, BytesMut};
 use fail::fail_point;
-use pageserver_api::key::Key;
+use pageserver_api::key::{rel_block_to_key, Key};
 use postgres_ffi::pg_constants;
 use std::fmt::Write as FmtWrite;
 use std::time::{Instant, SystemTime};
@@ -42,8 +42,8 @@ use utils::lsn::Lsn;
 pub enum BasebackupError {
    #[error("basebackup pageserver error {0:#}")]
    Server(#[from] anyhow::Error),
-    #[error("basebackup client error {0:#}")]
-    Client(#[source] io::Error),
+    #[error("basebackup client error {0:#} when {1}")]
+    Client(#[source] io::Error, &'static str),
 }

 /// Create basebackup with non-rel data in it.
@@ -234,7 +234,7 @@ where
        self.ar
            .append(&header, self.buf.as_slice())
            .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "flush"))?;

        self.total_blocks += nblocks;
        debug!("Added to basebackup slru {} relsize {}", segname, nblocks);
@@ -264,6 +264,31 @@ where
    async fn send_tarball(mut self) -> Result<(), BasebackupError> {
        // TODO include checksum

+        // Construct the pg_control file from the persisted checkpoint and pg_control
+        // information. But we only add this to the tarball at the end, so that if the
+        // writing is interrupted half-way through, the resulting incomplete tarball will
+        // be missing the pg_control file, which prevents PostgreSQL from starting up on
+        // it. With proper error handling, you should never try to start up from an
+        // incomplete basebackup in the first place, of course, but this is a nice little
+        // extra safety measure.
+        let checkpoint_bytes = self
+            .timeline
+            .get_checkpoint(self.lsn, self.ctx)
+            .await
+            .context("failed to get checkpoint bytes")?;
+        let pg_control_bytes = self
+            .timeline
+            .get_control_file(self.lsn, self.ctx)
+            .await
+            .context("failed to get control bytes")?;
+        let (pg_control_bytes, system_identifier, was_shutdown) =
+            postgres_ffi::generate_pg_control(
+                &pg_control_bytes,
+                &checkpoint_bytes,
+                self.lsn,
+                self.timeline.pg_version,
+            )?;
+
        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;

        let pgversion = self.timeline.pg_version;
@@ -273,9 +298,9 @@ where
        for dir in subdirs.iter() {
            let header = new_tar_header_dir(dir)?;
            self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                .await
-                .context("could not add directory to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball"))?;
        }

        // Send config files.
@@ -286,13 +311,13 @@ where
                self.ar
                    .append(&header, data)
                    .await
-                    .context("could not add config file to basebackup tarball")?;
+                    .map_err(|e| BasebackupError::Client(e, "send_tarball,pg_hba.conf"))?;
            } else {
                let header = new_tar_header(filepath, 0)?;
                self.ar
-                    .append(&header, &mut io::empty())
+                    .append(&header, io::empty())
                    .await
-                    .context("could not add config file to basebackup tarball")?;
+                    .map_err(|e| BasebackupError::Client(e, "send_tarball,add_config_file"))?;
            }
        }
        if !lazy_slru_download {
@@ -401,12 +426,16 @@ where
                // In future we will not generate AUX record for "pg_logical/replorigin_checkpoint" at all,
                // but now we should handle (skip) it for backward compatibility.
                continue;
+            } else if path == "pg_stat/pgstat.stat" && !was_shutdown {
+                // Drop statistic in case of abnormal termination, i.e. if we're not starting from the exact LSN
+                // of a shutdown checkpoint.
+                continue;
            }
            let header = new_tar_header(&path, content.len() as u64)?;
            self.ar
                .append(&header, &*content)
                .await
-                .context("could not add aux file to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball,add_aux_file"))?;
        }

        if min_restart_lsn != Lsn::MAX {
@@ -419,7 +448,7 @@ where
            self.ar
                .append(&header, &data[..])
                .await
-                .context("could not add restart.lsn file to basebackup tarball")?;
+                .map_err(|e| BasebackupError::Client(e, "send_tarball,restart.lsn"))?;
        }
        for xid in self
            .timeline
@@ -451,9 +480,9 @@ where
            let crc32 = crc32c::crc32c(&content);
            content.extend_from_slice(&crc32.to_le_bytes());
            let header = new_tar_header("pg_logical/replorigin_checkpoint", content.len() as u64)?;
-            self.ar.append(&header, &*content).await.context(
-                "could not add pg_logical/replorigin_checkpoint file to basebackup tarball",
-            )?;
+            self.ar.append(&header, &*content).await.map_err(|e| {
+                BasebackupError::Client(e, "send_tarball,pg_logical/replorigin_checkpoint")
+            })?;
        }

        fail_point!("basebackup-before-control-file", |_| {
@@ -462,9 +491,13 @@ where
            )))
        });

-        // Generate pg_control and bootstrap WAL segment.
-        self.add_pgcontrol_file().await?;
-        self.ar.finish().await.map_err(BasebackupError::Client)?;
+        // Last, add the pg_control file and bootstrap WAL segment.
+        self.add_pgcontrol_file(pg_control_bytes, system_identifier)
+            .await?;
+        self.ar
+            .finish()
+            .await
+            .map_err(|e| BasebackupError::Client(e, "send_tarball,finish"))?;
        debug!("all tarred up!");
        Ok(())
    }
@@ -482,9 +515,9 @@ where
            let file_name = dst.to_segfile_name(0);
            let header = new_tar_header(&file_name, 0)?;
            self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_rel,empty"))?;
            return Ok(());
        }

@@ -498,13 +531,9 @@ where
            for blknum in startblk..endblk {
                let img = self
                    .timeline
-                    .get_rel_page_at_lsn(
-                        src,
-                        blknum,
-                        Version::Lsn(self.lsn),
-                        self.ctx,
-                        self.io_concurrency.clone(),
-                    )
+                    // TODO: investigate using get_vectored for the entire startblk..endblk range.
+                    // But this code path is not on the critical path for most basebackups (?).
+                    .get(rel_block_to_key(src, blknum), self.lsn, self.ctx)
                    .await
                    .map_err(|e| BasebackupError::Server(e.into()))?;
                segment_data.extend_from_slice(&img[..]);
@@ -515,7 +544,7 @@ where
            self.ar
                .append(&header, segment_data.as_slice())
                .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_rel,segment"))?;

            seg += 1;
            startblk = endblk;
@@ -566,7 +595,7 @@ where
            self.ar
                .append(&header, pg_version_str.as_bytes())
                .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_dbdir,PG_VERSION"))?;

            info!("timeline.pg_version {}", self.timeline.pg_version);

@@ -576,7 +605,7 @@ where
                self.ar
                    .append(&header, &img[..])
                    .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,global/pg_filenode.map"))?;
            } else {
                warn!("global/pg_filenode.map is missing");
            }
@@ -612,9 +641,9 @@ where
            let path = format!("base/{}", dbnode);
            let header = new_tar_header_dir(&path)?;
            self.ar
-                .append(&header, &mut io::empty())
+                .append(&header, io::empty())
                .await
-                .map_err(BasebackupError::Client)?;
+                .map_err(|e| BasebackupError::Client(e, "add_dbdir,base"))?;

            if let Some(img) = relmap_img {
                let dst_path = format!("base/{}/PG_VERSION", dbnode);
@@ -627,14 +656,14 @@ where
                self.ar
                    .append(&header, pg_version_str.as_bytes())
                    .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,base/PG_VERSION"))?;

                let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
                let header = new_tar_header(&relmap_path, img.len() as u64)?;
                self.ar
                    .append(&header, &img[..])
                    .await
-                    .map_err(BasebackupError::Client)?;
+                    .map_err(|e| BasebackupError::Client(e, "add_dbdir,base/pg_filenode.map"))?;
            }
        };
        Ok(())
@@ -663,7 +692,7 @@ where
        self.ar
            .append(&header, &buf[..])
            .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_twophase_file"))?;

        Ok(())
    }
@@ -672,7 +701,11 @@ where
    // Add generated pg_control file and bootstrap WAL segment.
    // Also send zenith.signal file with extra bootstrap data.
    //
-    async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> {
+    async fn add_pgcontrol_file(
+        &mut self,
+        pg_control_bytes: Bytes,
+        system_identifier: u64,
+    ) -> Result<(), BasebackupError> {
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
@@ -693,32 +726,14 @@ where
                zenith_signal.as_bytes(),
            )
            .await
-            .map_err(BasebackupError::Client)?;
-
-        let checkpoint_bytes = self
-            .timeline
-            .get_checkpoint(self.lsn, self.ctx)
-            .await
-            .context("failed to get checkpoint bytes")?;
-        let pg_control_bytes = self
-            .timeline
-            .get_control_file(self.lsn, self.ctx)
-            .await
-            .context("failed get control bytes")?;
-
-        let (pg_control_bytes, system_identifier) = postgres_ffi::generate_pg_control(
-            &pg_control_bytes,
-            &checkpoint_bytes,
-            self.lsn,
-            self.timeline.pg_version,
-        )?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,zenith.signal"))?;

        //send pg_control
        let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
        self.ar
            .append(&header, &pg_control_bytes[..])
            .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,pg_control"))?;

        //send wal segment
        let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);
@@ -742,7 +757,7 @@ where
        self.ar
            .append(&header, &wal_seg[..])
            .await
-            .map_err(BasebackupError::Client)?;
+            .map_err(|e| BasebackupError::Client(e, "add_pgcontrol_file,wal_segment"))?;
        Ok(())
    }
 }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -134,6 +134,7 @@ fn main() -> anyhow::Result<()> {
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
    info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");
    info!(?conf.wal_receiver_protocol, "starting with WAL receiver protocol");
+    info!(?conf.validate_wal_contiguity, "starting with WAL contiguity validation");
    info!(?conf.page_service_pipelining, "starting with page service pipelining config");
    info!(?conf.get_vectored_concurrent_io, "starting with get_vectored IO concurrency config");

--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -140,10 +140,6 @@ pub struct PageServerConf {
    /// not terrible.
    pub background_task_maximum_delay: Duration,

-    /// If true, use a separate semaphore for compaction tasks instead of the common background task
-    /// semaphore. Defaults to false.
-    pub use_compaction_semaphore: bool,
-
    pub control_plane_api: Option<Url>,

    /// JWT token for use with the control plane API.
@@ -201,6 +197,10 @@ pub struct PageServerConf {
    /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
    /// files read.
    pub enable_read_path_debugging: bool,
+
+    /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
+    /// safekeepers does not have gaps.
+    pub validate_wal_contiguity: bool,
 }

 /// Token for authentication to safekeepers
@@ -340,7 +340,6 @@ impl PageServerConf {
            test_remote_failures,
            ondemand_download_behavior_treat_error_as_warn,
            background_task_maximum_delay,
-            use_compaction_semaphore,
            control_plane_api,
            control_plane_api_token,
            control_plane_emergency_mode,
@@ -365,6 +364,7 @@ impl PageServerConf {
            page_service_pipelining,
            get_vectored_concurrent_io,
            enable_read_path_debugging,
+            validate_wal_contiguity,
        } = config_toml;

        let mut conf = PageServerConf {
@@ -395,7 +395,6 @@ impl PageServerConf {
            test_remote_failures,
            ondemand_download_behavior_treat_error_as_warn,
            background_task_maximum_delay,
-            use_compaction_semaphore,
            control_plane_api,
            control_plane_emergency_mode,
            heatmap_upload_concurrency,
@@ -452,6 +451,7 @@ impl PageServerConf {
            virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
            no_sync: no_sync.unwrap_or(false),
            enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
+            validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
        };

        // ------------------------------------------------------------
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -98,6 +98,7 @@ pub struct RequestContext {
    download_behavior: DownloadBehavior,
    access_stats_behavior: AccessStatsBehavior,
    page_content_kind: PageContentKind,
+    read_path_debug: bool,
 }

 /// The kind of access to the page cache.
@@ -155,6 +156,7 @@ impl RequestContextBuilder {
                download_behavior: DownloadBehavior::Download,
                access_stats_behavior: AccessStatsBehavior::Update,
                page_content_kind: PageContentKind::Unknown,
+                read_path_debug: false,
            },
        }
    }
@@ -168,6 +170,7 @@ impl RequestContextBuilder {
                download_behavior: original.download_behavior,
                access_stats_behavior: original.access_stats_behavior,
                page_content_kind: original.page_content_kind,
+                read_path_debug: original.read_path_debug,
            },
        }
    }
@@ -191,6 +194,11 @@ impl RequestContextBuilder {
        self
    }

+    pub(crate) fn read_path_debug(mut self, b: bool) -> Self {
+        self.inner.read_path_debug = b;
+        self
+    }
+
    pub fn build(self) -> RequestContext {
        self.inner
    }
@@ -291,4 +299,8 @@ impl RequestContext {
    pub(crate) fn page_content_kind(&self) -> PageContentKind {
        self.page_content_kind
    }
+
+    pub(crate) fn read_path_debug(&self) -> bool {
+        self.read_path_debug
+    }
 }
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -173,6 +173,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient {
                        listen_pg_port: m.postgres_port,
                        listen_http_addr: m.http_host,
                        listen_http_port: m.http_port,
+                        listen_https_port: None, // TODO: Support https.
                        availability_zone_id: az_id.expect("Checked above"),
                    })
                }
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -150,30 +150,10 @@ impl FlushOp {
    }
 }

-/// A DeletionNotify can be used to wait for a deletion to have been executed.
-#[derive(Debug)]
-pub struct DeletionNotify {
-    /// Receives the `DeletionListSeq` from `ListWriter` when scheduled in a `DeletionList`.
-    seq_rx: tokio::sync::oneshot::Receiver<DeletionListSeq>,
-    /// Watches the last executed `DeletionListSeq`.
-    executed_rx: tokio::sync::watch::Receiver<DeletionListSeq>,
-}
-
-impl DeletionNotify {
-    /// Waits for the deletion to have been executed.
-    pub async fn notify(mut self) {
-        let Ok(wait_seq) = self.seq_rx.await else {
-            return; // TODO return error
-        };
-        self.executed_rx.wait_for(|&seq| seq >= wait_seq).await.ok();
-    }
-}
-
 #[derive(Clone, Debug)]
 pub struct DeletionQueueClient {
    tx: tokio::sync::mpsc::UnboundedSender<ListWriterQueueMessage>,
    executor_tx: tokio::sync::mpsc::Sender<DeleterMessage>,
-    executed_rx: tokio::sync::watch::Receiver<DeletionListSeq>,

    lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
 }
@@ -196,9 +176,6 @@ impl TenantDeletionList {
    }
 }

-/// Deletion list sequence number. Monotonically increasing, even across restarts.
-type DeletionListSeq = u64;
-
 /// Files ending with this suffix will be ignored and erased
 /// during recovery as startup.
 const TEMP_SUFFIX: &str = "tmp";
@@ -208,9 +185,8 @@ struct DeletionList {
    /// Serialization version, for future use
    version: u8,

-    /// Used for constructing a unique key for each deletion list we write out, and to notify
-    /// callers when a deletion has been executed (and will not be retried later).
-    sequence: DeletionListSeq,
+    /// Used for constructing a unique key for each deletion list we write out.
+    sequence: u64,

    /// To avoid repeating tenant/timeline IDs in every key, we store keys in
    /// nested HashMaps by TenantTimelineID.  Each Tenant only appears once
@@ -238,13 +214,13 @@ struct DeletionHeader {

    /// The highest sequence number (inclusive) that has been validated.  All deletion
    /// lists on disk with a sequence <= this value are safe to execute.
-    validated_sequence: DeletionListSeq,
+    validated_sequence: u64,
 }

 impl DeletionHeader {
    const VERSION_LATEST: u8 = 1;

-    fn new(validated_sequence: DeletionListSeq) -> Self {
+    fn new(validated_sequence: u64) -> Self {
        Self {
            version: Self::VERSION_LATEST,
            validated_sequence,
@@ -266,7 +242,7 @@ impl DeletionHeader {

 impl DeletionList {
    const VERSION_LATEST: u8 = 1;
-    fn new(sequence: DeletionListSeq) -> Self {
+    fn new(sequence: u64) -> Self {
        Self {
            version: Self::VERSION_LATEST,
            sequence,
@@ -484,8 +460,6 @@ impl DeletionQueueClient {
    /// layers until you're sure they can be deleted safely (i.e. remote metadata no longer
    /// references them).
    ///
-    /// The returned `DeletionNotify` can be used to wait for the deletion to execute.
-    ///
    /// The `current_generation` is the generation of this pageserver's current attachment.  The
    /// generations in `layers` are the generations in which those layers were written.
    pub(crate) fn push_layers(
@@ -494,14 +468,12 @@ impl DeletionQueueClient {
        timeline_id: TimelineId,
        current_generation: Generation,
        layers: Vec<(LayerName, LayerFileMetadata)>,
-    ) -> Result<DeletionNotify, DeletionQueueError> {
+    ) -> Result<(), DeletionQueueError> {
        // None generations are not valid for attached tenants: they must always be attached in
        // a known generation.  None generations are still permitted for layers in the index because
        // they may be historical.
        assert!(!current_generation.is_none());

-        let (seq_tx, seq_rx) = tokio::sync::oneshot::channel();
-
        metrics::DELETION_QUEUE
            .keys_submitted
            .inc_by(layers.len() as u64);
@@ -513,14 +485,8 @@ impl DeletionQueueClient {
                layers,
                generation: current_generation,
                objects: Vec::new(),
-                seq_tx,
            }),
-        )?;
-
-        Ok(DeletionNotify {
-            seq_rx,
-            executed_rx: self.executed_rx.clone(),
-        })
+        )
    }

    /// This is cancel-safe.  If you drop the future the flush may still happen in the background.
@@ -644,10 +610,6 @@ impl DeletionQueue {
        // happen in the backend (persistent), not in this queue.
        let (executor_tx, executor_rx) = tokio::sync::mpsc::channel(16);

-        // Notifies clients about executed deletions.
-        // TODO: recover the last sequence number on startup.
-        let (executed_tx, executed_rx) = tokio::sync::watch::channel(0);
-
        let lsn_table = Arc::new(std::sync::RwLock::new(VisibleLsnUpdates::new()));

        // The deletion queue has an independent cancellation token to
@@ -660,7 +622,6 @@ impl DeletionQueue {
                client: DeletionQueueClient {
                    tx,
                    executor_tx: executor_tx.clone(),
-                    executed_rx,
                    lsn_table: lsn_table.clone(),
                },
                cancel: cancel.clone(),
@@ -671,7 +632,6 @@ impl DeletionQueue {
                    conf,
                    backend_rx,
                    executor_tx,
-                    executed_tx,
                    controller_upcall_client,
                    lsn_table.clone(),
                    cancel.clone(),
@@ -1268,7 +1228,6 @@ pub(crate) mod mock {
            DeletionQueueClient {
                tx: self.tx.clone(),
                executor_tx: self.executor_tx.clone(),
-                executed_rx: todo!(),
                lsn_table: self.lsn_table.clone(),
            }
        }
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -12,7 +12,6 @@

 use super::DeletionHeader;
 use super::DeletionList;
-use super::DeletionListSeq;
 use super::FlushOp;
 use super::ValidatorQueueMessage;

@@ -66,9 +65,6 @@ pub(super) struct DeletionOp {
    /// The _current_ generation of the Tenant shard attachment in which we are enqueuing
    /// this deletion.
    pub(super) generation: Generation,
-
-    /// Return channel for the `DeletionList::sequence` this deletion is included in.
-    pub(super) seq_tx: tokio::sync::oneshot::Sender<DeletionListSeq>,
 }

 #[derive(Debug)]
@@ -179,7 +175,7 @@ impl ListWriter {
    ///
    /// It is not an error for the header to not exist: we return None, and
    /// the caller should act as if validated_sequence is 0
-    async fn load_validated_sequence(&self) -> Result<Option<DeletionListSeq>, anyhow::Error> {
+    async fn load_validated_sequence(&self) -> Result<Option<u64>, anyhow::Error> {
        let header_path = self.conf.deletion_header_path();
        match tokio::fs::read(&header_path).await {
            Ok(header_bytes) => {
@@ -232,7 +228,7 @@ impl ListWriter {

        let temp_extension = format!(".{TEMP_SUFFIX}");
        let header_path = self.conf.deletion_header_path();
-        let mut seqs: Vec<DeletionListSeq> = Vec::new();
+        let mut seqs: Vec<u64> = Vec::new();
        while let Some(dentry) = dir.next_entry().await.fatal_err("read deletion dentry") {
            let file_name = dentry.file_name();
            let dentry_str = file_name.to_string_lossy();
@@ -437,9 +433,6 @@ impl ListWriter {
                            metrics::DELETION_QUEUE.unexpected_errors.inc();
                        }
                    }
-
-                    // Notify the client about the sequence number of this deletion.
-                    op.seq_tx.send(self.pending.sequence).ok();
                }
                ListWriterQueueMessage::Flush(op) => {
                    if self.pending.is_empty() {
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -33,7 +33,6 @@ use crate::virtual_file::MaybeFatalIo;
 use super::deleter::DeleterMessage;
 use super::DeletionHeader;
 use super::DeletionList;
-use super::DeletionListSeq;
 use super::DeletionQueueError;
 use super::FlushOp;
 use super::VisibleLsnUpdates;
@@ -61,9 +60,6 @@ where
    rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
    tx: tokio::sync::mpsc::Sender<DeleterMessage>,

-    /// Notifies clients about the last executed DeletionList sequence number.
-    executed_tx: tokio::sync::watch::Sender<DeletionListSeq>,
-
    // Client for calling into control plane API for validation of deletes
    controller_upcall_client: Option<C>,

@@ -98,7 +94,6 @@ where
        conf: &'static PageServerConf,
        rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
        tx: tokio::sync::mpsc::Sender<DeleterMessage>,
-        executed_tx: tokio::sync::watch::Sender<DeletionListSeq>,
        controller_upcall_client: Option<C>,
        lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
        cancel: CancellationToken,
@@ -107,7 +102,6 @@ where
            conf,
            rx,
            tx,
-            executed_tx,
            controller_upcall_client,
            lsn_table,
            pending_lists: Vec::new(),
@@ -167,7 +161,7 @@ where
            tenant_generations.keys().map(|k| (*k, true)).collect()
        };

-        let mut validated_sequence: Option<DeletionListSeq> = None;
+        let mut validated_sequence: Option<u64> = None;

        // Apply the validation results to the pending LSN updates
        for (tenant_id, tenant_lsn_state) in pending_lsn_updates.tenants {
@@ -301,7 +295,6 @@ where
    async fn cleanup_lists(&mut self, list_paths: Vec<Utf8PathBuf>) {
        for list_path in list_paths {
            debug!("Removing deletion list {list_path}");
-            // TODO: this needs to fsync the removal.
            tokio::fs::remove_file(&list_path)
                .await
                .fatal_err("remove deletion list");
@@ -331,7 +324,6 @@ where
        }

        // Drain `validated_lists` into the executor
-        let executed_seq = self.validated_lists.iter().map(|l| l.sequence).max();
        let mut executing_lists = Vec::new();
        for list in self.validated_lists.drain(..) {
            let list_path = self.conf.deletion_list_path(list.sequence);
@@ -348,14 +340,6 @@ where
        // Erase the deletion lists whose keys have all be deleted from remote storage
        self.cleanup_lists(executing_lists).await;

-        // Notify any waiters that the deletions have been executed.
-        //
-        // TODO: this will wait for all pending lists to be deleted. Consider making it more
-        // responsive by processing lists one by one.
-        if let Some(executed_seq) = executed_seq {
-            self.executed_tx.send_replace(executed_seq);
-        }
-
        Ok(())
    }

--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -824,6 +824,38 @@ paths:
              schema:
                $ref: "#/components/schemas/TenantConfigResponse"

+  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers:
+    parameters:
+      - name: tenant_shard_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: concurrency
+        description: Maximum number of concurrent downloads (capped at remote storage concurrency)
+        in: query
+        required: false
+        schema:
+          type: integer
+    post:
+      description: |
+        Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter
+        may be used to target all shards of a tenant when the unsharded form is used, or a specific
+        tenant shard with the sharded form.
+      responses:
+        "200":
+          description: Success
+    delete:
+      description: Stop any on-going background downloads of heatmap layers for the specified timeline.
+      responses:
+        "200":
+          description: Success
+
  /v1/utilization:
    get:
      description: |
@@ -882,6 +914,8 @@ components:
              properties:
                reason:
                  type: string
+        gc_blocking:
+          type: string

    TenantCreateRequest:
      allOf:
@@ -1080,9 +1114,15 @@ components:
          type: integer
        state:
          type: string
+        min_readable_lsn:
+          type: string
+          format: hex
        latest_gc_cutoff_lsn:
          type: string
          format: hex
+        applied_gc_cutoff_lsn:
+          type: string
+          format: hex

    SyntheticSizeResponse:
      type: object
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -68,6 +68,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;

 use crate::config::PageServerConf;
+use crate::context::RequestContextBuilder;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::pgdatadir_mapping::LsnForTimestamp;
@@ -482,6 +483,11 @@ async fn build_timeline_info_common(

    let (pitr_history_size, within_ancestor_pitr) = timeline.get_pitr_history_stats();

+    let min_readable_lsn = std::cmp::max(
+        timeline.get_gc_cutoff_lsn(),
+        *timeline.get_applied_gc_cutoff_lsn(),
+    );
+
    let info = TimelineInfo {
        tenant_id: timeline.tenant_shard_id,
        timeline_id: timeline.timeline_id,
@@ -493,7 +499,12 @@ async fn build_timeline_info_common(
        initdb_lsn,
        last_record_lsn,
        prev_record_lsn: Some(timeline.get_prev_record_lsn()),
-        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
+        // Externally, expose the lowest LSN that can be used to create a branch as the "GC cutoff", although internally
+        // we distinguish between the "planned" GC cutoff (PITR point) and the "latest" GC cutoff (where we
+        // actually trimmed data to), which can pass each other when PITR is changed.
+        latest_gc_cutoff_lsn: min_readable_lsn,
+        min_readable_lsn,
+        applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(),
        current_logical_size: current_logical_size.size_dont_care_about_accuracy(),
        current_logical_size_is_accurate: match current_logical_size.accuracy() {
            tenant::timeline::logical_size::Accuracy::Approximate => false,
@@ -1453,6 +1464,59 @@ async fn timeline_layer_scan_disposable_keys(
    )
 }

+async fn timeline_download_heatmap_layers_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    // Only used in the case where remote storage is not configured.
+    const DEFAULT_MAX_CONCURRENCY: usize = 100;
+    // A conservative default.
+    const DEFAULT_CONCURRENCY: usize = 16;
+
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+
+    let desired_concurrency =
+        parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY);
+
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    let state = get_state(&request);
+    let timeline =
+        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
+            .await?;
+
+    let max_concurrency = get_config(&request)
+        .remote_storage_config
+        .as_ref()
+        .map(|c| c.concurrency_limit())
+        .unwrap_or(DEFAULT_MAX_CONCURRENCY);
+    let concurrency = std::cmp::min(max_concurrency, desired_concurrency);
+
+    timeline.start_heatmap_layers_download(concurrency).await?;
+
+    json_response(StatusCode::ACCEPTED, ())
+}
+
+async fn timeline_shutdown_download_heatmap_layers_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    let state = get_state(&request);
+    let timeline =
+        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
+            .await?;
+
+    timeline.stop_and_drain_heatmap_layers_download().await;
+
+    json_response(StatusCode::OK, ())
+}
+
 async fn layer_download_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
@@ -2151,6 +2215,7 @@ async fn timeline_compact_handler(
    let state = get_state(&request);

    let mut flags = EnumSet::empty();
+    flags |= CompactFlags::NoYield; // run compaction to completion

    if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? {
        flags |= CompactFlags::ForceL0Compaction;
@@ -2330,6 +2395,7 @@ async fn timeline_checkpoint_handler(
                    match e {
                        CompactionError::ShuttingDown => ApiError::ShuttingDown,
                        CompactionError::Offload(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
+                        CompactionError::CollectKeySpaceError(e) => ApiError::InternalServerError(anyhow::anyhow!(e)),
                        CompactionError::Other(e) => ApiError::InternalServerError(e)
                    }
                )?;
@@ -2507,14 +2573,30 @@ async fn deletion_queue_flush(
    }
 }

-/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
 async fn getpage_at_lsn_handler(
+    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    getpage_at_lsn_handler_inner(false, request, cancel).await
+}
+
+async fn touchpage_at_lsn_handler(
+    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    getpage_at_lsn_handler_inner(true, request, cancel).await
+}
+
+/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
+async fn getpage_at_lsn_handler_inner(
+    touch: bool,
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    // Require pageserver admin permission for this API instead of only tenant-level token.
+    check_permission(&request, None)?;
    let state = get_state(&request);

    struct Key(pageserver_api::key::Key);
@@ -2529,22 +2611,29 @@ async fn getpage_at_lsn_handler(

    let key: Key = parse_query_param(&request, "key")?
        .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'key' query parameter")))?;
-    let lsn: Lsn = parse_query_param(&request, "lsn")?
-        .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?;
+    let lsn: Option<Lsn> = parse_query_param(&request, "lsn")?;

    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+        // Enable read path debugging
+        let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true).build();
        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;

+        // Use last_record_lsn if no lsn is provided
+        let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
        let page = timeline.get(key.0, lsn, &ctx).await?;

-        Result::<_, ApiError>::Ok(
-            Response::builder()
-                .status(StatusCode::OK)
-                .header(header::CONTENT_TYPE, "application/octet-stream")
-                .body(hyper::Body::from(page))
-                .unwrap(),
-        )
+        if touch {
+            json_response(StatusCode::OK, ())
+        } else {
+            Result::<_, ApiError>::Ok(
+                Response::builder()
+                    .status(StatusCode::OK)
+                    .header(header::CONTENT_TYPE, "application/octet-stream")
+                    .body(hyper::Body::from(page))
+                    .unwrap(),
+            )
+        }
    }
    .instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
    .await
@@ -3615,6 +3704,14 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
            |r| api_handler(r, layer_map_info_handler),
        )
+        .post(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
+            |r| api_handler(r, timeline_download_heatmap_layers_handler),
+        )
+        .delete(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers",
+            |r| api_handler(r, timeline_shutdown_download_heatmap_layers_handler),
+        )
        .get(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| api_handler(r, layer_download_handler),
@@ -3671,6 +3768,10 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
            |r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
        )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/touchpage",
+            |r| api_handler(r, touchpage_at_lsn_handler),
+        )
        .get(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
            |r| api_handler(r, timeline_collect_keyspace),
--- a/Show More
+++ b/Show More