fix

fix pin
asyncreadready
2026-01-18 19:02:56 +00:00 · 2024-08-21 18:44:57 +01:00 · 2024-08-21 16:29:52 +01:00 · 2024-08-21 16:16:49 +01:00 · 2024-08-21 15:28:25 +01:00 · 2024-08-21 14:42:41 +01:00
361 changed files with 7358 additions and 13563 deletions
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -23,30 +23,10 @@ platforms = [
 ]

 [final-excludes]
-workspace-members = [
-    # vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
-    # it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
-    # from depending on workspace-hack because most of the dependencies are not used.
-    "vm_monitor",
-    # All of these exist in libs and are not usually built independently.
-    # Putting workspace hack there adds a bottleneck for cargo builds.
-    "compute_api",
-    "consumption_metrics",
-    "desim",
-    "metrics",
-    "pageserver_api",
-    "postgres_backend",
-    "postgres_connection",
-    "postgres_ffi",
-    "pq_proto",
-    "remote_storage",
-    "safekeeper_api",
-    "tenant_size_model",
-    "tracing-utils",
-    "utils",
-    "wal_craft",
-    "walproposer",
-]
+# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
+# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
+# from depending on workspace-hack because most of the dependencies are not used.
+workspace-members = ["vm_monitor"]

 # Write out exact versions rather than a semver range. (Defaults to false.)
 # exact-versions = true
--- a/.devcontainer/Dockerfile.devcontainer
+++ b/.devcontainer/Dockerfile.devcontainer
@@ -1 +0,0 @@
-FROM neondatabase/build-tools:pinned
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,23 +0,0 @@
-// https://containers.dev/implementors/json_reference/
-{
-  "name": "Neon",
-  "build": {
-    "context": "..",
-    "dockerfile": "Dockerfile.devcontainer"
-  },
-
-  "postCreateCommand": {
-    "build neon": "BUILD_TYPE=debug CARGO_BUILD_FLAGS='--features=testing' mold -run make -s -j`nproc`",
-    "install python deps": "./scripts/pysync"
-  },
-
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "charliermarsh.ruff",
-        "github.vscode-github-actions",
-        "rust-lang.rust-analyzer"
-      ]
-    }
-  }
-}
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,6 +0,0 @@
-
-blank_issues_enabled: true
-contact_links:
-  - name: Feature request
-    url: https://console.neon.tech/app/projects?modal=feedback
-    about: For feature requests in the Neon product, please submit via the feedback form on `https://console.neon.tech`
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -7,13 +7,6 @@ self-hosted-runner:
    - small-arm64
    - us-east-2
 config-variables:
-  - AZURE_DEV_CLIENT_ID
-  - AZURE_DEV_REGISTRY_NAME
-  - AZURE_DEV_SUBSCRIPTION_ID
-  - AZURE_PROD_CLIENT_ID
-  - AZURE_PROD_REGISTRY_NAME
-  - AZURE_PROD_SUBSCRIPTION_ID
-  - AZURE_TENANT_ID
  - BENCHMARK_PROJECT_ID_PUB
  - BENCHMARK_PROJECT_ID_SUB
  - REMOTE_STORAGE_AZURE_CONTAINER
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -71,7 +71,7 @@ runs:
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
        path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
        prefix: latest
        # The lack of compatibility snapshot (for example, for the new Postgres version)
@@ -169,8 +169,10 @@ runs:
          EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
        fi

-        if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
+        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
+          cov_prefix=()
        else
          cov_prefix=()
        fi
@@ -211,13 +213,13 @@ runs:
        fi

    - name: Upload compatibility snapshot
-      # Note, that we use `github.base_ref` which is a target branch for a PR
-      if: github.event_name == 'pull_request' && github.base_ref == 'release'
+      if: github.ref_name == 'release'
      uses: ./.github/actions/upload
      with:
-        name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
+        prefix: latest

    - name: Upload test results
      if: ${{ !cancelled() }}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -94,16 +94,11 @@ jobs:
      # We run tests with addtional features, that are turned off by default (e.g. in release builds), see
      # corresponding Cargo.toml files for their descriptions.
      - name: Set env variables
-        env:
-          ARCH: ${{ inputs.arch }}
        run: |
          CARGO_FEATURES="--features testing"
-          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
+          if [[ $BUILD_TYPE == "debug" ]]; then
            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
            CARGO_FLAGS="--locked"
-          elif [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=""
-            CARGO_FLAGS="--locked"
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=""
            CARGO_FLAGS="--locked --release"
@@ -163,8 +158,6 @@ jobs:
      # Do install *before* running rust tests because they might recompile the
      # binaries with different features/flags.
      - name: Install rust binaries
-        env:
-          ARCH: ${{ inputs.arch }}
        run: |
          # Install target binaries
          mkdir -p /tmp/neon/bin/
@@ -179,7 +172,7 @@ jobs:
          done

          # Install test executables and write list of all binaries (for code coverage)
-          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
+          if [[ $BUILD_TYPE == "debug" ]]; then
            # Keep bloated coverage data files away from the rest of the artifact
            mkdir -p /tmp/coverage/

@@ -216,14 +209,8 @@ jobs:
          #nextest does not yet support running doctests
          ${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES

-          # run all non-pageserver tests
-          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
-
-          # run pageserver tests with different settings
          for io_engine in std-fs tokio-epoll-uring ; do
-            for io_buffer_alignment in 0 1 512 ; do
-              NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT=$io_buffer_alignment ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
-            done
+            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
          done

          # Run separate tests for real S3
@@ -256,8 +243,8 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  regress-tests:
-    # Don't run regression tests on debug arm64 builds
-    if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
+    # Run test on x64 only
+    if: inputs.arch == 'x64'
    needs: [ build-neon ]
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
--- a/.github/workflows/_push-to-acr.yml
+++ b/.github/workflows/_push-to-acr.yml
@@ -1,56 +0,0 @@
-name: Push images to ACR
-on:
-  workflow_call:
-    inputs:
-      client_id:
-        description: Client ID of Azure managed identity or Entra app
-        required: true
-        type: string
-      image_tag:
-        description: Tag for the container image
-        required: true
-        type: string
-      images:
-        description: Images to push
-        required: true
-        type: string
-      registry_name:
-        description: Name of the container registry
-        required: true
-        type: string
-      subscription_id:
-        description: Azure subscription ID
-        required: true
-        type: string
-      tenant_id:
-        description: Azure tenant ID
-        required: true
-        type: string
-
-jobs:
-  push-to-acr:
-    runs-on: ubuntu-22.04
-    permissions:
-      contents: read  # This is required for actions/checkout
-      id-token: write # This is required for Azure Login to work.
-
-    steps:
-      - name: Azure login
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
-        with:
-          client-id: ${{ inputs.client_id }}
-          subscription-id: ${{ inputs.subscription_id }}
-          tenant-id: ${{ inputs.tenant_id }}
-
-      - name: Login to ACR
-        run: |
-          az acr login --name=${{ inputs.registry_name }}
-
-      - name: Copy docker images to ACR ${{ inputs.registry_name }}
-        run: |
-          images='${{ inputs.images }}'
-          for image in ${images}; do
-            docker buildx imagetools create \
-              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
-                                        neondatabase/${image}:${{ inputs.image_tag }}
-          done
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -198,7 +198,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        arch: [ x64, arm64 ]
+        arch: [ x64 ]
        # Do not build or run tests in debug for release branches
        build-type: ${{ fromJson((startsWith(github.ref_name, 'release') && github.event_name == 'push') && '["release"]' || '["debug", "release"]') }}
        include:
@@ -286,7 +286,6 @@ jobs:
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
-          SYNC_AFTER_EACH_TEST: true
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

@@ -794,6 +793,9 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml down

  promote-images:
+    permissions:
+      contents: read  # This is required for actions/checkout
+      id-token: write # This is required for Azure Login to work.
    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
    runs-on: ubuntu-22.04

@@ -820,6 +822,28 @@ jobs:
                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
          done

+      - name: Azure login
+        if: github.ref_name == 'main'
+        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
+        with:
+          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
+
+      - name: Login to ACR
+        if: github.ref_name == 'main'
+        run: |
+          az acr login --name=neoneastus2
+
+      - name: Copy docker images to ACR-dev
+        if: github.ref_name == 'main'
+        run: |
+          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
+            docker buildx imagetools create \
+              -t neoneastus2.azurecr.io/neondatabase/${image}:${{ needs.tag.outputs.build-tag }} \
+                                        neondatabase/${image}:${{ needs.tag.outputs.build-tag }}
+          done
+
      - name: Add latest tag to images
        if: github.ref_name == 'main'
        run: |
@@ -857,30 +881,6 @@ jobs:
                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
          done

-  push-to-acr-dev:
-    if: github.ref_name == 'main'
-    needs: [ tag, promote-images ]
-    uses: ./.github/workflows/_push-to-acr.yml
-    with:
-      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
-      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
-
-  push-to-acr-prod:
-    if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
-    needs: [ tag, promote-images ]
-    uses: ./.github/workflows/_push-to-acr.yml
-    with:
-      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
-      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
-      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
-      tenant_id: ${{ vars.AZURE_TENANT_ID }}
-
  trigger-custom-extensions-build-and-wait:
    needs: [ check-permissions, tag ]
    runs-on: ubuntu-22.04
@@ -956,8 +956,8 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
-    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()
+    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
+    if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'

    runs-on: [ self-hosted, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
@@ -1055,88 +1055,43 @@ jobs:
              generate_release_notes: true,
            })

-  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
-    needs: [ deploy ]
+    needs: [ check-permissions, promote-images, tag, build-and-test-locally ]
    if: github.ref_name == 'release'

-    runs-on: ubuntu-22.04
+    runs-on: [ self-hosted, small ]
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+      options: --init
    steps:
-      - name: Fetch GITHUB_RUN_ID and COMMIT_SHA for the last merged release PR
-        id: fetch-last-release-pr-info
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          branch_name_and_pr_number=$(gh pr list \
-            --repo "${GITHUB_REPOSITORY}" \
-            --base release \
-            --state merged \
-            --limit 10 \
-            --json mergeCommit,headRefName,number \
-            --jq ".[] | select(.mergeCommit.oid==\"${GITHUB_SHA}\") | { branch_name: .headRefName, pr_number: .number }")
-          branch_name=$(echo "${branch_name_and_pr_number}" | jq -r '.branch_name')
-          pr_number=$(echo "${branch_name_and_pr_number}" | jq -r '.pr_number')
-
-          run_id=$(gh run list \
-            --repo "${GITHUB_REPOSITORY}" \
-            --workflow build_and_test.yml \
-            --branch "${branch_name}" \
-            --json databaseId \
-            --limit 1 \
-            --jq '.[].databaseId')
-
-          last_commit_sha=$(gh pr view "${pr_number}" \
-            --repo "${GITHUB_REPOSITORY}" \
-            --json commits \
-            --jq '.commits[-1].oid')
-
-          echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT}
-          echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT}
-
-      - name: Promote compatibility snapshot and Neon artifact
+      - name: Promote compatibility snapshot for the release
        env:
          BUCKET: neon-github-public-dev
-          AWS_REGION: eu-central-1
-          COMMIT_SHA: ${{ steps.fetch-last-release-pr-info.outputs.commit-sha }}
-          RUN_ID: ${{ steps.fetch-last-release-pr-info.outputs.run-id }}
+          PREFIX: artifacts/latest
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          old_prefix="artifacts/${COMMIT_SHA}/${RUN_ID}"
-          new_prefix="artifacts/latest"
-
-          files_to_promote=()
-          files_on_s3=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${old_prefix} | jq -r '.Contents[]?.Key' || true)
-
-          for arch in X64 ARM64; do
+          # Update compatibility snapshot for the release
+          for pg_version in v14 v15 v16; do
            for build_type in debug release; do
-              neon_artifact_filename="neon-Linux-${arch}-${build_type}-artifact.tar.zst"
-              s3_key=$(echo "${files_on_s3}" | grep ${neon_artifact_filename} | sort --version-sort | tail -1 || true)
-              if [ -z "${s3_key}" ]; then
-                echo >&2 "Neither s3://${BUCKET}/${old_prefix}/${neon_artifact_filename} nor its version from previous attempts exist"
-                exit 1
-              fi
+              OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
+              NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst

-              files_to_promote+=("s3://${BUCKET}/${s3_key}")
-
-              for pg_version in v14 v15 v16; do
-                # We run less tests for debug builds, so we don't need to promote them
-                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
-                  continue
-                fi
-
-                compatibility_data_filename="compatibility-snapshot-${arch}-${build_type}-pg${pg_version}.tar.zst"
-                s3_key=$(echo "${files_on_s3}" | grep ${compatibility_data_filename} | sort --version-sort | tail -1 || true)
-                if [ -z "${s3_key}" ]; then
-                  echo >&2 "Neither s3://${BUCKET}/${old_prefix}/${compatibility_data_filename} nor its version from previous attempts exist"
-                  exit 1
-                fi
-
-                files_to_promote+=("s3://${BUCKET}/${s3_key}")
-              done
+              time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
            done
          done

-          for f in "${files_to_promote[@]}"; do
-            time aws s3 cp --only-show-errors ${f} s3://${BUCKET}/${new_prefix}/
+          # Update Neon artifact for the release (reuse already uploaded artifact)
+          for build_type in debug release; do
+            OLD_PREFIX=artifacts/${COMMIT_SHA}/${GITHUB_RUN_ID}
+            FILENAME=neon-${{ runner.os }}-${{ runner.arch }}-${build_type}-artifact.tar.zst
+
+            S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[]?.Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
+            if [ -z "${S3_KEY}" ]; then
+              echo >&2 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist"
+              exit 1
+            fi
+
+            time aws s3 cp --only-show-errors s3://${BUCKET}/${S3_KEY} s3://${BUCKET}/${PREFIX}/${FILENAME}
          done

  pin-build-tools-image:
--- a/.github/workflows/label-for-external-users.yml
+++ b/.github/workflows/label-for-external-users.yml
@@ -7,11 +7,6 @@ on:
  pull_request_target:
    types:
      - opened
-  workflow_dispatch:
-    inputs:
-      github-actor:
-        description: 'GitHub username. If empty, the username of the current user will be used'
-        required: false

 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
@@ -31,31 +26,12 @@ jobs:
      id: check-user
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        ACTOR: ${{ inputs.github-actor || github.actor }}
      run: |
-        expected_error="User does not exist or is not a member of the organization"
-        output_file=output.txt
-
-        for i in $(seq 1 10); do
-          if gh api "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}" \
-              -H "Accept: application/vnd.github+json" \
-              -H "X-GitHub-Api-Version: 2022-11-28" > ${output_file}; then
-
-            is_member=true
-            break
-          elif grep -q "${expected_error}" ${output_file}; then
-            is_member=false
-            break
-          elif [ $i -eq 10 ]; then
-            title="Failed to get memmbership status for ${ACTOR}"
-            message="The latest GitHub API error message: '$(cat ${output_file})'"
-            echo "::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}"
-
-            exit 1
-          fi
-
-          sleep 1
-        done
+        if gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${GITHUB_ACTOR}"; then
+          is_member=true
+        else
+          is_member=false
+        fi

        echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}

--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,8 +64,7 @@ aws-types = "1.2.0"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
-bindgen = "0.70"
-bit_field = "0.10.2"
+bindgen = "0.65"
 bstr = "1.0"
 byteorder = "1.4"
 bytes = "1.0"
@@ -73,7 +72,7 @@ camino = "1.1.6"
 cfg-if = "1.0.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive"] }
-comfy-table = "7.1"
+comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-deque = "0.8.5"
@@ -103,18 +102,18 @@ humantime-serde = "1.1.1"
 hyper = "0.14"
 tokio-tungstenite = "0.20.0"
 indexmap = "2"
-indoc = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "9"
 lasso = "0.7"
+leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
 memoffset = "0.8"
-nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
+nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
@@ -123,8 +122,8 @@ opentelemetry = "0.20.0"
 opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.12.0"
 parking_lot = "0.12"
-parquet = { version = "53", default-features = false, features = ["zstd"] }
-parquet_derive = "53"
+parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
+parquet_derive = "51.0.0"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
 procfs = "0.16"
@@ -140,13 +139,12 @@ reqwest-retry = "0.5"
 routerify = "3"
 rpds = "0.13"
 rustc-hash = "1.1.0"
-rustls = "0.22"
+rustls = "0.23"
 rustls-pemfile = "2"
 rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
-send-future = "0.1.0"
 sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
@@ -158,10 +156,11 @@ signal-hook = "0.3"
 smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
-strum = "0.26"
-strum_macros = "0.26"
+strum = "0.24"
+strum_macros = "0.24"
 "subtle"  = "2.5.0"
-svg_fmt = "0.4.3"
+# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
+svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
@@ -172,13 +171,13 @@ tikv-jemalloc-ctl = "0.5"
 tokio = { version = "1.17", features = ["macros"] }
 tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
-tokio-postgres-rustls = "0.11.0"
-tokio-rustls = "0.25"
+tokio-postgres-rustls = "0.12.0"
+tokio-rustls = "0.26"
 tokio-stream = "0.1"
 tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
-toml = "0.8"
-toml_edit = "0.22"
+toml = "0.7"
+toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tower-service = "0.3.2"
 tracing = "0.1"
@@ -201,21 +200,10 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-
-# We want to use the 'neon' branch for these, but there's currently one
-# incompatible change on the branch. See:
-#
-# - PR #8076 which contained changes that depended on the new changes in
-#   the rust-postgres crate, and
-# - PR #8654 which reverted those changes and made the code in proxy incompatible
-#   with the tip of the 'neon' branch again.
-#
-# When those proxy changes are re-applied (see PR #8747), we can switch using
-# the tip of the 'neon' branch again.
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
@@ -244,7 +232,7 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
 criterion = "0.5.1"
-rcgen = "0.12"
+rcgen = "0.13"
 rstest = "0.18"
 camino-tempfile = "1.0.2"
 tonic-build = "0.9"
@@ -252,7 +240,11 @@ tonic-build = "0.9"
 [patch.crates-io]

 # Needed to get `tokio-postgres-rustls` to depend on our fork.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+
+# bug fixes for UUID
+parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
+parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }

 ################# Binary contents sections

--- a/1
+++ b/1
@@ -87,7 +87,6 @@ RUN mkdir -p /data/.neon/ && \
       "pg_distrib_dir='/usr/local/'\n" \
       "listen_pg_addr='0.0.0.0:6400'\n" \
       "listen_http_addr='0.0.0.0:9898'\n" \
-       "availability_zone='local'\n" \
  > /data/.neon/pageserver.toml && \
  chown -R neon:neon /data/.neon

--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -192,7 +192,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.81.0
+ENV RUSTC_VERSION=1.80.1
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
@@ -207,7 +207,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    export PATH="$HOME/.cargo/bin:$PATH" && \
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
-    rustup component add llvm-tools rustfmt clippy && \
+    rustup component add llvm-tools-preview rustfmt clippy && \
    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -942,7 +942,7 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
 COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
 #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
 COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
-COPY patches/pg_hint_plan.patch /ext-src
+COPY patches/pg_hintplan.patch /ext-src
 COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
 COPY patches/pg_cron.patch /ext-src
 #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
@@ -964,7 +964,7 @@ RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
 RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
 # cmake is required for the h3 test
 RUN apt-get update && apt-get install -y cmake
-RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
+RUN patch -p1 < /ext-src/pg_hintplan.patch
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
 RUN patch -p1 </ext-src/pg_anon.patch
 RUN patch -p1 </ext-src/pg_cron.patch
--- a/README.md
+++ b/README.md
@@ -64,12 +64,6 @@ brew install protobuf openssl flex bison icu4c pkg-config
 echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
 ```

-If you get errors about missing `m4` you may have to install it manually:
-```
-brew install m4
-brew link --force m4
-```
-
 2. [Install Rust](https://www.rust-lang.org/tools/install)
 ```
 # recommended approach from https://www.rust-lang.org/tools/install
@@ -132,7 +126,7 @@ make -j`sysctl -n hw.logicalcpu` -s
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.

 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
+Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.


 #### Running neon database
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -44,7 +44,6 @@ use std::{thread, time::Duration};
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
-use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
 use tracing::{error, info, warn};
@@ -367,8 +366,6 @@ fn wait_spec(
        state.start_time = now;
    }

-    launch_lsn_lease_bg_task_for_static(&compute);
-
    Ok(WaitSpecResult {
        compute,
        http_port,
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -11,7 +11,6 @@ pub mod logger;
 pub mod catalog;
 pub mod compute;
 pub mod extension_server;
-pub mod lsn_lease;
 mod migration;
 pub mod monitor;
 pub mod params;
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -1,186 +0,0 @@
-use anyhow::bail;
-use anyhow::Result;
-use postgres::{NoTls, SimpleQueryMessage};
-use std::time::SystemTime;
-use std::{str::FromStr, sync::Arc, thread, time::Duration};
-use utils::id::TenantId;
-use utils::id::TimelineId;
-
-use compute_api::spec::ComputeMode;
-use tracing::{info, warn};
-use utils::{
-    lsn::Lsn,
-    shard::{ShardCount, ShardNumber, TenantShardId},
-};
-
-use crate::compute::ComputeNode;
-
-/// Spawns a background thread to periodically renew LSN leases for static compute.
-/// Do nothing if the compute is not in static mode.
-pub fn launch_lsn_lease_bg_task_for_static(compute: &Arc<ComputeNode>) {
-    let (tenant_id, timeline_id, lsn) = {
-        let state = compute.state.lock().unwrap();
-        let spec = state.pspec.as_ref().expect("Spec must be set");
-        match spec.spec.mode {
-            ComputeMode::Static(lsn) => (spec.tenant_id, spec.timeline_id, lsn),
-            _ => return,
-        }
-    };
-    let compute = compute.clone();
-
-    let span = tracing::info_span!("lsn_lease_bg_task", %tenant_id, %timeline_id, %lsn);
-    thread::spawn(move || {
-        let _entered = span.entered();
-        if let Err(e) = lsn_lease_bg_task(compute, tenant_id, timeline_id, lsn) {
-            // TODO: might need stronger error feedback than logging an warning.
-            warn!("Exited with error: {e}");
-        }
-    });
-}
-
-/// Renews lsn lease periodically so static compute are not affected by GC.
-fn lsn_lease_bg_task(
-    compute: Arc<ComputeNode>,
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    lsn: Lsn,
-) -> Result<()> {
-    loop {
-        let valid_until = acquire_lsn_lease_with_retry(&compute, tenant_id, timeline_id, lsn)?;
-        let valid_duration = valid_until
-            .duration_since(SystemTime::now())
-            .unwrap_or(Duration::ZERO);
-
-        // Sleep for 60 seconds less than the valid duration but no more than half of the valid duration.
-        let sleep_duration = valid_duration
-            .saturating_sub(Duration::from_secs(60))
-            .max(valid_duration / 2);
-
-        info!(
-            "Succeeded, sleeping for {} seconds",
-            sleep_duration.as_secs()
-        );
-        thread::sleep(sleep_duration);
-    }
-}
-
-/// Acquires lsn lease in a retry loop. Returns the expiration time if a lease is granted.
-/// Returns an error if a lease is explicitly not granted. Otherwise, we keep sending requests.
-fn acquire_lsn_lease_with_retry(
-    compute: &Arc<ComputeNode>,
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    lsn: Lsn,
-) -> Result<SystemTime> {
-    let mut attempts = 0usize;
-    let mut retry_period_ms: f64 = 500.0;
-    const MAX_RETRY_PERIOD_MS: f64 = 60.0 * 1000.0;
-
-    loop {
-        // Note: List of pageservers is dynamic, need to re-read configs before each attempt.
-        let configs = {
-            let state = compute.state.lock().unwrap();
-
-            let spec = state.pspec.as_ref().expect("spec must be set");
-
-            let conn_strings = spec.pageserver_connstr.split(',');
-
-            conn_strings
-                .map(|connstr| {
-                    let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
-                    if let Some(storage_auth_token) = &spec.storage_auth_token {
-                        info!("Got storage auth token from spec file");
-                        config.password(storage_auth_token.clone());
-                    } else {
-                        info!("Storage auth token not set");
-                    }
-                    config
-                })
-                .collect::<Vec<_>>()
-        };
-
-        let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
-        match result {
-            Ok(Some(res)) => {
-                return Ok(res);
-            }
-            Ok(None) => {
-                bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
-            }
-            Err(e) => {
-                warn!("Failed to acquire lsn lease: {e} (attempt {attempts}");
-
-                thread::sleep(Duration::from_millis(retry_period_ms as u64));
-                retry_period_ms *= 1.5;
-                retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
-            }
-        }
-        attempts += 1;
-    }
-}
-
-/// Tries to acquire an LSN lease through PS page_service API.
-fn try_acquire_lsn_lease(
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    lsn: Lsn,
-    configs: &[postgres::Config],
-) -> Result<Option<SystemTime>> {
-    fn get_valid_until(
-        config: &postgres::Config,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        lsn: Lsn,
-    ) -> Result<Option<SystemTime>> {
-        let mut client = config.connect(NoTls)?;
-        let cmd = format!("lease lsn {} {} {} ", tenant_shard_id, timeline_id, lsn);
-        let res = client.simple_query(&cmd)?;
-        let msg = match res.first() {
-            Some(msg) => msg,
-            None => bail!("empty response"),
-        };
-        let row = match msg {
-            SimpleQueryMessage::Row(row) => row,
-            _ => bail!("error parsing lsn lease response"),
-        };
-
-        // Note: this will be None if a lease is explicitly not granted.
-        let valid_until_str = row.get("valid_until");
-
-        let valid_until = valid_until_str.map(|s| {
-            SystemTime::UNIX_EPOCH
-                .checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
-                .expect("Time larger than max SystemTime could handle")
-        });
-        Ok(valid_until)
-    }
-
-    let shard_count = configs.len();
-
-    let valid_until = if shard_count > 1 {
-        configs
-            .iter()
-            .enumerate()
-            .map(|(shard_number, config)| {
-                let tenant_shard_id = TenantShardId {
-                    tenant_id,
-                    shard_count: ShardCount::new(shard_count as u8),
-                    shard_number: ShardNumber(shard_number as u8),
-                };
-                get_valid_until(config, tenant_shard_id, timeline_id, lsn)
-            })
-            .collect::<Result<Vec<Option<SystemTime>>>>()?
-            .into_iter()
-            .min()
-            .unwrap()
-    } else {
-        get_valid_until(
-            &configs[0],
-            TenantShardId::unsharded(tenant_id),
-            timeline_id,
-            lsn,
-        )?
-    };
-
-    Ok(valid_until)
-}
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -22,10 +22,9 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};

 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

-/// Escape a string for including it in a SQL literal.
-///
-/// Wrapping the result with `E'{}'` or `'{}'` is not required,
-/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
+/// Escape a string for including it in a SQL literal. Wrapping the result
+/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
+/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
 /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
 /// for the original implementation.
 pub fn escape_literal(s: &str) -> String {
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,6 +6,7 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -640,8 +640,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
-            let new_timeline_id =
-                parse_timeline_id(branch_match)?.unwrap_or(TimelineId::generate());
            let new_branch_name = branch_match
                .get_one::<String>("branch-name")
                .ok_or_else(|| anyhow!("No branch name provided"))?;
@@ -660,6 +658,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
+            let new_timeline_id = TimelineId::generate();
            let storage_controller = StorageController::from_env(env);
            let create_req = TimelineCreateRequest {
                new_timeline_id,
@@ -1571,6 +1570,7 @@ fn cli() -> Command {
                        .value_parser(value_parser!(PathBuf))
                        .value_name("config")
                )
+                .arg(pg_version_arg.clone())
                .arg(force_arg)
        )
        .subcommand(
@@ -1583,7 +1583,6 @@ fn cli() -> Command {
            .subcommand(Command::new("branch")
                .about("Create a new timeline, using another timeline as a base, copying its data")
                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
                .arg(branch_name_arg.clone())
                .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
                    .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -165,9 +165,6 @@ pub struct NeonStorageControllerConf {
    pub split_threshold: Option<u64>,

    pub max_secondary_lag_bytes: Option<u64>,
-
-    #[serde(with = "humantime_serde")]
-    pub heartbeat_interval: Duration,
 }

 impl NeonStorageControllerConf {
@@ -175,9 +172,6 @@ impl NeonStorageControllerConf {
    const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);

    const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
-
-    // Very tight heartbeat interval to speed up tests
-    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
 }

 impl Default for NeonStorageControllerConf {
@@ -189,7 +183,6 @@ impl Default for NeonStorageControllerConf {
            database_url: None,
            split_threshold: None,
            max_secondary_lag_bytes: None,
-            heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
        }
    }
 }
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -75,14 +75,14 @@ impl PageServerNode {
        }
    }

-    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
-        toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
+    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
+        toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
    }

    fn pageserver_init_make_toml(
        &self,
        conf: NeonLocalInitPageserverConf,
-    ) -> anyhow::Result<toml_edit::DocumentMut> {
+    ) -> anyhow::Result<toml_edit::Document> {
        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");

        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
@@ -137,9 +137,9 @@ impl PageServerNode {

        // Turn `overrides` into a toml document.
        // TODO: above code is legacy code, it should be refactored to use toml_edit directly.
-        let mut config_toml = toml_edit::DocumentMut::new();
+        let mut config_toml = toml_edit::Document::new();
        for fragment_str in overrides {
-            let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
+            let fragment = toml_edit::Document::from_str(&fragment_str)
                .expect("all fragments in `overrides` are valid toml documents, this function controls that");
            for (key, item) in fragment.iter() {
                config_toml.insert(key, item.clone());
@@ -181,23 +181,6 @@ impl PageServerNode {
        );
        io::stdout().flush()?;

-        // If the config file we got as a CLI argument includes the `availability_zone`
-        // config, then use that to populate the `metadata.json` file for the pageserver.
-        // In production the deployment orchestrator does this for us.
-        let az_id = conf
-            .other
-            .get("availability_zone")
-            .map(|toml| {
-                let az_str = toml.to_string();
-                // Trim the (") chars from the toml representation
-                if az_str.starts_with('"') && az_str.ends_with('"') {
-                    az_str[1..az_str.len() - 1].to_string()
-                } else {
-                    az_str
-                }
-            })
-            .unwrap_or("local".to_string());
-
        let config = self
            .pageserver_init_make_toml(conf)
            .context("make pageserver toml")?;
@@ -233,7 +216,6 @@ impl PageServerNode {
        let (_http_host, http_port) =
            parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
        let http_port = http_port.unwrap_or(9898);
-
        // Intentionally hand-craft JSON: this acts as an implicit format compat test
        // in case the pageserver-side structure is edited, and reflects the real life
        // situation: the metadata is written by some other script.
@@ -244,10 +226,7 @@ impl PageServerNode {
                postgres_port: self.pg_connection_config.port(),
                http_host: "localhost".to_string(),
                http_port,
-                other: HashMap::from([(
-                    "availability_zone_id".to_string(),
-                    serde_json::json!(az_id),
-                )]),
+                other: HashMap::new(),
            })
            .unwrap(),
        )
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -5,7 +5,6 @@
 //! ```text
 //!   .neon/safekeepers/<safekeeper id>
 //! ```
-use std::future::Future;
 use std::io::Write;
 use std::path::PathBuf;
 use std::time::Duration;
@@ -35,10 +34,12 @@ pub enum SafekeeperHttpError {

 type Result<T> = result::Result<T, SafekeeperHttpError>;

-pub(crate) trait ResponseErrorMessageExt: Sized {
-    fn error_from_body(self) -> impl Future<Output = Result<Self>> + Send;
+#[async_trait::async_trait]
+pub trait ResponseErrorMessageExt: Sized {
+    async fn error_from_body(self) -> Result<Self>;
 }

+#[async_trait::async_trait]
 impl ResponseErrorMessageExt for reqwest::Response {
    async fn error_from_body(self) -> Result<Self> {
        let status = self.status();
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -437,8 +437,6 @@ impl StorageController {
            &humantime::Duration::from(self.config.max_offline).to_string(),
            "--max-warming-up-interval",
            &humantime::Duration::from(self.config.max_warming_up).to_string(),
-            "--heartbeat-interval",
-            &humantime::Duration::from(self.config.heartbeat_interval).to_string(),
            "--address-for-peers",
            &address_for_peers.to_string(),
        ]
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
 use clap::{Parser, Subcommand};
 use pageserver_api::{
    controller_api::{
-        NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
-        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
+        NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
+        TenantDescribeResponse, TenantPolicyRequest,
    },
    models::{
        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -41,8 +41,6 @@ enum Command {
        listen_http_addr: String,
        #[arg(long)]
        listen_http_port: u16,
-        #[arg(long)]
-        availability_zone_id: String,
    },

    /// Modify a node's configuration in the storage controller
@@ -80,10 +78,7 @@ enum Command {
    /// List nodes known to the storage controller
    Nodes {},
    /// List tenants known to the storage controller
-    Tenants {
-        /// If this field is set, it will list the tenants on a specific node
-        node_id: Option<NodeId>,
-    },
+    Tenants {},
    /// Create a new tenant in the storage controller, and by extension on pageservers.
    TenantCreate {
        #[arg(long)]
@@ -152,9 +147,9 @@ enum Command {
        #[arg(long)]
        threshold: humantime::Duration,
    },
-    // Migrate away from a set of specified pageservers by moving the primary attachments to pageservers
+    // Drain a set of specified pageservers by moving the primary attachments to pageservers
    // outside of the specified set.
-    BulkMigrate {
+    Drain {
        // Set of pageserver node ids to drain.
        #[arg(long)]
        nodes: Vec<NodeId>,
@@ -168,34 +163,6 @@ enum Command {
        #[arg(long)]
        dry_run: Option<bool>,
    },
-    /// Start draining the specified pageserver.
-    /// The drain is complete when the schedulling policy returns to active.
-    StartDrain {
-        #[arg(long)]
-        node_id: NodeId,
-    },
-    /// Cancel draining the specified pageserver and wait for `timeout`
-    /// for the operation to be canceled. May be retried.
-    CancelDrain {
-        #[arg(long)]
-        node_id: NodeId,
-        #[arg(long)]
-        timeout: humantime::Duration,
-    },
-    /// Start filling the specified pageserver.
-    /// The drain is complete when the schedulling policy returns to active.
-    StartFill {
-        #[arg(long)]
-        node_id: NodeId,
-    },
-    /// Cancel filling the specified pageserver and wait for `timeout`
-    /// for the operation to be canceled. May be retried.
-    CancelFill {
-        #[arg(long)]
-        node_id: NodeId,
-        #[arg(long)]
-        timeout: humantime::Duration,
-    },
 }

 #[derive(Parser)]
@@ -282,34 +249,6 @@ impl FromStr for NodeAvailabilityArg {
    }
 }

-async fn wait_for_scheduling_policy<F>(
-    client: Client,
-    node_id: NodeId,
-    timeout: Duration,
-    f: F,
-) -> anyhow::Result<NodeSchedulingPolicy>
-where
-    F: Fn(NodeSchedulingPolicy) -> bool,
-{
-    let waiter = tokio::time::timeout(timeout, async move {
-        loop {
-            let node = client
-                .dispatch::<(), NodeDescribeResponse>(
-                    Method::GET,
-                    format!("control/v1/node/{node_id}"),
-                    None,
-                )
-                .await?;
-
-            if f(node.scheduling) {
-                return Ok::<NodeSchedulingPolicy, mgmt_api::Error>(node.scheduling);
-            }
-        }
-    });
-
-    Ok(waiter.await??)
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    let cli = Cli::parse();
@@ -327,7 +266,6 @@ async fn main() -> anyhow::Result<()> {
            listen_pg_port,
            listen_http_addr,
            listen_http_port,
-            availability_zone_id,
        } => {
            storcon_client
                .dispatch::<_, ()>(
@@ -339,7 +277,6 @@ async fn main() -> anyhow::Result<()> {
                        listen_pg_port,
                        listen_http_addr,
                        listen_http_port,
-                        availability_zone_id,
                    }),
                )
                .await?;
@@ -406,41 +343,7 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
-        Command::Tenants {
-            node_id: Some(node_id),
-        } => {
-            let describe_response = storcon_client
-                .dispatch::<(), NodeShardResponse>(
-                    Method::GET,
-                    format!("control/v1/node/{node_id}/shards"),
-                    None,
-                )
-                .await?;
-            let shards = describe_response.shards;
-            let mut table = comfy_table::Table::new();
-            table.set_header([
-                "Shard",
-                "Intended Primary/Secondary",
-                "Observed Primary/Secondary",
-            ]);
-            for shard in shards {
-                table.add_row([
-                    format!("{}", shard.tenant_shard_id),
-                    match shard.is_intended_secondary {
-                        None => "".to_string(),
-                        Some(true) => "Secondary".to_string(),
-                        Some(false) => "Primary".to_string(),
-                    },
-                    match shard.is_observed_secondary {
-                        None => "".to_string(),
-                        Some(true) => "Secondary".to_string(),
-                        Some(false) => "Primary".to_string(),
-                    },
-                ]);
-            }
-            println!("{table}");
-        }
-        Command::Tenants { node_id: None } => {
+        Command::Tenants {} => {
            let mut resp = storcon_client
                .dispatch::<(), Vec<TenantDescribeResponse>>(
                    Method::GET,
@@ -725,7 +628,7 @@ async fn main() -> anyhow::Result<()> {
                })
                .await?;
        }
-        Command::BulkMigrate {
+        Command::Drain {
            nodes,
            concurrency,
            max_shards,
@@ -754,7 +657,7 @@ async fn main() -> anyhow::Result<()> {
            }

            if nodes.len() != node_to_drain_descs.len() {
-                anyhow::bail!("Bulk migration requested away from node which doesn't exist.")
+                anyhow::bail!("Drain requested for node which doesn't exist.")
            }

            node_to_fill_descs.retain(|desc| {
@@ -766,7 +669,7 @@ async fn main() -> anyhow::Result<()> {
            });

            if node_to_fill_descs.is_empty() {
-                anyhow::bail!("There are no nodes to migrate to")
+                anyhow::bail!("There are no nodes to drain to")
            }

            // Set the node scheduling policy to draining for the nodes which
@@ -787,7 +690,7 @@ async fn main() -> anyhow::Result<()> {
                    .await?;
            }

-            // Perform the migration: move each tenant shard scheduled on a node to
+            // Perform the drain: move each tenant shard scheduled on a node to
            // be drained to a node which is being filled. A simple round robin
            // strategy is used to pick the new node.
            let tenants = storcon_client
@@ -800,13 +703,13 @@ async fn main() -> anyhow::Result<()> {

            let mut selected_node_idx = 0;

-            struct MigrationMove {
+            struct DrainMove {
                tenant_shard_id: TenantShardId,
                from: NodeId,
                to: NodeId,
            }

-            let mut moves: Vec<MigrationMove> = Vec::new();
+            let mut moves: Vec<DrainMove> = Vec::new();

            let shards = tenants
                .into_iter()
@@ -836,7 +739,7 @@ async fn main() -> anyhow::Result<()> {
                    continue;
                }

-                moves.push(MigrationMove {
+                moves.push(DrainMove {
                    tenant_shard_id: shard.tenant_shard_id,
                    from: shard
                        .node_attached
@@ -913,67 +816,6 @@ async fn main() -> anyhow::Result<()> {
                failure
            );
        }
-        Command::StartDrain { node_id } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::PUT,
-                    format!("control/v1/node/{node_id}/drain"),
-                    None,
-                )
-                .await?;
-            println!("Drain started for {node_id}");
-        }
-        Command::CancelDrain { node_id, timeout } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::DELETE,
-                    format!("control/v1/node/{node_id}/drain"),
-                    None,
-                )
-                .await?;
-
-            println!("Waiting for node {node_id} to quiesce on scheduling policy ...");
-
-            let final_policy =
-                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {
-                    use NodeSchedulingPolicy::*;
-                    matches!(sched, Active | PauseForRestart)
-                })
-                .await?;
-
-            println!(
-                "Drain was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
-            );
-        }
-        Command::StartFill { node_id } => {
-            storcon_client
-                .dispatch::<(), ()>(Method::PUT, format!("control/v1/node/{node_id}/fill"), None)
-                .await?;
-
-            println!("Fill started for {node_id}");
-        }
-        Command::CancelFill { node_id, timeout } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::DELETE,
-                    format!("control/v1/node/{node_id}/fill"),
-                    None,
-                )
-                .await?;
-
-            println!("Waiting for node {node_id} to quiesce on scheduling policy ...");
-
-            let final_policy =
-                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {
-                    use NodeSchedulingPolicy::*;
-                    matches!(sched, Active)
-                })
-                .await?;
-
-            println!(
-                "Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
-            );
-        }
    }

    Ok(())
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -3,7 +3,7 @@ set -x

 cd /ext-src || exit 2
 FAILED=
-LIST=$( (echo -e "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
+LIST=$( (echo "${SKIP//","/"\n"}"; ls -d -- *-src) | sort | uniq -u)
 for d in ${LIST}
 do
       [ -d "${d}" ] || continue
--- a/docs/rfcs/037-storage-controller-restarts.md
+++ b/docs/rfcs/037-storage-controller-restarts.md
@@ -1,259 +0,0 @@
-# Rolling Storage Controller Restarts
-
-## Summary
-
-This RFC describes the issues around the current storage controller restart procedure
-and describes an implementation which reduces downtime to a few milliseconds on the happy path.
-
-## Motivation
-
-Storage controller upgrades (restarts, more generally) can cause multi-second availability gaps.
-While the storage controller does not sit on the main data path, it's generally not acceptable
-to block management requests for extended periods of time (e.g. https://github.com/neondatabase/neon/issues/8034).
-
-### Current Implementation
-
-The storage controller runs in a Kubernetes Deployment configured for one replica and strategy set to [Recreate](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#recreate-deployment).
-In non Kubernetes terms, during an upgrade, the currently running storage controller is stopped and, only after,
-a new instance is created.
-
-At start-up, the storage controller calls into all the pageservers it manages (retrieved from DB) to learn the
-latest locations of all tenant shards present on them. This is usually fast, but can push into tens of seconds
-under unfavourable circumstances: pageservers are heavily loaded or unavailable.
-
-## Prior Art
-
-There's probably as many ways of handling restarts gracefully as there are distributed systems. Some examples include:
-* Active/Standby architectures: Two or more instance of the same service run, but traffic is only routed to one of them.
-For fail-over, traffic is routed to one of the standbys (which becomes active).
-* Consensus Algorithms (Raft, Paxos and friends): The part of consensus we care about here is leader election: peers communicate to each other
-and use a voting scheme that ensures the existence of a single leader (e.g. Raft epochs).
-
-## Requirements
-
-* Reduce storage controller unavailability during upgrades to milliseconds
-* Minimize the interval in which it's possible for more than one storage controller
-to issue reconciles.
-* Have one uniform implementation for restarts and upgrades
-* Fit in with the current Kubernetes deployment scheme
-
-## Non Goals
-
-* Implement our own consensus algorithm from scratch
-* Completely eliminate downtime storage controller downtime. Instead we aim to reduce it to the point where it looks
-like a transient error to the control plane
-
-## Impacted Components
-
-* storage controller
-* deployment orchestration (i.e. Ansible)
-* helm charts
-
-## Terminology
-
-* Observed State: in-memory mapping between tenant shards and their current pageserver locations - currently built up
-at start-up by quering pageservers
-* Deployment: Kubernetes [primitive](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) that models
-a set of replicas
-
-## Implementation
-
-### High Level Flow
-
-At a very high level the proposed idea is to start a new storage controller instance while
-the previous one is still running and cut-over to it when it becomes ready. The new instance,
-should coordinate with the existing one and transition responsibility gracefully. While the controller
-has built in safety against split-brain situations (via generation numbers), we'd like to avoid such
-scenarios since they can lead to availability issues for tenants that underwent changes while two controllers
-were operating at the same time and require operator intervention to remedy.
-
-### Kubernetes Deployment Configuration
-
-On the Kubernetes configuration side, the proposal is to update the storage controller `Deployment`
-to use `spec.strategy.type = RollingUpdate`, `spec.strategy.rollingUpdate.maxSurge=1` and `spec.strategy.maxUnavailable=0`.
-Under the hood, Kubernetes creates a new replica set and adds one pod to it (`maxSurge=1`). The old replica set does not
-scale down until the new replica set has one replica in the ready state (`maxUnavailable=0`).
-
-The various possible failure scenarios are investigated in the [Handling Failures](#handling-failures) section.
-
-### Storage Controller Start-Up
-
-This section describes the primitives required on the storage controller side and the flow of the happy path.
-
-#### Database Table For Leader Synchronization
-
-A new table should be added to the storage controller database for leader synchronization during startup.
-This table will always contain at most one row. The proposed name for the table is `leader` and the schema
-contains two elements:
-* `hostname`: represents the hostname for the current storage controller leader - should be addressible
-from other pods in the deployment
-* `start_timestamp`: holds the start timestamp for the current storage controller leader (UTC timezone) - only required
-for failure case handling: see [Previous Leader Crashes Before New Leader Readiness](#previous-leader-crashes-before-new-leader-readiness)
-
-Storage controllers will read the leader row at start-up and then update it to mark themselves as the leader
-at the end of the start-up sequence. We want compare-and-exchange semantics for the update: avoid the
-situation where two concurrent updates succeed and overwrite each other. The default Postgres isolation
-level is `READ COMMITTED`, which isn't strict enough here. This update transaction should use at least `REPEATABLE
-READ` isolation level in order to [prevent lost updates](https://www.interdb.jp/pg/pgsql05/08.html). Currently,
-the storage controller uses the stricter `SERIALIZABLE` isolation level for all transactions. This more than suits
-our needs here.
-
-```
-START TRANSACTION ISOLATION LEVEL REPEATABLE READ
-UPDATE leader SET hostname=<new_hostname>, start_timestamp=<new_start_ts>
-WHERE hostname=<old_hostname>, start_timestampt=<old_start_ts>;
-```
-
-If the transaction fails or if no rows have been updated, then the compare-and-exchange is regarded as a failure.
-
-#### Step Down API
-
-A new HTTP endpoint should be added to the storage controller: `POST /control/v1/step_down`. Upon receiving this
-request the leader cancels any pending reconciles and goes into a mode where it replies with 503 to all other APIs
-and does not issue any location configurations to its pageservers. The successful HTTP response will return a serialized
-snapshot of the observed state.
-
-If other step down requests come in after the initial one, the request is handled and the observed state is returned (required
-for failure scenario handling - see [Handling Failures](#handling-failures)).
-
-#### Graceful Restart Happy Path
-
-At start-up, the first thing the storage controller does is retrieve the sole row from the new
-`leader` table. If such an entry exists, send a `/step_down` PUT API call to the current leader.
-This should be retried a few times with a short backoff (see [1]). The aspiring leader loads the
-observed state into memory and the start-up sequence proceeds as usual, but *without* querying the
-pageservers in order to build up the observed state.
-
-Before doing any reconciliations or persistence change, update the `leader` database table as described in the [Database Table For Leader Synchronization](database-table-for-leader-synchronization)
-section. If this step fails, the storage controller process exits.
-
-Note that no row will exist in the `leaders` table for the first graceful restart. In that case, force update the `leader` table
-(without the WHERE clause) and perform with the pre-existing start-up procedure (i.e. build observed state by querying pageservers).
-
-Summary of proposed new start-up sequence:
-1. Call `/step_down`
-2. Perform any pending database migrations
-3. Load state from database
-4. Load observed state returned in step (1) into memory
-5. Do initial heartbeat round (may be moved after 5)
-7. Mark self as leader by updating the database
-8. Reschedule and reconcile everything
-
-Some things to note from the steps above:
-* The storage controller makes no changes to the cluster state before step (5) (i.e. no location config
-calls to the pageserver and no compute notifications)
-* Ask the current leader to step down before loading state from database so we don't get a lost update
-if the transactions overlap.
-* Before loading the observed state at step (3), cross-validate against the database. If validation fails,
-fall back to asking the pageservers about their current locations.
-* Database migrations should only run **after** the previous instance steps down (or the step down times out).
-
-
-[1] The API call might fail because there's no storage controller running (i.e. [restart](#storage-controller-crash-or-restart)),
-so we don't want to extend the unavailability period by much. We still want to retry since that's not the common case.
-
-### Handling Failures
-
-#### Storage Controller Crash Or Restart
-
-The storage controller may crash or be restarted outside of roll-outs. When a new pod is created, its call to
-`/step_down` will fail since the previous leader is no longer reachable. In this case perform the pre-existing
-start-up procedure and update the leader table (with the WHERE clause). If the update fails, the storage controller
-exists and consistency is maintained.
-
-#### Previous Leader Crashes Before New Leader Readiness
-
-When the previous leader (P1) crashes before the new leader (P2) passses the readiness check, Kubernetes will
-reconcile the old replica set and create a new pod for it (P1'). The `/step_down` API call will fail for P1'
-(see [2]).
-
-Now we have two cases to consider:
-* P2 updates the `leader` table first: The database update from P1' will fail and P1' will exit, or be terminated
-by Kubernetes depending on timings.
-* P1' updates the `leader` table first: The `hostname` field of the `leader` row stays the same, but the `start_timestamp` field changes.
-The database update from P2 will fail (since `start_timestamp` does not match). P2 will exit and Kubernetes will
-create a new replacement pod for it (P2'). Now the entire dance starts again, but with P1' as the leader and P2' as the incumbent.
-
-[2] P1 and P1' may (more likely than not) be the same pod and have the same hostname. The implementation
-should avoid this self reference and fail the API call at the client if the persisted hostname matches
-the current one.
-
-#### Previous Leader Crashes After New Leader Readiness
-
-The deployment's replica sets already satisfy the deployment's replica count requirements and the
-Kubernetes deployment rollout will just clean up the dead pod.
-
-#### New Leader Crashes Before Pasing Readiness Check
-
-The deployment controller scales up the new replica sets by creating a new pod. The entire procedure is repeated
-with the new pod.
-
-#### Network Partition Between New Pod and Previous Leader
-
-This feels very unlikely, but should be considered in any case. P2 (the new aspiring leader) fails the `/step_down`
-API call into P1 (the current leader). P2 proceeds with the pre-existing startup procedure and updates the `leader` table.
-Kubernetes will terminate P1, but there may be a brief period where both storage controller can drive reconciles.
-
-### Dealing With Split Brain Scenarios
-
-As we've seen in the previous section, we can end up with two storage controller running at the same time. The split brain
-duration is not bounded since the Kubernetes controller might become partitioned from the pods (unlikely though). While these
-scenarios are not fatal, they can cause tenant unavailability, so we'd like to reduce the chances of this happening.
-The rest of this section sketches some safety measure. It's likely overkill to implement all of them however.
-
-### Ensure Leadership Before Producing Side Effects
-
-The storage controller has two types of side effects: location config requests into pageservers and compute notifications into the control plane.
-Before issuing either, the storage controller could check that it is indeed still the leader by querying the database. Side effects might still be
-applied if they race with the database updatem, but the situation will eventually be detected. The storage controller process should terminate in these cases.
-
-### Leadership Lease
-
-Up until now, the leadership defined by this RFC is static. In order to bound the length of the split brain scenario, we could require the leadership
-to be renewed periodically. Two new columns would be added to the leaders table:
-1. `last_renewed` - timestamp indicating when the lease was last renewed
-2. `lease_duration` - duration indicating the amount of time after which the lease expires
-
-The leader periodically attempts to renew the lease by checking that it is in fact still the legitimate leader and updating `last_renewed` in the
-same transaction. If the update fails, the process exits. New storage controller instances wishing to become leaders must wait for the current lease
-to expire before acquiring leadership if they have not succesfully received a response to the `/step_down` request.
-
-### Notify Pageserver Of Storage Controller Term
-
-Each time that leadership changes, we can bump a `term` integer column in the `leader` table. This term uniquely identifies a leader.
-Location config requests and re-attach responses can include this term. On the pageserver side, keep the latest term in memory and refuse
-anything which contains a stale term (i.e. smaller than the current one).
-
-### Observability
-
-* The storage controller should expose a metric which describes it's state (`Active | WarmingUp | SteppedDown`).
-Per region alerts should be added on this metric which triggers when:
-  + no storage controller has been in the `Active` state for an extended period of time
-  + more than one storage controllers are in the `Active` state
-
-* An alert that periodically verifies that the `leader` table is in sync with the metric above would be very useful.
-We'd have to expose the storage controller read only database to Grafana (perhaps it is already done).
-
-## Alternatives
-
-### Kubernetes Leases
-
-Kubernetes has a [lease primitive](https://kubernetes.io/docs/concepts/architecture/leases/) which can be used to implement leader election.
-Only one instance may hold a lease at any given time. This lease needs to be periodically renewed and has an expiration period.
-
-In our case, it would work something like this:
-* `/step_down` deletes the lease or stops it from renewing
-* lease acquisition becomes part of the start-up procedure
-
-The kubert crate implements a [lightweight lease API](https://docs.rs/kubert/latest/kubert/lease/struct.LeaseManager.html), but it's still
-not exactly trivial to implement.
-
-This approach has the benefit of baked in observability (`kubectl describe lease`), but:
-* We offload the responsibility to Kubernetes which makes it harder to debug when things go wrong.
-* More code surface than the simple "row in database" approach. Also, most of this code would be in
-a dependency not subject to code review, etc.
-* Hard to test. Our testing infra does not run the storage controller in Kubernetes and changing it do
-so is not simple and complictes and the test set-up.
-
-To my mind, the "row in database" approach is straightforward enough that we don't have to offload this
-to something external.
--- a/docs/updating-postgres.md
+++ b/docs/updating-postgres.md
@@ -21,21 +21,30 @@ _Example: 15.4 is the new minor version to upgrade to from 15.3._
 1. Create a new branch based on the stable branch you are updating.

    ```shell
-    git checkout -b my-branch-15 REL_15_STABLE_neon
+    git checkout -b my-branch REL_15_STABLE_neon
    ```

-1. Find the upstream release tags you're looking for. They are of the form `REL_X_Y`.
+1. Tag the last commit on the stable branch you are updating.

-1. Merge the upstream tag into the branch you created on the tag and resolve any conflicts.
+    ```shell
+    git tag REL_15_3_neon
+    ```
+
+1. Push the new tag to the Neon Postgres repository.
+
+    ```shell
+    git push origin REL_15_3_neon
+    ```
+
+1. Find the release tags you're looking for. They are of the form `REL_X_Y`.
+
+1. Rebase the branch you created on the tag and resolve any conflicts.

    ```shell
    git fetch upstream REL_15_4
-    git merge REL_15_4
+    git rebase REL_15_4
    ```

-    In the commit message of the merge commit, mention if there were
-    any non-trivial conflicts or other issues.
-
 1. Run the Postgres test suite to make sure our commits have not affected
 Postgres in a negative way.

@@ -48,7 +57,7 @@ Postgres in a negative way.
 1. Push your branch to the Neon Postgres repository.

    ```shell
-    git push origin my-branch-15
+    git push origin my-branch
    ```

 1. Clone the Neon repository if you have not done so already.
@@ -65,7 +74,7 @@ branch.
 1. Update the Git submodule.

    ```shell
-    git submodule set-branch --branch my-branch-15 vendor/postgres-v15
+    git submodule set-branch --branch my-branch vendor/postgres-v15
    git submodule update --remote vendor/postgres-v15
    ```

@@ -80,12 +89,14 @@ minor Postgres release.

 1. Create a pull request, and wait for CI to go green.

-1. Push the Postgres branches with the merge commits into the Neon Postgres repository.
+1. Force push the rebased Postgres branches into the Neon Postgres repository.

    ```shell
-    git push origin my-branch-15:REL_15_STABLE_neon
+    git push --force origin my-branch:REL_15_STABLE_neon
    ```

+    It may require disabling various branch protections.
+
 1. Update your Neon PR to point at the branches.

    ```shell
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -14,3 +14,5 @@ regex.workspace = true

 utils = { path = "../utils" }
 remote_storage = { version = "0.1", path = "../remote_storage/" }
+
+workspace_hack.workspace = true
--- a/libs/consumption_metrics/Cargo.toml
+++ b/libs/consumption_metrics/Cargo.toml
@@ -6,8 +6,10 @@ license = "Apache-2.0"

 [dependencies]
 anyhow.workspace = true
-chrono = { workspace = true, features = ["serde"] }
+chrono.workspace = true
 rand.workspace = true
 serde.workspace = true
 serde_with.workspace = true
 utils.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/desim/Cargo.toml
+++ b/libs/desim/Cargo.toml
@@ -14,3 +14,5 @@ parking_lot.workspace = true
 hex.workspace = true
 scopeguard.workspace = true
 smallvec = { workspace = true, features = ["write"] }
+
+workspace_hack.workspace = true
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -12,6 +12,8 @@ chrono.workspace = true
 twox-hash.workspace = true
 measured.workspace = true

+workspace_hack.workspace = true
+
 [target.'cfg(target_os = "linux")'.dependencies]
 procfs.workspace = true
 measured-process.workspace = true
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -68,7 +68,6 @@ macro_rules! register_uint_gauge {
 static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);

 /// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
-///
 /// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
 /// while holding the lock.
 pub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> {
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -4,10 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true

-[features]
-# See pageserver/Cargo.toml
-testing = ["dep:nix"]
-
 [dependencies]
 serde.workspace = true
 serde_with.workspace = true
@@ -25,14 +21,10 @@ hex.workspace = true
 humantime.workspace = true
 thiserror.workspace = true
 humantime-serde.workspace = true
-chrono = { workspace = true, features = ["serde"] }
+chrono.workspace = true
 itertools.workspace = true
-storage_broker.workspace = true
-camino = {workspace = true, features = ["serde1"]}
-remote_storage.workspace = true
-postgres_backend.workspace = true
-nix = {workspace = true, optional = true}
-reqwest.workspace = true
+
+workspace_hack.workspace = true

 [dev-dependencies]
 bincode.workspace = true
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -1,28 +1,15 @@
-use camino::Utf8PathBuf;
+use std::collections::HashMap;
+
+use const_format::formatcp;

 #[cfg(test)]
 mod tests;

-use const_format::formatcp;
 pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
 pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
 pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
 pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");

-use postgres_backend::AuthType;
-use remote_storage::RemoteStorageConfig;
-use serde_with::serde_as;
-use std::{
-    collections::HashMap,
-    num::{NonZeroU64, NonZeroUsize},
-    str::FromStr,
-    time::Duration,
-};
-use utils::logging::LogFormat;
-
-use crate::models::ImageCompressionAlgorithm;
-use crate::models::LsnLease;
-
 // Certain metadata (e.g. externally-addressable name, AZ) is delivered
 // as a separate structure.  This information is not neeed by the pageserver
 // itself, it is only used for registering the pageserver with the control
@@ -42,476 +29,3 @@ pub struct NodeMetadata {
    #[serde(flatten)]
    pub other: HashMap<String, serde_json::Value>,
 }
-
-/// `pageserver.toml`
-///
-/// We use serde derive with `#[serde(default)]` to generate a deserializer
-/// that fills in the default values for each config field.
-///
-/// If there cannot be a static default value because we need to make runtime
-/// checks to determine the default, make it an `Option` (which defaults to None).
-/// The runtime check should be done in the consuming crate, i.e., `pageserver`.
-#[serde_as]
-#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
-#[serde(default, deny_unknown_fields)]
-pub struct ConfigToml {
-    // types mapped 1:1 into the runtime PageServerConfig type
-    pub listen_pg_addr: String,
-    pub listen_http_addr: String,
-    pub availability_zone: Option<String>,
-    #[serde(with = "humantime_serde")]
-    pub wait_lsn_timeout: Duration,
-    #[serde(with = "humantime_serde")]
-    pub wal_redo_timeout: Duration,
-    pub superuser: String,
-    pub page_cache_size: usize,
-    pub max_file_descriptors: usize,
-    pub pg_distrib_dir: Option<Utf8PathBuf>,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub http_auth_type: AuthType,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub pg_auth_type: AuthType,
-    pub auth_validation_public_key_path: Option<Utf8PathBuf>,
-    pub remote_storage: Option<RemoteStorageConfig>,
-    pub tenant_config: TenantConfigToml,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub broker_endpoint: storage_broker::Uri,
-    #[serde(with = "humantime_serde")]
-    pub broker_keepalive_interval: Duration,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub log_format: LogFormat,
-    pub concurrent_tenant_warmup: NonZeroUsize,
-    pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
-    #[serde(with = "humantime_serde")]
-    pub metric_collection_interval: Duration,
-    pub metric_collection_endpoint: Option<reqwest::Url>,
-    pub metric_collection_bucket: Option<RemoteStorageConfig>,
-    #[serde(with = "humantime_serde")]
-    pub synthetic_size_calculation_interval: Duration,
-    pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
-    pub test_remote_failures: u64,
-    pub ondemand_download_behavior_treat_error_as_warn: bool,
-    #[serde(with = "humantime_serde")]
-    pub background_task_maximum_delay: Duration,
-    pub control_plane_api: Option<reqwest::Url>,
-    pub control_plane_api_token: Option<String>,
-    pub control_plane_emergency_mode: bool,
-    pub heatmap_upload_concurrency: usize,
-    pub secondary_download_concurrency: usize,
-    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
-    pub ingest_batch_size: u64,
-    pub max_vectored_read_bytes: MaxVectoredReadBytes,
-    pub image_compression: ImageCompressionAlgorithm,
-    pub ephemeral_bytes_per_memory_kb: usize,
-    pub l0_flush: Option<crate::models::L0FlushConfig>,
-    #[serde(skip_serializing)]
-    // TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
-    pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
-    pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
-    pub io_buffer_alignment: usize,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(deny_unknown_fields)]
-pub struct DiskUsageEvictionTaskConfig {
-    pub max_usage_pct: utils::serde_percent::Percent,
-    pub min_avail_bytes: u64,
-    #[serde(with = "humantime_serde")]
-    pub period: Duration,
-    #[cfg(feature = "testing")]
-    pub mock_statvfs: Option<statvfs::mock::Behavior>,
-    /// Select sorting for evicted layers
-    #[serde(default)]
-    pub eviction_order: EvictionOrder,
-}
-
-pub mod statvfs {
-    pub mod mock {
-        #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-        #[serde(tag = "type")]
-        pub enum Behavior {
-            Success {
-                blocksize: u64,
-                total_blocks: u64,
-                name_filter: Option<utils::serde_regex::Regex>,
-            },
-            #[cfg(feature = "testing")]
-            Failure { mocked_error: MockedError },
-        }
-
-        #[cfg(feature = "testing")]
-        #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-        #[allow(clippy::upper_case_acronyms)]
-        pub enum MockedError {
-            EIO,
-        }
-
-        #[cfg(feature = "testing")]
-        impl From<MockedError> for nix::Error {
-            fn from(e: MockedError) -> Self {
-                match e {
-                    MockedError::EIO => nix::Error::EIO,
-                }
-            }
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(tag = "type", content = "args")]
-pub enum EvictionOrder {
-    RelativeAccessed {
-        highest_layer_count_loses_first: bool,
-    },
-}
-
-impl Default for EvictionOrder {
-    fn default() -> Self {
-        Self::RelativeAccessed {
-            highest_layer_count_loses_first: true,
-        }
-    }
-}
-
-#[derive(
-    Eq,
-    PartialEq,
-    Debug,
-    Copy,
-    Clone,
-    strum_macros::EnumString,
-    strum_macros::Display,
-    serde_with::DeserializeFromStr,
-    serde_with::SerializeDisplay,
-)]
-#[strum(serialize_all = "kebab-case")]
-pub enum GetVectoredImpl {
-    Sequential,
-    Vectored,
-}
-
-#[derive(
-    Eq,
-    PartialEq,
-    Debug,
-    Copy,
-    Clone,
-    strum_macros::EnumString,
-    strum_macros::Display,
-    serde_with::DeserializeFromStr,
-    serde_with::SerializeDisplay,
-)]
-#[strum(serialize_all = "kebab-case")]
-pub enum GetImpl {
-    Legacy,
-    Vectored,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct MaxVectoredReadBytes(pub NonZeroUsize);
-
-/// A tenant's calcuated configuration, which is the result of merging a
-/// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
-///
-/// For storing and transmitting individual tenant's configuration, see
-/// TenantConfOpt.
-#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-#[serde(deny_unknown_fields, default)]
-pub struct TenantConfigToml {
-    // Flush out an inmemory layer, if it's holding WAL older than this
-    // This puts a backstop on how much WAL needs to be re-digested if the
-    // page server crashes.
-    // This parameter actually determines L0 layer file size.
-    pub checkpoint_distance: u64,
-    // Inmemory layer is also flushed at least once in checkpoint_timeout to
-    // eventually upload WAL after activity is stopped.
-    #[serde(with = "humantime_serde")]
-    pub checkpoint_timeout: Duration,
-    // Target file size, when creating image and delta layers.
-    // This parameter determines L1 layer file size.
-    pub compaction_target_size: u64,
-    // How often to check if there's compaction work to be done.
-    // Duration::ZERO means automatic compaction is disabled.
-    #[serde(with = "humantime_serde")]
-    pub compaction_period: Duration,
-    // Level0 delta layer threshold for compaction.
-    pub compaction_threshold: usize,
-    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
-    // Determines how much history is retained, to allow
-    // branching and read replicas at an older point in time.
-    // The unit is #of bytes of WAL.
-    // Page versions older than this are garbage collected away.
-    pub gc_horizon: u64,
-    // Interval at which garbage collection is triggered.
-    // Duration::ZERO means automatic GC is disabled
-    #[serde(with = "humantime_serde")]
-    pub gc_period: Duration,
-    // Delta layer churn threshold to create L1 image layers.
-    pub image_creation_threshold: usize,
-    // Determines how much history is retained, to allow
-    // branching and read replicas at an older point in time.
-    // The unit is time.
-    // Page versions older than this are garbage collected away.
-    #[serde(with = "humantime_serde")]
-    pub pitr_interval: Duration,
-    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
-    #[serde(with = "humantime_serde")]
-    pub walreceiver_connect_timeout: Duration,
-    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
-    /// A stalled safekeeper will be changed to a newer one when it appears.
-    #[serde(with = "humantime_serde")]
-    pub lagging_wal_timeout: Duration,
-    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
-    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
-    /// to avoid eager reconnects.
-    pub max_lsn_wal_lag: NonZeroU64,
-    pub eviction_policy: crate::models::EvictionPolicy,
-    pub min_resident_size_override: Option<u64>,
-    // See the corresponding metric's help string.
-    #[serde(with = "humantime_serde")]
-    pub evictions_low_residence_duration_metric_threshold: Duration,
-
-    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
-    /// may be disabled if a Tenant will not have secondary locations: only secondary
-    /// locations will use the heatmap uploaded by attached locations.
-    #[serde(with = "humantime_serde")]
-    pub heatmap_period: Duration,
-
-    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
-    pub lazy_slru_download: bool,
-
-    pub timeline_get_throttle: crate::models::ThrottleConfig,
-
-    // How much WAL must be ingested before checking again whether a new image layer is required.
-    // Expresed in multiples of checkpoint distance.
-    pub image_layer_creation_check_threshold: u8,
-
-    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
-    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
-    /// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
-    /// file is written.
-    pub switch_aux_file_policy: crate::models::AuxFilePolicy,
-
-    /// The length for an explicit LSN lease request.
-    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
-    #[serde(with = "humantime_serde")]
-    pub lsn_lease_length: Duration,
-
-    /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
-    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
-    #[serde(with = "humantime_serde")]
-    pub lsn_lease_length_for_ts: Duration,
-}
-
-pub mod defaults {
-    use crate::models::ImageCompressionAlgorithm;
-
-    pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
-
-    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
-    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
-
-    pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
-
-    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
-    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
-
-    pub const DEFAULT_LOG_FORMAT: &str = "plain";
-
-    pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
-
-    pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
-
-    pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
-    pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
-    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
-    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
-
-    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
-    pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
-
-    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
-
-    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB
-
-    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
-        ImageCompressionAlgorithm::Zstd { level: Some(1) };
-
-    pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false;
-
-    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
-
-    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
-}
-
-impl Default for ConfigToml {
-    fn default() -> Self {
-        use defaults::*;
-
-        Self {
-            listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
-            listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
-            availability_zone: (None),
-            wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
-                .expect("cannot parse default wait lsn timeout")),
-            wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
-                .expect("cannot parse default wal redo timeout")),
-            superuser: (DEFAULT_SUPERUSER.to_string()),
-            page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
-            max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
-            pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
-            http_auth_type: (AuthType::Trust),
-            pg_auth_type: (AuthType::Trust),
-            auth_validation_public_key_path: (None),
-            remote_storage: None,
-            broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
-                .parse()
-                .expect("failed to parse default broker endpoint")),
-            broker_keepalive_interval: (humantime::parse_duration(
-                storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
-            )
-            .expect("cannot parse default keepalive interval")),
-            log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
-
-            concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
-                .expect("Invalid default constant")),
-            concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(1).unwrap(),
-            metric_collection_interval: (humantime::parse_duration(
-                DEFAULT_METRIC_COLLECTION_INTERVAL,
-            )
-            .expect("cannot parse default metric collection interval")),
-            synthetic_size_calculation_interval: (humantime::parse_duration(
-                DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
-            )
-            .expect("cannot parse default synthetic size calculation interval")),
-            metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
-
-            metric_collection_bucket: (None),
-
-            disk_usage_based_eviction: (None),
-
-            test_remote_failures: (0),
-
-            ondemand_download_behavior_treat_error_as_warn: (false),
-
-            background_task_maximum_delay: (humantime::parse_duration(
-                DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
-            )
-            .unwrap()),
-
-            control_plane_api: (None),
-            control_plane_api_token: (None),
-            control_plane_emergency_mode: (false),
-
-            heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
-            secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
-
-            ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
-
-            virtual_file_io_engine: None,
-
-            max_vectored_read_bytes: (MaxVectoredReadBytes(
-                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
-            )),
-            image_compression: (DEFAULT_IMAGE_COMPRESSION),
-            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
-            l0_flush: None,
-            compact_level0_phase1_value_access: Default::default(),
-            virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
-
-            io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
-
-            tenant_config: TenantConfigToml::default(),
-        }
-    }
-}
-
-pub mod tenant_conf_defaults {
-
-    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
-    // would be more appropriate. But a low value forces the code to be exercised more,
-    // which is good for now to trigger bugs.
-    // This parameter actually determines L0 layer file size.
-    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
-    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
-
-    // FIXME the below configs are only used by legacy algorithm. The new algorithm
-    // has different parameters.
-
-    // Target file size, when creating image and delta layers.
-    // This parameter determines L1 layer file size.
-    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
-
-    pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
-    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
-    pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
-        crate::models::CompactionAlgorithm::Legacy;
-
-    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
-
-    // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
-    // If there's a need to decrease this value, first make sure that GC
-    // doesn't hold a layer map write lock for non-trivial operations.
-    // Relevant: https://github.com/neondatabase/neon/issues/3394
-    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
-    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
-    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
-    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
-    // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
-    // throughputs up to 1GiB/s per timeline.
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
-    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
-    // By default ingest enough WAL for two new L0 layers before checking if new image
-    // image layers should be created.
-    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
-
-    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
-}
-
-impl Default for TenantConfigToml {
-    fn default() -> Self {
-        use tenant_conf_defaults::*;
-        Self {
-            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
-            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
-                .expect("cannot parse default checkpoint timeout"),
-            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
-            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
-                .expect("cannot parse default compaction period"),
-            compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
-            compaction_algorithm: crate::models::CompactionAlgorithmSettings {
-                kind: DEFAULT_COMPACTION_ALGORITHM,
-            },
-            gc_horizon: DEFAULT_GC_HORIZON,
-            gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
-                .expect("cannot parse default gc period"),
-            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
-            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
-                .expect("cannot parse default PITR interval"),
-            walreceiver_connect_timeout: humantime::parse_duration(
-                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
-            )
-            .expect("cannot parse default walreceiver connect timeout"),
-            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
-                .expect("cannot parse default walreceiver lagging wal timeout"),
-            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
-                .expect("cannot parse default max walreceiver Lsn wal lag"),
-            eviction_policy: crate::models::EvictionPolicy::NoEviction,
-            min_resident_size_override: None,
-            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
-                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
-            )
-            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
-            heatmap_period: Duration::ZERO,
-            lazy_slru_download: false,
-            timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
-            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
-            switch_aux_file_policy: crate::models::AuxFilePolicy::default_tenant_config(),
-            lsn_lease_length: LsnLease::DEFAULT_LENGTH,
-            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
-        }
-    }
-}
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::str::FromStr;
 use std::time::{Duration, Instant};

@@ -8,7 +8,6 @@ use std::time::{Duration, Instant};
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId};

-use crate::models::PageserverUtilization;
 use crate::{
    models::{ShardParameters, TenantConfig},
    shard::{ShardStripeSize, TenantShardId},
@@ -56,8 +55,6 @@ pub struct NodeRegisterRequest {

    pub listen_http_addr: String,
    pub listen_http_port: u16,
-
-    pub availability_zone_id: String,
 }

 #[derive(Serialize, Deserialize)]
@@ -74,17 +71,6 @@ pub struct TenantPolicyRequest {
    pub scheduling: Option<ShardSchedulingPolicy>,
 }

-#[derive(Serialize, Deserialize)]
-pub struct ShardsPreferredAzsRequest {
-    #[serde(flatten)]
-    pub preferred_az_ids: HashMap<TenantShardId, String>,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct ShardsPreferredAzsResponse {
-    pub updated: Vec<TenantShardId>,
-}
-
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantLocateResponseShard {
    pub shard_id: TenantShardId,
@@ -112,21 +98,6 @@ pub struct TenantDescribeResponse {
    pub config: TenantConfig,
 }

-#[derive(Serialize, Deserialize, Debug)]
-pub struct NodeShardResponse {
-    pub node_id: NodeId,
-    pub shards: Vec<NodeShard>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct NodeShard {
-    pub tenant_shard_id: TenantShardId,
-    /// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.
-    pub is_observed_secondary: Option<bool>,
-    /// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.
-    pub is_intended_secondary: Option<bool>,
-}
-
 #[derive(Serialize, Deserialize)]
 pub struct NodeDescribeResponse {
    pub id: NodeId,
@@ -158,12 +129,8 @@ pub struct TenantDescribeResponseShard {
    pub is_splitting: bool,

    pub scheduling_policy: ShardSchedulingPolicy,
-
-    pub preferred_az_id: Option<String>,
 }

-/// Migration request for a given tenant shard to a given node.
-///
 /// Explicitly migrating a particular shard is a low level operation
 /// TODO: higher level "Reschedule tenant" operation where the request
 /// specifies some constraints, e.g. asking it to get off particular node(s)
@@ -173,11 +140,23 @@ pub struct TenantShardMigrateRequest {
    pub node_id: NodeId,
 }

-#[derive(Serialize, Clone, Debug)]
+/// Utilisation score indicating how good a candidate a pageserver
+/// is for scheduling the next tenant. See [`crate::models::PageserverUtilization`].
+/// Lower values are better.
+#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Debug)]
+pub struct UtilizationScore(pub u64);
+
+impl UtilizationScore {
+    pub fn worst() -> Self {
+        UtilizationScore(u64::MAX)
+    }
+}
+
+#[derive(Serialize, Clone, Copy, Debug)]
 #[serde(into = "NodeAvailabilityWrapper")]
 pub enum NodeAvailability {
    // Normal, happy state
-    Active(PageserverUtilization),
+    Active(UtilizationScore),
    // Node is warming up, but we expect it to become available soon. Covers
    // the time span between the re-attach response being composed on the storage controller
    // and the first successful heartbeat after the processing of the re-attach response
@@ -216,9 +195,7 @@ impl From<NodeAvailabilityWrapper> for NodeAvailability {
        match val {
            // Assume the worst utilisation score to begin with. It will later be updated by
            // the heartbeats.
-            NodeAvailabilityWrapper::Active => {
-                NodeAvailability::Active(PageserverUtilization::full())
-            }
+            NodeAvailabilityWrapper::Active => NodeAvailability::Active(UtilizationScore::worst()),
            NodeAvailabilityWrapper::WarmingUp => NodeAvailability::WarmingUp(Instant::now()),
            NodeAvailabilityWrapper::Offline => NodeAvailability::Offline,
        }
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -108,41 +108,14 @@ impl Key {
        }
    }

-    /// This function checks more extensively what keys we can take on the write path.
-    /// If a key beginning with 00 does not have a global/default tablespace OID, it
-    /// will be rejected on the write path.
-    #[allow(dead_code)]
-    pub fn is_valid_key_on_write_path_strong(&self) -> bool {
-        use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
-        if !self.is_i128_representable() {
-            return false;
-        }
-        if self.field1 == 0
-            && !(self.field2 == GLOBALTABLESPACE_OID
-                || self.field2 == DEFAULTTABLESPACE_OID
-                || self.field2 == 0)
-        {
-            return false; // User defined tablespaces are not supported
-        }
-        true
-    }
-
-    /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply
-    /// checks if the key is i128 representable. Note that some keys can be successfully
-    /// ingested into the pageserver, but will cause errors on generating basebackup.
-    pub fn is_valid_key_on_write_path(&self) -> bool {
-        self.is_i128_representable()
-    }
-
-    pub fn is_i128_representable(&self) -> bool {
-        self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222
-    }
-
    /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
    /// As long as Neon does not support tablespace (because of lack of access to local file system),
    /// we can assume that only some predefined namespace OIDs are used which can fit in u16
    pub fn to_i128(&self) -> i128 {
-        assert!(self.is_i128_representable(), "invalid key: {self}");
+        assert!(
+            self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222,
+            "invalid key: {self}",
+        );
        (((self.field1 & 0x7F) as i128) << 120)
            | (((self.field2 & 0xFFFF) as i128) << 104)
            | ((self.field3 as i128) << 72)
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -6,9 +6,8 @@ pub use utilization::PageserverUtilization;

 use std::{
    collections::HashMap,
-    fmt::Display,
    io::{BufRead, Read},
-    num::{NonZeroU32, NonZeroU64, NonZeroUsize},
+    num::{NonZeroU64, NonZeroUsize},
    str::FromStr,
    sync::atomic::AtomicUsize,
    time::{Duration, SystemTime},
@@ -62,7 +61,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
    serde::Serialize,
    serde::Deserialize,
    strum_macros::Display,
-    strum_macros::VariantNames,
+    strum_macros::EnumVariantNames,
    strum_macros::AsRefStr,
    strum_macros::IntoStaticStr,
 )]
@@ -305,10 +304,8 @@ pub struct TenantConfig {
    pub lsn_lease_length_for_ts: Option<String>,
 }

-/// The policy for the aux file storage.
-///
-/// It can be switched through `switch_aux_file_policy` tenant config.
-/// When the first aux file written, the policy will be persisted in the
+/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy`
+/// tenant config. When the first aux file written, the policy will be persisted in the
 /// `index_part.json` file and has a limited migration path.
 ///
 /// Currently, we only allow the following migration path:
@@ -351,7 +348,7 @@ impl AuxFilePolicy {

    /// If a tenant writes aux files without setting `switch_aux_policy`, this value will be used.
    pub fn default_tenant_config() -> Self {
-        Self::V2
+        Self::V1
    }
 }

@@ -438,9 +435,7 @@ pub enum CompactionAlgorithm {
    Tiered,
 }

-#[derive(
-    Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,
-)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub enum ImageCompressionAlgorithm {
    // Disabled for writes, support decompressing during read path
    Disabled,
@@ -475,33 +470,11 @@ impl FromStr for ImageCompressionAlgorithm {
    }
 }

-impl Display for ImageCompressionAlgorithm {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
-            ImageCompressionAlgorithm::Zstd { level } => {
-                if let Some(level) = level {
-                    write!(f, "zstd({})", level)
-                } else {
-                    write!(f, "zstd")
-                }
-            }
-        }
-    }
-}
-
 #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
 pub struct CompactionAlgorithmSettings {
    pub kind: CompactionAlgorithm,
 }

-#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
-#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
-pub enum L0FlushConfig {
-    #[serde(rename_all = "snake_case")]
-    Direct { max_concurrency: NonZeroUsize },
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub struct EvictionPolicyLayerAccessThreshold {
    #[serde(with = "humantime_serde")]
@@ -513,11 +486,12 @@ pub struct EvictionPolicyLayerAccessThreshold {
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
 pub struct ThrottleConfig {
    pub task_kinds: Vec<String>, // TaskKind
-    pub initial: u32,
+    pub initial: usize,
    #[serde(with = "humantime_serde")]
    pub refill_interval: Duration,
-    pub refill_amount: NonZeroU32,
-    pub max: u32,
+    pub refill_amount: NonZeroUsize,
+    pub max: usize,
+    pub fair: bool,
 }

 impl ThrottleConfig {
@@ -527,8 +501,9 @@ impl ThrottleConfig {
            // other values don't matter with emtpy `task_kinds`.
            initial: 0,
            refill_interval: Duration::from_millis(1),
-            refill_amount: NonZeroU32::new(1).unwrap(),
+            refill_amount: NonZeroUsize::new(1).unwrap(),
            max: 1,
+            fair: true,
        }
    }
    /// The requests per second allowed  by the given config.
@@ -746,14 +721,8 @@ pub struct TimelineInfo {

    pub walreceiver_status: String,

-    // ALWAYS add new fields at the end of the struct with `Option` to ensure forward/backward compatibility.
-    // Backward compatibility: you will get a JSON not containing the newly-added field.
-    // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does
-    // not deny unknown fields by default so it's safe to set the field to some value, though it won't be
-    // read.
    /// The last aux file policy being used on this timeline
    pub last_aux_file_policy: Option<AuxFilePolicy>,
-    pub is_archived: Option<bool>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -898,9 +867,7 @@ pub struct WalRedoManagerStatus {
    pub process: Option<WalRedoManagerProcessStatus>,
 }

-/// The progress of a secondary tenant.
-///
-/// It is mostly useful when doing a long running download: e.g. initiating
+/// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating
 /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
 /// what's happening.
 #[derive(Default, Debug, Serialize, Deserialize, Clone)]
@@ -1095,7 +1062,7 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
    }
 }

-// A GetPage request contains two LSN values:
+// In the V2 protocol version, a GetPage request contains two LSN values:
 //
 // request_lsn: Get the page version at this point in time.  Lsn::Max is a special value that means
 // "get the latest version present". It's used by the primary server, which knows that no one else
@@ -1108,7 +1075,7 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
 // passing an earlier LSN can speed up the request, by allowing the pageserver to process the
 // request without waiting for 'request_lsn' to arrive.
 //
-// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
+// The legacy V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
 // sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
 // 'latest' was set to true. The V2 interface was added because there was no correct way for a
 // standby to request a page at a particular non-latest LSN, and also include the
@@ -1116,11 +1083,15 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
 // request, if the standby knows that the page hasn't been modified since, and risk getting an error
 // if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
 // require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
-// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
+// interface allows sending both LSNs, and let the pageserver do the right thing. There is no
 // difference in the responses between V1 and V2.
 //
+// The Request structs below reflect the V2 interface. If V1 is used, the parse function
+// maps the old format requests to the new format.
+//
 #[derive(Clone, Copy)]
 pub enum PagestreamProtocolVersion {
+    V1,
    V2,
 }

@@ -1259,17 +1230,36 @@ impl PagestreamFeMessage {
        bytes.into()
    }

-    pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
+    pub fn parse<R: std::io::Read>(
+        body: &mut R,
+        protocol_version: PagestreamProtocolVersion,
+    ) -> anyhow::Result<PagestreamFeMessage> {
        // these correspond to the NeonMessageTag enum in pagestore_client.h
        //
        // TODO: consider using protobuf or serde bincode for less error prone
        // serialization.
        let msg_tag = body.read_u8()?;

-        // these two fields are the same for every request type
-        let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
-        let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
+        let (request_lsn, not_modified_since) = match protocol_version {
+            PagestreamProtocolVersion::V2 => (
+                Lsn::from(body.read_u64::<BigEndian>()?),
+                Lsn::from(body.read_u64::<BigEndian>()?),
+            ),
+            PagestreamProtocolVersion::V1 => {
+                // In the old protocol, each message starts with a boolean 'latest' flag,
+                // followed by 'lsn'. Convert that to the two LSNs, 'request_lsn' and
+                // 'not_modified_since', used in the new protocol version.
+                let latest = body.read_u8()? != 0;
+                let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
+                if latest {
+                    (Lsn::MAX, request_lsn) // get latest version
+                } else {
+                    (request_lsn, request_lsn) // get version at specified LSN
+                }
+            }
+        };

+        // The rest of the messages are the same between V1 and V2
        match msg_tag {
            0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
                request_lsn,
@@ -1477,7 +1467,9 @@ mod tests {
        ];
        for msg in messages {
            let bytes = msg.serialize();
-            let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
+            let reconstructed =
+                PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V2)
+                    .unwrap();
            assert!(msg == reconstructed);
        }
    }
@@ -1685,33 +1677,21 @@ mod tests {
    #[test]
    fn test_image_compression_algorithm_parsing() {
        use ImageCompressionAlgorithm::*;
-        let cases = [
-            ("disabled", Disabled),
-            ("zstd", Zstd { level: None }),
-            ("zstd(18)", Zstd { level: Some(18) }),
-            ("zstd(-3)", Zstd { level: Some(-3) }),
-        ];
-
-        for (display, expected) in cases {
-            assert_eq!(
-                ImageCompressionAlgorithm::from_str(display).unwrap(),
-                expected,
-                "parsing works"
-            );
-            assert_eq!(format!("{expected}"), display, "Display FromStr roundtrip");
-
-            let ser = serde_json::to_string(&expected).expect("serialization");
-            assert_eq!(
-                serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),
-                expected,
-                "serde roundtrip"
-            );
-
-            assert_eq!(
-                serde_json::Value::String(display.to_string()),
-                serde_json::to_value(expected).unwrap(),
-                "Display is the serde serialization"
-            );
-        }
+        assert_eq!(
+            ImageCompressionAlgorithm::from_str("disabled").unwrap(),
+            Disabled
+        );
+        assert_eq!(
+            ImageCompressionAlgorithm::from_str("zstd").unwrap(),
+            Zstd { level: None }
+        );
+        assert_eq!(
+            ImageCompressionAlgorithm::from_str("zstd(18)").unwrap(),
+            Zstd { level: Some(18) }
+        );
+        assert_eq!(
+            ImageCompressionAlgorithm::from_str("zstd(-3)").unwrap(),
+            Zstd { level: Some(-3) }
+        );
    }
 }
--- a/libs/pageserver_api/src/models/utilization.rs
+++ b/libs/pageserver_api/src/models/utilization.rs
@@ -38,7 +38,7 @@ pub struct PageserverUtilization {
    pub max_shard_count: u32,

    /// Cached result of [`Self::score`]
-    pub utilization_score: Option<u64>,
+    pub utilization_score: u64,

    /// When was this snapshot captured, pageserver local time.
    ///
@@ -50,8 +50,6 @@ fn unity_percent() -> Percent {
    Percent::new(0).unwrap()
 }

-pub type RawScore = u64;
-
 impl PageserverUtilization {
    const UTILIZATION_FULL: u64 = 1000000;

@@ -64,7 +62,7 @@ impl PageserverUtilization {
    /// - Negative values are forbidden
    /// - Values over UTILIZATION_FULL indicate an overloaded node, which may show degraded performance due to
    ///   layer eviction.
-    pub fn score(&self) -> RawScore {
+    pub fn score(&self) -> u64 {
        let disk_usable_capacity = ((self.disk_usage_bytes + self.free_space_bytes)
            * self.disk_usable_pct.get() as u64)
            / 100;
@@ -76,41 +74,8 @@ impl PageserverUtilization {
        std::cmp::max(disk_utilization_score, shard_utilization_score)
    }

-    pub fn cached_score(&mut self) -> RawScore {
-        match self.utilization_score {
-            None => {
-                let s = self.score();
-                self.utilization_score = Some(s);
-                s
-            }
-            Some(s) => s,
-        }
-    }
-
-    /// If a node is currently hosting more work than it can comfortably handle.  This does not indicate that
-    /// it will fail, but it is a strong signal that more work should not be added unless there is no alternative.
-    ///
-    /// When a node is overloaded, we may override soft affinity preferences and do things like scheduling
-    /// into a node in a less desirable AZ, if all the nodes in the preferred AZ are overloaded.
-    pub fn is_overloaded(score: RawScore) -> bool {
-        // Why the factor of two?  This is unscientific but reflects behavior of real systems:
-        // - In terms of shard counts, a node's preferred max count is a soft limit intended to keep
-        //   startup and housekeeping jobs nice and responsive.  We can go to double this limit if needed
-        //   until some more nodes are deployed.
-        // - In terms of disk space, the node's utilization heuristic assumes every tenant needs to
-        //   hold its biggest timeline fully on disk, which is tends to be an over estimate when
-        //   some tenants are very idle and have dropped layers from disk.  In practice going up to
-        //   double is generally better than giving up and scheduling in a sub-optimal AZ.
-        score >= 2 * Self::UTILIZATION_FULL
-    }
-
-    pub fn adjust_shard_count_max(&mut self, shard_count: u32) {
-        if self.shard_count < shard_count {
-            self.shard_count = shard_count;
-
-            // Dirty cache: this will be calculated next time someone retrives the score
-            self.utilization_score = None;
-        }
+    pub fn refresh_score(&mut self) {
+        self.utilization_score = self.score();
    }

    /// A utilization structure that has a full utilization score: use this as a placeholder when
@@ -123,38 +88,7 @@ impl PageserverUtilization {
            disk_usable_pct: Percent::new(100).unwrap(),
            shard_count: 1,
            max_shard_count: 1,
-            utilization_score: Some(Self::UTILIZATION_FULL),
-            captured_at: serde_system_time::SystemTime(SystemTime::now()),
-        }
-    }
-}
-
-/// Test helper
-pub mod test_utilization {
-    use super::PageserverUtilization;
-    use std::time::SystemTime;
-    use utils::{
-        serde_percent::Percent,
-        serde_system_time::{self},
-    };
-
-    // Parameters of the imaginary node used for test utilization instances
-    const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024;
-    const TEST_SHARDS_MAX: u32 = 1000;
-
-    /// Unit test helper.  Unconditionally compiled because cfg(test) doesn't carry across crates.  Do
-    /// not abuse this function from non-test code.
-    ///
-    /// Emulates a node with a 1000 shard limit and a 1TB disk.
-    pub fn simple(shard_count: u32, disk_wanted_bytes: u64) -> PageserverUtilization {
-        PageserverUtilization {
-            disk_usage_bytes: disk_wanted_bytes,
-            free_space_bytes: TEST_DISK_SIZE - std::cmp::min(disk_wanted_bytes, TEST_DISK_SIZE),
-            disk_wanted_bytes,
-            disk_usable_pct: Percent::new(100).unwrap(),
-            shard_count,
-            max_shard_count: TEST_SHARDS_MAX,
-            utilization_score: None,
+            utilization_score: Self::UTILIZATION_FULL,
            captured_at: serde_system_time::SystemTime(SystemTime::now()),
        }
    }
@@ -186,7 +120,7 @@ mod tests {
            disk_usage_bytes: u64::MAX,
            free_space_bytes: 0,
            disk_wanted_bytes: u64::MAX,
-            utilization_score: Some(13),
+            utilization_score: 13,
            disk_usable_pct: Percent::new(90).unwrap(),
            shard_count: 100,
            max_shard_count: 200,
--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -18,6 +18,7 @@ tokio-rustls.workspace = true
 tracing.workspace = true

 pq_proto.workspace = true
+workspace_hack.workspace = true

 [dev-dependencies]
 once_cell.workspace = true
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -69,10 +69,8 @@ impl QueryError {
 }

 /// Returns true if the given error is a normal consequence of a network issue,
-/// or the client closing the connection.
-///
-/// These errors can happen during normal operations,
-/// and don't indicate a bug in our code.
+/// or the client closing the connection. These errors can happen during normal
+/// operations, and don't indicate a bug in our code.
 pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
@@ -81,16 +79,17 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
    )
 }

+#[async_trait::async_trait]
 pub trait Handler<IO> {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
    /// might be not what we want after CopyData streaming, but currently we don't
    /// care). It will also flush out the output buffer.
-    fn process_query(
+    async fn process_query(
        &mut self,
        pgb: &mut PostgresBackend<IO>,
        query_string: &str,
-    ) -> impl Future<Output = Result<(), QueryError>>;
+    ) -> Result<(), QueryError>;

    /// Called on startup packet receival, allows to process params.
    ///
--- a/libs/postgres_backend/tests/simple_select.rs
+++ b/libs/postgres_backend/tests/simple_select.rs
@@ -23,6 +23,7 @@ async fn make_tcp_pair() -> (TcpStream, TcpStream) {

 struct TestHandler {}

+#[async_trait::async_trait]
 impl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {
    // return single col 'hey' for any query
    async fn process_query(
--- a/libs/postgres_connection/Cargo.toml
+++ b/libs/postgres_connection/Cargo.toml
@@ -11,5 +11,7 @@ postgres.workspace = true
 tokio-postgres.workspace = true
 url.workspace = true

+workspace_hack.workspace = true
+
 [dev-dependencies]
 once_cell.workspace = true
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -7,7 +7,6 @@ use std::fmt;
 use url::Host;

 /// Parses a string of format either `host:port` or `host` into a corresponding pair.
-///
 /// The `host` part should be a correct `url::Host`, while `port` (if present) should be
 /// a valid decimal u16 of digits only.
 pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -19,6 +19,8 @@ thiserror.workspace = true
 serde.workspace = true
 utils.workspace = true

+workspace_hack.workspace = true
+
 [dev-dependencies]
 env_logger.workspace = true
 postgres.workspace = true
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -14,7 +14,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
    fn include_file(&self, filename: &str) {
        // This does the equivalent of passing bindgen::CargoCallbacks
        // to the builder .parse_callbacks() method.
-        let cargo_callbacks = bindgen::CargoCallbacks::new();
+        let cargo_callbacks = bindgen::CargoCallbacks;
        cargo_callbacks.include_file(filename)
    }

@@ -121,7 +121,6 @@ fn main() -> anyhow::Result<()> {
            .allowlist_type("XLogPageHeaderData")
            .allowlist_type("XLogLongPageHeaderData")
            .allowlist_var("XLOG_PAGE_MAGIC")
-            .allowlist_var("PG_MAJORVERSION_NUM")
            .allowlist_var("PG_CONTROL_FILE_SIZE")
            .allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
            .allowlist_type("PageHeaderData")
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -44,9 +44,6 @@ macro_rules! postgres_ffi {
            // Re-export some symbols from bindings
            pub use bindings::DBState_DB_SHUTDOWNED;
            pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
-
-            pub const ZERO_CHECKPOINT: bytes::Bytes =
-                bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);
        }
    };
 }
@@ -109,107 +106,6 @@ macro_rules! dispatch_pgversion {
    };
 }

-#[macro_export]
-macro_rules! enum_pgversion_dispatch {
-    ($name:expr, $typ:ident, $bind:ident, $code:block) => {
-        enum_pgversion_dispatch!(
-            name = $name,
-            bind = $bind,
-            typ = $typ,
-            code = $code,
-            pgversions = [
-                V14 : v14,
-                V15 : v15,
-                V16 : v16,
-            ]
-        )
-    };
-    (name = $name:expr,
-     bind = $bind:ident,
-     typ = $typ:ident,
-     code = $code:block,
-     pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {
-        match $name {
-            $(
-            self::$typ::$variant($bind) => {
-                use $crate::$md as pgv;
-                $code
-            }
-            ),+,
-        }
-    };
-}
-
-#[macro_export]
-macro_rules! enum_pgversion {
-    {$name:ident, pgv :: $t:ident} => {
-        enum_pgversion!{
-            name = $name,
-            typ = $t,
-            pgversions = [
-                V14 : v14,
-                V15 : v15,
-                V16 : v16,
-            ]
-        }
-    };
-    {$name:ident, pgv :: $p:ident :: $t:ident} => {
-        enum_pgversion!{
-            name = $name,
-            path = $p,
-            typ = $t,
-            pgversions = [
-                V14 : v14,
-                V15 : v15,
-                V16 : v16,
-            ]
-        }
-    };
-    {name = $name:ident,
-     typ = $t:ident,
-     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
-        pub enum $name {
-            $($variant ( $crate::$md::$t )),+
-        }
-        impl self::$name {
-            pub fn pg_version(&self) -> u32 {
-                enum_pgversion_dispatch!(self, $name, _ign, {
-                    pgv::bindings::PG_MAJORVERSION_NUM
-                })
-            }
-        }
-        $(
-        impl Into<self::$name> for $crate::$md::$t {
-            fn into(self) -> self::$name {
-                self::$name::$variant (self)
-            }
-        }
-        )+
-    };
-    {name = $name:ident,
-     path = $p:ident,
-     typ = $t:ident,
-     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
-        pub enum $name {
-            $($variant ($crate::$md::$p::$t)),+
-        }
-        impl $name {
-            pub fn pg_version(&self) -> u32 {
-                enum_pgversion_dispatch!(self, $name, _ign, {
-                    pgv::bindings::PG_MAJORVERSION_NUM
-                })
-            }
-        }
-        $(
-        impl Into<$name> for $crate::$md::$p::$t {
-            fn into(self) -> $name {
-                $name::$variant (self)
-            }
-        }
-        )+
-    };
-}
-
 pub mod pg_constants;
 pub mod relfile_utils;

@@ -240,9 +136,9 @@ pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;

 // Export some version independent functions that are used outside of this mod
 pub use v14::xlog_utils::encode_logical_message;
+pub use v14::xlog_utils::from_pg_timestamp;
 pub use v14::xlog_utils::get_current_timestamp;
 pub use v14::xlog_utils::to_pg_timestamp;
-pub use v14::xlog_utils::try_from_pg_timestamp;
 pub use v14::xlog_utils::XLogFileName;

 pub use v14::bindings::DBState_DB_SHUTDOWNED;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -135,8 +135,6 @@ pub fn get_current_timestamp() -> TimestampTz {
 mod timestamp_conversions {
    use std::time::Duration;

-    use anyhow::Context;
-
    use super::*;

    const UNIX_EPOCH_JDATE: u64 = 2440588; // == date2j(1970, 1, 1)
@@ -156,18 +154,18 @@ mod timestamp_conversions {
        }
    }

-    pub fn try_from_pg_timestamp(time: TimestampTz) -> anyhow::Result<SystemTime> {
+    pub fn from_pg_timestamp(time: TimestampTz) -> SystemTime {
        let time: u64 = time
            .try_into()
-            .context("timestamp before millenium (postgres epoch)")?;
+            .expect("timestamp before millenium (postgres epoch)");
        let since_unix_epoch = time + SECS_DIFF_UNIX_TO_POSTGRES_EPOCH * USECS_PER_SEC;
        SystemTime::UNIX_EPOCH
            .checked_add(Duration::from_micros(since_unix_epoch))
-            .context("SystemTime overflow")
+            .expect("SystemTime overflow")
    }
 }

-pub use timestamp_conversions::{to_pg_timestamp, try_from_pg_timestamp};
+pub use timestamp_conversions::{from_pg_timestamp, to_pg_timestamp};

 // Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
 // start_lsn must point to some previously known record boundary (beginning of
@@ -547,14 +545,14 @@ mod tests {
    #[test]
    fn test_ts_conversion() {
        let now = SystemTime::now();
-        let round_trip = try_from_pg_timestamp(to_pg_timestamp(now)).unwrap();
+        let round_trip = from_pg_timestamp(to_pg_timestamp(now));

        let now_since = now.duration_since(SystemTime::UNIX_EPOCH).unwrap();
        let round_trip_since = round_trip.duration_since(SystemTime::UNIX_EPOCH).unwrap();
        assert_eq!(now_since.as_micros(), round_trip_since.as_micros());

        let now_pg = get_current_timestamp();
-        let round_trip_pg = to_pg_timestamp(try_from_pg_timestamp(now_pg).unwrap());
+        let round_trip_pg = to_pg_timestamp(from_pg_timestamp(now_pg));

        assert_eq!(now_pg, round_trip_pg);
    }
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -14,6 +14,8 @@ postgres.workspace = true
 postgres_ffi.workspace = true
 camino-tempfile.workspace = true

+workspace_hack.workspace = true
+
 [dev-dependencies]
 regex.workspace = true
 utils.workspace = true
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -11,7 +11,9 @@ itertools.workspace = true
 pin-project-lite.workspace = true
 postgres-protocol.workspace = true
 rand.workspace = true
-tokio = { workspace = true, features = ["io-util"] }
+tokio.workspace = true
 tracing.workspace = true
 thiserror.workspace = true
 serde.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -32,7 +32,7 @@ scopeguard.workspace = true
 metrics.workspace = true
 utils.workspace = true
 pin-project-lite.workspace = true
-
+workspace_hack.workspace = true
 azure_core.workspace = true
 azure_identity.workspace = true
 azure_storage.workspace = true
@@ -46,4 +46,3 @@ sync_wrapper = { workspace = true, features = ["futures"] }
 camino-tempfile.workspace = true
 test-context.workspace = true
 rand.workspace = true
-tokio = { workspace = true, features = ["test-util"] }
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -185,7 +185,7 @@ mod tests {
    use super::*;

    fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
-        let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
+        let toml = input.parse::<toml_edit::Document>().unwrap();
        RemoteStorageConfig::from_toml(toml.as_item())
    }

@@ -235,31 +235,6 @@ timeout = '5s'";
        );
    }

-    #[test]
-    fn test_storage_class_serde_roundtrip() {
-        let classes = [
-            None,
-            Some(StorageClass::Standard),
-            Some(StorageClass::IntelligentTiering),
-        ];
-        for class in classes {
-            #[derive(Serialize, Deserialize)]
-            struct Wrapper {
-                #[serde(
-                    deserialize_with = "deserialize_storage_class",
-                    serialize_with = "serialize_storage_class"
-                )]
-                class: Option<StorageClass>,
-            }
-            let wrapped = Wrapper {
-                class: class.clone(),
-            };
-            let serialized = serde_json::to_string(&wrapped).unwrap();
-            let deserialized: Wrapper = serde_json::from_str(&serialized).unwrap();
-            assert_eq!(class, deserialized.class);
-        }
-    }
-
    #[test]
    fn test_azure_parsing() {
        let toml = "\
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -45,8 +45,6 @@ pub use azure_core::Etag;

 pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};

-/// Default concurrency limit for S3 operations
-///
 /// Currently, sync happens with AWS S3, that has two limits on requests per second:
 /// ~200 RPS for IAM services
 /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
@@ -302,9 +300,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    ) -> Result<(), TimeTravelError>;
 }

-/// Data part of an ongoing [`Download`].
-///
-/// `DownloadStream` is sensitive to the timeout and cancellation used with the original
+/// DownloadStream is sensitive to the timeout and cancellation used with the original
 /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
 /// with `tokio::io::copy_buf`.
 // This has 'static because safekeepers do not use cancellation tokens (yet)
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -9,3 +9,5 @@ serde.workspace = true
 serde_with.workspace = true
 const_format.workspace = true
 utils.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -60,16 +60,3 @@ pub struct TimelineCopyRequest {
    pub target_timeline_id: TimelineId,
    pub until_lsn: Lsn,
 }
-
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct TimelineTermBumpRequest {
-    /// bump to
-    pub term: Option<u64>,
-}
-
-#[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct TimelineTermBumpResponse {
-    // before the request
-    pub previous_term: u64,
-    pub current_term: u64,
-}
--- a/libs/tenant_size_model/Cargo.toml
+++ b/libs/tenant_size_model/Cargo.toml
@@ -9,3 +9,5 @@ license.workspace = true
 anyhow.workspace = true
 serde.workspace = true
 serde_json.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -5,10 +5,9 @@
 mod calculation;
 pub mod svg;

-/// StorageModel is the input to the synthetic size calculation.
-///
-/// It represents a tree of timelines, with just the information that's needed
-/// for the calculation. This doesn't track timeline names or where each timeline
+/// StorageModel is the input to the synthetic size calculation. It represents
+/// a tree of timelines, with just the information that's needed for the
+/// calculation. This doesn't track timeline names or where each timeline
 /// begins and ends, for example. Instead, it consists of "points of interest"
 /// on the timelines. A point of interest could be the timeline start or end point,
 /// the oldest point on a timeline that needs to be retained because of PITR
--- a/libs/tracing-utils/Cargo.toml
+++ b/libs/tracing-utils/Cargo.toml
@@ -14,3 +14,5 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -14,6 +14,7 @@ testing = ["fail/failpoints"]
 arc-swap.workspace = true
 sentry.workspace = true
 async-compression.workspace = true
+async-trait.workspace = true
 anyhow.workspace = true
 bincode.workspace = true
 bytes.workspace = true
@@ -25,6 +26,7 @@ hyper = { workspace = true, features = ["full"] }
 fail.workspace = true
 futures = { workspace = true}
 jsonwebtoken.workspace = true
+leaky-bucket.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
@@ -37,7 +39,7 @@ thiserror.workspace = true
 tokio.workspace = true
 tokio-tar.workspace = true
 tokio-util.workspace = true
-toml_edit = { workspace = true, features = ["serde"] }
+toml_edit.workspace = true
 tracing.workspace = true
 tracing-error.workspace = true
 tracing-subscriber = { workspace = true, features = ["json", "registry"] }
@@ -52,6 +54,7 @@ walkdir.workspace = true
 pq_proto.workspace = true
 postgres_connection.workspace = true
 metrics.workspace = true
+workspace_hack.workspace = true

 const_format.workspace = true

@@ -68,7 +71,6 @@ criterion.workspace = true
 hex-literal.workspace = true
 camino-tempfile.workspace = true
 serde_assert.workspace = true
-tokio = { workspace = true, features = ["test-util"] }

 [[bench]]
 name = "benchmarks"
--- a/libs/utils/src/circuit_breaker.rs
+++ b/libs/utils/src/circuit_breaker.rs
@@ -5,10 +5,8 @@ use std::{

 use metrics::IntCounter;

-/// Circuit breakers are for operations that are expensive and fallible.
-///
-/// If a circuit breaker fails repeatedly, we will stop attempting it for some
-/// period of time, to avoid denial-of-service from retries, and
+/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly,
+/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and
 /// to mitigate the log spam from repeated failures.
 pub struct CircuitBreaker {
    /// An identifier that enables us to log useful errors when a circuit is broken
--- a/libs/utils/src/crashsafe.rs
+++ b/libs/utils/src/crashsafe.rs
@@ -1,4 +1,3 @@
-use std::os::fd::AsRawFd;
 use std::{
    borrow::Cow,
    fs::{self, File},
@@ -204,27 +203,6 @@ pub fn overwrite(
    Ok(())
 }

-/// Syncs the filesystem for the given file descriptor.
-#[cfg_attr(target_os = "macos", allow(unused_variables))]
-pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
-    // Linux guarantees durability for syncfs.
-    // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
-    #[cfg(target_os = "linux")]
-    {
-        use anyhow::Context;
-        nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
-    }
-    #[cfg(target_os = "macos")]
-    {
-        // macOS is not a production platform for Neon, don't even bother.
-    }
-    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
-    {
-        compile_error!("Unsupported OS");
-    }
-    Ok(())
-}
-
 #[cfg(test)]
 mod tests {

--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -249,10 +249,8 @@ macro_rules! id_newtype {
    };
 }

-/// Neon timeline ID.
-///
-/// They are different from PostgreSQL timeline
-/// IDs, but serve a similar purpose: they differentiate
+/// Neon timeline IDs are different from PostgreSQL timeline
+/// IDs. They serve a similar purpose though: they differentiate
 /// between different "histories" of the same cluster.  However,
 /// PostgreSQL timeline IDs are a bit cumbersome, because they are only
 /// 32-bits wide, and they must be in ascending order in any given
--- a/libs/utils/src/leaky_bucket.rs
+++ b/libs/utils/src/leaky_bucket.rs
@@ -1,280 +0,0 @@
-//! This module implements the Generic Cell Rate Algorithm for a simplified
-//! version of the Leaky Bucket rate limiting system.
-//!
-//! # Leaky Bucket
-//!
-//! If the bucket is full, no new requests are allowed and are throttled/errored.
-//! If the bucket is partially full/empty, new requests are added to the bucket in
-//! terms of "tokens".
-//!
-//! Over time, tokens are removed from the bucket, naturally allowing new requests at a steady rate.
-//!
-//! The bucket size tunes the burst support. The drain rate tunes the steady-rate requests per second.
-//!
-//! # [GCRA](https://en.wikipedia.org/wiki/Generic_cell_rate_algorithm)
-//!
-//! GCRA is a continuous rate leaky-bucket impl that stores minimal state and requires
-//! no background jobs to drain tokens, as the design utilises timestamps to drain automatically over time.
-//!
-//! We store an "empty_at" timestamp as the only state. As time progresses, we will naturally approach
-//! the empty state. The full-bucket state is calculated from `empty_at - config.bucket_width`.
-//!
-//! Another explaination can be found here: <https://brandur.org/rate-limiting>
-
-use std::{sync::Mutex, time::Duration};
-
-use tokio::{sync::Notify, time::Instant};
-
-pub struct LeakyBucketConfig {
-    /// This is the "time cost" of a single request unit.
-    /// Should loosely represent how long it takes to handle a request unit in active resource time.
-    /// Loosely speaking this is the inverse of the steady-rate requests-per-second
-    pub cost: Duration,
-
-    /// total size of the bucket
-    pub bucket_width: Duration,
-}
-
-impl LeakyBucketConfig {
-    pub fn new(rps: f64, bucket_size: f64) -> Self {
-        let cost = Duration::from_secs_f64(rps.recip());
-        let bucket_width = cost.mul_f64(bucket_size);
-        Self { cost, bucket_width }
-    }
-}
-
-pub struct LeakyBucketState {
-    /// Bucket is represented by `allow_at..empty_at` where `allow_at = empty_at - config.bucket_width`.
-    ///
-    /// At any given time, `empty_at - now` represents the number of tokens in the bucket, multiplied by the "time_cost".
-    /// Adding `n` tokens to the bucket is done by moving `empty_at` forward by `n * config.time_cost`.
-    /// If `now < allow_at`, the bucket is considered filled and cannot accept any more tokens.
-    /// Draining the bucket will happen naturally as `now` moves forward.
-    ///
-    /// Let `n` be some "time cost" for the request,
-    /// If now is after empty_at, the bucket is empty and the empty_at is reset to now,
-    /// If now is within the `bucket window + n`, we are within time budget.
-    /// If now is before the `bucket window + n`, we have run out of budget.
-    ///
-    /// This is inspired by the generic cell rate algorithm (GCRA) and works
-    /// exactly the same as a leaky-bucket.
-    pub empty_at: Instant,
-}
-
-impl LeakyBucketState {
-    pub fn with_initial_tokens(config: &LeakyBucketConfig, initial_tokens: f64) -> Self {
-        LeakyBucketState {
-            empty_at: Instant::now() + config.cost.mul_f64(initial_tokens),
-        }
-    }
-
-    pub fn bucket_is_empty(&self, now: Instant) -> bool {
-        // if self.end is after now, the bucket is not empty
-        self.empty_at <= now
-    }
-
-    /// Immediately adds tokens to the bucket, if there is space.
-    ///
-    /// In a scenario where you are waiting for available rate,
-    /// rather than just erroring immediately, `started` corresponds to when this waiting started.
-    ///
-    /// `n` is the number of tokens that will be filled in the bucket.
-    ///
-    /// # Errors
-    ///
-    /// If there is not enough space, no tokens are added. Instead, an error is returned with the time when
-    /// there will be space again.
-    pub fn add_tokens(
-        &mut self,
-        config: &LeakyBucketConfig,
-        started: Instant,
-        n: f64,
-    ) -> Result<(), Instant> {
-        let now = Instant::now();
-
-        // invariant: started <= now
-        debug_assert!(started <= now);
-
-        // If the bucket was empty when we started our search,
-        // we should update the `empty_at` value accordingly.
-        // this prevents us from having negative tokens in the bucket.
-        let mut empty_at = self.empty_at;
-        if empty_at < started {
-            empty_at = started;
-        }
-
-        let n = config.cost.mul_f64(n);
-        let new_empty_at = empty_at + n;
-        let allow_at = new_empty_at.checked_sub(config.bucket_width);
-
-        //                     empty_at
-        //          allow_at    |   new_empty_at
-        //           /          |   /
-        // -------o-[---------o-|--]---------
-        //   now1 ^      now2 ^
-        //
-        // at now1, the bucket would be completely filled if we add n tokens.
-        // at now2, the bucket would be partially filled if we add n tokens.
-
-        match allow_at {
-            Some(allow_at) if now < allow_at => Err(allow_at),
-            _ => {
-                self.empty_at = new_empty_at;
-                Ok(())
-            }
-        }
-    }
-}
-
-pub struct RateLimiter {
-    pub config: LeakyBucketConfig,
-    pub state: Mutex<LeakyBucketState>,
-    /// a queue to provide this fair ordering.
-    pub queue: Notify,
-}
-
-struct Requeue<'a>(&'a Notify);
-
-impl Drop for Requeue<'_> {
-    fn drop(&mut self) {
-        self.0.notify_one();
-    }
-}
-
-impl RateLimiter {
-    pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self {
-        RateLimiter {
-            state: Mutex::new(LeakyBucketState::with_initial_tokens(
-                &config,
-                initial_tokens,
-            )),
-            config,
-            queue: {
-                let queue = Notify::new();
-                queue.notify_one();
-                queue
-            },
-        }
-    }
-
-    pub fn steady_rps(&self) -> f64 {
-        self.config.cost.as_secs_f64().recip()
-    }
-
-    /// returns true if we did throttle
-    pub async fn acquire(&self, count: usize) -> bool {
-        let mut throttled = false;
-
-        let start = tokio::time::Instant::now();
-
-        // wait until we are the first in the queue
-        let mut notified = std::pin::pin!(self.queue.notified());
-        if !notified.as_mut().enable() {
-            throttled = true;
-            notified.await;
-        }
-
-        // notify the next waiter in the queue when we are done.
-        let _guard = Requeue(&self.queue);
-
-        loop {
-            let res = self
-                .state
-                .lock()
-                .unwrap()
-                .add_tokens(&self.config, start, count as f64);
-            match res {
-                Ok(()) => return throttled,
-                Err(ready_at) => {
-                    throttled = true;
-                    tokio::time::sleep_until(ready_at).await;
-                }
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::time::Duration;
-
-    use tokio::time::Instant;
-
-    use super::{LeakyBucketConfig, LeakyBucketState};
-
-    #[tokio::test(start_paused = true)]
-    async fn check() {
-        let config = LeakyBucketConfig {
-            // average 100rps
-            cost: Duration::from_millis(10),
-            // burst up to 100 requests
-            bucket_width: Duration::from_millis(1000),
-        };
-
-        let mut state = LeakyBucketState {
-            empty_at: Instant::now(),
-        };
-
-        // supports burst
-        {
-            // should work for 100 requests this instant
-            for _ in 0..100 {
-                state.add_tokens(&config, Instant::now(), 1.0).unwrap();
-            }
-            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();
-            assert_eq!(ready - Instant::now(), Duration::from_millis(10));
-        }
-
-        // doesn't overfill
-        {
-            // after 1s we should have an empty bucket again.
-            tokio::time::advance(Duration::from_secs(1)).await;
-            assert!(state.bucket_is_empty(Instant::now()));
-
-            // after 1s more, we should not over count the tokens and allow more than 200 requests.
-            tokio::time::advance(Duration::from_secs(1)).await;
-            for _ in 0..100 {
-                state.add_tokens(&config, Instant::now(), 1.0).unwrap();
-            }
-            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();
-            assert_eq!(ready - Instant::now(), Duration::from_millis(10));
-        }
-
-        // supports sustained rate over a long period
-        {
-            tokio::time::advance(Duration::from_secs(1)).await;
-
-            // should sustain 100rps
-            for _ in 0..2000 {
-                tokio::time::advance(Duration::from_millis(10)).await;
-                state.add_tokens(&config, Instant::now(), 1.0).unwrap();
-            }
-        }
-
-        // supports requesting more tokens than can be stored in the bucket
-        // we just wait a little bit longer upfront.
-        {
-            // start the bucket completely empty
-            tokio::time::advance(Duration::from_secs(5)).await;
-            assert!(state.bucket_is_empty(Instant::now()));
-
-            // requesting 200 tokens of space should take 200*cost = 2s
-            // but we already have 1s available, so we wait 1s from start.
-            let start = Instant::now();
-
-            let ready = state.add_tokens(&config, start, 200.0).unwrap_err();
-            assert_eq!(ready - Instant::now(), Duration::from_secs(1));
-
-            tokio::time::advance(Duration::from_millis(500)).await;
-            let ready = state.add_tokens(&config, start, 200.0).unwrap_err();
-            assert_eq!(ready - Instant::now(), Duration::from_millis(500));
-
-            tokio::time::advance(Duration::from_millis(500)).await;
-            state.add_tokens(&config, start, 200.0).unwrap();
-
-            // bucket should be completely full now
-            let ready = state.add_tokens(&config, Instant::now(), 1.0).unwrap_err();
-            assert_eq!(ready - Instant::now(), Duration::from_millis(10));
-        }
-    }
-}
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -71,7 +71,6 @@ pub mod postgres_client;

 pub mod tracing_span_assert;

-pub mod leaky_bucket;
 pub mod rate_limit;

 /// Simple once-barrier and a guard which keeps barrier awaiting.
--- a/libs/utils/src/lock_file.rs
+++ b/libs/utils/src/lock_file.rs
@@ -100,9 +100,7 @@ pub enum LockFileRead {
 }

 /// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
-/// inspect its content.
-///
-/// It is not an `Err(...)` if the file does not exist or is already locked.
+/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
 /// Check the [`LockFileRead`] variants for details.
 pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {
    let res = fs::OpenOptions::new().read(true).open(path);
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -3,9 +3,9 @@ use std::str::FromStr;
 use anyhow::Context;
 use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
-use strum_macros::{EnumString, VariantNames};
+use strum_macros::{EnumString, EnumVariantNames};

-#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
+#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
 #[strum(serialize_all = "snake_case")]
 pub enum LogFormat {
    Plain,
@@ -188,7 +188,7 @@ impl Drop for TracingPanicHookGuard {
 }

 /// Named symbol for our panic hook, which logs the panic.
-fn tracing_panic_hook(info: &std::panic::PanicHookInfo) {
+fn tracing_panic_hook(info: &std::panic::PanicInfo) {
    // following rust 1.66.1 std implementation:
    // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
    let location = info.location();
@@ -274,14 +274,6 @@ impl From<String> for SecretString {
    }
 }

-impl FromStr for SecretString {
-    type Err = std::convert::Infallible;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        Ok(Self(s.to_string()))
-    }
-}
-
 impl std::fmt::Debug for SecretString {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "[SECRET]")
--- a/libs/utils/src/pageserver_feedback.rs
+++ b/libs/utils/src/pageserver_feedback.rs
@@ -8,7 +8,6 @@ use tracing::{trace, warn};
 use crate::lsn::Lsn;

 /// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
-///
 /// Serialized in custom flexible key/value format. In replication protocol, it
 /// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
 /// Standby status update / Hot standby feedback messages.
--- a/libs/utils/src/poison.rs
+++ b/libs/utils/src/poison.rs
@@ -65,8 +65,6 @@ impl<T> Poison<T> {
    }
 }

-/// Armed pointer to a [`Poison`].
-///
 /// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.
 /// Once modifications are done, use [`Self::disarm`].
 /// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned
--- a/libs/utils/src/rate_limit.rs
+++ b/libs/utils/src/rate_limit.rs
@@ -5,15 +5,6 @@ use std::time::{Duration, Instant};
 pub struct RateLimit {
    last: Option<Instant>,
    interval: Duration,
-    dropped: u64,
-}
-
-pub struct RateLimitStats(u64);
-
-impl std::fmt::Display for RateLimitStats {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{} dropped calls", self.0)
-    }
 }

 impl RateLimit {
@@ -21,27 +12,20 @@ impl RateLimit {
        Self {
            last: None,
            interval,
-            dropped: 0,
        }
    }

    /// Call `f` if the rate limit allows.
    /// Don't call it otherwise.
    pub fn call<F: FnOnce()>(&mut self, f: F) {
-        self.call2(|_| f())
-    }
-
-    pub fn call2<F: FnOnce(RateLimitStats)>(&mut self, f: F) {
        let now = Instant::now();
        match self.last {
            Some(last) if now - last <= self.interval => {
                // ratelimit
-                self.dropped += 1;
            }
            _ => {
                self.last = Some(now);
-                f(RateLimitStats(self.dropped));
-                self.dropped = 0;
+                f();
            }
        }
    }
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -13,11 +13,10 @@ pub struct ShardNumber(pub u8);
 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardCount(pub u8);

-/// Combination of ShardNumber and ShardCount.
-///
-/// For use within the context of a particular tenant, when we need to know which shard we're
-/// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
-/// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
+/// Combination of ShardNumber and ShardCount.  For use within the context of a particular tenant,
+/// when we need to know which shard we're dealing with, but do not need to know the full
+/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
+/// the fully qualified TenantShardId.
 #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub struct ShardIndex {
    pub shard_number: ShardNumber,
--- a/libs/utils/src/simple_rcu.rs
+++ b/libs/utils/src/simple_rcu.rs
@@ -49,11 +49,12 @@ use std::sync::{RwLock, RwLockWriteGuard};

 use tokio::sync::watch;

-/// Rcu allows multiple readers to read and hold onto a value without blocking
-/// (for very long).
 ///
-/// Storing to the Rcu updates the value, making new readers immediately see
-/// the new value, but it also waits for all current readers to finish.
+/// Rcu allows multiple readers to read and hold onto a value without blocking
+/// (for very long).  Storing to the Rcu updates the value, making new readers
+/// immediately see the new value, but it also waits for all current readers to
+/// finish.
+///
 pub struct Rcu<V> {
    inner: RwLock<RcuInner<V>>,
 }
--- a/libs/utils/src/sync/heavier_once_cell.rs
+++ b/libs/utils/src/sync/heavier_once_cell.rs
@@ -5,9 +5,7 @@ use std::sync::{
 use tokio::sync::Semaphore;

 /// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of
-/// `SemaphorePermit`.
-///
-/// Allows use of `take` which does not require holding an outer mutex guard
+/// `SemaphorePermit`, allowing use of `take` which does not require holding an outer mutex guard
 /// for the duration of initialization.
 ///
 /// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`].
--- a/libs/utils/src/toml_edit_ext.rs
+++ b/libs/utils/src/toml_edit_ext.rs
@@ -10,7 +10,7 @@ pub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>
 where
    T: serde::de::DeserializeOwned,
 {
-    let document: toml_edit::DocumentMut = match item {
+    let document: toml_edit::Document = match item {
        toml_edit::Item::Table(toml) => toml.clone().into(),
        toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {
            toml.clone().into_table().into()
--- a/libs/utils/src/vec_map.rs
+++ b/libs/utils/src/vec_map.rs
@@ -7,7 +7,6 @@ pub enum VecMapOrdering {
 }

 /// Ordered map datastructure implemented in a Vec.
-///
 /// Append only - can only add keys that are larger than the
 /// current max key.
 /// Ordering can be adjusted using [`VecMapOrdering`]
--- a/libs/utils/src/yielding_loop.rs
+++ b/libs/utils/src/yielding_loop.rs
@@ -6,10 +6,9 @@ pub enum YieldingLoopError {
    Cancelled,
 }

-/// Helper for long synchronous loops, e.g. over all tenants in the system.
-///
-/// Periodically yields to avoid blocking the executor, and after resuming
-/// checks the provided cancellation token to drop out promptly on shutdown.
+/// Helper for long synchronous loops, e.g. over all tenants in the system.  Periodically
+/// yields to avoid blocking the executor, and after resuming checks the provided
+/// cancellation token to drop out promptly on shutdown.
 #[inline(always)]
 pub async fn yielding_loop<I, T, F>(
    interval: usize,
@@ -24,7 +23,7 @@ where
    for (i, item) in iter.enumerate() {
        visitor(item);

-        if (i + 1) % interval == 0 {
+        if i + 1 % interval == 0 {
            tokio::task::yield_now().await;
            if cancel.is_cancelled() {
                return Err(YieldingLoopError::Cancelled);
--- a/libs/walproposer/Cargo.toml
+++ b/libs/walproposer/Cargo.toml
@@ -9,6 +9,8 @@ anyhow.workspace = true
 utils.workspace = true
 postgres_ffi.workspace = true

+workspace_hack.workspace = true
+
 [build-dependencies]
 anyhow.workspace = true
 bindgen.workspace = true
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -4,6 +4,7 @@
 use std::{env, path::PathBuf, process::Command};

 use anyhow::{anyhow, Context};
+use bindgen::CargoCallbacks;

 fn main() -> anyhow::Result<()> {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
@@ -63,25 +64,16 @@ fn main() -> anyhow::Result<()> {
            .map_err(|s| anyhow!("Bad postgres server path {s:?}"))?
    };

-    let unwind_abi_functions = [
-        "log_internal",
-        "recovery_download",
-        "start_streaming",
-        "finish_sync_safekeepers",
-        "wait_event_set",
-        "WalProposerStart",
-    ];
-
    // The bindgen::Builder is the main entry point
    // to bindgen, and lets you build up options for
    // the resulting bindings.
-    let mut builder = bindgen::Builder::default()
+    let bindings = bindgen::Builder::default()
        // The input header we would like to generate
        // bindings for.
        .header("bindgen_deps.h")
        // Tell cargo to invalidate the built crate whenever any of the
        // included header files changed.
-        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
+        .parse_callbacks(Box::new(CargoCallbacks))
        .allowlist_type("WalProposer")
        .allowlist_type("WalProposerConfig")
        .allowlist_type("walproposer_api")
@@ -103,7 +95,6 @@ fn main() -> anyhow::Result<()> {
        .allowlist_var("ERROR")
        .allowlist_var("FATAL")
        .allowlist_var("PANIC")
-        .allowlist_var("PG_VERSION_NUM")
        .allowlist_var("WPEVENT")
        .allowlist_var("WL_LATCH_SET")
        .allowlist_var("WL_SOCKET_READABLE")
@@ -113,12 +104,7 @@ fn main() -> anyhow::Result<()> {
        .allowlist_var("WL_SOCKET_MASK")
        .clang_arg("-DWALPROPOSER_LIB")
        .clang_arg(format!("-I{pgxn_neon}"))
-        .clang_arg(format!("-I{inc_server_path}"));
-
-    for name in unwind_abi_functions {
-        builder = builder.override_abi(bindgen::Abi::CUnwind, name);
-    }
-    let bindings = builder
+        .clang_arg(format!("-I{inc_server_path}"))
        // Finish the builder and generate the bindings.
        .generate()
        // Unwrap the Result and panic on failure.
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -33,7 +33,7 @@ extern "C" fn get_shmem_state(wp: *mut WalProposer) -> *mut WalproposerShmemStat
    }
 }

-extern "C-unwind" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
+extern "C" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -187,7 +187,7 @@ extern "C" fn conn_blocking_write(
    }
 }

-extern "C-unwind" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
+extern "C" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
    unsafe {
        let callback_data = (*(*(*sk).wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -272,7 +272,7 @@ extern "C" fn rm_safekeeper_event_set(sk: *mut Safekeeper) {
    }
 }

-extern "C-unwind" fn wait_event_set(
+extern "C" fn wait_event_set(
    wp: *mut WalProposer,
    timeout: ::std::os::raw::c_long,
    event_sk: *mut *mut Safekeeper,
@@ -324,7 +324,7 @@ extern "C" fn get_redo_start_lsn(wp: *mut WalProposer) -> XLogRecPtr {
    }
 }

-extern "C-unwind" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
+extern "C" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -340,7 +340,7 @@ extern "C" fn process_safekeeper_feedback(wp: *mut WalProposer, sk: *mut Safekee
    }
 }

-extern "C-unwind" fn log_internal(
+extern "C" fn log_internal(
    wp: *mut WalProposer,
    level: ::std::os::raw::c_int,
    line: *const ::std::os::raw::c_char,
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -282,11 +282,7 @@ mod tests {
    use std::cell::UnsafeCell;
    use utils::id::TenantTimelineId;

-    use crate::{
-        api_bindings::Level,
-        bindings::{NeonWALReadResult, PG_VERSION_NUM},
-        walproposer::Wrapper,
-    };
+    use crate::{api_bindings::Level, bindings::NeonWALReadResult, walproposer::Wrapper};

    use super::ApiImpl;

@@ -493,79 +489,41 @@ mod tests {

        let (sender, receiver) = sync_channel(1);

-        // Messages definitions are at walproposer.h
-        // xxx: it would be better to extract them from safekeeper crate and
-        // use serialization/deserialization here.
-        let greeting_tag = (b'g' as u64).to_ne_bytes();
-        let proto_version = 2_u32.to_ne_bytes();
-        let pg_version: [u8; 4] = PG_VERSION_NUM.to_ne_bytes();
-        let proposer_id = [0; 16];
-        let system_id = 0_u64.to_ne_bytes();
-        let tenant_id = ttid.tenant_id.as_arr();
-        let timeline_id = ttid.timeline_id.as_arr();
-        let pg_tli = 1_u32.to_ne_bytes();
-        let wal_seg_size = 16777216_u32.to_ne_bytes();
-        let proposer_greeting = [
-            greeting_tag.as_slice(),
-            proto_version.as_slice(),
-            pg_version.as_slice(),
-            proposer_id.as_slice(),
-            system_id.as_slice(),
-            tenant_id.as_slice(),
-            timeline_id.as_slice(),
-            pg_tli.as_slice(),
-            wal_seg_size.as_slice(),
-        ]
-        .concat();
-
-        let voting_tag = (b'v' as u64).to_ne_bytes();
-        let vote_request_term = 3_u64.to_ne_bytes();
-        let proposer_id = [0; 16];
-        let vote_request = [
-            voting_tag.as_slice(),
-            vote_request_term.as_slice(),
-            proposer_id.as_slice(),
-        ]
-        .concat();
-
-        let acceptor_greeting_term = 2_u64.to_ne_bytes();
-        let acceptor_greeting_node_id = 1_u64.to_ne_bytes();
-        let acceptor_greeting = [
-            greeting_tag.as_slice(),
-            acceptor_greeting_term.as_slice(),
-            acceptor_greeting_node_id.as_slice(),
-        ]
-        .concat();
-
-        let vote_response_term = 3_u64.to_ne_bytes();
-        let vote_given = 1_u64.to_ne_bytes();
-        let flush_lsn = 0x539_u64.to_ne_bytes();
-        let truncate_lsn = 0x539_u64.to_ne_bytes();
-        let th_len = 1_u32.to_ne_bytes();
-        let th_term = 2_u64.to_ne_bytes();
-        let th_lsn = 0x539_u64.to_ne_bytes();
-        let timeline_start_lsn = 0x539_u64.to_ne_bytes();
-        let vote_response = [
-            voting_tag.as_slice(),
-            vote_response_term.as_slice(),
-            vote_given.as_slice(),
-            flush_lsn.as_slice(),
-            truncate_lsn.as_slice(),
-            th_len.as_slice(),
-            th_term.as_slice(),
-            th_lsn.as_slice(),
-            timeline_start_lsn.as_slice(),
-        ]
-        .concat();
-
        let my_impl: Box<dyn ApiImpl> = Box::new(MockImpl {
            wait_events: Cell::new(WaitEventsData {
                sk: std::ptr::null_mut(),
                event_mask: 0,
            }),
-            expected_messages: vec![proposer_greeting, vote_request],
+            expected_messages: vec![
+                // TODO: When updating Postgres versions, this test will cause
+                // problems. Postgres version in message needs updating.
+                //
+                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160003, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
+                vec![
+                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 76, 143, 54, 6, 60, 108, 110,
+                    147, 188, 32, 214, 90, 130, 15, 61, 158, 76, 143, 54, 6, 60, 108, 110, 147,
+                    188, 32, 214, 90, 130, 15, 61, 1, 0, 0, 0, 0, 0, 0, 1,
+                ],
+                // VoteRequest(VoteRequest { term: 3 })
+                vec![
+                    118, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0,
+                ],
+            ],
            expected_ptr: AtomicUsize::new(0),
-            safekeeper_replies: vec![acceptor_greeting, vote_response],
+            safekeeper_replies: vec![
+                // Greeting(AcceptorGreeting { term: 2, node_id: NodeId(1) })
+                vec![
+                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+                ],
+                // VoteResponse(VoteResponse { term: 3, vote_given: 1, flush_lsn: 0/539, truncate_lsn: 0/539, term_history: [(2, 0/539)], timeline_start_lsn: 0/539 })
+                vec![
+                    118, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 57,
+                    5, 0, 0, 0, 0, 0, 0, 57, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0,
+                    0, 57, 5, 0, 0, 0, 0, 0, 0, 57, 5, 0, 0, 0, 0, 0, 0,
+                ],
+            ],
            replies_ptr: AtomicUsize::new(0),
            sync_channel: sender,
            shmem: UnsafeCell::new(crate::api_bindings::empty_shmem()),
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -8,7 +8,7 @@ license.workspace = true
 default = []
 # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
 # which adds some runtime cost to run tests on outage conditions
-testing = ["fail/failpoints", "pageserver_api/testing" ]
+testing = ["fail/failpoints"]

 [dependencies]
 anyhow.workspace = true
@@ -16,7 +16,6 @@ arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
-bit_field.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
 camino.workspace = true
@@ -37,6 +36,7 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 itertools.workspace = true
+leaky-bucket.workspace = true
 md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
@@ -52,7 +52,6 @@ rand.workspace = true
 range-set-blaze = { version = "0.1.16", features = ["alloc"] }
 regex.workspace = true
 scopeguard.workspace = true
-send-future.workspace = true
 serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 serde_path_to_error.workspace = true
@@ -101,7 +100,6 @@ procfs.workspace = true
 criterion.workspace = true
 hex-literal.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
-indoc.workspace = true

 [[bench]]
 name = "bench_layer_map"
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -10,7 +10,6 @@ use pageserver::{
    page_cache,
    repository::Value,
    task_mgr::TaskKind,
-    tenant::storage_layer::inmemory_layer::SerializedBatch,
    tenant::storage_layer::InMemoryLayer,
    virtual_file,
 };
@@ -68,16 +67,12 @@ async fn ingest(
    let layer =
        InMemoryLayer::create(conf, timeline_id, tenant_shard_id, lsn, entered, &ctx).await?;

-    let data = Value::Image(Bytes::from(vec![0u8; put_size]));
-    let data_ser_size = data.serialized_size().unwrap() as usize;
+    let data = Value::Image(Bytes::from(vec![0u8; put_size])).ser()?;
    let ctx = RequestContext::new(
        pageserver::task_mgr::TaskKind::WalReceiverConnectionHandler,
        pageserver::context::DownloadBehavior::Download,
    );

-    const BATCH_SIZE: usize = 16;
-    let mut batch = Vec::new();
-
    for i in 0..put_count {
        lsn += put_size as u64;

@@ -100,17 +95,7 @@ async fn ingest(
            }
        }

-        batch.push((key.to_compact(), lsn, data_ser_size, data.clone()));
-        if batch.len() >= BATCH_SIZE {
-            let this_batch = std::mem::take(&mut batch);
-            let serialized = SerializedBatch::from_values(this_batch).unwrap();
-            layer.put_batch(serialized, &ctx).await?;
-        }
-    }
-    if !batch.is_empty() {
-        let this_batch = std::mem::take(&mut batch);
-        let serialized = SerializedBatch::from_values(this_batch).unwrap();
-        layer.put_batch(serialized, &ctx).await?;
+        layer.put_value(key.to_compact(), lsn, &data, &ctx).await?;
    }
    layer.freeze(lsn + 1).await;

@@ -164,11 +149,7 @@ fn criterion_benchmark(c: &mut Criterion) {
    let conf: &'static PageServerConf = Box::leak(Box::new(
        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
    ));
-    virtual_file::init(
-        16384,
-        virtual_file::io_engine_for_bench(),
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    virtual_file::init(16384, virtual_file::io_engine_for_bench());
    page_cache::init(conf.page_cache_size);

    {
--- a/pageserver/client/Cargo.toml
+++ b/pageserver/client/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 pageserver_api.workspace = true
 thiserror.workspace = true
+async-trait.workspace = true
 reqwest = { workspace = true, features = [ "stream" ] }
 utils.workspace = true
 serde.workspace = true
--- a/pageserver/client/src/lib.rs
+++ b/pageserver/client/src/lib.rs
@@ -1,20 +1,2 @@
 pub mod mgmt_api;
 pub mod page_service;
-
-/// For timeline_block_unblock_gc, distinguish the two different operations. This could be a bool.
-// If file structure is per-kind not per-feature then where to put this?
-#[derive(Clone, Copy)]
-pub enum BlockUnblock {
-    Block,
-    Unblock,
-}
-
-impl std::fmt::Display for BlockUnblock {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let s = match self {
-            BlockUnblock::Block => "block",
-            BlockUnblock::Unblock => "unblock",
-        };
-        f.write_str(s)
-    }
-}
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -12,8 +12,6 @@ use utils::{

 pub use reqwest::Body as ReqwestBody;

-use crate::BlockUnblock;
-
 pub mod util;

 #[derive(Debug, Clone)]
@@ -421,24 +419,6 @@ impl Client {
        }
    }

-    pub async fn timeline_archival_config(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        req: &TimelineArchivalConfigRequest,
-    ) -> Result<()> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/archival_config",
-            self.mgmt_api_endpoint
-        );
-
-        self.request(Method::POST, &uri, req)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
    pub async fn timeline_detach_ancestor(
        &self,
        tenant_shard_id: TenantShardId,
@@ -456,20 +436,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    pub async fn timeline_block_unblock_gc(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        dir: BlockUnblock,
-    ) -> Result<()> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/{dir}_gc",
-            self.mgmt_api_endpoint,
-        );
-
-        self.request(Method::POST, &uri, ()).await.map(|_| ())
-    }
-
    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{}/reset",
@@ -540,16 +506,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    /// Configs io buffer alignment at runtime.
-    pub async fn put_io_alignment(&self, align: usize) -> Result<()> {
-        let uri = format!("{}/v1/io_alignment", self.mgmt_api_endpoint);
-        self.request(Method::PUT, uri, align)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
    pub async fn get_utilization(&self) -> Result<PageserverUtilization> {
        let uri = format!("{}/v1/utilization", self.mgmt_api_endpoint);
        self.get(uri)
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -144,11 +144,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
-    pageserver::virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    pageserver::page_cache::init(100);

    let mut total_delta_layers = 0usize;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -59,7 +59,7 @@ pub(crate) enum LayerCmd {

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
-    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs, 1);
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let file = VirtualFile::open(path, ctx).await?;
    let file_id = page_cache::next_file_id();
@@ -89,7 +89,6 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
    for (k, v) in all {
        let value = cursor.read_blob(v.pos(), ctx).await?;
        println!("key:{} value_len:{}", k, value.len());
-        assert!(k.is_i128_representable(), "invalid key: ");
    }
    // TODO(chi): special handling for last key?
    Ok(())
@@ -190,11 +189,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            new_tenant_id,
            new_timeline_id,
        } => {
-            pageserver::virtual_file::init(
-                10,
-                virtual_file::api::IoEngineKind::StdFs,
-                pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-            );
+            pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
            pageserver::page_cache::init(100);

            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -26,7 +26,7 @@ use pageserver::{
    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
    virtual_file,
 };
-use pageserver_api::{config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT, shard::TenantShardId};
+use pageserver_api::shard::TenantShardId;
 use postgres_ffi::ControlFileData;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use tokio_util::sync::CancellationToken;
@@ -174,7 +174,7 @@ async fn main() -> anyhow::Result<()> {
                println!("specified prefix '{}' failed validation", cmd.prefix);
                return Ok(());
            };
-            let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
+            let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
            let toml_item = toml_document
                .get("remote_storage")
                .expect("need remote_storage");
@@ -205,11 +205,7 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {

 async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    dump_layerfile_from_path(path, true, &ctx).await
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -58,11 +58,6 @@ pub(crate) struct Args {
    /// [`pageserver_api::models::virtual_file::IoEngineKind`].
    #[clap(long)]
    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,
-
-    /// Before starting the benchmark, live-reconfigure the pageserver to use specified alignment for io buffers.
-    #[clap(long)]
-    set_io_alignment: Option<usize>,
-
    targets: Option<Vec<TenantTimelineId>>,
 }

@@ -129,10 +124,6 @@ async fn main_impl(
        mgmt_api_client.put_io_engine(engine_str).await?;
    }

-    if let Some(align) = args.set_io_alignment {
-        mgmt_api_client.put_io_alignment(align).await?;
-    }
-
    // discover targets
    let timelines: Vec<TenantTimelineId> = crate::util::cli::targets::discover(
        &mgmt_api_client,
--- a/pageserver/src/assert_u64_eq_usize.rs
+++ b/pageserver/src/assert_u64_eq_usize.rs
@@ -1,39 +0,0 @@
-//! `u64`` and `usize`` aren't guaranteed to be identical in Rust, but life is much simpler if that's the case.
-
-pub(crate) const _ASSERT_U64_EQ_USIZE: () = {
-    if std::mem::size_of::<usize>() != std::mem::size_of::<u64>() {
-        panic!("the traits defined in this module assume that usize and u64 can be converted to each other without loss of information");
-    }
-};
-
-pub(crate) trait U64IsUsize {
-    fn into_usize(self) -> usize;
-}
-
-impl U64IsUsize for u64 {
-    #[inline(always)]
-    fn into_usize(self) -> usize {
-        #[allow(clippy::let_unit_value)]
-        let _ = _ASSERT_U64_EQ_USIZE;
-        self as usize
-    }
-}
-
-pub(crate) trait UsizeIsU64 {
-    fn into_u64(self) -> u64;
-}
-
-impl UsizeIsU64 for usize {
-    #[inline(always)]
-    fn into_u64(self) -> u64 {
-        #[allow(clippy::let_unit_value)]
-        let _ = _ASSERT_U64_EQ_USIZE;
-        self as u64
-    }
-}
-
-pub const fn u64_to_usize(x: u64) -> usize {
-    #[allow(clippy::let_unit_value)]
-    let _ = _ASSERT_U64_EQ_USIZE;
-    x as usize
-}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -5,7 +5,6 @@
 use std::env;
 use std::env::{var, VarError};
 use std::io::Read;
-use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;

@@ -37,7 +36,6 @@ use pageserver::{
    virtual_file,
 };
 use postgres_backend::AuthType;
-use utils::crashsafe::syncfs;
 use utils::failpoint_support;
 use utils::logging::TracingErrorLayerEnablement;
 use utils::{
@@ -126,53 +124,19 @@ fn main() -> anyhow::Result<()> {
    // after setting up logging, log the effective IO engine choice and read path implementations
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
    info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
-    info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");
+    info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");

-    // The tenants directory contains all the pageserver local disk state.
-    // Create if not exists and make sure all the contents are durable before proceeding.
-    // Ensuring durability eliminates a whole bug class where we come up after an unclean shutdown.
-    // After unclea shutdown, we don't know if all the filesystem content we can read via syscalls is actually durable or not.
-    // Examples for that: OOM kill, systemd killing us during shutdown, self abort due to unrecoverable IO error.
    let tenants_path = conf.tenants_path();
-    {
-        let open = || {
-            nix::dir::Dir::open(
-                tenants_path.as_std_path(),
-                nix::fcntl::OFlag::O_DIRECTORY | nix::fcntl::OFlag::O_RDONLY,
-                nix::sys::stat::Mode::empty(),
-            )
-        };
-        let dirfd = match open() {
-            Ok(dirfd) => dirfd,
-            Err(e) => match e {
-                nix::errno::Errno::ENOENT => {
-                    utils::crashsafe::create_dir_all(&tenants_path).with_context(|| {
-                        format!("Failed to create tenants root dir at '{tenants_path}'")
-                    })?;
-                    open().context("open tenants dir after creating it")?
-                }
-                e => anyhow::bail!(e),
-            },
-        };
-
-        let started = Instant::now();
-        syncfs(dirfd)?;
-        let elapsed = started.elapsed();
-        info!(
-            elapsed_ms = elapsed.as_millis(),
-            "made tenant directory contents durable"
-        );
+    if !tenants_path.exists() {
+        utils::crashsafe::create_dir_all(conf.tenants_path())
+            .with_context(|| format!("Failed to create tenants root dir at '{tenants_path}'"))?;
    }

    // Initialize up failpoints support
    let scenario = failpoint_support::init();

    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        conf.max_file_descriptors,
-        conf.virtual_file_io_engine,
-        conf.io_buffer_alignment,
-    );
+    virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
    page_cache::init(conf.page_cache_size);

    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
@@ -208,15 +172,27 @@ fn initialize_config(
        }
    };

-    let config_file_contents =
-        std::fs::read_to_string(cfg_file_path).context("read config file from filesystem")?;
-    let config_toml = serde_path_to_error::deserialize(
-        toml_edit::de::Deserializer::from_str(&config_file_contents)
-            .context("build toml deserializer")?,
-    )
-    .context("deserialize config toml")?;
-    let conf = PageServerConf::parse_and_validate(identity.id, config_toml, workdir)
-        .context("runtime-validation of config toml")?;
+    let config: toml_edit::Document = match std::fs::File::open(cfg_file_path) {
+        Ok(mut f) => {
+            let md = f.metadata().context("stat config file")?;
+            if md.is_file() {
+                let mut s = String::new();
+                f.read_to_string(&mut s).context("read config file")?;
+                s.parse().context("parse config file toml")?
+            } else {
+                anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
+            }
+        }
+        Err(e) => {
+            anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
+        }
+    };
+
+    debug!("Using pageserver toml: {config}");
+
+    // Construct the runtime representation
+    let conf = PageServerConf::parse_and_validate(identity.id, &config, workdir)
+        .context("Failed to parse pageserver configuration")?;

    Ok(Box::leak(Box::new(conf)))
 }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -1,8 +1,6 @@
 //! Periodically collect consumption metrics for all active tenants
 //! and push them to a HTTP endpoint.
 use crate::config::PageServerConf;
-use crate::consumption_metrics::metrics::MetricsKey;
-use crate::consumption_metrics::upload::KeyGen as _;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::size::CalculateSyntheticSizeError;
@@ -10,7 +8,6 @@ use crate::tenant::tasks::BackgroundLoopKind;
 use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
-use itertools::Itertools as _;
 use pageserver_api::models::TenantState;
 use remote_storage::{GenericRemoteStorage, RemoteStorageConfig};
 use reqwest::Url;
@@ -22,8 +19,9 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::NodeId;

-mod disk_cache;
 mod metrics;
+use crate::consumption_metrics::metrics::MetricsKey;
+mod disk_cache;
 mod upload;

 const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
@@ -145,12 +143,6 @@ async fn collect_metrics(
        // these are point in time, with variable "now"
        let metrics = metrics::collect_all_metrics(&tenant_manager, &cached_metrics, &ctx).await;

-        // Pre-generate event idempotency keys, to reuse them across the bucket
-        // and HTTP sinks.
-        let idempotency_keys = std::iter::repeat_with(|| node_id.as_str().generate())
-            .take(metrics.len())
-            .collect_vec();
-
        let metrics = Arc::new(metrics);

        // why not race cancellation here? because we are one of the last tasks, and if we are
@@ -169,14 +161,8 @@ async fn collect_metrics(
            }

            if let Some(bucket_client) = &bucket_client {
-                let res = upload::upload_metrics_bucket(
-                    bucket_client,
-                    &cancel,
-                    &node_id,
-                    &metrics,
-                    &idempotency_keys,
-                )
-                .await;
+                let res =
+                    upload::upload_metrics_bucket(bucket_client, &cancel, &node_id, &metrics).await;
                if let Err(e) = res {
                    tracing::error!("failed to upload to S3: {e:#}");
                }
@@ -188,9 +174,9 @@ async fn collect_metrics(
                &client,
                metric_collection_endpoint,
                &cancel,
+                &node_id,
                &metrics,
                &mut cached_metrics,
-                &idempotency_keys,
            )
            .await;
            if let Err(e) = res {
--- a/pageserver/src/consumption_metrics/upload.rs
+++ b/pageserver/src/consumption_metrics/upload.rs
@@ -24,16 +24,16 @@ pub(super) async fn upload_metrics_http(
    client: &reqwest::Client,
    metric_collection_endpoint: &reqwest::Url,
    cancel: &CancellationToken,
+    node_id: &str,
    metrics: &[RawMetric],
    cached_metrics: &mut Cache,
-    idempotency_keys: &[IdempotencyKey<'_>],
 ) -> anyhow::Result<()> {
    let mut uploaded = 0;
    let mut failed = 0;

    let started_at = std::time::Instant::now();

-    let mut iter = serialize_in_chunks(CHUNK_SIZE, metrics, idempotency_keys);
+    let mut iter = serialize_in_chunks(CHUNK_SIZE, metrics, node_id);

    while let Some(res) = iter.next() {
        let (chunk, body) = res?;
@@ -87,7 +87,6 @@ pub(super) async fn upload_metrics_bucket(
    cancel: &CancellationToken,
    node_id: &str,
    metrics: &[RawMetric],
-    idempotency_keys: &[IdempotencyKey<'_>],
 ) -> anyhow::Result<()> {
    if metrics.is_empty() {
        // Skip uploads if we have no metrics, so that readers don't have to handle the edge case
@@ -107,7 +106,7 @@ pub(super) async fn upload_metrics_bucket(

    // Serialize and write into compressed buffer
    let started_at = std::time::Instant::now();
-    for res in serialize_in_chunks(CHUNK_SIZE, metrics, idempotency_keys) {
+    for res in serialize_in_chunks(CHUNK_SIZE, metrics, node_id) {
        let (_chunk, body) = res?;
        gzip_writer.write_all(&body).await?;
    }
@@ -135,31 +134,29 @@ pub(super) async fn upload_metrics_bucket(
    Ok(())
 }

-/// Serializes the input metrics as JSON in chunks of chunk_size. The provided
-/// idempotency keys are injected into the corresponding metric events (reused
-/// across different metrics sinks), and must have the same length as input.
-fn serialize_in_chunks<'a>(
+// The return type is quite ugly, but we gain testability in isolation
+fn serialize_in_chunks<'a, F>(
    chunk_size: usize,
    input: &'a [RawMetric],
-    idempotency_keys: &'a [IdempotencyKey<'a>],
+    factory: F,
 ) -> impl ExactSizeIterator<Item = Result<(&'a [RawMetric], bytes::Bytes), serde_json::Error>> + 'a
+where
+    F: KeyGen<'a> + 'a,
 {
    use bytes::BufMut;

-    assert_eq!(input.len(), idempotency_keys.len());
-
-    struct Iter<'a> {
+    struct Iter<'a, F> {
        inner: std::slice::Chunks<'a, RawMetric>,
-        idempotency_keys: std::slice::Iter<'a, IdempotencyKey<'a>>,
        chunk_size: usize,

        // write to a BytesMut so that we can cheaply clone the frozen Bytes for retries
        buffer: bytes::BytesMut,
        // chunk amount of events are reused to produce the serialized document
        scratch: Vec<Event<Ids, Name>>,
+        factory: F,
    }

-    impl<'a> Iterator for Iter<'a> {
+    impl<'a, F: KeyGen<'a>> Iterator for Iter<'a, F> {
        type Item = Result<(&'a [RawMetric], bytes::Bytes), serde_json::Error>;

        fn next(&mut self) -> Option<Self::Item> {
@@ -170,14 +167,17 @@ fn serialize_in_chunks<'a>(
                self.scratch.extend(
                    chunk
                        .iter()
-                        .zip(&mut self.idempotency_keys)
-                        .map(|(raw_metric, key)| raw_metric.as_event(key)),
+                        .map(|raw_metric| raw_metric.as_event(&self.factory.generate())),
                );
            } else {
                // next rounds: update_in_place to reuse allocations
                assert_eq!(self.scratch.len(), self.chunk_size);
-                itertools::izip!(self.scratch.iter_mut(), chunk, &mut self.idempotency_keys)
-                    .for_each(|(slot, raw_metric, key)| raw_metric.update_in_place(slot, key));
+                self.scratch
+                    .iter_mut()
+                    .zip(chunk.iter())
+                    .for_each(|(slot, raw_metric)| {
+                        raw_metric.update_in_place(slot, &self.factory.generate())
+                    });
            }

            let res = serde_json::to_writer(
@@ -198,19 +198,18 @@ fn serialize_in_chunks<'a>(
        }
    }

-    impl<'a> ExactSizeIterator for Iter<'a> {}
+    impl<'a, F: KeyGen<'a>> ExactSizeIterator for Iter<'a, F> {}

    let buffer = bytes::BytesMut::new();
    let inner = input.chunks(chunk_size);
-    let idempotency_keys = idempotency_keys.iter();
    let scratch = Vec::new();

    Iter {
        inner,
-        idempotency_keys,
        chunk_size,
        buffer,
        scratch,
+        factory,
    }
 }

@@ -269,7 +268,7 @@ impl RawMetricExt for RawMetric {
    }
 }

-pub(crate) trait KeyGen<'a> {
+trait KeyGen<'a>: Copy {
    fn generate(&self) -> IdempotencyKey<'a>;
 }

@@ -390,10 +389,7 @@ mod tests {
        let examples = metric_samples();
        assert!(examples.len() > 1);

-        let now = Utc::now();
-        let idempotency_keys = (0..examples.len())
-            .map(|i| FixedGen::new(now, "1", i as u16).generate())
-            .collect::<Vec<_>>();
+        let factory = FixedGen::new(Utc::now(), "1", 42);

        // need to use Event here because serde_json::Value uses default hashmap, not linked
        // hashmap
@@ -402,13 +398,13 @@ mod tests {
            events: Vec<Event<Ids, Name>>,
        }

-        let correct = serialize_in_chunks(examples.len(), &examples, &idempotency_keys)
+        let correct = serialize_in_chunks(examples.len(), &examples, factory)
            .map(|res| res.unwrap().1)
            .flat_map(|body| serde_json::from_slice::<EventChunk>(&body).unwrap().events)
            .collect::<Vec<_>>();

        for chunk_size in 1..examples.len() {
-            let actual = serialize_in_chunks(chunk_size, &examples, &idempotency_keys)
+            let actual = serialize_in_chunks(chunk_size, &examples, factory)
                .map(|res| res.unwrap().1)
                .flat_map(|body| serde_json::from_slice::<EventChunk>(&body).unwrap().events)
                .collect::<Vec<_>>();
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -1,9 +1,7 @@
-//! Defines [`RequestContext`].
-//!
-//! It is a structure that we use throughout the pageserver to propagate
-//! high-level context from places that _originate_ activity down to the
-//! shared code paths at the heart of the pageserver. It's inspired by
-//! Golang's `context.Context`.
+//! This module defines `RequestContext`, a structure that we use throughout
+//! the pageserver to propagate high-level context from places
+//! that _originate_ activity down to the shared code paths at the
+//! heart of the pageserver. It's inspired by Golang's `context.Context`.
 //!
 //! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:
 //! 1. What high-level activity ([`TaskKind`]) needs this page?
@@ -107,10 +105,8 @@ pub struct RequestContext {
 #[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
 pub enum PageContentKind {
    Unknown,
-    DeltaLayerSummary,
    DeltaLayerBtreeNode,
    DeltaLayerValue,
-    ImageLayerSummary,
    ImageLayerBtreeNode,
    ImageLayerValue,
    InMemoryLayer,
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -141,32 +141,12 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
                        m.other
                    );

-                    let az_id = {
-                        let az_id_from_metadata = m
-                            .other
-                            .get("availability_zone_id")
-                            .and_then(|jv| jv.as_str().map(|str| str.to_owned()));
-
-                        match az_id_from_metadata {
-                            Some(az_id) => Some(az_id),
-                            None => {
-                                tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
-                                conf.availability_zone.clone()
-                            }
-                        }
-                    };
-
-                    if az_id.is_none() {
-                        panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
-                    }
-
                    Some(NodeRegisterRequest {
                        node_id: conf.id,
                        listen_pg_addr: m.postgres_host,
                        listen_pg_port: m.postgres_port,
                        listen_http_addr: m.http_host,
                        listen_http_port: m.http_port,
-                        availability_zone_id: az_id.expect("Checked above"),
                    })
                }
                Err(e) => {
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -41,15 +41,19 @@
 // - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
 //   reading these fields. We use the Debug impl for semi-structured logging, though.

-use std::{sync::Arc, time::SystemTime};
+use std::{
+    sync::Arc,
+    time::{Duration, SystemTime},
+};

 use anyhow::Context;
-use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId};
+use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn, Instrument};
+use utils::serde_percent::Percent;
 use utils::{completion, id::TimelineId};

 use crate::{
@@ -65,9 +69,23 @@ use crate::{
    CancellableTask, DiskUsageEvictionTask,
 };

+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct DiskUsageEvictionTaskConfig {
+    pub max_usage_pct: Percent,
+    pub min_avail_bytes: u64,
+    #[serde(with = "humantime_serde")]
+    pub period: Duration,
+    #[cfg(feature = "testing")]
+    pub mock_statvfs: Option<crate::statvfs::mock::Behavior>,
+    /// Select sorting for evicted layers
+    #[serde(default)]
+    pub eviction_order: EvictionOrder,
+}
+
 /// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
 /// partitioning.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "type", content = "args")]
 pub enum EvictionOrder {
    /// Order the layers to be evicted by how recently they have been accessed relatively within
    /// the set of resident layers of a tenant.
@@ -78,22 +96,23 @@ pub enum EvictionOrder {
        /// we read tenants is deterministic. If we find the need to use this as `false`, we need
        /// to ensure nondeterminism by adding in a random number to break the
        /// `relative_last_activity==0.0` ties.
+        #[serde(default = "default_highest_layer_count_loses_first")]
        highest_layer_count_loses_first: bool,
    },
 }

-impl From<pageserver_api::config::EvictionOrder> for EvictionOrder {
-    fn from(value: pageserver_api::config::EvictionOrder) -> Self {
-        match value {
-            pageserver_api::config::EvictionOrder::RelativeAccessed {
-                highest_layer_count_loses_first,
-            } => Self::RelativeAccessed {
-                highest_layer_count_loses_first,
-            },
+impl Default for EvictionOrder {
+    fn default() -> Self {
+        Self::RelativeAccessed {
+            highest_layer_count_loses_first: true,
        }
    }
 }

+fn default_highest_layer_count_loses_first() -> bool {
+    true
+}
+
 impl EvictionOrder {
    fn sort(&self, candidates: &mut [(EvictionPartition, EvictionCandidate)]) {
        use EvictionOrder::*;
@@ -276,7 +295,7 @@ async fn disk_usage_eviction_task_iteration(
        storage,
        usage_pre,
        tenant_manager,
-        task_config.eviction_order.into(),
+        task_config.eviction_order,
        cancel,
    )
    .await;
@@ -1238,6 +1257,7 @@ mod filesystem_level_usage {

    #[test]
    fn max_usage_pct_pressure() {
+        use super::EvictionOrder;
        use super::Usage as _;
        use std::time::Duration;
        use utils::serde_percent::Percent;
@@ -1249,7 +1269,7 @@ mod filesystem_level_usage {
                period: Duration::MAX,
                #[cfg(feature = "testing")]
                mock_statvfs: None,
-                eviction_order: pageserver_api::config::EvictionOrder::default(),
+                eviction_order: EvictionOrder::default(),
            },
            total_bytes: 100_000,
            avail_bytes: 0,
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -318,27 +318,6 @@ impl From<crate::tenant::DeleteTimelineError> for ApiError {
    }
 }

-impl From<crate::tenant::TimelineArchivalError> for ApiError {
-    fn from(value: crate::tenant::TimelineArchivalError) -> Self {
-        use crate::tenant::TimelineArchivalError::*;
-        match value {
-            NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found").into()),
-            Timeout => ApiError::Timeout("hit pageserver internal timeout".into()),
-            e @ HasArchivedParent(_) => {
-                ApiError::PreconditionFailed(e.to_string().into_boxed_str())
-            }
-            HasUnarchivedChildren(children) => ApiError::PreconditionFailed(
-                format!(
-                    "Cannot archive timeline which has non-archived child timelines: {children:?}"
-                )
-                .into_boxed_str(),
-            ),
-            a @ AlreadyInProgress => ApiError::Conflict(a.to_string()),
-            Other(e) => ApiError::InternalServerError(e),
-        }
-    }
-}
-
 impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
    fn from(value: crate::tenant::mgr::DeleteTimelineError) -> Self {
        use crate::tenant::mgr::DeleteTimelineError::*;
@@ -426,8 +405,6 @@ async fn build_timeline_info_common(
    let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx);
    let current_physical_size = Some(timeline.layer_size_sum().await);
    let state = timeline.current_state();
-    // Report is_archived = false if the timeline is still loading
-    let is_archived = timeline.is_archived().unwrap_or(false);
    let remote_consistent_lsn_projected = timeline
        .get_remote_consistent_lsn_projected()
        .unwrap_or(Lsn(0));
@@ -468,7 +445,6 @@ async fn build_timeline_info_common(
        pg_version: timeline.pg_version,

        state,
-        is_archived: Some(is_archived),

        walreceiver_status,

@@ -710,7 +686,9 @@ async fn timeline_archival_config_handler(

        tenant
            .apply_timeline_archival_config(timeline_id, request_data.state)
-            .await?;
+            .await
+            .context("applying archival config")
+            .map_err(ApiError::InternalServerError)?;
        Ok::<_, ApiError>(())
    }
    .instrument(info_span!("timeline_archival_config",
@@ -874,10 +852,7 @@ async fn get_timestamp_of_lsn_handler(

    match result {
        Some(time) => {
-            let time = format_rfc3339(
-                postgres_ffi::try_from_pg_timestamp(time).map_err(ApiError::InternalServerError)?,
-            )
-            .to_string();
+            let time = format_rfc3339(postgres_ffi::from_pg_timestamp(time)).to_string();
            json_response(StatusCode::OK, time)
        }
        None => Err(ApiError::NotFound(
@@ -1731,12 +1706,13 @@ async fn timeline_compact_handler(
        flags |= CompactFlags::ForceImageLayerCreation;
    }
    if Some(true) == parse_query_param::<_, bool>(&request, "enhanced_gc_bottom_most_compaction")? {
+        if !cfg!(feature = "testing") {
+            return Err(ApiError::InternalServerError(anyhow!(
+                "enhanced_gc_bottom_most_compaction is only available in testing mode"
+            )));
+        }
        flags |= CompactFlags::EnhancedGcBottomMostCompaction;
    }
-    if Some(true) == parse_query_param::<_, bool>(&request, "dry_run")? {
-        flags |= CompactFlags::DryRun;
-    }
-
    let wait_until_uploaded =
        parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);

@@ -2076,7 +2052,7 @@ async fn disk_usage_eviction_run(
        evict_bytes: u64,

        #[serde(default)]
-        eviction_order: pageserver_api::config::EvictionOrder,
+        eviction_order: crate::disk_usage_eviction_task::EvictionOrder,
    }

    #[derive(Debug, Clone, Copy, serde::Serialize)]
@@ -2112,7 +2088,7 @@ async fn disk_usage_eviction_run(
        &state.remote_storage,
        usage,
        &state.tenant_manager,
-        config.eviction_order.into(),
+        config.eviction_order,
        &cancel,
    )
    .await;
@@ -2354,20 +2330,6 @@ async fn put_io_engine_handler(
    json_response(StatusCode::OK, ())
 }

-async fn put_io_alignment_handler(
-    mut r: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    check_permission(&r, None)?;
-    let align: usize = json_request(&mut r).await?;
-    crate::virtual_file::set_io_buffer_alignment(align).map_err(|align| {
-        ApiError::PreconditionFailed(
-            format!("Requested io alignment ({align}) is not a power of two").into(),
-        )
-    })?;
-    json_response(StatusCode::OK, ())
-}
-
 /// Polled by control plane.
 ///
 /// See [`crate::utilization`].
@@ -2980,7 +2942,7 @@ pub fn make_router(
        )
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
-            |r| api_handler(r, timeline_compact_handler),
+            |r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
        )
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
@@ -3055,9 +3017,6 @@ pub fn make_router(
            |r| api_handler(r, timeline_collect_keyspace),
        )
        .put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
-        .put("/v1/io_alignment", |r| {
-            api_handler(r, put_io_alignment_handler)
-        })
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/force_aux_policy_switch",
            |r| api_handler(r, force_aux_policy_switch_handler),
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -19,7 +19,6 @@ use crate::metrics::WAL_INGEST;
 use crate::pgdatadir_mapping::*;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
-use crate::walrecord::decode_wal_record;
 use crate::walrecord::DecodedWALRecord;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
@@ -311,13 +310,11 @@ async fn import_wal(

        let mut nrecords = 0;
        let mut modification = tline.begin_modification(last_lsn);
+        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= endpoint {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                let mut decoded = DecodedWALRecord::default();
-                decode_wal_record(recdata, &mut decoded, tline.pg_version)?;
-
                walingest
-                    .ingest_record(decoded, lsn, &mut modification, ctx)
+                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, ctx)
                    .await?;
                WAL_INGEST.records_committed.inc();

@@ -452,12 +449,11 @@ pub async fn import_wal_from_tar(
        waldecoder.feed_bytes(&bytes[offset..]);

        let mut modification = tline.begin_modification(last_lsn);
+        let mut decoded = DecodedWALRecord::default();
        while last_lsn <= end_lsn {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
-                let mut decoded = DecodedWALRecord::default();
-                decode_wal_record(recdata, &mut decoded, tline.pg_version)?;
                walingest
-                    .ingest_record(decoded, lsn, &mut modification, ctx)
+                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, ctx)
                    .await?;
                modification.commit(ctx).await?;
                last_lsn = lsn;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Conrad Ludgate	2cca1b3e4e	fix	2024-08-21 18:44:57 +01:00
Conrad Ludgate	471b3b300d	fix pin	2024-08-21 16:29:52 +01:00
Conrad Ludgate	fbd4b91169	asyncreadready	2024-08-21 16:16:49 +01:00
Conrad Ludgate	8cc45ad9bd	asrawfd things	2024-08-21 15:28:25 +01:00
Conrad Ludgate	aabbd55187	add ktls handling	2024-08-21 14:42:41 +01:00
Conrad Ludgate	987a859352	start integrating ktls	2024-08-21 14:11:58 +01:00
Conrad Ludgate	e171fd805b	add ktls dep	2024-08-21 13:51:02 +01:00
Conrad Ludgate	1e4702b26a	update rustls	2024-08-21 13:47:19 +01:00