Try to find the best parallelisation

CI(check-codestyle-rust): use parallel and cargo hack --partition
Revert "CI(check-codestyle-rust): use mold -run"
2026-03-09 11:20:38 +00:00 · 2024-09-14 12:19:11 +01:00 · 2024-09-05 13:35:12 +01:00 · 2024-09-05 11:31:01 +01:00 · 2024-09-05 11:30:21 +01:00 · 2024-09-05 00:18:35 +01:00
192 changed files with 2961 additions and 3484 deletions
--- a/.devcontainer/Dockerfile.devcontainer
+++ b/.devcontainer/Dockerfile.devcontainer
@@ -1 +0,0 @@
 FROM neondatabase/build-tools:pinned
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,23 +0,0 @@
 // https://containers.dev/implementors/json_reference/
 {
  "name": "Neon",
  "build": {
    "context": "..",
    "dockerfile": "Dockerfile.devcontainer"
  },
  "postCreateCommand": {
    "build neon": "BUILD_TYPE=debug CARGO_BUILD_FLAGS='--features=testing' mold -run make -s -j`nproc`",
    "install python deps": "./scripts/pysync"
  },
  "customizations": {
    "vscode": {
      "extensions": [
        "charliermarsh.ruff",
        "github.vscode-github-actions",
        "rust-lang.rust-analyzer"
      ]
    }
  }
 }
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -7,13 +7,6 @@ self-hosted-runner:
    - small-arm64
    - us-east-2
 config-variables:
  - AZURE_DEV_CLIENT_ID
  - AZURE_DEV_REGISTRY_NAME
  - AZURE_DEV_SUBSCRIPTION_ID
  - AZURE_PROD_CLIENT_ID
  - AZURE_PROD_REGISTRY_NAME
  - AZURE_PROD_SUBSCRIPTION_ID
  - AZURE_TENANT_ID
  - BENCHMARK_PROJECT_ID_PUB
  - BENCHMARK_PROJECT_ID_SUB
  - REMOTE_STORAGE_AZURE_CONTAINER
--- a/.github/workflows/_push-to-acr.yml
+++ b/.github/workflows/_push-to-acr.yml
@@ -1,56 +0,0 @@
 name: Push images to ACR
 on:
  workflow_call:
    inputs:
      client_id:
        description: Client ID of Azure managed identity or Entra app
        required: true
        type: string
      image_tag:
        description: Tag for the container image
        required: true
        type: string
      images:
        description: Images to push
        required: true
        type: string
      registry_name:
        description: Name of the container registry
        required: true
        type: string
      subscription_id:
        description: Azure subscription ID
        required: true
        type: string
      tenant_id:
        description: Azure tenant ID
        required: true
        type: string
 jobs:
  push-to-acr:
    runs-on: ubuntu-22.04
    permissions:
      contents: read  # This is required for actions/checkout
      id-token: write # This is required for Azure Login to work.
    steps:
      - name: Azure login
        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
        with:
          client-id: ${{ inputs.client_id }}
          subscription-id: ${{ inputs.subscription_id }}
          tenant-id: ${{ inputs.tenant_id }}
      - name: Login to ACR
        run: |
          az acr login --name=${{ inputs.registry_name }}
      - name: Copy docker images to ACR ${{ inputs.registry_name }}
        run: |
          images='${{ inputs.images }}'
          for image in ${images}; do
            docker buildx imagetools create \
              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
                                        neondatabase/${image}:${{ inputs.image_tag }}
          done
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -122,10 +122,12 @@ jobs:
  check-codestyle-rust:
    needs: [ check-permissions, build-build-tools-image ]
    # There's no reason to expect clippy or code formatting to be different on different platforms,
    # so it's enough to run these on x64 only.
    strategy:
      matrix:
        arch: [ x64, arm64 ]
-    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
@@ -166,15 +168,25 @@ jobs:
            exit 1
          fi
          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
      - name: Run cargo clippy (debug)
-        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
+        run: |
-      - name: Run cargo clippy (release)
+          parallel --jobs 8 "cargo hack --feature-powerset --partition {}/8 clippy --target-dir target/partition-{} $CLIPPY_COMMON_ARGS" ::: 1 2 3 4 5 6 7 8
-        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
+
      # instead of running the full release build, running debug build again,
      # but with disabled `debug-assertions` to excersice release code paths
      - name: Run cargo clippy (debug, with debug-assertions=false)
        run: |
          for N in 4 8 10 12 14 16 18 20; do
            echo "Running clippy with debug-assertions=false for partition ${N}"
            time parallel --jobs ${N} "cargo hack --feature-powerset --partition {}/${N} clippy --target-dir target/partition-{} $CLIPPY_COMMON_ARGS -C debug-assertions=off" ::: $(seq -s " " 1 ${N})
            rm -rf target/partition-*
          done
      - name: Check documentation generation
        run: cargo doc --workspace --no-deps --document-private-items
        env:
-            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
+          RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
      - name: Check formatting
@@ -286,7 +298,6 @@ jobs:
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          SYNC_AFTER_EACH_TEST: true
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones
@@ -794,6 +805,9 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml down
  promote-images:
    permissions:
      contents: read  # This is required for actions/checkout
      id-token: write # This is required for Azure Login to work.
    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
    runs-on: ubuntu-22.04
@@ -820,6 +834,28 @@ jobs:
                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
          done
      - name: Azure login
        if: github.ref_name == 'main'
        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
        with:
          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
      - name: Login to ACR
        if: github.ref_name == 'main'
        run: |
          az acr login --name=neoneastus2
      - name: Copy docker images to ACR-dev
        if: github.ref_name == 'main'
        run: |
          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
            docker buildx imagetools create \
              -t neoneastus2.azurecr.io/neondatabase/${image}:${{ needs.tag.outputs.build-tag }} \
                                        neondatabase/${image}:${{ needs.tag.outputs.build-tag }}
          done
      - name: Add latest tag to images
        if: github.ref_name == 'main'
        run: |
@@ -857,30 +893,6 @@ jobs:
                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
          done
  push-to-acr-dev:
    if: github.ref_name == 'main'
    needs: [ tag, promote-images ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
  push-to-acr-prod:
    if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
    needs: [ tag, promote-images ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
  trigger-custom-extensions-build-and-wait:
    needs: [ check-permissions, tag ]
    runs-on: ubuntu-22.04
@@ -956,8 +968,8 @@ jobs:
          exit 1
  deploy:
-    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
-    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()
+    if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
    runs-on: [ self-hosted, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
--- a/.github/workflows/label-for-external-users.yml
+++ b/.github/workflows/label-for-external-users.yml
@@ -7,11 +7,6 @@ on:
  pull_request_target:
    types:
      - opened
  workflow_dispatch:
    inputs:
      github-actor:
        description: 'GitHub username. If empty, the username of the current user will be used'
        required: false
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
@@ -31,31 +26,12 @@ jobs:
      id: check-user
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        ACTOR: ${{ inputs.github-actor || github.actor }}
      run: |
-        expected_error="User does not exist or is not a member of the organization"
+        if gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${GITHUB_ACTOR}"; then
-        output_file=output.txt
+          is_member=true
-
+        else
-        for i in $(seq 1 10); do
+          is_member=false
-          if gh api "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${ACTOR}" \
+        fi
              -H "Accept: application/vnd.github+json" \
              -H "X-GitHub-Api-Version: 2022-11-28" > ${output_file}; then
            is_member=true
            break
          elif grep -q "${expected_error}" ${output_file}; then
            is_member=false
            break
          elif [ $i -eq 10 ]; then
            title="Failed to get memmbership status for ${ACTOR}"
            message="The latest GitHub API error message: '$(cat ${output_file})'"
            echo "::error file=.github/workflows/label-for-external-users.yml,title=${title}::${message}"
            exit 1
          fi
          sleep 1
        done
        echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -915,22 +915,25 @@ dependencies = [
 [[package]]
 name = "bindgen"
-version = "0.70.1"
+version = "0.65.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
+checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 1.3.2",
 "cexpr",
 "clang-sys",
- "itertools 0.12.1",
+ "lazy_static",
 "lazycell",
 "log",
- "prettyplease 0.2.17",
+ "peeking_take_while",
 "prettyplease 0.2.6",
 "proc-macro2",
 "quote",
 "regex",
 "rustc-hash",
 "shlex",
 "syn 2.0.52",
 "which",
 ]
 [[package]]
@@ -1189,9 +1192,9 @@ dependencies = [
 [[package]]
 name = "comfy-table"
-version = "7.1.1"
+version = "6.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7"
+checksum = "6e7b787b0dc42e8111badfdbe4c3059158ccb2db8780352fa1b01e8ccf45cc4d"
 dependencies = [
 "crossterm",
 "strum",
@@ -1246,7 +1249,7 @@ dependencies = [
 "tokio-postgres",
 "tokio-stream",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -1360,8 +1363,8 @@ dependencies = [
 "tokio",
 "tokio-postgres",
 "tokio-util",
- "toml",
+ "toml 0.7.4",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "url",
 "utils",
@@ -1485,22 +1488,25 @@ checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
 [[package]]
 name = "crossterm"
-version = "0.27.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df"
+checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 1.3.2",
 "crossterm_winapi",
 "libc",
 "mio",
 "parking_lot 0.12.1",
 "signal-hook",
 "signal-hook-mio",
 "winapi",
 ]
 [[package]]
 name = "crossterm_winapi"
-version = "0.9.1"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
 dependencies = [
 "winapi",
 ]
@@ -2721,12 +2727,6 @@ dependencies = [
 "hashbrown 0.14.5",
 ]
 [[package]]
 name = "indoc"
 version = "2.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
 [[package]]
 name = "infer"
 version = "0.2.3"
@@ -2943,6 +2943,12 @@ dependencies = [
 "spin 0.5.2",
 ]
 [[package]]
 name = "lazycell"
 version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 [[package]]
 name = "libc"
 version = "0.2.150"
@@ -3141,7 +3147,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd01039851e82f8799046eabbb354056283fb265c8ec0996af940f4e85a380ff"
 dependencies = [
 "serde",
- "toml",
+ "toml 0.8.14",
 ]
 [[package]]
@@ -3657,7 +3663,7 @@ dependencies = [
 "thiserror",
 "tokio",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "utils",
 "workspace_hack",
 ]
@@ -3695,7 +3701,6 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.26",
 "indoc",
 "itertools 0.10.5",
 "md5",
 "metrics",
@@ -3744,7 +3749,7 @@ dependencies = [
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "twox-hash",
 "url",
@@ -3761,7 +3766,6 @@ dependencies = [
 "bincode",
 "byteorder",
 "bytes",
 "camino",
 "chrono",
 "const_format",
 "enum-map",
@@ -3769,16 +3773,11 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "itertools 0.10.5",
 "nix 0.27.1",
 "postgres_backend",
 "postgres_ffi",
 "rand 0.8.5",
 "remote_storage",
 "reqwest 0.12.4",
 "serde",
 "serde_json",
 "serde_with",
 "storage_broker",
 "strum",
 "strum_macros",
 "thiserror",
@@ -3907,9 +3906,8 @@ dependencies = [
 [[package]]
 name = "parquet"
-version = "53.0.0"
+version = "51.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
 checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8"
 dependencies = [
 "ahash",
 "bytes",
@@ -3928,9 +3926,8 @@ dependencies = [
 [[package]]
 name = "parquet_derive"
-version = "53.0.0"
+version = "51.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/apache/arrow-rs?branch=master#2534976a564be3d2d56312dc88fb1b6ed4cef829"
 checksum = "86e9fcfae007533a06b580429a3f7e07cb833ec8aa37c041c16563e7918f057e"
 dependencies = [
 "parquet",
 "proc-macro2",
@@ -3967,6 +3964,12 @@ dependencies = [
 "sha2",
 ]
 [[package]]
 name = "peeking_take_while"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
 [[package]]
 name = "pem"
 version = "3.0.3"
@@ -4120,7 +4123,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4133,7 +4136,7 @@ dependencies = [
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -4152,7 +4155,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -4264,9 +4267,9 @@ dependencies = [
 [[package]]
 name = "prettyplease"
-version = "0.2.17"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
+checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1"
 dependencies = [
 "proc-macro2",
 "syn 2.0.52",
@@ -4811,7 +4814,7 @@ dependencies = [
 "tokio",
 "tokio-stream",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "utils",
 ]
@@ -5321,7 +5324,7 @@ dependencies = [
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "tracing-subscriber",
 "url",
@@ -5730,6 +5733,17 @@ dependencies = [
 "signal-hook-registry",
 ]
 [[package]]
 name = "signal-hook-mio"
 version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
 dependencies = [
 "libc",
 "mio",
 "signal-hook",
 ]
 [[package]]
 name = "signal-hook-registry"
 version = "1.4.1"
@@ -6042,21 +6056,21 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 [[package]]
 name = "strum"
-version = "0.26.3"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
+checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
 [[package]]
 name = "strum_macros"
-version = "0.26.4"
+version = "0.24.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
 dependencies = [
- "heck 0.5.0",
+ "heck 0.4.1",
 "proc-macro2",
 "quote",
 "rustversion",
- "syn 2.0.52",
+ "syn 1.0.109",
 ]
 [[package]]
@@ -6067,9 +6081,8 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 [[package]]
 name = "svg_fmt"
-version = "0.4.3"
+version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/nical/rust_debug?rev=28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4#28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4"
 checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"
 [[package]]
 name = "syn"
@@ -6397,7 +6410,7 @@ dependencies = [
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=20031d7a9ee1addeae6e0968e3899ae6bf01cee2#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -6508,6 +6521,18 @@ dependencies = [
 "tracing",
 ]
 [[package]]
 name = "toml"
 version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec"
 dependencies = [
 "serde",
 "serde_spanned",
 "toml_datetime",
 "toml_edit 0.19.10",
 ]
 [[package]]
 name = "toml"
 version = "0.8.14"
@@ -6517,7 +6542,7 @@ dependencies = [
 "serde",
 "serde_spanned",
 "toml_datetime",
- "toml_edit",
+ "toml_edit 0.22.14",
 ]
 [[package]]
@@ -6529,6 +6554,19 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "toml_edit"
 version = "0.19.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
 dependencies = [
 "indexmap 1.9.3",
 "serde",
 "serde_spanned",
 "toml_datetime",
 "winnow 0.4.6",
 ]
 [[package]]
 name = "toml_edit"
 version = "0.22.14"
@@ -6539,7 +6577,7 @@ dependencies = [
 "serde",
 "serde_spanned",
 "toml_datetime",
- "winnow",
+ "winnow 0.6.13",
 ]
 [[package]]
@@ -6952,7 +6990,7 @@ dependencies = [
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
- "toml_edit",
+ "toml_edit 0.19.10",
 "tracing",
 "tracing-error",
 "tracing-subscriber",
@@ -7498,6 +7536,15 @@ version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
 [[package]]
 name = "winnow"
 version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699"
 dependencies = [
 "memchr",
 ]
 [[package]]
 name = "winnow"
 version = "0.6.13"
@@ -7567,7 +7614,6 @@ dependencies = [
 "hyper 0.14.26",
 "indexmap 1.9.3",
 "itertools 0.10.5",
 "itertools 0.12.1",
 "lazy_static",
 "libc",
 "log",
@@ -7605,7 +7651,6 @@ dependencies = [
 "tokio",
 "tokio-rustls 0.24.0",
 "tokio-util",
 "toml_edit",
 "tonic",
 "tower",
 "tracing",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,7 +64,7 @@ aws-types = "1.2.0"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
-bindgen = "0.70"
+bindgen = "0.65"
 bit_field = "0.10.2"
 bstr = "1.0"
 byteorder = "1.4"
@@ -73,7 +73,7 @@ camino = "1.1.6"
 cfg-if = "1.0.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive"] }
-comfy-table = "7.1"
+comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-deque = "0.8.5"
@@ -103,7 +103,6 @@ humantime-serde = "1.1.1"
 hyper = "0.14"
 tokio-tungstenite = "0.20.0"
 indexmap = "2"
 indoc = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -123,8 +122,8 @@ opentelemetry = "0.20.0"
 opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.12.0"
 parking_lot = "0.12"
-parquet = { version = "53", default-features = false, features = ["zstd"] }
+parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
-parquet_derive = "53"
+parquet_derive = "51.0.0"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
 procfs = "0.16"
@@ -158,10 +157,11 @@ signal-hook = "0.3"
 smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
-strum = "0.26"
+strum = "0.24"
-strum_macros = "0.26"
+strum_macros = "0.24"
 "subtle"  = "2.5.0"
-svg_fmt = "0.4.3"
+# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
 svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
@@ -177,8 +177,8 @@ tokio-rustls = "0.25"
 tokio-stream = "0.1"
 tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
-toml = "0.8"
+toml = "0.7"
-toml_edit = "0.22"
+toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tower-service = "0.3.2"
 tracing = "0.1"
@@ -201,21 +201,10 @@ env_logger = "0.10"
 log = "0.4"
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-# We want to use the 'neon' branch for these, but there's currently one
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-# incompatible change on the branch. See:
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-#
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 # - PR #8076 which contained changes that depended on the new changes in
 #   the rust-postgres crate, and
 # - PR #8654 which reverted those changes and made the code in proxy incompatible
 #   with the tip of the 'neon' branch again.
 #
 # When those proxy changes are re-applied (see PR #8747), we can switch using
 # the tip of the 'neon' branch again.
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
 postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
@@ -252,7 +241,11 @@ tonic-build = "0.9"
 [patch.crates-io]
 # Needed to get `tokio-postgres-rustls` to depend on our fork.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev = "20031d7a9ee1addeae6e0968e3899ae6bf01cee2" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 # bug fixes for UUID
 parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
 parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
 ################# Binary contents sections
--- a/1
+++ b/1
@@ -87,7 +87,6 @@ RUN mkdir -p /data/.neon/ && \
       "pg_distrib_dir='/usr/local/'\n" \
       "listen_pg_addr='0.0.0.0:6400'\n" \
       "listen_http_addr='0.0.0.0:9898'\n" \
       "availability_zone='local'\n" \
  > /data/.neon/pageserver.toml && \
  chown -R neon:neon /data/.neon
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -192,7 +192,7 @@ WORKDIR /home/nonroot
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.81.0
+ENV RUSTC_VERSION=1.80.1
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
@@ -207,7 +207,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    export PATH="$HOME/.cargo/bin:$PATH" && \
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
-    rustup component add llvm-tools rustfmt clippy && \
+    rustup component add llvm-tools-preview rustfmt clippy && \
    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
--- a/README.md
+++ b/README.md
@@ -64,12 +64,6 @@ brew install protobuf openssl flex bison icu4c pkg-config
 echo 'export PATH="$(brew --prefix openssl)/bin:$PATH"' >> ~/.zshrc
 ```
 If you get errors about missing `m4` you may have to install it manually:
 ```
 brew install m4
 brew link --force m4
 ```
 2. [Install Rust](https://www.rust-lang.org/tools/install)
 ```
 # recommended approach from https://www.rust-lang.org/tools/install
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -22,10 +22,9 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
-/// Escape a string for including it in a SQL literal.
+/// Escape a string for including it in a SQL literal. Wrapping the result
-///
+/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
-/// Wrapping the result with `E'{}'` or `'{}'` is not required,
+/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
 /// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
 /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
 /// for the original implementation.
 pub fn escape_literal(s: &str) -> String {
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -640,8 +640,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
        }
        Some(("branch", branch_match)) => {
            let tenant_id = get_tenant_id(branch_match, env)?;
            let new_timeline_id =
                parse_timeline_id(branch_match)?.unwrap_or(TimelineId::generate());
            let new_branch_name = branch_match
                .get_one::<String>("branch-name")
                .ok_or_else(|| anyhow!("No branch name provided"))?;
@@ -660,6 +658,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
            let new_timeline_id = TimelineId::generate();
            let storage_controller = StorageController::from_env(env);
            let create_req = TimelineCreateRequest {
                new_timeline_id,
@@ -1571,6 +1570,7 @@ fn cli() -> Command {
                        .value_parser(value_parser!(PathBuf))
                        .value_name("config")
                )
                .arg(pg_version_arg.clone())
                .arg(force_arg)
        )
        .subcommand(
@@ -1583,7 +1583,6 @@ fn cli() -> Command {
            .subcommand(Command::new("branch")
                .about("Create a new timeline, using another timeline as a base, copying its data")
                .arg(tenant_id_arg.clone())
                .arg(timeline_id_arg.clone())
                .arg(branch_name_arg.clone())
                .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
                    .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -75,14 +75,14 @@ impl PageServerNode {
        }
    }
-    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::DocumentMut {
+    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
-        toml_edit::DocumentMut::from_str(&format!("id={node_id}")).unwrap()
+        toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
    }
    fn pageserver_init_make_toml(
        &self,
        conf: NeonLocalInitPageserverConf,
-    ) -> anyhow::Result<toml_edit::DocumentMut> {
+    ) -> anyhow::Result<toml_edit::Document> {
        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
@@ -137,9 +137,9 @@ impl PageServerNode {
        // Turn `overrides` into a toml document.
        // TODO: above code is legacy code, it should be refactored to use toml_edit directly.
-        let mut config_toml = toml_edit::DocumentMut::new();
+        let mut config_toml = toml_edit::Document::new();
        for fragment_str in overrides {
-            let fragment = toml_edit::DocumentMut::from_str(&fragment_str)
+            let fragment = toml_edit::Document::from_str(&fragment_str)
                .expect("all fragments in `overrides` are valid toml documents, this function controls that");
            for (key, item) in fragment.iter() {
                config_toml.insert(key, item.clone());
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
 use clap::{Parser, Subcommand};
 use pageserver_api::{
    controller_api::{
-        NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
+        NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
-        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
+        TenantDescribeResponse, TenantPolicyRequest,
    },
    models::{
        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -80,10 +80,7 @@ enum Command {
    /// List nodes known to the storage controller
    Nodes {},
    /// List tenants known to the storage controller
-    Tenants {
+    Tenants {},
        /// If this field is set, it will list the tenants on a specific node
        node_id: Option<NodeId>,
    },
    /// Create a new tenant in the storage controller, and by extension on pageservers.
    TenantCreate {
        #[arg(long)]
@@ -339,7 +336,7 @@ async fn main() -> anyhow::Result<()> {
                        listen_pg_port,
                        listen_http_addr,
                        listen_http_port,
-                        availability_zone_id,
+                        availability_zone_id: Some(availability_zone_id),
                    }),
                )
                .await?;
@@ -406,41 +403,7 @@ async fn main() -> anyhow::Result<()> {
                )
                .await?;
        }
-        Command::Tenants {
+        Command::Tenants {} => {
            node_id: Some(node_id),
        } => {
            let describe_response = storcon_client
                .dispatch::<(), NodeShardResponse>(
                    Method::GET,
                    format!("control/v1/node/{node_id}/shards"),
                    None,
                )
                .await?;
            let shards = describe_response.shards;
            let mut table = comfy_table::Table::new();
            table.set_header([
                "Shard",
                "Intended Primary/Secondary",
                "Observed Primary/Secondary",
            ]);
            for shard in shards {
                table.add_row([
                    format!("{}", shard.tenant_shard_id),
                    match shard.is_intended_secondary {
                        None => "".to_string(),
                        Some(true) => "Secondary".to_string(),
                        Some(false) => "Primary".to_string(),
                    },
                    match shard.is_observed_secondary {
                        None => "".to_string(),
                        Some(true) => "Secondary".to_string(),
                        Some(false) => "Primary".to_string(),
                    },
                ]);
            }
            println!("{table}");
        }
        Command::Tenants { node_id: None } => {
            let mut resp = storcon_client
                .dispatch::<(), Vec<TenantDescribeResponse>>(
                    Method::GET,
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -68,7 +68,6 @@ macro_rules! register_uint_gauge {
 static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
 /// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
 ///
 /// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
 /// while holding the lock.
 pub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> {
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -4,10 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 # See pageserver/Cargo.toml
 testing = ["dep:nix"]
 [dependencies]
 serde.workspace = true
 serde_with.workspace = true
@@ -27,12 +23,6 @@ thiserror.workspace = true
 humantime-serde.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 itertools.workspace = true
 storage_broker.workspace = true
 camino = {workspace = true, features = ["serde1"]}
 remote_storage.workspace = true
 postgres_backend.workspace = true
 nix = {workspace = true, optional = true}
 reqwest.workspace = true
 [dev-dependencies]
 bincode.workspace = true
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -1,28 +1,15 @@
-use camino::Utf8PathBuf;
+use std::collections::HashMap;
 use const_format::formatcp;
 #[cfg(test)]
 mod tests;
 use const_format::formatcp;
 pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
 pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
 pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
 pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
 use postgres_backend::AuthType;
 use remote_storage::RemoteStorageConfig;
 use serde_with::serde_as;
 use std::{
    collections::HashMap,
    num::{NonZeroU64, NonZeroUsize},
    str::FromStr,
    time::Duration,
 };
 use utils::logging::LogFormat;
 use crate::models::ImageCompressionAlgorithm;
 use crate::models::LsnLease;
 // Certain metadata (e.g. externally-addressable name, AZ) is delivered
 // as a separate structure.  This information is not neeed by the pageserver
 // itself, it is only used for registering the pageserver with the control
@@ -42,476 +29,3 @@ pub struct NodeMetadata {
    #[serde(flatten)]
    pub other: HashMap<String, serde_json::Value>,
 }
 /// `pageserver.toml`
 ///
 /// We use serde derive with `#[serde(default)]` to generate a deserializer
 /// that fills in the default values for each config field.
 ///
 /// If there cannot be a static default value because we need to make runtime
 /// checks to determine the default, make it an `Option` (which defaults to None).
 /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
 #[serde_as]
 #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
 #[serde(default, deny_unknown_fields)]
 pub struct ConfigToml {
    // types mapped 1:1 into the runtime PageServerConfig type
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    pub availability_zone: Option<String>,
    #[serde(with = "humantime_serde")]
    pub wait_lsn_timeout: Duration,
    #[serde(with = "humantime_serde")]
    pub wal_redo_timeout: Duration,
    pub superuser: String,
    pub page_cache_size: usize,
    pub max_file_descriptors: usize,
    pub pg_distrib_dir: Option<Utf8PathBuf>,
    #[serde_as(as = "serde_with::DisplayFromStr")]
    pub http_auth_type: AuthType,
    #[serde_as(as = "serde_with::DisplayFromStr")]
    pub pg_auth_type: AuthType,
    pub auth_validation_public_key_path: Option<Utf8PathBuf>,
    pub remote_storage: Option<RemoteStorageConfig>,
    pub tenant_config: TenantConfigToml,
    #[serde_as(as = "serde_with::DisplayFromStr")]
    pub broker_endpoint: storage_broker::Uri,
    #[serde(with = "humantime_serde")]
    pub broker_keepalive_interval: Duration,
    #[serde_as(as = "serde_with::DisplayFromStr")]
    pub log_format: LogFormat,
    pub concurrent_tenant_warmup: NonZeroUsize,
    pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
    #[serde(with = "humantime_serde")]
    pub metric_collection_interval: Duration,
    pub metric_collection_endpoint: Option<reqwest::Url>,
    pub metric_collection_bucket: Option<RemoteStorageConfig>,
    #[serde(with = "humantime_serde")]
    pub synthetic_size_calculation_interval: Duration,
    pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
    pub test_remote_failures: u64,
    pub ondemand_download_behavior_treat_error_as_warn: bool,
    #[serde(with = "humantime_serde")]
    pub background_task_maximum_delay: Duration,
    pub control_plane_api: Option<reqwest::Url>,
    pub control_plane_api_token: Option<String>,
    pub control_plane_emergency_mode: bool,
    pub heatmap_upload_concurrency: usize,
    pub secondary_download_concurrency: usize,
    pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
    pub ingest_batch_size: u64,
    pub max_vectored_read_bytes: MaxVectoredReadBytes,
    pub image_compression: ImageCompressionAlgorithm,
    pub ephemeral_bytes_per_memory_kb: usize,
    pub l0_flush: Option<crate::models::L0FlushConfig>,
    #[serde(skip_serializing)]
    // TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
    pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
    pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
    pub io_buffer_alignment: usize,
 }
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct DiskUsageEvictionTaskConfig {
    pub max_usage_pct: utils::serde_percent::Percent,
    pub min_avail_bytes: u64,
    #[serde(with = "humantime_serde")]
    pub period: Duration,
    #[cfg(feature = "testing")]
    pub mock_statvfs: Option<statvfs::mock::Behavior>,
    /// Select sorting for evicted layers
    #[serde(default)]
    pub eviction_order: EvictionOrder,
 }
 pub mod statvfs {
    pub mod mock {
        #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
        #[serde(tag = "type")]
        pub enum Behavior {
            Success {
                blocksize: u64,
                total_blocks: u64,
                name_filter: Option<utils::serde_regex::Regex>,
            },
            #[cfg(feature = "testing")]
            Failure { mocked_error: MockedError },
        }
        #[cfg(feature = "testing")]
        #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
        #[allow(clippy::upper_case_acronyms)]
        pub enum MockedError {
            EIO,
        }
        #[cfg(feature = "testing")]
        impl From<MockedError> for nix::Error {
            fn from(e: MockedError) -> Self {
                match e {
                    MockedError::EIO => nix::Error::EIO,
                }
            }
        }
    }
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(tag = "type", content = "args")]
 pub enum EvictionOrder {
    RelativeAccessed {
        highest_layer_count_loses_first: bool,
    },
 }
 impl Default for EvictionOrder {
    fn default() -> Self {
        Self::RelativeAccessed {
            highest_layer_count_loses_first: true,
        }
    }
 }
 #[derive(
    Eq,
    PartialEq,
    Debug,
    Copy,
    Clone,
    strum_macros::EnumString,
    strum_macros::Display,
    serde_with::DeserializeFromStr,
    serde_with::SerializeDisplay,
 )]
 #[strum(serialize_all = "kebab-case")]
 pub enum GetVectoredImpl {
    Sequential,
    Vectored,
 }
 #[derive(
    Eq,
    PartialEq,
    Debug,
    Copy,
    Clone,
    strum_macros::EnumString,
    strum_macros::Display,
    serde_with::DeserializeFromStr,
    serde_with::SerializeDisplay,
 )]
 #[strum(serialize_all = "kebab-case")]
 pub enum GetImpl {
    Legacy,
    Vectored,
 }
 #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(transparent)]
 pub struct MaxVectoredReadBytes(pub NonZeroUsize);
 /// A tenant's calcuated configuration, which is the result of merging a
 /// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
 ///
 /// For storing and transmitting individual tenant's configuration, see
 /// TenantConfOpt.
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(deny_unknown_fields, default)]
 pub struct TenantConfigToml {
    // Flush out an inmemory layer, if it's holding WAL older than this
    // This puts a backstop on how much WAL needs to be re-digested if the
    // page server crashes.
    // This parameter actually determines L0 layer file size.
    pub checkpoint_distance: u64,
    // Inmemory layer is also flushed at least once in checkpoint_timeout to
    // eventually upload WAL after activity is stopped.
    #[serde(with = "humantime_serde")]
    pub checkpoint_timeout: Duration,
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub compaction_target_size: u64,
    // How often to check if there's compaction work to be done.
    // Duration::ZERO means automatic compaction is disabled.
    #[serde(with = "humantime_serde")]
    pub compaction_period: Duration,
    // Level0 delta layer threshold for compaction.
    pub compaction_threshold: usize,
    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
    // Determines how much history is retained, to allow
    // branching and read replicas at an older point in time.
    // The unit is #of bytes of WAL.
    // Page versions older than this are garbage collected away.
    pub gc_horizon: u64,
    // Interval at which garbage collection is triggered.
    // Duration::ZERO means automatic GC is disabled
    #[serde(with = "humantime_serde")]
    pub gc_period: Duration,
    // Delta layer churn threshold to create L1 image layers.
    pub image_creation_threshold: usize,
    // Determines how much history is retained, to allow
    // branching and read replicas at an older point in time.
    // The unit is time.
    // Page versions older than this are garbage collected away.
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Duration,
    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
    #[serde(with = "humantime_serde")]
    pub walreceiver_connect_timeout: Duration,
    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
    /// A stalled safekeeper will be changed to a newer one when it appears.
    #[serde(with = "humantime_serde")]
    pub lagging_wal_timeout: Duration,
    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
    /// to avoid eager reconnects.
    pub max_lsn_wal_lag: NonZeroU64,
    pub eviction_policy: crate::models::EvictionPolicy,
    pub min_resident_size_override: Option<u64>,
    // See the corresponding metric's help string.
    #[serde(with = "humantime_serde")]
    pub evictions_low_residence_duration_metric_threshold: Duration,
    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
    /// may be disabled if a Tenant will not have secondary locations: only secondary
    /// locations will use the heatmap uploaded by attached locations.
    #[serde(with = "humantime_serde")]
    pub heatmap_period: Duration,
    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
    pub lazy_slru_download: bool,
    pub timeline_get_throttle: crate::models::ThrottleConfig,
    // How much WAL must be ingested before checking again whether a new image layer is required.
    // Expresed in multiples of checkpoint distance.
    pub image_layer_creation_check_threshold: u8,
    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
    /// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
    /// file is written.
    pub switch_aux_file_policy: crate::models::AuxFilePolicy,
    /// The length for an explicit LSN lease request.
    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
    #[serde(with = "humantime_serde")]
    pub lsn_lease_length: Duration,
    /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
    #[serde(with = "humantime_serde")]
    pub lsn_lease_length_for_ts: Duration,
 }
 pub mod defaults {
    use crate::models::ImageCompressionAlgorithm;
    pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
    pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
    pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
    pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
    pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
    pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
    pub const DEFAULT_LOG_FORMAT: &str = "plain";
    pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
    pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
    pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
    pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
    pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
    pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 128 * 1024; // 128 KiB
    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
        ImageCompressionAlgorithm::Zstd { level: Some(1) };
    pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false;
    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
 }
 impl Default for ConfigToml {
    fn default() -> Self {
        use defaults::*;
        Self {
            listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
            listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
            availability_zone: (None),
            wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
                .expect("cannot parse default wait lsn timeout")),
            wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
                .expect("cannot parse default wal redo timeout")),
            superuser: (DEFAULT_SUPERUSER.to_string()),
            page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
            max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
            pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
            http_auth_type: (AuthType::Trust),
            pg_auth_type: (AuthType::Trust),
            auth_validation_public_key_path: (None),
            remote_storage: None,
            broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
                .parse()
                .expect("failed to parse default broker endpoint")),
            broker_keepalive_interval: (humantime::parse_duration(
                storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
            )
            .expect("cannot parse default keepalive interval")),
            log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
            concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
                .expect("Invalid default constant")),
            concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(1).unwrap(),
            metric_collection_interval: (humantime::parse_duration(
                DEFAULT_METRIC_COLLECTION_INTERVAL,
            )
            .expect("cannot parse default metric collection interval")),
            synthetic_size_calculation_interval: (humantime::parse_duration(
                DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
            )
            .expect("cannot parse default synthetic size calculation interval")),
            metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
            metric_collection_bucket: (None),
            disk_usage_based_eviction: (None),
            test_remote_failures: (0),
            ondemand_download_behavior_treat_error_as_warn: (false),
            background_task_maximum_delay: (humantime::parse_duration(
                DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
            )
            .unwrap()),
            control_plane_api: (None),
            control_plane_api_token: (None),
            control_plane_emergency_mode: (false),
            heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
            secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
            ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
            virtual_file_io_engine: None,
            max_vectored_read_bytes: (MaxVectoredReadBytes(
                NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
            )),
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
            l0_flush: None,
            compact_level0_phase1_value_access: Default::default(),
            virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
            io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
            tenant_config: TenantConfigToml::default(),
        }
    }
 }
 pub mod tenant_conf_defaults {
    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
    // would be more appropriate. But a low value forces the code to be exercised more,
    // which is good for now to trigger bugs.
    // This parameter actually determines L0 layer file size.
    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
    // FIXME the below configs are only used by legacy algorithm. The new algorithm
    // has different parameters.
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
    pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
    pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
        crate::models::CompactionAlgorithm::Legacy;
    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
    // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
    // If there's a need to decrease this value, first make sure that GC
    // doesn't hold a layer map write lock for non-trivial operations.
    // Relevant: https://github.com/neondatabase/neon/issues/3394
    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
    // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
    // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
    // throughputs up to 1GiB/s per timeline.
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
    // By default ingest enough WAL for two new L0 layers before checking if new image
    // image layers should be created.
    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
 }
 impl Default for TenantConfigToml {
    fn default() -> Self {
        use tenant_conf_defaults::*;
        Self {
            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
                .expect("cannot parse default checkpoint timeout"),
            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
                .expect("cannot parse default compaction period"),
            compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
            compaction_algorithm: crate::models::CompactionAlgorithmSettings {
                kind: DEFAULT_COMPACTION_ALGORITHM,
            },
            gc_horizon: DEFAULT_GC_HORIZON,
            gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
                .expect("cannot parse default gc period"),
            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
                .expect("cannot parse default PITR interval"),
            walreceiver_connect_timeout: humantime::parse_duration(
                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
            )
            .expect("cannot parse default walreceiver connect timeout"),
            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
                .expect("cannot parse default walreceiver lagging wal timeout"),
            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
                .expect("cannot parse default max walreceiver Lsn wal lag"),
            eviction_policy: crate::models::EvictionPolicy::NoEviction,
            min_resident_size_override: None,
            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
            )
            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
            heatmap_period: Duration::ZERO,
            lazy_slru_download: false,
            timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
            switch_aux_file_policy: crate::models::AuxFilePolicy::default_tenant_config(),
            lsn_lease_length: LsnLease::DEFAULT_LENGTH,
            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
        }
    }
 }
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -1,4 +1,4 @@
-use std::collections::{HashMap, HashSet};
+use std::collections::HashSet;
 use std::str::FromStr;
 use std::time::{Duration, Instant};
@@ -57,7 +57,7 @@ pub struct NodeRegisterRequest {
    pub listen_http_addr: String,
    pub listen_http_port: u16,
-    pub availability_zone_id: String,
+    pub availability_zone_id: Option<String>,
 }
 #[derive(Serialize, Deserialize)]
@@ -74,17 +74,6 @@ pub struct TenantPolicyRequest {
    pub scheduling: Option<ShardSchedulingPolicy>,
 }
 #[derive(Serialize, Deserialize)]
 pub struct ShardsPreferredAzsRequest {
    #[serde(flatten)]
    pub preferred_az_ids: HashMap<TenantShardId, String>,
 }
 #[derive(Serialize, Deserialize)]
 pub struct ShardsPreferredAzsResponse {
    pub updated: Vec<TenantShardId>,
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct TenantLocateResponseShard {
    pub shard_id: TenantShardId,
@@ -112,21 +101,6 @@ pub struct TenantDescribeResponse {
    pub config: TenantConfig,
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct NodeShardResponse {
    pub node_id: NodeId,
    pub shards: Vec<NodeShard>,
 }
 #[derive(Serialize, Deserialize, Debug)]
 pub struct NodeShard {
    pub tenant_shard_id: TenantShardId,
    /// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.
    pub is_observed_secondary: Option<bool>,
    /// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.
    pub is_intended_secondary: Option<bool>,
 }
 #[derive(Serialize, Deserialize)]
 pub struct NodeDescribeResponse {
    pub id: NodeId,
@@ -158,12 +132,8 @@ pub struct TenantDescribeResponseShard {
    pub is_splitting: bool,
    pub scheduling_policy: ShardSchedulingPolicy,
    pub preferred_az_id: Option<String>,
 }
 /// Migration request for a given tenant shard to a given node.
 ///
 /// Explicitly migrating a particular shard is a low level operation
 /// TODO: higher level "Reschedule tenant" operation where the request
 /// specifies some constraints, e.g. asking it to get off particular node(s)
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -263,6 +263,15 @@ impl Key {
        field5: u8::MAX,
        field6: u32::MAX,
    };
    /// A key slightly smaller than [`Key::MAX`] for use in layer key ranges to avoid them to be confused with L0 layers
    pub const NON_L0_MAX: Key = Key {
        field1: u8::MAX,
        field2: u32::MAX,
        field3: u32::MAX,
        field4: u32::MAX,
        field5: u8::MAX,
        field6: u32::MAX - 1,
    };
    pub fn from_hex(s: &str) -> Result<Self> {
        if s.len() != 36 {
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -6,7 +6,6 @@ pub use utilization::PageserverUtilization;
 use std::{
    collections::HashMap,
    fmt::Display,
    io::{BufRead, Read},
    num::{NonZeroU32, NonZeroU64, NonZeroUsize},
    str::FromStr,
@@ -62,7 +61,7 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
    serde::Serialize,
    serde::Deserialize,
    strum_macros::Display,
-    strum_macros::VariantNames,
+    strum_macros::EnumVariantNames,
    strum_macros::AsRefStr,
    strum_macros::IntoStaticStr,
 )]
@@ -305,10 +304,8 @@ pub struct TenantConfig {
    pub lsn_lease_length_for_ts: Option<String>,
 }
-/// The policy for the aux file storage.
+/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy`
-///
+/// tenant config. When the first aux file written, the policy will be persisted in the
 /// It can be switched through `switch_aux_file_policy` tenant config.
 /// When the first aux file written, the policy will be persisted in the
 /// `index_part.json` file and has a limited migration path.
 ///
 /// Currently, we only allow the following migration path:
@@ -438,9 +435,7 @@ pub enum CompactionAlgorithm {
    Tiered,
 }
-#[derive(
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
    Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,
 )]
 pub enum ImageCompressionAlgorithm {
    // Disabled for writes, support decompressing during read path
    Disabled,
@@ -475,33 +470,11 @@ impl FromStr for ImageCompressionAlgorithm {
    }
 }
 impl Display for ImageCompressionAlgorithm {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
            ImageCompressionAlgorithm::Zstd { level } => {
                if let Some(level) = level {
                    write!(f, "zstd({})", level)
                } else {
                    write!(f, "zstd")
                }
            }
        }
    }
 }
 #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
 pub struct CompactionAlgorithmSettings {
    pub kind: CompactionAlgorithm,
 }
 #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
 #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
 pub enum L0FlushConfig {
    #[serde(rename_all = "snake_case")]
    Direct { max_concurrency: NonZeroUsize },
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub struct EvictionPolicyLayerAccessThreshold {
    #[serde(with = "humantime_serde")]
@@ -898,9 +871,7 @@ pub struct WalRedoManagerStatus {
    pub process: Option<WalRedoManagerProcessStatus>,
 }
-/// The progress of a secondary tenant.
+/// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating
 ///
 /// It is mostly useful when doing a long running download: e.g. initiating
 /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
 /// what's happening.
 #[derive(Default, Debug, Serialize, Deserialize, Clone)]
@@ -1685,33 +1656,21 @@ mod tests {
    #[test]
    fn test_image_compression_algorithm_parsing() {
        use ImageCompressionAlgorithm::*;
-        let cases = [
+        assert_eq!(
-            ("disabled", Disabled),
+            ImageCompressionAlgorithm::from_str("disabled").unwrap(),
-            ("zstd", Zstd { level: None }),
+            Disabled
-            ("zstd(18)", Zstd { level: Some(18) }),
+        );
-            ("zstd(-3)", Zstd { level: Some(-3) }),
+        assert_eq!(
-        ];
+            ImageCompressionAlgorithm::from_str("zstd").unwrap(),
-
+            Zstd { level: None }
-        for (display, expected) in cases {
+        );
-            assert_eq!(
+        assert_eq!(
-                ImageCompressionAlgorithm::from_str(display).unwrap(),
+            ImageCompressionAlgorithm::from_str("zstd(18)").unwrap(),
-                expected,
+            Zstd { level: Some(18) }
-                "parsing works"
+        );
-            );
+        assert_eq!(
-            assert_eq!(format!("{expected}"), display, "Display FromStr roundtrip");
+            ImageCompressionAlgorithm::from_str("zstd(-3)").unwrap(),
-
+            Zstd { level: Some(-3) }
-            let ser = serde_json::to_string(&expected).expect("serialization");
+        );
            assert_eq!(
                serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),
                expected,
                "serde roundtrip"
            );
            assert_eq!(
                serde_json::Value::String(display.to_string()),
                serde_json::to_value(expected).unwrap(),
                "Display is the serde serialization"
            );
        }
    }
 }
--- a/libs/pageserver_api/src/models/utilization.rs
+++ b/libs/pageserver_api/src/models/utilization.rs
@@ -89,19 +89,8 @@ impl PageserverUtilization {
    /// If a node is currently hosting more work than it can comfortably handle.  This does not indicate that
    /// it will fail, but it is a strong signal that more work should not be added unless there is no alternative.
    ///
    /// When a node is overloaded, we may override soft affinity preferences and do things like scheduling
    /// into a node in a less desirable AZ, if all the nodes in the preferred AZ are overloaded.
    pub fn is_overloaded(score: RawScore) -> bool {
-        // Why the factor of two?  This is unscientific but reflects behavior of real systems:
+        score >= Self::UTILIZATION_FULL
        // - In terms of shard counts, a node's preferred max count is a soft limit intended to keep
        //   startup and housekeeping jobs nice and responsive.  We can go to double this limit if needed
        //   until some more nodes are deployed.
        // - In terms of disk space, the node's utilization heuristic assumes every tenant needs to
        //   hold its biggest timeline fully on disk, which is tends to be an over estimate when
        //   some tenants are very idle and have dropped layers from disk.  In practice going up to
        //   double is generally better than giving up and scheduling in a sub-optimal AZ.
        score >= 2 * Self::UTILIZATION_FULL
    }
    pub fn adjust_shard_count_max(&mut self, shard_count: u32) {
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -69,10 +69,8 @@ impl QueryError {
 }
 /// Returns true if the given error is a normal consequence of a network issue,
-/// or the client closing the connection.
+/// or the client closing the connection. These errors can happen during normal
-///
+/// operations, and don't indicate a bug in our code.
 /// These errors can happen during normal operations,
 /// and don't indicate a bug in our code.
 pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
@@ -81,16 +79,17 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
    )
 }
 #[async_trait::async_trait]
 pub trait Handler<IO> {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
    /// might be not what we want after CopyData streaming, but currently we don't
    /// care). It will also flush out the output buffer.
-    fn process_query(
+    async fn process_query(
        &mut self,
        pgb: &mut PostgresBackend<IO>,
        query_string: &str,
-    ) -> impl Future<Output = Result<(), QueryError>>;
+    ) -> Result<(), QueryError>;
    /// Called on startup packet receival, allows to process params.
    ///
--- a/libs/postgres_backend/tests/simple_select.rs
+++ b/libs/postgres_backend/tests/simple_select.rs
@@ -23,6 +23,7 @@ async fn make_tcp_pair() -> (TcpStream, TcpStream) {
 struct TestHandler {}
 #[async_trait::async_trait]
 impl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {
    // return single col 'hey' for any query
    async fn process_query(
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -7,7 +7,6 @@ use std::fmt;
 use url::Host;
 /// Parses a string of format either `host:port` or `host` into a corresponding pair.
 ///
 /// The `host` part should be a correct `url::Host`, while `port` (if present) should be
 /// a valid decimal u16 of digits only.
 pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -14,7 +14,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
    fn include_file(&self, filename: &str) {
        // This does the equivalent of passing bindgen::CargoCallbacks
        // to the builder .parse_callbacks() method.
-        let cargo_callbacks = bindgen::CargoCallbacks::new();
+        let cargo_callbacks = bindgen::CargoCallbacks;
        cargo_callbacks.include_file(filename)
    }
@@ -121,7 +121,6 @@ fn main() -> anyhow::Result<()> {
            .allowlist_type("XLogPageHeaderData")
            .allowlist_type("XLogLongPageHeaderData")
            .allowlist_var("XLOG_PAGE_MAGIC")
            .allowlist_var("PG_MAJORVERSION_NUM")
            .allowlist_var("PG_CONTROL_FILE_SIZE")
            .allowlist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
            .allowlist_type("PageHeaderData")
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -44,9 +44,6 @@ macro_rules! postgres_ffi {
            // Re-export some symbols from bindings
            pub use bindings::DBState_DB_SHUTDOWNED;
            pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
            pub const ZERO_CHECKPOINT: bytes::Bytes =
                bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]);
        }
    };
 }
@@ -109,107 +106,6 @@ macro_rules! dispatch_pgversion {
    };
 }
 #[macro_export]
 macro_rules! enum_pgversion_dispatch {
    ($name:expr, $typ:ident, $bind:ident, $code:block) => {
        enum_pgversion_dispatch!(
            name = $name,
            bind = $bind,
            typ = $typ,
            code = $code,
            pgversions = [
                V14 : v14,
                V15 : v15,
                V16 : v16,
            ]
        )
    };
    (name = $name:expr,
     bind = $bind:ident,
     typ = $typ:ident,
     code = $code:block,
     pgversions = [$($variant:ident : $md:ident),+ $(,)?]) => {
        match $name {
            $(
            self::$typ::$variant($bind) => {
                use $crate::$md as pgv;
                $code
            }
            ),+,
        }
    };
 }
 #[macro_export]
 macro_rules! enum_pgversion {
    {$name:ident, pgv :: $t:ident} => {
        enum_pgversion!{
            name = $name,
            typ = $t,
            pgversions = [
                V14 : v14,
                V15 : v15,
                V16 : v16,
            ]
        }
    };
    {$name:ident, pgv :: $p:ident :: $t:ident} => {
        enum_pgversion!{
            name = $name,
            path = $p,
            typ = $t,
            pgversions = [
                V14 : v14,
                V15 : v15,
                V16 : v16,
            ]
        }
    };
    {name = $name:ident,
     typ = $t:ident,
     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
        pub enum $name {
            $($variant ( $crate::$md::$t )),+
        }
        impl self::$name {
            pub fn pg_version(&self) -> u32 {
                enum_pgversion_dispatch!(self, $name, _ign, {
                    pgv::bindings::PG_MAJORVERSION_NUM
                })
            }
        }
        $(
        impl Into<self::$name> for $crate::$md::$t {
            fn into(self) -> self::$name {
                self::$name::$variant (self)
            }
        }
        )+
    };
    {name = $name:ident,
     path = $p:ident,
     typ = $t:ident,
     pgversions = [$($variant:ident : $md:ident),+ $(,)?]} => {
        pub enum $name {
            $($variant ($crate::$md::$p::$t)),+
        }
        impl $name {
            pub fn pg_version(&self) -> u32 {
                enum_pgversion_dispatch!(self, $name, _ign, {
                    pgv::bindings::PG_MAJORVERSION_NUM
                })
            }
        }
        $(
        impl Into<$name> for $crate::$md::$p::$t {
            fn into(self) -> $name {
                $name::$variant (self)
            }
        }
        )+
    };
 }
 pub mod pg_constants;
 pub mod relfile_utils;
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -185,7 +185,7 @@ mod tests {
    use super::*;
    fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
-        let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
+        let toml = input.parse::<toml_edit::Document>().unwrap();
        RemoteStorageConfig::from_toml(toml.as_item())
    }
@@ -235,31 +235,6 @@ timeout = '5s'";
        );
    }
    #[test]
    fn test_storage_class_serde_roundtrip() {
        let classes = [
            None,
            Some(StorageClass::Standard),
            Some(StorageClass::IntelligentTiering),
        ];
        for class in classes {
            #[derive(Serialize, Deserialize)]
            struct Wrapper {
                #[serde(
                    deserialize_with = "deserialize_storage_class",
                    serialize_with = "serialize_storage_class"
                )]
                class: Option<StorageClass>,
            }
            let wrapped = Wrapper {
                class: class.clone(),
            };
            let serialized = serde_json::to_string(&wrapped).unwrap();
            let deserialized: Wrapper = serde_json::from_str(&serialized).unwrap();
            assert_eq!(class, deserialized.class);
        }
    }
    #[test]
    fn test_azure_parsing() {
        let toml = "\
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -45,8 +45,6 @@ pub use azure_core::Etag;
 pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
 /// Default concurrency limit for S3 operations
 ///
 /// Currently, sync happens with AWS S3, that has two limits on requests per second:
 /// ~200 RPS for IAM services
 /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
@@ -302,9 +300,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    ) -> Result<(), TimeTravelError>;
 }
-/// Data part of an ongoing [`Download`].
+/// DownloadStream is sensitive to the timeout and cancellation used with the original
 ///
 /// `DownloadStream` is sensitive to the timeout and cancellation used with the original
 /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
 /// with `tokio::io::copy_buf`.
 // This has 'static because safekeepers do not use cancellation tokens (yet)
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -60,16 +60,3 @@ pub struct TimelineCopyRequest {
    pub target_timeline_id: TimelineId,
    pub until_lsn: Lsn,
 }
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct TimelineTermBumpRequest {
    /// bump to
    pub term: Option<u64>,
 }
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct TimelineTermBumpResponse {
    // before the request
    pub previous_term: u64,
    pub current_term: u64,
 }
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -5,10 +5,9 @@
 mod calculation;
 pub mod svg;
-/// StorageModel is the input to the synthetic size calculation.
+/// StorageModel is the input to the synthetic size calculation. It represents
-///
+/// a tree of timelines, with just the information that's needed for the
-/// It represents a tree of timelines, with just the information that's needed
+/// calculation. This doesn't track timeline names or where each timeline
 /// for the calculation. This doesn't track timeline names or where each timeline
 /// begins and ends, for example. Instead, it consists of "points of interest"
 /// on the timelines. A point of interest could be the timeline start or end point,
 /// the oldest point on a timeline that needs to be retained because of PITR
--- a/libs/utils/src/circuit_breaker.rs
+++ b/libs/utils/src/circuit_breaker.rs
@@ -5,10 +5,8 @@ use std::{
 use metrics::IntCounter;
-/// Circuit breakers are for operations that are expensive and fallible.
+/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly,
-///
+/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and
 /// If a circuit breaker fails repeatedly, we will stop attempting it for some
 /// period of time, to avoid denial-of-service from retries, and
 /// to mitigate the log spam from repeated failures.
 pub struct CircuitBreaker {
    /// An identifier that enables us to log useful errors when a circuit is broken
--- a/libs/utils/src/crashsafe.rs
+++ b/libs/utils/src/crashsafe.rs
@@ -1,4 +1,3 @@
 use std::os::fd::AsRawFd;
 use std::{
    borrow::Cow,
    fs::{self, File},
@@ -204,27 +203,6 @@ pub fn overwrite(
    Ok(())
 }
 /// Syncs the filesystem for the given file descriptor.
 #[cfg_attr(target_os = "macos", allow(unused_variables))]
 pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
    // Linux guarantees durability for syncfs.
    // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
    #[cfg(target_os = "linux")]
    {
        use anyhow::Context;
        nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
    }
    #[cfg(target_os = "macos")]
    {
        // macOS is not a production platform for Neon, don't even bother.
    }
    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
    {
        compile_error!("Unsupported OS");
    }
    Ok(())
 }
 #[cfg(test)]
 mod tests {
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -249,10 +249,8 @@ macro_rules! id_newtype {
    };
 }
-/// Neon timeline ID.
+/// Neon timeline IDs are different from PostgreSQL timeline
-///
+/// IDs. They serve a similar purpose though: they differentiate
 /// They are different from PostgreSQL timeline
 /// IDs, but serve a similar purpose: they differentiate
 /// between different "histories" of the same cluster.  However,
 /// PostgreSQL timeline IDs are a bit cumbersome, because they are only
 /// 32-bits wide, and they must be in ascending order in any given
--- a/libs/utils/src/lock_file.rs
+++ b/libs/utils/src/lock_file.rs
@@ -100,9 +100,7 @@ pub enum LockFileRead {
 }
 /// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
-/// inspect its content.
+/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
 ///
 /// It is not an `Err(...)` if the file does not exist or is already locked.
 /// Check the [`LockFileRead`] variants for details.
 pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {
    let res = fs::OpenOptions::new().read(true).open(path);
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -3,9 +3,9 @@ use std::str::FromStr;
 use anyhow::Context;
 use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
-use strum_macros::{EnumString, VariantNames};
+use strum_macros::{EnumString, EnumVariantNames};
-#[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
+#[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
 #[strum(serialize_all = "snake_case")]
 pub enum LogFormat {
    Plain,
@@ -188,7 +188,7 @@ impl Drop for TracingPanicHookGuard {
 }
 /// Named symbol for our panic hook, which logs the panic.
-fn tracing_panic_hook(info: &std::panic::PanicHookInfo) {
+fn tracing_panic_hook(info: &std::panic::PanicInfo) {
    // following rust 1.66.1 std implementation:
    // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
    let location = info.location();
@@ -274,14 +274,6 @@ impl From<String> for SecretString {
    }
 }
 impl FromStr for SecretString {
    type Err = std::convert::Infallible;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Ok(Self(s.to_string()))
    }
 }
 impl std::fmt::Debug for SecretString {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "[SECRET]")
--- a/libs/utils/src/pageserver_feedback.rs
+++ b/libs/utils/src/pageserver_feedback.rs
@@ -8,7 +8,6 @@ use tracing::{trace, warn};
 use crate::lsn::Lsn;
 /// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
 ///
 /// Serialized in custom flexible key/value format. In replication protocol, it
 /// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
 /// Standby status update / Hot standby feedback messages.
--- a/libs/utils/src/poison.rs
+++ b/libs/utils/src/poison.rs
@@ -65,8 +65,6 @@ impl<T> Poison<T> {
    }
 }
 /// Armed pointer to a [`Poison`].
 ///
 /// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.
 /// Once modifications are done, use [`Self::disarm`].
 /// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned
--- a/libs/utils/src/shard.rs
+++ b/libs/utils/src/shard.rs
@@ -13,11 +13,10 @@ pub struct ShardNumber(pub u8);
 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardCount(pub u8);
-/// Combination of ShardNumber and ShardCount.
+/// Combination of ShardNumber and ShardCount.  For use within the context of a particular tenant,
-///
+/// when we need to know which shard we're dealing with, but do not need to know the full
-/// For use within the context of a particular tenant, when we need to know which shard we're
+/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
-/// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
+/// the fully qualified TenantShardId.
 /// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
 #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub struct ShardIndex {
    pub shard_number: ShardNumber,
--- a/libs/utils/src/simple_rcu.rs
+++ b/libs/utils/src/simple_rcu.rs
@@ -49,11 +49,12 @@ use std::sync::{RwLock, RwLockWriteGuard};
 use tokio::sync::watch;
 /// Rcu allows multiple readers to read and hold onto a value without blocking
 /// (for very long).
 ///
-/// Storing to the Rcu updates the value, making new readers immediately see
+/// Rcu allows multiple readers to read and hold onto a value without blocking
-/// the new value, but it also waits for all current readers to finish.
+/// (for very long).  Storing to the Rcu updates the value, making new readers
 /// immediately see the new value, but it also waits for all current readers to
 /// finish.
 ///
 pub struct Rcu<V> {
    inner: RwLock<RcuInner<V>>,
 }
--- a/libs/utils/src/sync/heavier_once_cell.rs
+++ b/libs/utils/src/sync/heavier_once_cell.rs
@@ -5,9 +5,7 @@ use std::sync::{
 use tokio::sync::Semaphore;
 /// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of
-/// `SemaphorePermit`.
+/// `SemaphorePermit`, allowing use of `take` which does not require holding an outer mutex guard
 ///
 /// Allows use of `take` which does not require holding an outer mutex guard
 /// for the duration of initialization.
 ///
 /// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`].
--- a/libs/utils/src/toml_edit_ext.rs
+++ b/libs/utils/src/toml_edit_ext.rs
@@ -10,7 +10,7 @@ pub fn deserialize_item<T>(item: &toml_edit::Item) -> Result<T, Error>
 where
    T: serde::de::DeserializeOwned,
 {
-    let document: toml_edit::DocumentMut = match item {
+    let document: toml_edit::Document = match item {
        toml_edit::Item::Table(toml) => toml.clone().into(),
        toml_edit::Item::Value(toml_edit::Value::InlineTable(toml)) => {
            toml.clone().into_table().into()
--- a/libs/utils/src/vec_map.rs
+++ b/libs/utils/src/vec_map.rs
@@ -7,7 +7,6 @@ pub enum VecMapOrdering {
 }
 /// Ordered map datastructure implemented in a Vec.
 ///
 /// Append only - can only add keys that are larger than the
 /// current max key.
 /// Ordering can be adjusted using [`VecMapOrdering`]
--- a/libs/utils/src/yielding_loop.rs
+++ b/libs/utils/src/yielding_loop.rs
@@ -6,10 +6,9 @@ pub enum YieldingLoopError {
    Cancelled,
 }
-/// Helper for long synchronous loops, e.g. over all tenants in the system.
+/// Helper for long synchronous loops, e.g. over all tenants in the system.  Periodically
-///
+/// yields to avoid blocking the executor, and after resuming checks the provided
-/// Periodically yields to avoid blocking the executor, and after resuming
+/// cancellation token to drop out promptly on shutdown.
 /// checks the provided cancellation token to drop out promptly on shutdown.
 #[inline(always)]
 pub async fn yielding_loop<I, T, F>(
    interval: usize,
@@ -24,7 +23,7 @@ where
    for (i, item) in iter.enumerate() {
        visitor(item);
-        if (i + 1) % interval == 0 {
+        if i + 1 % interval == 0 {
            tokio::task::yield_now().await;
            if cancel.is_cancelled() {
                return Err(YieldingLoopError::Cancelled);
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -4,6 +4,7 @@
 use std::{env, path::PathBuf, process::Command};
 use anyhow::{anyhow, Context};
 use bindgen::CargoCallbacks;
 fn main() -> anyhow::Result<()> {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
@@ -63,25 +64,16 @@ fn main() -> anyhow::Result<()> {
            .map_err(|s| anyhow!("Bad postgres server path {s:?}"))?
    };
    let unwind_abi_functions = [
        "log_internal",
        "recovery_download",
        "start_streaming",
        "finish_sync_safekeepers",
        "wait_event_set",
        "WalProposerStart",
    ];
    // The bindgen::Builder is the main entry point
    // to bindgen, and lets you build up options for
    // the resulting bindings.
-    let mut builder = bindgen::Builder::default()
+    let bindings = bindgen::Builder::default()
        // The input header we would like to generate
        // bindings for.
        .header("bindgen_deps.h")
        // Tell cargo to invalidate the built crate whenever any of the
        // included header files changed.
-        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
+        .parse_callbacks(Box::new(CargoCallbacks))
        .allowlist_type("WalProposer")
        .allowlist_type("WalProposerConfig")
        .allowlist_type("walproposer_api")
@@ -113,12 +105,7 @@ fn main() -> anyhow::Result<()> {
        .allowlist_var("WL_SOCKET_MASK")
        .clang_arg("-DWALPROPOSER_LIB")
        .clang_arg(format!("-I{pgxn_neon}"))
-        .clang_arg(format!("-I{inc_server_path}"));
+        .clang_arg(format!("-I{inc_server_path}"))
    for name in unwind_abi_functions {
        builder = builder.override_abi(bindgen::Abi::CUnwind, name);
    }
    let bindings = builder
        // Finish the builder and generate the bindings.
        .generate()
        // Unwrap the Result and panic on failure.
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -33,7 +33,7 @@ extern "C" fn get_shmem_state(wp: *mut WalProposer) -> *mut WalproposerShmemStat
    }
 }
-extern "C-unwind" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
+extern "C" fn start_streaming(wp: *mut WalProposer, startpos: XLogRecPtr) {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -187,7 +187,7 @@ extern "C" fn conn_blocking_write(
    }
 }
-extern "C-unwind" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
+extern "C" fn recovery_download(wp: *mut WalProposer, sk: *mut Safekeeper) -> bool {
    unsafe {
        let callback_data = (*(*(*sk).wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -272,7 +272,7 @@ extern "C" fn rm_safekeeper_event_set(sk: *mut Safekeeper) {
    }
 }
-extern "C-unwind" fn wait_event_set(
+extern "C" fn wait_event_set(
    wp: *mut WalProposer,
    timeout: ::std::os::raw::c_long,
    event_sk: *mut *mut Safekeeper,
@@ -324,7 +324,7 @@ extern "C" fn get_redo_start_lsn(wp: *mut WalProposer) -> XLogRecPtr {
    }
 }
-extern "C-unwind" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
+extern "C" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
        let api = callback_data as *mut Box<dyn ApiImpl>;
@@ -340,7 +340,7 @@ extern "C" fn process_safekeeper_feedback(wp: *mut WalProposer, sk: *mut Safekee
    }
 }
-extern "C-unwind" fn log_internal(
+extern "C" fn log_internal(
    wp: *mut WalProposer,
    level: ::std::os::raw::c_int,
    line: *const ::std::os::raw::c_char,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -8,7 +8,7 @@ license.workspace = true
 default = []
 # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
 # which adds some runtime cost to run tests on outage conditions
-testing = ["fail/failpoints", "pageserver_api/testing" ]
+testing = ["fail/failpoints"]
 [dependencies]
 anyhow.workspace = true
@@ -101,7 +101,6 @@ procfs.workspace = true
 criterion.workspace = true
 hex-literal.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] }
 indoc.workspace = true
 [[bench]]
 name = "bench_layer_map"
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -4,7 +4,7 @@ use bytes::Bytes;
 use camino::Utf8PathBuf;
 use criterion::{criterion_group, criterion_main, Criterion};
 use pageserver::{
-    config::PageServerConf,
+    config::{defaults::DEFAULT_IO_BUFFER_ALIGNMENT, PageServerConf},
    context::{DownloadBehavior, RequestContext},
    l0_flush::{L0FlushConfig, L0FlushGlobalState},
    page_cache,
@@ -167,7 +167,7 @@ fn criterion_benchmark(c: &mut Criterion) {
    virtual_file::init(
        16384,
        virtual_file::io_engine_for_bench(),
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+        DEFAULT_IO_BUFFER_ALIGNMENT,
    );
    page_cache::init(conf.page_cache_size);
--- a/pageserver/client/src/lib.rs
+++ b/pageserver/client/src/lib.rs
@@ -1,20 +1,2 @@
 pub mod mgmt_api;
 pub mod page_service;
 /// For timeline_block_unblock_gc, distinguish the two different operations. This could be a bool.
 // If file structure is per-kind not per-feature then where to put this?
 #[derive(Clone, Copy)]
 pub enum BlockUnblock {
    Block,
    Unblock,
 }
 impl std::fmt::Display for BlockUnblock {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let s = match self {
            BlockUnblock::Block => "block",
            BlockUnblock::Unblock => "unblock",
        };
        f.write_str(s)
    }
 }
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -12,8 +12,6 @@ use utils::{
 pub use reqwest::Body as ReqwestBody;
 use crate::BlockUnblock;
 pub mod util;
 #[derive(Debug, Clone)]
@@ -456,20 +454,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }
    pub async fn timeline_block_unblock_gc(
        &self,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        dir: BlockUnblock,
    ) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/{dir}_gc",
            self.mgmt_api_endpoint,
        );
        self.request(Method::POST, &uri, ()).await.map(|_| ())
    }
    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
        let uri = format!(
            "{}/v1/tenant/{}/reset",
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -4,6 +4,7 @@
 use anyhow::Result;
 use camino::{Utf8Path, Utf8PathBuf};
 use pageserver::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
@@ -147,7 +148,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    pageserver::virtual_file::init(
        10,
        virtual_file::api::IoEngineKind::StdFs,
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+        DEFAULT_IO_BUFFER_ALIGNMENT,
    );
    pageserver::page_cache::init(100);
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -3,6 +3,7 @@ use std::path::{Path, PathBuf};
 use anyhow::Result;
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::Subcommand;
 use pageserver::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::block_io::BlockCursor;
@@ -193,7 +194,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            pageserver::virtual_file::init(
                10,
                virtual_file::api::IoEngineKind::StdFs,
-                pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
+                DEFAULT_IO_BUFFER_ALIGNMENT,
            );
            pageserver::page_cache::init(100);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -20,13 +20,14 @@ use clap::{Parser, Subcommand};
 use index_part::IndexPartCmd;
 use layers::LayerCmd;
 use pageserver::{
    config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
    context::{DownloadBehavior, RequestContext},
    page_cache,
    task_mgr::TaskKind,
    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
    virtual_file,
 };
-use pageserver_api::{config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT, shard::TenantShardId};
+use pageserver_api::shard::TenantShardId;
 use postgres_ffi::ControlFileData;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use tokio_util::sync::CancellationToken;
@@ -174,7 +175,7 @@ async fn main() -> anyhow::Result<()> {
                println!("specified prefix '{}' failed validation", cmd.prefix);
                return Ok(());
            };
-            let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
+            let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
            let toml_item = toml_document
                .get("remote_storage")
                .expect("need remote_storage");
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -5,7 +5,6 @@
 use std::env;
 use std::env::{var, VarError};
 use std::io::Read;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
@@ -37,7 +36,6 @@ use pageserver::{
    virtual_file,
 };
 use postgres_backend::AuthType;
 use utils::crashsafe::syncfs;
 use utils::failpoint_support;
 use utils::logging::TracingErrorLayerEnablement;
 use utils::{
@@ -126,6 +124,7 @@ fn main() -> anyhow::Result<()> {
    // after setting up logging, log the effective IO engine choice and read path implementations
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
    info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
    info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access");
    info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");
    // The tenants directory contains all the pageserver local disk state.
@@ -156,7 +155,23 @@ fn main() -> anyhow::Result<()> {
        };
        let started = Instant::now();
-        syncfs(dirfd)?;
+        // Linux guarantees durability for syncfs.
        // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
        #[cfg(target_os = "linux")]
        {
            use std::os::fd::AsRawFd;
            nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?;
        }
        #[cfg(target_os = "macos")]
        {
            // macOS is not a production platform for Neon, don't even bother.
            drop(dirfd);
        }
        #[cfg(not(any(target_os = "linux", target_os = "macos")))]
        {
            compile_error!("Unsupported OS");
        }
        let elapsed = started.elapsed();
        info!(
            elapsed_ms = elapsed.as_millis(),
@@ -208,15 +223,27 @@ fn initialize_config(
        }
    };
-    let config_file_contents =
+    let config: toml_edit::Document = match std::fs::File::open(cfg_file_path) {
-        std::fs::read_to_string(cfg_file_path).context("read config file from filesystem")?;
+        Ok(mut f) => {
-    let config_toml = serde_path_to_error::deserialize(
+            let md = f.metadata().context("stat config file")?;
-        toml_edit::de::Deserializer::from_str(&config_file_contents)
+            if md.is_file() {
-            .context("build toml deserializer")?,
+                let mut s = String::new();
-    )
+                f.read_to_string(&mut s).context("read config file")?;
-    .context("deserialize config toml")?;
+                s.parse().context("parse config file toml")?
-    let conf = PageServerConf::parse_and_validate(identity.id, config_toml, workdir)
+            } else {
-        .context("runtime-validation of config toml")?;
+                anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
            }
        }
        Err(e) => {
            anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
        }
    };
    debug!("Using pageserver toml: {config}");
    // Construct the runtime representation
    let conf = PageServerConf::parse_and_validate(identity.id, &config, workdir)
        .context("Failed to parse pageserver configuration")?;
    Ok(Box::leak(Box::new(conf)))
 }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -1,9 +1,7 @@
-//! Defines [`RequestContext`].
+//! This module defines `RequestContext`, a structure that we use throughout
-//!
+//! the pageserver to propagate high-level context from places
-//! It is a structure that we use throughout the pageserver to propagate
+//! that _originate_ activity down to the shared code paths at the
-//! high-level context from places that _originate_ activity down to the
+//! heart of the pageserver. It's inspired by Golang's `context.Context`.
 //! shared code paths at the heart of the pageserver. It's inspired by
 //! Golang's `context.Context`.
 //!
 //! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:
 //! 1. What high-level activity ([`TaskKind`]) needs this page?
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -141,24 +141,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
                        m.other
                    );
-                    let az_id = {
+                    let az_id = m
-                        let az_id_from_metadata = m
+                        .other
-                            .other
+                        .get("availability_zone_id")
-                            .get("availability_zone_id")
+                        .and_then(|jv| jv.as_str().map(|str| str.to_owned()));
                            .and_then(|jv| jv.as_str().map(|str| str.to_owned()));
                        match az_id_from_metadata {
                            Some(az_id) => Some(az_id),
                            None => {
                                tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
                                conf.availability_zone.clone()
                            }
                        }
                    };
                    if az_id.is_none() {
                        panic!("Availablity zone id could not be inferred from metadata.json or pageserver config");
                    }
                    Some(NodeRegisterRequest {
                        node_id: conf.id,
@@ -166,7 +152,7 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
                        listen_pg_port: m.postgres_port,
                        listen_http_addr: m.http_host,
                        listen_http_port: m.http_port,
-                        availability_zone_id: az_id.expect("Checked above"),
+                        availability_zone_id: az_id,
                    })
                }
                Err(e) => {
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -41,15 +41,19 @@
 // - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
 //   reading these fields. We use the Debug impl for semi-structured logging, though.
-use std::{sync::Arc, time::SystemTime};
+use std::{
    sync::Arc,
    time::{Duration, SystemTime},
 };
 use anyhow::Context;
-use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId};
+use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn, Instrument};
 use utils::serde_percent::Percent;
 use utils::{completion, id::TimelineId};
 use crate::{
@@ -65,9 +69,23 @@ use crate::{
    CancellableTask, DiskUsageEvictionTask,
 };
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct DiskUsageEvictionTaskConfig {
    pub max_usage_pct: Percent,
    pub min_avail_bytes: u64,
    #[serde(with = "humantime_serde")]
    pub period: Duration,
    #[cfg(feature = "testing")]
    pub mock_statvfs: Option<crate::statvfs::mock::Behavior>,
    /// Select sorting for evicted layers
    #[serde(default)]
    pub eviction_order: EvictionOrder,
 }
 /// Selects the sort order for eviction candidates *after* per tenant `min_resident_size`
 /// partitioning.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(tag = "type", content = "args")]
 pub enum EvictionOrder {
    /// Order the layers to be evicted by how recently they have been accessed relatively within
    /// the set of resident layers of a tenant.
@@ -78,22 +96,23 @@ pub enum EvictionOrder {
        /// we read tenants is deterministic. If we find the need to use this as `false`, we need
        /// to ensure nondeterminism by adding in a random number to break the
        /// `relative_last_activity==0.0` ties.
        #[serde(default = "default_highest_layer_count_loses_first")]
        highest_layer_count_loses_first: bool,
    },
 }
-impl From<pageserver_api::config::EvictionOrder> for EvictionOrder {
+impl Default for EvictionOrder {
-    fn from(value: pageserver_api::config::EvictionOrder) -> Self {
+    fn default() -> Self {
-        match value {
+        Self::RelativeAccessed {
-            pageserver_api::config::EvictionOrder::RelativeAccessed {
+            highest_layer_count_loses_first: true,
                highest_layer_count_loses_first,
            } => Self::RelativeAccessed {
                highest_layer_count_loses_first,
            },
        }
    }
 }
 fn default_highest_layer_count_loses_first() -> bool {
    true
 }
 impl EvictionOrder {
    fn sort(&self, candidates: &mut [(EvictionPartition, EvictionCandidate)]) {
        use EvictionOrder::*;
@@ -276,7 +295,7 @@ async fn disk_usage_eviction_task_iteration(
        storage,
        usage_pre,
        tenant_manager,
-        task_config.eviction_order.into(),
+        task_config.eviction_order,
        cancel,
    )
    .await;
@@ -1238,6 +1257,7 @@ mod filesystem_level_usage {
    #[test]
    fn max_usage_pct_pressure() {
        use super::EvictionOrder;
        use super::Usage as _;
        use std::time::Duration;
        use utils::serde_percent::Percent;
@@ -1249,7 +1269,7 @@ mod filesystem_level_usage {
                period: Duration::MAX,
                #[cfg(feature = "testing")]
                mock_statvfs: None,
-                eviction_order: pageserver_api::config::EvictionOrder::default(),
+                eviction_order: EvictionOrder::default(),
            },
            total_bytes: 100_000,
            avail_bytes: 0,
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2076,7 +2076,7 @@ async fn disk_usage_eviction_run(
        evict_bytes: u64,
        #[serde(default)]
-        eviction_order: pageserver_api::config::EvictionOrder,
+        eviction_order: crate::disk_usage_eviction_task::EvictionOrder,
    }
    #[derive(Debug, Clone, Copy, serde::Serialize)]
@@ -2112,7 +2112,7 @@ async fn disk_usage_eviction_run(
        &state.remote_storage,
        usage,
        &state.tenant_manager,
-        config.eviction_order.into(),
+        config.eviction_order,
        &cancel,
    )
    .await;
--- a/pageserver/src/l0_flush.rs
+++ b/pageserver/src/l0_flush.rs
@@ -1,7 +1,9 @@
 use std::{num::NonZeroUsize, sync::Arc};
-#[derive(Debug, PartialEq, Eq, Clone)]
+#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize)]
 #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
 pub enum L0FlushConfig {
    #[serde(rename_all = "snake_case")]
    Direct { max_concurrency: NonZeroUsize },
 }
@@ -14,16 +16,6 @@ impl Default for L0FlushConfig {
    }
 }
 impl From<pageserver_api::models::L0FlushConfig> for L0FlushConfig {
    fn from(config: pageserver_api::models::L0FlushConfig) -> Self {
        match config {
            pageserver_api::models::L0FlushConfig::Direct { max_concurrency } => {
                Self::Direct { max_concurrency }
            }
        }
    }
 }
 #[derive(Clone)]
 pub struct L0FlushGlobalState(Arc<Inner>);
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -9,7 +9,7 @@ use metrics::{
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
 use strum::{EnumCount, VariantNames};
-use strum_macros::{IntoStaticStr, VariantNames};
+use strum_macros::{EnumVariantNames, IntoStaticStr};
 use tracing::warn;
 use utils::id::TimelineId;
@@ -27,7 +27,7 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[
 ];
 // Metrics collected on operations on the storage repository.
-#[derive(Debug, VariantNames, IntoStaticStr)]
+#[derive(Debug, EnumVariantNames, IntoStaticStr)]
 #[strum(serialize_all = "kebab_case")]
 pub(crate) enum StorageTimeOperation {
    #[strum(serialize = "layer flush")]
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -1199,6 +1199,7 @@ impl PageServerHandler {
    }
 }
 #[async_trait::async_trait]
 impl<IO> postgres_backend::Handler<IO> for PageServerHandler
 where
    IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -729,12 +729,8 @@ impl Timeline {
        let current_policy = self.last_aux_file_policy.load();
        match current_policy {
            Some(AuxFilePolicy::V1) => {
-                let res = self.list_aux_files_v1(lsn, ctx).await?;
+                warn!("this timeline is using deprecated aux file policy V1 (policy=V1)");
-                let empty_str = if res.is_empty() { ", empty" } else { "" };
+                self.list_aux_files_v1(lsn, ctx).await
                warn!(
                    "this timeline is using deprecated aux file policy V1 (policy=v1{empty_str})"
                );
                Ok(res)
            }
            None => {
                let res = self.list_aux_files_v1(lsn, ctx).await?;
@@ -1021,10 +1017,9 @@ impl Timeline {
 }
 /// DatadirModification represents an operation to ingest an atomic set of
-/// updates to the repository.
+/// updates to the repository. It is created by the 'begin_record'
-///
+/// function. It is called for each WAL record, so that all the modifications
-/// It is created by the 'begin_record' function. It is called for each WAL
+/// by a one WAL record appear atomic.
 /// record, so that all the modifications by a one WAL record appear atomic.
 pub struct DatadirModification<'a> {
    /// The timeline this modification applies to. You can access this to
    /// read the state, but note that any pending updates are *not* reflected
@@ -1205,13 +1200,6 @@ impl<'a> DatadirModification<'a> {
        img: Bytes,
    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
        let key = rel_block_to_key(rel, blknum);
        if !key.is_valid_key_on_write_path() {
            anyhow::bail!(
                "the request contains data not supported by pageserver at {}",
                key
            );
        }
        self.put(rel_block_to_key(rel, blknum), Value::Image(img));
        Ok(())
    }
@@ -1223,34 +1211,14 @@ impl<'a> DatadirModification<'a> {
        blknum: BlockNumber,
        img: Bytes,
    ) -> anyhow::Result<()> {
-        let key = slru_block_to_key(kind, segno, blknum);
+        self.put(slru_block_to_key(kind, segno, blknum), Value::Image(img));
        if !key.is_valid_key_on_write_path() {
            anyhow::bail!(
                "the request contains data not supported by pageserver at {}",
                key
            );
        }
        self.put(key, Value::Image(img));
        Ok(())
    }
-    pub(crate) fn put_rel_page_image_zero(
+    pub(crate) fn put_rel_page_image_zero(&mut self, rel: RelTag, blknum: BlockNumber) {
-        &mut self,
+        self.pending_zero_data_pages
-        rel: RelTag,
+            .insert(rel_block_to_key(rel, blknum).to_compact());
        blknum: BlockNumber,
    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, RelationError::InvalidRelnode);
        let key = rel_block_to_key(rel, blknum);
        if !key.is_valid_key_on_write_path() {
            anyhow::bail!(
                "the request contains data not supported by pageserver: {} @ {}",
                key,
                self.lsn
            );
        }
        self.pending_zero_data_pages.insert(key.to_compact());
        self.pending_bytes += ZERO_PAGE.len();
        Ok(())
    }
    pub(crate) fn put_slru_page_image_zero(
@@ -1258,18 +1226,10 @@ impl<'a> DatadirModification<'a> {
        kind: SlruKind,
        segno: u32,
        blknum: BlockNumber,
-    ) -> anyhow::Result<()> {
+    ) {
-        let key = slru_block_to_key(kind, segno, blknum);
+        self.pending_zero_data_pages
-        if !key.is_valid_key_on_write_path() {
+            .insert(slru_block_to_key(kind, segno, blknum).to_compact());
            anyhow::bail!(
                "the request contains data not supported by pageserver: {} @ {}",
                key,
                self.lsn
            );
        }
        self.pending_zero_data_pages.insert(key.to_compact());
        self.pending_bytes += ZERO_PAGE.len();
        Ok(())
    }
    /// Call this at the end of each WAL record.
@@ -1697,7 +1657,7 @@ impl<'a> DatadirModification<'a> {
                if aux_files_key_v1.is_empty() {
                    None
                } else {
-                    warn!("this timeline is using deprecated aux file policy V1 (detected existing v1 files)");
+                    warn!("this timeline is using deprecated aux file policy V1");
                    self.tline.do_switch_aux_policy(AuxFilePolicy::V1)?;
                    Some(AuxFilePolicy::V1)
                }
@@ -2084,7 +2044,6 @@ impl<'a> DatadirModification<'a> {
 /// This struct facilitates accessing either a committed key from the timeline at a
 /// specific LSN, or the latest uncommitted key from a pending modification.
 ///
 /// During WAL ingestion, the records from multiple LSNs may be batched in the same
 /// modification before being flushed to the timeline. Hence, the routines in WalIngest
 /// need to look up the keys in the modification first before looking them up in the
--- a/pageserver/src/statvfs.rs
+++ b/pageserver/src/statvfs.rs
@@ -60,7 +60,32 @@ pub mod mock {
    use regex::Regex;
    use tracing::log::info;
-    pub use pageserver_api::config::statvfs::mock::Behavior;
+    #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
    #[serde(tag = "type")]
    pub enum Behavior {
        Success {
            blocksize: u64,
            total_blocks: u64,
            name_filter: Option<utils::serde_regex::Regex>,
        },
        Failure {
            mocked_error: MockedError,
        },
    }
    #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
    #[allow(clippy::upper_case_acronyms)]
    pub enum MockedError {
        EIO,
    }
    impl From<MockedError> for nix::Error {
        fn from(e: MockedError) -> Self {
            match e {
                MockedError::EIO => nix::Error::EIO,
            }
        }
    }
    pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result<Statvfs> {
        info!("running mocked statvfs");
@@ -91,7 +116,6 @@ pub mod mock {
                    block_size: *blocksize,
                })
            }
            #[cfg(feature = "testing")]
            Behavior::Failure { mocked_error } => Err((*mocked_error).into()),
        }
    }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -1,9 +1,8 @@
 //! Timeline repository implementation that keeps old data in layer files, and
 //! the recent changes in ephemeral files.
 //!
-//! See tenant/*_layer.rs files. The functions here are responsible for locating
+//! Timeline repository implementation that keeps old data in files on disk, and
-//! the correct layer for the get/put call, walking back the timeline branching
+//! the recent changes in memory. See tenant/*_layer.rs files.
-//! history as needed.
+//! The functions here are responsible for locating the correct layer for the
 //! get/put call, walking back the timeline branching history as needed.
 //!
 //! The files are stored in the .neon/tenants/<tenant_id>/timelines/<timeline_id>
 //! directory. See docs/pageserver-storage.md for how the files are managed.
@@ -7091,13 +7090,13 @@ mod tests {
            vec![
                // Image layer at GC horizon
                PersistentLayerKey {
-                    key_range: Key::MIN..Key::MAX,
+                    key_range: Key::MIN..Key::NON_L0_MAX,
                    lsn_range: Lsn(0x30)..Lsn(0x31),
                    is_delta: false
                },
-                // The delta layer below the horizon
+                // The delta layer covers the full range (with the layer key hack to avoid being recognized as L0)
                PersistentLayerKey {
-                    key_range: get_key(3)..get_key(4),
+                    key_range: Key::MIN..Key::NON_L0_MAX,
                    lsn_range: Lsn(0x30)..Lsn(0x48),
                    is_delta: true
                },
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,10 +9,11 @@
 //! may lead to a data loss.
 //!
 use anyhow::bail;
 pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
 use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::CompactionAlgorithm;
 use pageserver_api::models::CompactionAlgorithmSettings;
 use pageserver_api::models::EvictionPolicy;
 use pageserver_api::models::LsnLease;
 use pageserver_api::models::{self, ThrottleConfig};
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::de::IntoDeserializer;
@@ -22,6 +23,50 @@ use std::num::NonZeroU64;
 use std::time::Duration;
 use utils::generation::Generation;
 pub mod defaults {
    // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
    // would be more appropriate. But a low value forces the code to be exercised more,
    // which is good for now to trigger bugs.
    // This parameter actually determines L0 layer file size.
    pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
    pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
    // FIXME the below configs are only used by legacy algorithm. The new algorithm
    // has different parameters.
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
    pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
    pub const DEFAULT_COMPACTION_ALGORITHM: super::CompactionAlgorithm =
        super::CompactionAlgorithm::Legacy;
    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
    // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
    // If there's a need to decrease this value, first make sure that GC
    // doesn't hold a layer map write lock for non-trivial operations.
    // Relevant: https://github.com/neondatabase/neon/issues/3394
    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
    // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
    // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
    // throughputs up to 1GiB/s per timeline.
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
    // By default ingest enough WAL for two new L0 layers before checking if new image
    // image layers should be created.
    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
 }
 #[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub(crate) enum AttachmentMode {
    /// Our generation is current as far as we know, and as far as we know we are the only attached
@@ -236,20 +281,96 @@ impl LocationConf {
    }
 }
-impl Default for LocationConf {
+/// A tenant's calcuated configuration, which is the result of merging a
-    // TODO: this should be removed once tenant loading can guarantee that we are never
+/// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
-    // loading from a directory without a configuration.
+///
-    // => tech debt since https://github.com/neondatabase/neon/issues/1555
+/// For storing and transmitting individual tenant's configuration, see
-    fn default() -> Self {
+/// TenantConfOpt.
-        Self {
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-            mode: LocationMode::Attached(AttachedLocationConfig {
+pub struct TenantConf {
-                generation: Generation::none(),
+    // Flush out an inmemory layer, if it's holding WAL older than this
-                attach_mode: AttachmentMode::Single,
+    // This puts a backstop on how much WAL needs to be re-digested if the
-            }),
+    // page server crashes.
-            tenant_conf: TenantConfOpt::default(),
+    // This parameter actually determines L0 layer file size.
-            shard: ShardIdentity::unsharded(),
+    pub checkpoint_distance: u64,
-        }
+    // Inmemory layer is also flushed at least once in checkpoint_timeout to
-    }
+    // eventually upload WAL after activity is stopped.
    #[serde(with = "humantime_serde")]
    pub checkpoint_timeout: Duration,
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
    pub compaction_target_size: u64,
    // How often to check if there's compaction work to be done.
    // Duration::ZERO means automatic compaction is disabled.
    #[serde(with = "humantime_serde")]
    pub compaction_period: Duration,
    // Level0 delta layer threshold for compaction.
    pub compaction_threshold: usize,
    pub compaction_algorithm: CompactionAlgorithmSettings,
    // Determines how much history is retained, to allow
    // branching and read replicas at an older point in time.
    // The unit is #of bytes of WAL.
    // Page versions older than this are garbage collected away.
    pub gc_horizon: u64,
    // Interval at which garbage collection is triggered.
    // Duration::ZERO means automatic GC is disabled
    #[serde(with = "humantime_serde")]
    pub gc_period: Duration,
    // Delta layer churn threshold to create L1 image layers.
    pub image_creation_threshold: usize,
    // Determines how much history is retained, to allow
    // branching and read replicas at an older point in time.
    // The unit is time.
    // Page versions older than this are garbage collected away.
    #[serde(with = "humantime_serde")]
    pub pitr_interval: Duration,
    /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
    #[serde(with = "humantime_serde")]
    pub walreceiver_connect_timeout: Duration,
    /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
    /// A stalled safekeeper will be changed to a newer one when it appears.
    #[serde(with = "humantime_serde")]
    pub lagging_wal_timeout: Duration,
    /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
    /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
    /// to avoid eager reconnects.
    pub max_lsn_wal_lag: NonZeroU64,
    pub eviction_policy: EvictionPolicy,
    pub min_resident_size_override: Option<u64>,
    // See the corresponding metric's help string.
    #[serde(with = "humantime_serde")]
    pub evictions_low_residence_duration_metric_threshold: Duration,
    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
    /// may be disabled if a Tenant will not have secondary locations: only secondary
    /// locations will use the heatmap uploaded by attached locations.
    #[serde(with = "humantime_serde")]
    pub heatmap_period: Duration,
    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
    pub lazy_slru_download: bool,
    pub timeline_get_throttle: pageserver_api::models::ThrottleConfig,
    // How much WAL must be ingested before checking again whether a new image layer is required.
    // Expresed in multiples of checkpoint distance.
    pub image_layer_creation_check_threshold: u8,
    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
    /// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
    /// file is written.
    pub switch_aux_file_policy: AuxFilePolicy,
    /// The length for an explicit LSN lease request.
    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
    #[serde(with = "humantime_serde")]
    pub lsn_lease_length: Duration,
    /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
    /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
    #[serde(with = "humantime_serde")]
    pub lsn_lease_length_for_ts: Duration,
 }
 /// Same as TenantConf, but this struct preserves the information about
@@ -424,6 +545,51 @@ impl TenantConfOpt {
    }
 }
 impl Default for TenantConf {
    fn default() -> Self {
        use defaults::*;
        Self {
            checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
            checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
                .expect("cannot parse default checkpoint timeout"),
            compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
            compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
                .expect("cannot parse default compaction period"),
            compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
            compaction_algorithm: CompactionAlgorithmSettings {
                kind: DEFAULT_COMPACTION_ALGORITHM,
            },
            gc_horizon: DEFAULT_GC_HORIZON,
            gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
                .expect("cannot parse default gc period"),
            image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
            pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
                .expect("cannot parse default PITR interval"),
            walreceiver_connect_timeout: humantime::parse_duration(
                DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
            )
            .expect("cannot parse default walreceiver connect timeout"),
            lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
                .expect("cannot parse default walreceiver lagging wal timeout"),
            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
                .expect("cannot parse default max walreceiver Lsn wal lag"),
            eviction_policy: EvictionPolicy::NoEviction,
            min_resident_size_override: None,
            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
            )
            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
            heatmap_period: Duration::ZERO,
            lazy_slru_download: false,
            timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
            switch_aux_file_policy: AuxFilePolicy::default_tenant_config(),
            lsn_lease_length: LsnLease::DEFAULT_LENGTH,
            lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
        }
    }
 }
 impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
    type Error = anyhow::Error;
@@ -452,8 +618,7 @@ impl TryFrom<toml_edit::Item> for TenantConfOpt {
                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
            }
            toml_edit::Item::Table(table) => {
-                let deserializer =
+                let deserializer = toml_edit::de::Deserializer::new(table.into());
                    toml_edit::de::Deserializer::from(toml_edit::DocumentMut::from(table));
                return serde_path_to_error::deserialize(deserializer)
                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
            }
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -1,8 +1,7 @@
-//! Describes the legacy now hopefully no longer modified per-timeline metadata.
+//! Describes the legacy now hopefully no longer modified per-timeline metadata stored in
-//!
+//! `index_part.json` managed by [`remote_timeline_client`]. For many tenants and their timelines,
-//! It is stored in `index_part.json` managed by [`remote_timeline_client`]. For many tenants and
+//! this struct and it's original serialization format is still needed because they were written a
-//! their timelines, this struct and its original serialization format is still needed because
+//! long time ago.
 //! they were written a long time ago.
 //!
 //! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json
 //! versioning.
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -282,10 +282,9 @@ impl BackgroundPurges {
 static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> =
    Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing));
-/// Responsible for storing and mutating the collection of all tenants
+/// The TenantManager is responsible for storing and mutating the collection of all tenants
-/// that this pageserver has state for.
+/// that this pageserver process has state for.  Every Tenant and SecondaryTenant instance
-///
+/// lives inside the TenantManager.
 /// Every Tenant and SecondaryTenant instance lives inside the TenantManager.
 ///
 /// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach
 /// the same tenant twice concurrently, or trying to configure the same tenant into secondary
@@ -2347,9 +2346,8 @@ pub enum TenantMapError {
    ShuttingDown,
 }
-/// Guards a particular tenant_id's content in the TenantsMap.
+/// Guards a particular tenant_id's content in the TenantsMap.  While this
-///
+/// structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]
 /// While this structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]
 /// for this tenant, which acts as a marker for any operations targeting
 /// this tenant to retry later, or wait for the InProgress state to end.
 ///
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -2184,8 +2184,6 @@ pub fn remote_timeline_path(
    remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
 }
 /// Obtains the path of the given Layer in the remote
 ///
 /// Note that the shard component of a remote layer path is _not_ always the same
 /// as in the TenantShardId of the caller: tenants may reference layers from a different
 /// ShardIndex.  Use the ShardIndex from the layer's metadata.
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -548,7 +548,7 @@ pub(crate) async fn download_initdb_tar_zst(
        cancel,
    )
    .await
-    .inspect_err(|_e| {
+    .map_err(|e| {
        // Do a best-effort attempt at deleting the temporary file upon encountering an error.
        // We don't have async here nor do we want to pile on any extra errors.
        if let Err(e) = std::fs::remove_file(&temp_path) {
@@ -556,6 +556,7 @@ pub(crate) async fn download_initdb_tar_zst(
                warn!("error deleting temporary file {temp_path}: {e}");
            }
        }
        e
    })?;
    Ok((temp_path, file))
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -1,5 +1,4 @@
 //! In-memory index to track the tenant files on the remote storage.
 //!
 //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
 //! remote timeline layers and its metadata.
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -434,11 +434,10 @@ impl ReadableLayer {
    }
 }
-/// Layers contain a hint indicating whether they are likely to be used for reads.
+/// Layers contain a hint indicating whether they are likely to be used for reads.  This is a hint rather
-///
+/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
-/// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
+/// of layers (for example when creating a branch that makes some previously covered layers visible).  It should
-/// when changing the visibility of layers (for example when creating a branch that makes some previously
+/// be used for cache management but not for correctness-critical checks.
 /// covered layers visible).  It should be used for cache management but not for correctness-critical checks.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum LayerVisibilityHint {
    /// A Visible layer might be read while serving a read, because there is not an image layer between it
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -39,7 +39,7 @@ use crate::tenant::disk_btree::{
 use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
-    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
+    BlobFlag, MaxVectoredReadBytes, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
    VectoredReadCoalesceMode, VectoredReadPlanner,
 };
 use crate::tenant::PageReconstructError;
@@ -52,7 +52,6 @@ use bytes::BytesMut;
 use camino::{Utf8Path, Utf8PathBuf};
 use futures::StreamExt;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
@@ -136,11 +135,10 @@ impl Summary {
 // Flag indicating that this version initialize the page
 const WILL_INIT: u64 = 1;
-/// Struct representing reference to BLOB in layers.
+/// Struct representing reference to BLOB in layers. Reference contains BLOB
-///
+/// offset, and for WAL records it also contains `will_init` flag. The flag
-/// Reference contains BLOB offset, and for WAL records it also contains
+/// helps to determine the range of records that needs to be applied, without
-/// `will_init` flag. The flag helps to determine the range of records
+/// reading/deserializing records themselves.
 /// that needs to be applied, without reading/deserializing records themselves.
 #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 pub struct BlobRef(pub u64);
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -1,9 +1,7 @@
 //! An ImageLayer represents an image or a snapshot of a key-range at
-//! one particular LSN.
+//! one particular LSN. It contains an image of all key-value pairs
-//!
+//! in its key-range. Any key that falls into the image layer's range
-//! It contains an image of all key-value pairs in its key-range. Any key
+//! but does not exist in the layer, does not exist.
 //! that falls into the image layer's range but does not exist in the layer,
 //! does not exist.
 //!
 //! An image layer is stored in a file on disk. The file is stored in
 //! timelines/<timeline_id> directory.  Currently, there are no
@@ -36,7 +34,8 @@ use crate::tenant::disk_btree::{
 };
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
-    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
+    BlobFlag, MaxVectoredReadBytes, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
    VectoredReadPlanner,
 };
 use crate::tenant::{PageReconstructError, Timeline};
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
@@ -47,7 +46,6 @@ use bytes::{Bytes, BytesMut};
 use camino::{Utf8Path, Utf8PathBuf};
 use hex;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use rand::{distributions::Alphanumeric, Rng};
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -215,7 +215,7 @@ impl IndexEntry {
    const _ASSERT_DEFAULT_CHECKPOINT_DISTANCE_IS_VALID: () = {
        let res = Self::validate_checkpoint_distance(
-            pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE,
+            crate::tenant::config::defaults::DEFAULT_CHECKPOINT_DISTANCE,
        );
        if res.is_err() {
            panic!("default checkpoint distance is valid")
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -12,10 +12,8 @@ use serde::{Deserialize, Serialize};
 #[cfg(test)]
 use utils::id::TenantId;
-/// A unique identifier of a persistent layer.
+/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
-///
+/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
 /// This is different from `LayerDescriptor`, which is only used in the benchmarks.
 /// This struct contains all necessary information to find the image / delta layer. It also provides
 /// a unified way to generate layer information like file name.
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)]
 pub struct PersistentLayerDesc {
--- a/pageserver/src/tenant/storage_layer/layer_name.rs
+++ b/pageserver/src/tenant/storage_layer/layer_name.rs
@@ -217,9 +217,8 @@ impl fmt::Display for ImageLayerName {
    }
 }
-/// LayerName is the logical identity of a layer within a LayerMap at a moment in time.
+/// LayerName is the logical identity of a layer within a LayerMap at a moment in time.  The
-///
+/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
 /// The LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
 /// over time (e.g. across shard splits or compression). The physical filenames of layers in local
 /// storage and object names in remote storage consist of the LayerName plus some extra qualifiers
 /// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])
--- a/pageserver/src/tenant/storage_layer/merge_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs
@@ -226,11 +226,9 @@ impl<'a> IteratorWrapper<'a> {
    }
 }
-/// A merge iterator over delta/image layer iterators.
+/// A merge iterator over delta/image layer iterators. When duplicated records are
-///
+/// found, the iterator will not perform any deduplication, and the caller should handle
-/// When duplicated records are found, the iterator will not perform any
+/// these situation. By saying duplicated records, there are many possibilities:
 /// deduplication, and the caller should handle these situation. By saying
 /// duplicated records, there are many possibilities:
 ///
 /// * Two same delta at the same LSN.
 /// * Two same image at the same LSN.
--- a/pageserver/src/tenant/storage_layer/split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/split_writer.rs
@@ -34,10 +34,9 @@ impl SplitWriterResult {
    }
 }
-/// An image writer that takes images and produces multiple image layers.
+/// An image writer that takes images and produces multiple image layers. The interface does not
-///
+/// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files
-/// The interface does not guarantee atomicity (i.e., if the image layer generation
+/// to be cleaned up)
 /// fails, there might be leftover files to be cleaned up)
 #[must_use]
 pub struct SplitImageLayerWriter {
    inner: ImageLayerWriter,
@@ -188,23 +187,22 @@ impl SplitImageLayerWriter {
            .await
    }
-    /// This function will be deprecated with #8841.
+    /// When split writer fails, the caller should call this function and handle partially generated layers.
    pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, ImageLayerWriter)> {
        Ok((self.generated_layers, self.inner))
    }
 }
-/// A delta writer that takes key-lsn-values and produces multiple delta layers.
+/// A delta writer that takes key-lsn-values and produces multiple delta layers. The interface does not
-///
+/// guarantee atomicity (i.e., if the delta layer generation fails, there might be leftover files
-/// The interface does not guarantee atomicity (i.e., if the delta layer generation fails,
+/// to be cleaned up).
 /// there might be leftover files to be cleaned up).
 ///
 /// Note that if updates of a single key exceed the target size limit, all of the updates will be batched
 /// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm
 /// will split them into multiple files based on size.
 #[must_use]
 pub struct SplitDeltaLayerWriter {
-    inner: Option<(Key, DeltaLayerWriter)>,
+    inner: DeltaLayerWriter,
    target_layer_size: u64,
    generated_layers: Vec<SplitWriterResult>,
    conf: &'static PageServerConf,
@@ -212,6 +210,7 @@ pub struct SplitDeltaLayerWriter {
    tenant_shard_id: TenantShardId,
    lsn_range: Range<Lsn>,
    last_key_written: Key,
    start_key: Key,
 }
 impl SplitDeltaLayerWriter {
@@ -219,18 +218,29 @@ impl SplitDeltaLayerWriter {
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        start_key: Key,
        lsn_range: Range<Lsn>,
        target_layer_size: u64,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
-            inner: None,
+            inner: DeltaLayerWriter::new(
                conf,
                timeline_id,
                tenant_shard_id,
                start_key,
                lsn_range.clone(),
                ctx,
            )
            .await?,
            generated_layers: Vec::new(),
            conf,
            timeline_id,
            tenant_shard_id,
            lsn_range,
            last_key_written: Key::MIN,
            start_key,
        })
    }
@@ -253,26 +263,9 @@ impl SplitDeltaLayerWriter {
        //
        // Also, keep all updates of a single key in a single file. TODO: split them using the legacy compaction
        // strategy. https://github.com/neondatabase/neon/issues/8837
        if self.inner.is_none() {
            self.inner = Some((
                key,
                DeltaLayerWriter::new(
                    self.conf,
                    self.timeline_id,
                    self.tenant_shard_id,
                    key,
                    self.lsn_range.clone(),
                    ctx,
                )
                .await?,
            ));
        }
        let (_, inner) = self.inner.as_mut().unwrap();
        let addition_size_estimation = KEY_SIZE as u64 + 8 /* LSN u64 size */ + 80 /* value size estimation */;
-        if inner.num_keys() >= 1
+        if self.inner.num_keys() >= 1
-            && inner.estimated_size() + addition_size_estimation >= self.target_layer_size
+            && self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size
        {
            if key != self.last_key_written {
                let next_delta_writer = DeltaLayerWriter::new(
@@ -284,13 +277,13 @@ impl SplitDeltaLayerWriter {
                    ctx,
                )
                .await?;
-                let (start_key, prev_delta_writer) =
+                let prev_delta_writer = std::mem::replace(&mut self.inner, next_delta_writer);
                    std::mem::replace(&mut self.inner, Some((key, next_delta_writer))).unwrap();
                let layer_key = PersistentLayerKey {
-                    key_range: start_key..key,
+                    key_range: self.start_key..key,
                    lsn_range: self.lsn_range.clone(),
                    is_delta: true,
                };
                self.start_key = key;
                if discard(&layer_key).await {
                    drop(prev_delta_writer);
                    self.generated_layers
@@ -301,18 +294,17 @@ impl SplitDeltaLayerWriter {
                    self.generated_layers
                        .push(SplitWriterResult::Produced(delta_layer));
                }
-            } else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
+            } else if self.inner.estimated_size() >= S3_UPLOAD_LIMIT {
                // We have to produce a very large file b/c a key is updated too often.
                anyhow::bail!(
                    "a single key is updated too often: key={}, estimated_size={}, and the layer file cannot be produced",
                    key,
-                    inner.estimated_size()
+                    self.inner.estimated_size()
                );
            }
        }
        self.last_key_written = key;
-        let (_, inner) = self.inner.as_mut().unwrap();
+        self.inner.put_value(key, lsn, val, ctx).await
        inner.put_value(key, lsn, val, ctx).await
    }
    pub async fn put_value(
@@ -331,6 +323,7 @@ impl SplitDeltaLayerWriter {
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Key,
        discard: D,
    ) -> anyhow::Result<Vec<SplitWriterResult>>
    where
@@ -342,15 +335,11 @@ impl SplitDeltaLayerWriter {
            inner,
            ..
        } = self;
        let Some((start_key, inner)) = inner else {
            return Ok(generated_layers);
        };
        if inner.num_keys() == 0 {
            return Ok(generated_layers);
        }
        let end_key = self.last_key_written.next();
        let layer_key = PersistentLayerKey {
-            key_range: start_key..end_key,
+            key_range: self.start_key..end_key,
            lsn_range: self.lsn_range.clone(),
            is_delta: true,
        };
@@ -369,14 +358,15 @@ impl SplitDeltaLayerWriter {
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Key,
    ) -> anyhow::Result<Vec<SplitWriterResult>> {
-        self.finish_with_discard_fn(tline, ctx, |_| async { false })
+        self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })
            .await
    }
-    /// This function will be deprecated with #8841.
+    /// When split writer fails, the caller should call this function and handle partially generated layers.
-    pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, Option<DeltaLayerWriter>)> {
+    pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, DeltaLayerWriter)> {
-        Ok((self.generated_layers, self.inner.map(|x| x.1)))
+        Ok((self.generated_layers, self.inner))
    }
 }
@@ -440,8 +430,10 @@ mod tests {
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
@@ -466,22 +458,11 @@ mod tests {
            )
            .await
            .unwrap();
-        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
+        let layers = delta_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 1);
        assert_eq!(
            layers
                .into_iter()
                .next()
                .unwrap()
                .into_resident_layer()
                .layer_desc()
                .key(),
            PersistentLayerKey {
                key_range: get_key(0)..get_key(1),
                lsn_range: Lsn(0x18)..Lsn(0x20),
                is_delta: true
            }
        );
    }
    #[tokio::test]
@@ -518,8 +499,10 @@ mod tests {
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
@@ -548,7 +531,10 @@ mod tests {
            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
-        let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
+        let delta_layers = delta_writer
            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
        if discard {
            for layer in image_layers {
                layer.into_discarded_layer();
@@ -567,14 +553,6 @@ mod tests {
                .collect_vec();
            assert_eq!(image_layers.len(), N / 512 + 1);
            assert_eq!(delta_layers.len(), N / 512 + 1);
            assert_eq!(
                delta_layers.first().unwrap().layer_desc().key_range.start,
                get_key(0)
            );
            assert_eq!(
                delta_layers.last().unwrap().layer_desc().key_range.end,
                get_key(N as u32)
            );
            for idx in 0..image_layers.len() {
                assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
                assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
@@ -622,8 +600,10 @@ mod tests {
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x18)..Lsn(0x20),
            4 * 1024,
            &ctx,
        )
        .await
        .unwrap();
@@ -662,35 +642,11 @@ mod tests {
            )
            .await
            .unwrap();
-        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
+        let layers = delta_writer
            .finish(&tline, &ctx, get_key(10))
            .await
            .unwrap();
        assert_eq!(layers.len(), 2);
        let mut layers_iter = layers.into_iter();
        assert_eq!(
            layers_iter
                .next()
                .unwrap()
                .into_resident_layer()
                .layer_desc()
                .key(),
            PersistentLayerKey {
                key_range: get_key(0)..get_key(1),
                lsn_range: Lsn(0x18)..Lsn(0x20),
                is_delta: true
            }
        );
        assert_eq!(
            layers_iter
                .next()
                .unwrap()
                .into_resident_layer()
                .layer_desc()
                .key(),
            PersistentLayerKey {
                key_range: get_key(1)..get_key(2),
                lsn_range: Lsn(0x18)..Lsn(0x20),
                is_delta: true
            }
        );
    }
    #[tokio::test]
@@ -710,8 +666,10 @@ mod tests {
            tenant.conf,
            tline.timeline_id,
            tenant.tenant_shard_id,
            get_key(0),
            Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
            4 * 1024 * 1024,
            &ctx,
        )
        .await
        .unwrap();
@@ -729,20 +687,10 @@ mod tests {
                .await
                .unwrap();
        }
-        let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
+        let delta_layers = delta_writer
            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
        assert_eq!(delta_layers.len(), 1);
        let delta_layer = delta_layers
            .into_iter()
            .next()
            .unwrap()
            .into_resident_layer();
        assert_eq!(
            delta_layer.layer_desc().key(),
            PersistentLayerKey {
                key_range: get_key(0)..get_key(1),
                lsn_range: Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
                is_delta: true
            }
        );
    }
 }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -10,6 +10,7 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::config::defaults::DEFAULT_COMPACTION_PERIOD;
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
@@ -455,11 +456,9 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
            // If compaction period is set to zero (to disable it), then we will use a reasonable default
            let period = if period == Duration::ZERO {
-                humantime::Duration::from_str(
+                humantime::Duration::from_str(DEFAULT_COMPACTION_PERIOD)
-                    pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD,
+                    .unwrap()
-                )
+                    .into()
                .unwrap()
                .into()
            } else {
                period
            };
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -66,6 +66,7 @@ use std::{
 use crate::{
    aux_file::AuxFileSizeEstimator,
    tenant::{
        config::defaults::DEFAULT_PITR_INTERVAL,
        layer_map::{LayerMap, SearchResult},
        metadata::TimelineMetadata,
        storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
@@ -101,7 +102,6 @@ use crate::{
    pgdatadir_mapping::{AuxFilesDirectory, DirectoryKind},
    virtual_file::{MaybeFatalIo, VirtualFile},
 };
 use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL;
 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
@@ -2243,7 +2243,7 @@ impl Timeline {
            };
            if aux_file_policy == Some(AuxFilePolicy::V1) {
-                warn!("this timeline is using deprecated aux file policy V1 (when loading the timeline)");
+                warn!("this timeline is using deprecated aux file policy V1");
            }
            result.repartition_threshold =
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -29,6 +29,7 @@ use utils::id::TimelineId;
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
 use crate::tenant::config::defaults::{DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD};
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::merge_iterator::MergeIterator;
 use crate::tenant::storage_layer::split_writer::{
@@ -42,9 +43,6 @@ use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter};
 use crate::tenant::timeline::{Layer, ResidentLayer};
 use crate::tenant::DeltaLayer;
 use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 use pageserver_api::config::tenant_conf_defaults::{
    DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD,
 };
 use crate::keyspace::KeySpace;
 use crate::repository::{Key, Value};
@@ -911,13 +909,137 @@ impl Timeline {
        // we're compacting, in key, LSN order.
        // If there's both a Value::Image and Value::WalRecord for the same (key,lsn),
        // then the Value::Image is ordered before Value::WalRecord.
-        let mut all_values_iter = {
+        //
-            let mut deltas = Vec::with_capacity(deltas_to_compact.len());
+        // TODO(https://github.com/neondatabase/neon/issues/8184): remove the page cached blob_io
-            for l in deltas_to_compact.iter() {
+        // option and validation code once we've reached confidence.
-                let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;
+        enum AllValuesIter<'a> {
-                deltas.push(l);
+            PageCachedBlobIo {
                all_keys_iter: VecIter<'a>,
            },
            StreamingKmergeBypassingPageCache {
                merge_iter: MergeIterator<'a>,
            },
            ValidatingStreamingKmergeBypassingPageCache {
                mode: CompactL0BypassPageCacheValidation,
                merge_iter: MergeIterator<'a>,
                all_keys_iter: VecIter<'a>,
            },
        }
        type VecIter<'a> = std::slice::Iter<'a, DeltaEntry<'a>>; // TODO: distinguished lifetimes
        impl AllValuesIter<'_> {
            async fn next_all_keys_iter(
                iter: &mut VecIter<'_>,
                ctx: &RequestContext,
            ) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
                let Some(DeltaEntry {
                    key,
                    lsn,
                    val: value_ref,
                    ..
                }) = iter.next()
                else {
                    return Ok(None);
                };
                let value = value_ref.load(ctx).await?;
                Ok(Some((*key, *lsn, value)))
            }
            async fn next(
                &mut self,
                ctx: &RequestContext,
            ) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
                match self {
                    AllValuesIter::PageCachedBlobIo { all_keys_iter: iter } => {
                      Self::next_all_keys_iter(iter, ctx).await
                    }
                    AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter } => merge_iter.next().await,
                    AllValuesIter::ValidatingStreamingKmergeBypassingPageCache { mode, merge_iter, all_keys_iter } => async {
                        // advance both iterators
                        let all_keys_iter_item = Self::next_all_keys_iter(all_keys_iter, ctx).await;
                        let merge_iter_item = merge_iter.next().await;
                        // compare results & log warnings as needed
                        macro_rules! rate_limited_warn {
                            ($($arg:tt)*) => {{
                                if cfg!(debug_assertions) || cfg!(feature = "testing") {
                                    warn!($($arg)*);
                                    panic!("CompactL0BypassPageCacheValidation failure, check logs");
                                }
                                use once_cell::sync::Lazy;
                                use utils::rate_limit::RateLimit;
                                use std::sync::Mutex;
                                use std::time::Duration;
                                static LOGGED: Lazy<Mutex<RateLimit>> =
                                    Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
                                let mut rate_limit = LOGGED.lock().unwrap();
                                rate_limit.call(|| {
                                    warn!($($arg)*);
                                });
                            }}
                        }
                        match (&all_keys_iter_item, &merge_iter_item) {
                            (Err(_), Err(_)) => {
                                // don't bother asserting equivality of the errors
                            }
                            (Err(all_keys), Ok(merge)) => {
                                rate_limited_warn!(?merge, "all_keys_iter returned an error where merge did not: {all_keys:?}");
                            },
                            (Ok(all_keys), Err(merge)) => {
                                rate_limited_warn!(?all_keys, "merge returned an error where all_keys_iter did not: {merge:?}");
                            },
                            (Ok(None), Ok(None)) => { }
                            (Ok(Some(all_keys)), Ok(None)) => {
                                rate_limited_warn!(?all_keys, "merge returned None where all_keys_iter returned Some");
                            }
                            (Ok(None), Ok(Some(merge))) => {
                                rate_limited_warn!(?merge, "all_keys_iter returned None where merge returned Some");
                            }
                            (Ok(Some((all_keys_key, all_keys_lsn, all_keys_value))), Ok(Some((merge_key, merge_lsn, merge_value)))) => {
                                match mode {
                                    // TODO: in this mode, we still load the value from disk for both iterators, even though we only need the all_keys_iter one
                                    CompactL0BypassPageCacheValidation::KeyLsn => {
                                        let all_keys = (all_keys_key, all_keys_lsn);
                                        let merge = (merge_key, merge_lsn);
                                        if all_keys != merge {
                                            rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN) than all_keys_iter");
                                        }
                                    }
                                    CompactL0BypassPageCacheValidation::KeyLsnValue => {
                                        let all_keys = (all_keys_key, all_keys_lsn, all_keys_value);
                                        let merge = (merge_key, merge_lsn, merge_value);
                                        if all_keys != merge {
                                            rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN,Value) than all_keys_iter");
                                        }
                                    }
                                }
                            }
                        }
                        // in case of mismatch, trust the legacy all_keys_iter_item
                        all_keys_iter_item
                    }.instrument(info_span!("next")).await
                }
            }
        }
        let mut all_values_iter = match &self.conf.compact_level0_phase1_value_access {
            CompactL0Phase1ValueAccess::PageCachedBlobIo => AllValuesIter::PageCachedBlobIo {
                all_keys_iter: all_keys.iter(),
            },
            CompactL0Phase1ValueAccess::StreamingKmerge { validate } => {
                let merge_iter = {
                    let mut deltas = Vec::with_capacity(deltas_to_compact.len());
                    for l in deltas_to_compact.iter() {
                        let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?;
                        deltas.push(l);
                    }
                    MergeIterator::create(&deltas, &[], ctx)
                };
                match validate {
                    None => AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter },
                    Some(validate) => AllValuesIter::ValidatingStreamingKmergeBypassingPageCache {
                        mode: validate.clone(),
                        merge_iter,
                        all_keys_iter: all_keys.iter(),
                    },
                }
            }
            MergeIterator::create(&deltas, &[], ctx)
        };
        // This iterator walks through all keys and is needed to calculate size used by each key
@@ -994,7 +1116,7 @@ impl Timeline {
        let mut keys = 0;
        while let Some((key, lsn, value)) = all_values_iter
-            .next()
+            .next(ctx)
            .await
            .map_err(CompactionError::Other)?
        {
@@ -1311,6 +1433,43 @@ impl TryFrom<CompactLevel0Phase1StatsBuilder> for CompactLevel0Phase1Stats {
    }
 }
 #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
 #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
 pub enum CompactL0Phase1ValueAccess {
    /// The old way.
    PageCachedBlobIo,
    /// The new way.
    StreamingKmerge {
        /// If set, we run both the old way and the new way, validate that
        /// they are identical (=> [`CompactL0BypassPageCacheValidation`]),
        /// and if the validation fails,
        /// - in tests: fail them with a panic or
        /// - in prod, log a rate-limited warning and use the old way's results.
        ///
        /// If not set, we only run the new way and trust its results.
        validate: Option<CompactL0BypassPageCacheValidation>,
    },
 }
 /// See [`CompactL0Phase1ValueAccess::StreamingKmerge`].
 #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
 #[serde(rename_all = "kebab-case")]
 pub enum CompactL0BypassPageCacheValidation {
    /// Validate that the series of (key, lsn) pairs are the same.
    KeyLsn,
    /// Validate that the entire output of old and new way is identical.
    KeyLsnValue,
 }
 impl Default for CompactL0Phase1ValueAccess {
    fn default() -> Self {
        CompactL0Phase1ValueAccess::StreamingKmerge {
            // TODO(https://github.com/neondatabase/neon/issues/8184): change to None once confident
            validate: Some(CompactL0BypassPageCacheValidation::KeyLsnValue),
        }
    }
 }
 impl Timeline {
    /// Entry point for new tiered compaction algorithm.
    ///
@@ -1809,6 +1968,7 @@ impl Timeline {
            .unwrap();
        // We don't want any of the produced layers to cover the full key range (i.e., MIN..MAX) b/c it will then be recognized
        // as an L0 layer.
        let hack_end_key = Key::NON_L0_MAX;
        let mut delta_layers = Vec::new();
        let mut image_layers = Vec::new();
        let mut downloaded_layers = Vec::new();
@@ -1854,8 +2014,10 @@ impl Timeline {
            self.conf,
            self.timeline_id,
            self.tenant_shard_id,
            Key::MIN,
            lowest_retain_lsn..end_lsn,
            self.get_compaction_target_size(),
            ctx,
        )
        .await?;
@@ -1962,7 +2124,7 @@ impl Timeline {
        let produced_image_layers = if let Some(writer) = image_layer_writer {
            if !dry_run {
                writer
-                    .finish_with_discard_fn(self, ctx, Key::MAX, discard)
+                    .finish_with_discard_fn(self, ctx, hack_end_key, discard)
                    .await?
            } else {
                let (layers, _) = writer.take()?;
@@ -1975,7 +2137,7 @@ impl Timeline {
        let produced_delta_layers = if !dry_run {
            delta_layer_writer
-                .finish_with_discard_fn(self, ctx, discard)
+                .finish_with_discard_fn(self, ctx, hack_end_key, discard)
                .await?
        } else {
            let (layers, _) = delta_layer_writer.take()?;
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -16,6 +16,7 @@
 //! Note that the vectored blob api does *not* go through the page cache.
 use std::collections::BTreeMap;
 use std::num::NonZeroUsize;
 use bytes::BytesMut;
 use pageserver_api::key::Key;
@@ -28,6 +29,9 @@ use crate::context::RequestContext;
 use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK};
 use crate::virtual_file::{self, VirtualFile};
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub struct MaxVectoredReadBytes(pub NonZeroUsize);
 /// Metadata bundled with the start and end offset of a blob.
 #[derive(Copy, Clone, Debug)]
 pub struct BlobMeta {
@@ -593,10 +597,8 @@ impl<'a> VectoredBlobReader<'a> {
    }
 }
-/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].
+/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`]. It provides a streaming API for
-///
+/// getting read blobs. It returns a batch when `handle` gets called and when the current key would just exceed the read_size and
 /// It provides a streaming API for getting read blobs. It returns a batch when
 /// `handle` gets called and when the current key would just exceed the read_size and
 /// max_cnt constraints.
 pub struct StreamingVectoredReadPlanner {
    read_builder: Option<VectoredReadBuilder>,
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -1,7 +1,6 @@
 //! VirtualFile is like a normal File, but it's not bound directly to
 //! a file descriptor.
 //!
-//! Instead, the file is opened when it's read from,
+//! VirtualFile is like a normal File, but it's not bound directly to
 //! a file descriptor. Instead, the file is opened when it's read from,
 //! and if too many files are open globally in the system, least-recently
 //! used ones are closed.
 //!
@@ -11,6 +10,7 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
 use crate::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use crate::context::RequestContext;
 use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};
@@ -19,7 +19,6 @@ use crate::tenant::TENANTS_SEGMENT_NAME;
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
 use owned_buffers_io::io_buf_ext::FullSlice;
 use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver_api::shard::TenantShardId;
 use std::fs::File;
 use std::io::{Error, ErrorKind, Seek, SeekFrom};
--- a/pageserver/src/virtual_file/io_engine.rs
+++ b/pageserver/src/virtual_file/io_engine.rs
@@ -84,14 +84,9 @@ pub(crate) fn get() -> IoEngine {
                        }
                    },
                    Err(std::env::VarError::NotPresent) => {
-                        #[cfg(target_os = "linux")]
+                        crate::config::defaults::DEFAULT_VIRTUAL_FILE_IO_ENGINE
-                        {
+                            .parse()
-                            IoEngineKind::TokioEpollUring
+                            .unwrap()
                        }
                        #[cfg(not(target_os = "linux"))]
                        {
                            IoEngineKind::StdFs
                        }
                    }
                    Err(std::env::VarError::NotUnicode(_)) => {
                        panic!("env var {env_var_name} is not unicode");
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -25,7 +25,9 @@ use std::time::Duration;
 use std::time::SystemTime;
 use pageserver_api::shard::ShardIdentity;
-use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
+use postgres_ffi::v14::nonrelfile_utils::clogpage_precedes;
 use postgres_ffi::v14::nonrelfile_utils::slru_may_delete_clogsegment;
 use postgres_ffi::TimestampTz;
 use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
 use anyhow::{bail, Context, Result};
@@ -46,31 +48,16 @@ use pageserver_api::key::rel_block_to_key;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
 use postgres_ffi::v14::xlog_utils::*;
 use postgres_ffi::v14::CheckPoint;
 use postgres_ffi::TransactionId;
 use postgres_ffi::BLCKSZ;
 use utils::bin_ser::SerializeError;
 use utils::lsn::Lsn;
 enum_pgversion! {CheckPoint, pgv::CheckPoint}
 impl CheckPoint {
    fn encode(&self) -> Result<Bytes, SerializeError> {
        enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.encode() })
    }
    fn update_next_xid(&mut self, xid: u32) -> bool {
        enum_pgversion_dispatch!(self, CheckPoint, cp, { cp.update_next_xid(xid) })
    }
    pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {
        enum_pgversion_dispatch!(self, CheckPoint, cp, {
            cp.update_next_multixid(multi_xid, multi_offset)
        })
    }
 }
 pub struct WalIngest {
    shard: ShardIdentity,
    pg_version: u32,
    checkpoint: CheckPoint,
    checkpoint_modified: bool,
    warn_ingest_lag: WarnIngestLag,
@@ -91,16 +78,12 @@ impl WalIngest {
        // Fetch the latest checkpoint into memory, so that we can compare with it
        // quickly in `ingest_record` and update it when it changes.
        let checkpoint_bytes = timeline.get_checkpoint(startpoint, ctx).await?;
-        let pgversion = timeline.pg_version;
+        let checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
-
+        trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
        let checkpoint = dispatch_pgversion!(pgversion, {
            let checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
            trace!("CheckPoint.nextXid = {}", checkpoint.nextXid.value);
            <pgv::CheckPoint as Into<CheckPoint>>::into(checkpoint)
        });
        Ok(WalIngest {
            shard: *timeline.get_shard_identity(),
            pg_version: timeline.pg_version,
            checkpoint,
            checkpoint_modified: false,
            warn_ingest_lag: WarnIngestLag {
@@ -134,7 +117,7 @@ impl WalIngest {
        modification.set_lsn(lsn)?;
-        if decoded.is_dbase_create_copy(pg_version) {
+        if decoded.is_dbase_create_copy(self.pg_version) {
            // Records of this type should always be preceded by a commit(), as they
            // rely on reading data pages back from the Timeline.
            assert!(!modification.has_dirty_data_pages());
@@ -354,67 +337,70 @@ impl WalIngest {
            pg_constants::RM_XLOG_ID => {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
-                enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
+                if info == pg_constants::XLOG_NEXTOID {
-                    if info == pg_constants::XLOG_NEXTOID {
+                    let next_oid = buf.get_u32_le();
-                        let next_oid = buf.get_u32_le();
+                    if self.checkpoint.nextOid != next_oid {
-                        if cp.nextOid != next_oid {
+                        self.checkpoint.nextOid = next_oid;
                            cp.nextOid = next_oid;
                            self.checkpoint_modified = true;
                        }
                    } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
                        || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                    {
                        let mut checkpoint_bytes = [0u8; pgv::xlog_utils::SIZEOF_CHECKPOINT];
                        buf.copy_to_slice(&mut checkpoint_bytes);
                        let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?;
                        trace!(
                            "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
                            xlog_checkpoint.oldestXid,
                            cp.oldestXid
                        );
                        if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
                            cp.oldestXid = xlog_checkpoint.oldestXid;
                        }
                        trace!(
                            "xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
                            xlog_checkpoint.oldestActiveXid,
                            cp.oldestActiveXid
                        );
                        // A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
                        // because at shutdown, all in-progress transactions will implicitly
                        // end. Postgres startup code knows that, and allows hot standby to start
                        // immediately from a shutdown checkpoint.
                        //
                        // In Neon, Postgres hot standby startup always behaves as if starting from
                        // an online checkpoint. It needs a valid `oldestActiveXid` value, so
                        // instead of overwriting self.checkpoint.oldestActiveXid with
                        // InvalidTransactionid from the checkpoint WAL record, update it to a
                        // proper value, knowing that there are no in-progress transactions at this
                        // point, except for prepared transactions.
                        //
                        // See also the neon code changes in the InitWalRecovery() function.
                        if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
                            && info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                        {
                            let mut oldest_active_xid = cp.nextXid.value as u32;
                            for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
                                if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
                                    oldest_active_xid = xid;
                                }
                            }
                            cp.oldestActiveXid = oldest_active_xid;
                        } else {
                            cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
                        }
                        // Write a new checkpoint key-value pair on every checkpoint record, even
                        // if nothing really changed. Not strictly required, but it seems nice to
                        // have some trace of the checkpoint records in the layer files at the same
                        // LSNs.
                        self.checkpoint_modified = true;
                    }
-                });
+                } else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
                    || info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                {
                    let mut checkpoint_bytes = [0u8; SIZEOF_CHECKPOINT];
                    buf.copy_to_slice(&mut checkpoint_bytes);
                    let xlog_checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
                    trace!(
                        "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}",
                        xlog_checkpoint.oldestXid,
                        self.checkpoint.oldestXid
                    );
                    if (self
                        .checkpoint
                        .oldestXid
                        .wrapping_sub(xlog_checkpoint.oldestXid) as i32)
                        < 0
                    {
                        self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
                    }
                    trace!(
                        "xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}",
                        xlog_checkpoint.oldestActiveXid,
                        self.checkpoint.oldestActiveXid
                    );
                    // A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`,
                    // because at shutdown, all in-progress transactions will implicitly
                    // end. Postgres startup code knows that, and allows hot standby to start
                    // immediately from a shutdown checkpoint.
                    //
                    // In Neon, Postgres hot standby startup always behaves as if starting from
                    // an online checkpoint. It needs a valid `oldestActiveXid` value, so
                    // instead of overwriting self.checkpoint.oldestActiveXid with
                    // InvalidTransactionid from the checkpoint WAL record, update it to a
                    // proper value, knowing that there are no in-progress transactions at this
                    // point, except for prepared transactions.
                    //
                    // See also the neon code changes in the InitWalRecovery() function.
                    if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
                        && info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                    {
                        let mut oldest_active_xid = self.checkpoint.nextXid.value as u32;
                        for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
                            if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
                                oldest_active_xid = xid;
                            }
                        }
                        self.checkpoint.oldestActiveXid = oldest_active_xid;
                    } else {
                        self.checkpoint.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
                    }
                    // Write a new checkpoint key-value pair on every checkpoint record, even
                    // if nothing really changed. Not strictly required, but it seems nice to
                    // have some trace of the checkpoint records in the layer files at the same
                    // LSNs.
                    self.checkpoint_modified = true;
                }
            }
            pg_constants::RM_LOGICALMSG_ID => {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
@@ -438,11 +424,7 @@ impl WalIngest {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
                if info == pg_constants::XLOG_RUNNING_XACTS {
                    let xlrec = crate::walrecord::XlRunningXacts::decode(&mut buf);
-
+                    self.checkpoint.oldestActiveXid = xlrec.oldest_running_xid;
                    enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
                        cp.oldestActiveXid = xlrec.oldest_running_xid;
                    });
                    self.checkpoint_modified = true;
                }
            }
@@ -557,7 +539,7 @@ impl WalIngest {
            && blk.has_image
            && decoded.xl_rmid == pg_constants::RM_XLOG_ID
            && (decoded.xl_info == pg_constants::XLOG_FPI
-            || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
+                || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
            // compression of WAL is not yet supported: fall back to storing the original WAL record
            && !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, modification.tline.pg_version)
            // do not materialize null pages because them most likely be soon replaced with real data
@@ -1222,7 +1204,7 @@ impl WalIngest {
            if rec.blkno % pg_constants::SLOTS_PER_FSM_PAGE != 0 {
                // Tail of last remaining FSM page has to be zeroed.
                // We are not precise here and instead of digging in FSM bitmap format just clear the whole page.
-                modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;
+                modification.put_rel_page_image_zero(rel, fsm_physical_page_no);
                fsm_physical_page_no += 1;
            }
            let nblocks = get_relsize(modification, rel, ctx).await?;
@@ -1244,7 +1226,7 @@ impl WalIngest {
            if rec.blkno % pg_constants::VM_HEAPBLOCKS_PER_PAGE != 0 {
                // Tail of last remaining vm page has to be zeroed.
                // We are not precise here and instead of digging in VM bitmap format just clear the whole page.
-                modification.put_rel_page_image_zero(rel, vm_page_no)?;
+                modification.put_rel_page_image_zero(rel, vm_page_no);
                vm_page_no += 1;
            }
            let nblocks = get_relsize(modification, rel, ctx).await?;
@@ -1260,17 +1242,12 @@ impl WalIngest {
    fn warn_on_ingest_lag(
        &mut self,
        conf: &crate::config::PageServerConf,
-        wal_timestamp: TimestampTz,
+        wal_timestmap: TimestampTz,
    ) {
        debug_assert_current_span_has_tenant_and_timeline_id();
        let now = SystemTime::now();
        let rate_limits = &mut self.warn_ingest_lag;
-
+        match try_from_pg_timestamp(wal_timestmap) {
        let ts = enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, _cp, {
            pgv::xlog_utils::try_from_pg_timestamp(wal_timestamp)
        });
        match ts {
            Ok(ts) => {
                match now.duration_since(ts) {
                    Ok(lag) => {
@@ -1280,7 +1257,7 @@ impl WalIngest {
                                warn!(%rate_limit_stats, %lag, "ingesting record with timestamp lagging more than wait_lsn_timeout");
                            })
                        }
-                    }
+                    },
                    Err(e) => {
                        let delta_t = e.duration();
                        // determined by prod victoriametrics query: 1000 * (timestamp(node_time_seconds{neon_service="pageserver"}) - node_time_seconds)
@@ -1294,6 +1271,7 @@ impl WalIngest {
                        }
                    }
                };
            }
            Err(error) => {
                rate_limits.timestamp_invalid_msg_ratelimit.call2(|rate_limit_stats| {
@@ -1401,17 +1379,14 @@ impl WalIngest {
        // truncated, but a checkpoint record with the updated values isn't written until
        // later. In Neon, a server can start at any LSN, not just on a checkpoint record,
        // so we keep the oldestXid and oldestXidDB up-to-date.
-        enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
+        self.checkpoint.oldestXid = xlrec.oldest_xid;
-            cp.oldestXid = xlrec.oldest_xid;
+        self.checkpoint.oldestXidDB = xlrec.oldest_xid_db;
            cp.oldestXidDB = xlrec.oldest_xid_db;
        });
        self.checkpoint_modified = true;
        // TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
        let latest_page_number =
-            enum_pgversion_dispatch!(self.checkpoint, CheckPoint, cp, { cp.nextXid.value }) as u32
+            self.checkpoint.nextXid.value as u32 / pg_constants::CLOG_XACTS_PER_PAGE;
                / pg_constants::CLOG_XACTS_PER_PAGE;
        // Now delete all segments containing pages between xlrec.pageno
        // and latest_page_number.
@@ -1419,9 +1394,7 @@ impl WalIngest {
        // First, make an important safety check:
        // the current endpoint page must not be eligible for removal.
        // See SimpleLruTruncate() in slru.c
-        if dispatch_pgversion!(modification.tline.pg_version, {
+        if clogpage_precedes(latest_page_number, xlrec.pageno) {
            pgv::nonrelfile_utils::clogpage_precedes(latest_page_number, xlrec.pageno)
        }) {
            info!("could not truncate directory pg_xact apparent wraparound");
            return Ok(());
        }
@@ -1438,12 +1411,7 @@ impl WalIngest {
            .await?
        {
            let segpage = segno * pg_constants::SLRU_PAGES_PER_SEGMENT;
-
+            if slru_may_delete_clogsegment(segpage, xlrec.pageno) {
            let may_delete = dispatch_pgversion!(modification.tline.pg_version, {
                pgv::nonrelfile_utils::slru_may_delete_clogsegment(segpage, xlrec.pageno)
            });
            if may_delete {
                modification
                    .drop_slru_segment(SlruKind::Clog, segno, ctx)
                    .await?;
@@ -1562,23 +1530,14 @@ impl WalIngest {
        xlrec: &XlMultiXactTruncate,
        ctx: &RequestContext,
    ) -> Result<()> {
-        let (maxsegment, startsegment, endsegment) =
+        self.checkpoint.oldestMulti = xlrec.end_trunc_off;
-            enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
+        self.checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
                cp.oldestMulti = xlrec.end_trunc_off;
                cp.oldestMultiDB = xlrec.oldest_multi_db;
                let maxsegment: i32 = pgv::nonrelfile_utils::mx_offset_to_member_segment(
                    pg_constants::MAX_MULTIXACT_OFFSET,
                );
                let startsegment: i32 =
                    pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.start_trunc_memb);
                let endsegment: i32 =
                    pgv::nonrelfile_utils::mx_offset_to_member_segment(xlrec.end_trunc_memb);
                (maxsegment, startsegment, endsegment)
            });
        self.checkpoint_modified = true;
        // PerformMembersTruncation
        let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
        let startsegment: i32 = mx_offset_to_member_segment(xlrec.start_trunc_memb);
        let endsegment: i32 = mx_offset_to_member_segment(xlrec.end_trunc_memb);
        let mut segment: i32 = startsegment;
        // Delete all the segments except the last one. The last segment can still
@@ -1737,7 +1696,7 @@ impl WalIngest {
                    continue;
                }
-                modification.put_rel_page_image_zero(rel, gap_blknum)?;
+                modification.put_rel_page_image_zero(rel, gap_blknum);
            }
        }
        Ok(())
@@ -1803,7 +1762,7 @@ impl WalIngest {
            // fill the gap with zeros
            for gap_blknum in old_nblocks..blknum {
-                modification.put_slru_page_image_zero(kind, segno, gap_blknum)?;
+                modification.put_slru_page_image_zero(kind, segno, gap_blknum);
            }
        }
        Ok(())
@@ -1852,23 +1811,11 @@ mod tests {
        // TODO
    }
-    #[tokio::test]
+    static ZERO_CHECKPOINT: Bytes = Bytes::from_static(&[0u8; SIZEOF_CHECKPOINT]);
    async fn test_zeroed_checkpoint_decodes_correctly() -> Result<()> {
        for i in 14..=16 {
            dispatch_pgversion!(i, {
                pgv::CheckPoint::decode(&pgv::ZERO_CHECKPOINT)?;
            });
        }
        Ok(())
    }
    async fn init_walingest_test(tline: &Timeline, ctx: &RequestContext) -> Result<WalIngest> {
        let mut m = tline.begin_modification(Lsn(0x10));
-        m.put_checkpoint(dispatch_pgversion!(
+        m.put_checkpoint(ZERO_CHECKPOINT.clone())?;
            tline.pg_version,
            pgv::ZERO_CHECKPOINT.clone()
        ))?;
        m.put_relmap_file(0, 111, Bytes::from(""), ctx).await?; // dummy relmapper file
        m.commit(ctx).await?;
        let walingest = WalIngest::new(tline, Lsn(0x10), ctx).await?;
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -43,12 +43,13 @@ use utils::lsn::Lsn;
 use utils::sync::gate::GateError;
 use utils::sync::heavier_once_cell;
 /// The real implementation that uses a Postgres process to
 /// perform WAL replay.
 ///
-/// Only one thread can use the process at a time, that is controlled by the
+/// This is the real implementation that uses a Postgres process to
-/// Mutex. In the future, we might want to launch a pool of processes to allow
+/// perform WAL replay. Only one thread can use the process at a time,
-/// concurrent replay of multiple records.
+/// that is controlled by the Mutex. In the future, we might want to
 /// launch a pool of processes to allow concurrent replay of multiple
 /// records.
 ///
 pub struct PostgresRedoManager {
    tenant_shard_id: TenantShardId,
    conf: &'static PageServerConf,
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -1038,12 +1038,9 @@ DetermineEpochStartLsn(WalProposer *wp)
 		if (SkipXLogPageHeader(wp, wp->propEpochStartLsn) != wp->api.get_redo_start_lsn(wp))
 		{
 			/*
-			 * However, allow to proceed if last_log_term on the node which gave
+			 * However, allow to proceed if previously elected leader was me;
-			 * the highest vote (i.e. point where we are going to start writing)
+			 * plain restart of walproposer not intervened by concurrent
-			 * actually had been won by me; plain restart of walproposer not
+			 * compute (who could generate WAL) is ok.
 			 * intervened by concurrent compute which wrote WAL is ok.
 			 *
 			 * This avoids compute crash after manual term_bump.
 			 */
 			if (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term ==
 											pg_atomic_read_u64(&walprop_shared->mineLastElectedTerm))))
@@ -1445,17 +1442,12 @@ RecvAppendResponses(Safekeeper *sk)
 		if (sk->appendResponse.term > wp->propTerm)
 		{
 			/*
-			 *
+			 * Another compute with higher term is running. Panic to restart
-			 * Term has changed to higher one, probably another compute is
+			 * PG as we likely need to retake basebackup. However, don't dump
-			 * running. If this is the case we could PANIC as well because
+			 * core as this is kinda expected scenario.
 			 * likely it inserted some data and our basebackup is unsuitable
 			 * anymore. However, we also bump term manually (term_bump endpoint)
 			 * on safekeepers for migration purposes, in this case we do want
 			 * compute to stay alive. So restart walproposer with FATAL instead
 			 * of panicking; if basebackup is spoiled next election will notice
 			 * this.
 			 */
-			wp_log(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT ", meaning another compute is running at the same time, and it conflicts with us",
+			disable_core_dump();
 			wp_log(PANIC, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT ", meaning another compute is running at the same time, and it conflicts with us",
 				   sk->host, sk->port,
 				   sk->appendResponse.term, wp->propTerm);
 		}
--- a/poetry.lock
+++ b/poetry.lock
@@ -985,38 +985,43 @@ files = [
 [[package]]
 name = "cryptography"
-version = "43.0.1"
+version = "42.0.4"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"},
+    {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"},
+    {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1"},
-    {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b"},
-    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1"},
-    {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992"},
-    {file = "cryptography-43.0.1-cp37-abi3-win32.whl", hash = "sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885"},
-    {file = "cryptography-43.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824"},
-    {file = "cryptography-43.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806"},
+    {file = "cryptography-42.0.4-cp37-abi3-win32.whl", hash = "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85"},
+    {file = "cryptography-42.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c"},
+    {file = "cryptography-42.0.4-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7"},
-    {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52"},
-    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a"},
-    {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9"},
-    {file = "cryptography-43.0.1-cp39-abi3-win32.whl", hash = "sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764"},
-    {file = "cryptography-43.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0"},
-    {file = "cryptography-43.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84"},
+    {file = "cryptography-42.0.4-cp39-abi3-win32.whl", hash = "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365"},
+    {file = "cryptography-42.0.4-cp39-abi3-win_amd64.whl", hash = "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88"},
-    {file = "cryptography-43.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20"},
-    {file = "cryptography-43.0.1.tar.gz", hash = "sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce"},
    {file = "cryptography-42.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74"},
    {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd"},
    {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b"},
    {file = "cryptography-42.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660"},
    {file = "cryptography-42.0.4.tar.gz", hash = "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb"},
 ]
 [package.dependencies]
@@ -1029,7 +1034,7 @@ nox = ["nox"]
 pep8test = ["check-sdist", "click", "mypy", "ruff"]
 sdist = ["build"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
+test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]
 [[package]]
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -311,9 +311,7 @@ async fn auth_quirks(
    let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?;
    // check allowed list
-    if config.ip_allowlist_check_enabled
+    if !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips) {
        && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
    {
        return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
    }
@@ -605,7 +603,6 @@ mod tests {
        rate_limiter_enabled: true,
        rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
        rate_limit_ip_subnet: 64,
        ip_allowlist_check_enabled: true,
    });
    async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -538,17 +538,4 @@ mod tests {
        ));
        Ok(())
    }
    #[test]
    fn test_connection_blocker() {
        fn check(v: serde_json::Value) -> bool {
            let peer_addr = IpAddr::from([127, 0, 0, 1]);
            let ip_list: Vec<IpPattern> = serde_json::from_value(v).unwrap();
            check_peer_addr_is_in_list(&peer_addr, &ip_list)
        }
        assert!(check(json!([])));
        assert!(check(json!(["127.0.0.1"])));
        assert!(!check(json!(["255.255.255.255"])));
    }
 }
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -224,7 +224,6 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
            rate_limiter_enabled: false,
            rate_limiter: BucketRateLimiter::new(vec![]),
            rate_limit_ip_subnet: 64,
            ip_allowlist_check_enabled: true,
        },
        require_client_ip: false,
        handshake_timeout: Duration::from_secs(10),
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -224,10 +224,6 @@ struct ProxyCliArgs {
    /// Whether to retry the wake_compute request
    #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
    wake_compute_retry: String,
    /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    is_private_access_proxy: bool,
 }
 #[derive(clap::Args, Clone, Copy, Debug)]
@@ -686,7 +682,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        rate_limiter_enabled: args.auth_rate_limit_enabled,
        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
        ip_allowlist_check_enabled: !args.is_private_access_proxy,
    };
    let config = Box::leak(Box::new(ProxyConfig {
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -242,6 +242,6 @@ mod tests {
    #[test]
    fn test() {
        let s = "{\"branch_created\":null,\"endpoint_created\":{\"endpoint_id\":\"ep-rapid-thunder-w0qqw2q9\"},\"project_created\":null,\"type\":\"endpoint_created\"}";
-        serde_json::from_str::<ControlPlaneEventKey>(s).unwrap();
+        let _: ControlPlaneEventKey = serde_json::from_str(s).unwrap();
    }
 }
--- a/proxy/src/cache/timed_lru.rs
+++ b/proxy/src/cache/timed_lru.rs
@@ -16,7 +16,7 @@ use tracing::debug;
 // On the other hand, `hashlink` has good download stats and appears to be maintained.
 use hashlink::{linked_hash_map::RawEntryMut, LruCache};
-use super::{common::Cached, timed_lru, Cache};
+use super::{common::Cached, *};
 /// An implementation of timed LRU cache with fixed capacity.
 /// Key properties:
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -64,7 +64,6 @@ pub struct AuthenticationConfig {
    pub rate_limiter_enabled: bool,
    pub rate_limiter: AuthRateLimiter,
    pub rate_limit_ip_subnet: u8,
    pub ip_allowlist_check_enabled: bool,
 }
 impl TlsConfig {
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -395,7 +395,7 @@ mod tests {
                }
            }
        });
-        serde_json::from_str::<KickSession<'_>>(&json.to_string())?;
+        let _: KickSession<'_> = serde_json::from_str(&json.to_string())?;
        Ok(())
    }
@@ -403,7 +403,7 @@ mod tests {
    #[test]
    fn parse_db_info() -> anyhow::Result<()> {
        // with password
-        serde_json::from_value::<DatabaseInfo>(json!({
+        let _: DatabaseInfo = serde_json::from_value(json!({
            "host": "localhost",
            "port": 5432,
            "dbname": "postgres",
@@ -413,7 +413,7 @@ mod tests {
        }))?;
        // without password
-        serde_json::from_value::<DatabaseInfo>(json!({
+        let _: DatabaseInfo = serde_json::from_value(json!({
            "host": "localhost",
            "port": 5432,
            "dbname": "postgres",
@@ -422,7 +422,7 @@ mod tests {
        }))?;
        // new field (forward compatibility)
-        serde_json::from_value::<DatabaseInfo>(json!({
+        let _: DatabaseInfo = serde_json::from_value(json!({
            "host": "localhost",
            "port": 5432,
            "dbname": "postgres",
@@ -441,7 +441,7 @@ mod tests {
            "address": "0.0.0.0",
            "aux": dummy_aux(),
        });
-        serde_json::from_str::<WakeCompute>(&json.to_string())?;
+        let _: WakeCompute = serde_json::from_str(&json.to_string())?;
        Ok(())
    }
@@ -451,18 +451,18 @@ mod tests {
        let json = json!({
            "role_secret": "secret",
        });
-        serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
+        let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
        let json = json!({
            "role_secret": "secret",
            "allowed_ips": ["8.8.8.8"],
        });
-        serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
+        let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
        let json = json!({
            "role_secret": "secret",
            "allowed_ips": ["8.8.8.8"],
            "project_id": "project",
        });
-        serde_json::from_str::<GetRoleSecret>(&json.to_string())?;
+        let _: GetRoleSecret = serde_json::from_str(&json.to_string())?;
        Ok(())
    }
--- a/proxy/src/console/mgmt.rs
+++ b/proxy/src/console/mgmt.rs
@@ -78,7 +78,7 @@ pub(crate) type ComputeReady = DatabaseInfo;
 // TODO: replace with an http-based protocol.
 struct MgmtHandler;
-
+#[async_trait::async_trait]
 impl postgres_backend::Handler<tokio::net::TcpStream> for MgmtHandler {
    async fn process_query(
        &mut self,
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -38,7 +38,10 @@ impl Api {
        locks: &'static ApiLocks<EndpointCacheKey>,
        wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
    ) -> Self {
-        let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN").unwrap_or_default();
+        let jwt = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
            Ok(v) => v,
            Err(_) => String::new(),
        };
        Self {
            endpoint,
            caches,
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -6,7 +6,7 @@ use pq_proto::StartupMessageParams;
 use smol_str::SmolStr;
 use std::net::IpAddr;
 use tokio::sync::mpsc;
-use tracing::{debug, field::display, info, info_span, Span};
+use tracing::{field::display, info, info_span, Span};
 use try_lock::TryLock;
 use uuid::Uuid;
@@ -362,9 +362,7 @@ impl RequestMonitoringInner {
                });
        }
        if let Some(tx) = self.sender.take() {
-            tx.send(RequestData::from(&*self))
+            let _: Result<(), _> = tx.send(RequestData::from(&*self));
                .inspect_err(|e| debug!("tx send failed: {e}"))
                .ok();
        }
    }
@@ -373,9 +371,7 @@ impl RequestMonitoringInner {
        // Here we log the length of the session.
        self.disconnect_timestamp = Some(Utc::now());
        if let Some(tx) = self.disconnect_sender.take() {
-            tx.send(RequestData::from(&*self))
+            let _: Result<(), _> = tx.send(RequestData::from(&*self));
                .inspect_err(|e| debug!("tx send failed: {e}"))
                .ok();
        }
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Alexander Bayandin	ae50e9600f	Try to find the best parallelisation	2024-09-14 12:19:11 +01:00
Alexander Bayandin	cd613f5ab3	CI(check-codestyle-rust): use `parallel` and `cargo hack --partition`	2024-09-05 13:35:12 +01:00
Alexander Bayandin	539b3ad541	Revert "CI(check-codestyle-rust): use mold -run" This reverts commit `a0e923b70b`.	2024-09-05 11:31:01 +01:00
Alexander Bayandin	79fa640058	Revert "CI(check-codestyle-rust): try to use -j$(nproc)" This reverts commit `c41f9870a5`.	2024-09-05 11:30:21 +01:00
Alexander Bayandin	c41f9870a5	CI(check-codestyle-rust): try to use -j$(nproc)	2024-09-05 00:18:35 +01:00
Alexander Bayandin	a0e923b70b	CI(check-codestyle-rust): use mold -run	2024-09-04 23:58:25 +01:00
Alexander Bayandin	38f6107534	CI(check-codestyle-rust): revert back arm64 & use large runners	2024-09-04 22:48:44 +01:00
Alexander Bayandin	f25e07893c	CI(check-codestyle-rust): use `-C debug-assertions=off` instead of `--release`	2024-09-04 15:22:55 +01:00
Heikki Linnakangas	460d48437b	Run fewer redundant rust formatting checks We have no ARM64 or x64 specific code currently, so there's not much need to run clippy and other rust codestyle checks on both architectures. The check-codestyle-rust job took about 40 minutes on arm64 in the CI, which is now avoided. Also, skip running clippy in --release mode. It's pretty expensive to run, and there is very little difference between debug and release builds that could lead to different clippy warnings. The debug and release clippy checks took about 6 minutes each, so this saves another 6 minutes of runtime on CI. This doesn't make the overall CI runtime shorter, because other jobs still take longer than thesee did. But the 40 minutes spent on arm64 was actually pretty close to being the slowest job, and spending less on these tests is good anyway.	2024-09-04 14:12:07 +03:00