wip

2026-03-06 18:00:37 +00:00 · 2024-09-12 21:12:44 +01:00
184 changed files with 2668 additions and 5502 deletions
--- a/.devcontainer/Dockerfile.devcontainer
+++ b/.devcontainer/Dockerfile.devcontainer
@@ -0,0 +1 @@
+FROM neondatabase/build-tools:pinned
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,23 @@
+// https://containers.dev/implementors/json_reference/
+{
+  "name": "Neon",
+  "build": {
+    "context": "..",
+    "dockerfile": "Dockerfile.devcontainer"
+  },
+
+  "postCreateCommand": {
+    "build neon": "BUILD_TYPE=debug CARGO_BUILD_FLAGS='--features=testing' mold -run make -s -j`nproc`",
+    "install python deps": "./scripts/pysync"
+  },
+
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "charliermarsh.ruff",
+        "github.vscode-github-actions",
+        "rust-lang.rust-analyzer"
+      ]
+    }
+  }
+}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -62,7 +62,7 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16 17; do
+          for r in 14 15 16; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done
@@ -83,10 +83,6 @@ jobs:
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT

-      - name: Set pg 17 revision for caching
-        id: pg_v17_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
-
      # Set some environment variables used by all the steps.
      #
      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
@@ -140,13 +136,6 @@ jobs:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

-      - name: Cache postgres v17 build
-        id: cache_pg_17
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
-
      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
        run: mold -run make postgres-v14 -j$(nproc)
@@ -159,10 +148,6 @@ jobs:
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: mold -run make postgres-v16 -j$(nproc)

-      - name: Build postgres v17
-        if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v17 -j$(nproc)
-
      - name: Build neon extensions
        run: mold -run make neon-pg-ext -j$(nproc)

@@ -225,7 +210,7 @@ jobs:
        run: |
          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
          export PQ_LIB_DIR
-          LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
+          LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
          export LD_LIBRARY_PATH

          #nextest does not yet support running doctests
--- a/.github/workflows/_push-to-acr.yml
+++ b/.github/workflows/_push-to-acr.yml
@@ -52,5 +52,5 @@ jobs:
          for image in ${images}; do
            docker buildx imagetools create \
              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
-                                                        neondatabase/${image}:${{ inputs.image_tag }}
+                                        neondatabase/${image}:${{ inputs.image_tag }}
          done
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -54,8 +54,8 @@ jobs:
      build-tag: ${{steps.build-tag.outputs.tag}}

    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -120,59 +120,6 @@ jobs:
      - name: Run mypy to check types
        run: poetry run mypy .

-  # Check that the vendor/postgres-* submodules point to the
-  # corresponding REL_*_STABLE_neon branches.
-  check-submodules:
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          submodules: true
-
-      - uses: dorny/paths-filter@v3
-        id: check-if-submodules-changed
-        with:
-          filters: |
-            vendor:
-              - 'vendor/**'
-
-      - name: Check vendor/postgres-v14 submodule reference
-        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
-        uses: jtmullen/submodule-branch-check-action@v1
-        with:
-          path: "vendor/postgres-v14"
-          fetch_depth: "50"
-          sub_fetch_depth: "50"
-          pass_if_unchanged: true
-
-      - name: Check vendor/postgres-v15 submodule reference
-        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
-        uses: jtmullen/submodule-branch-check-action@v1
-        with:
-          path: "vendor/postgres-v15"
-          fetch_depth: "50"
-          sub_fetch_depth: "50"
-          pass_if_unchanged: true
-
-      - name: Check vendor/postgres-v16 submodule reference
-        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
-        uses: jtmullen/submodule-branch-check-action@v1
-        with:
-          path: "vendor/postgres-v16"
-          fetch_depth: "50"
-          sub_fetch_depth: "50"
-          pass_if_unchanged: true
-
-      - name: Check vendor/postgres-v17 submodule reference
-        if: steps.check-if-submodules-changed.outputs.vendor == 'true'
-        uses: jtmullen/submodule-branch-check-action@v1
-        with:
-          path: "vendor/postgres-v17"
-          fetch_depth: "50"
-          sub_fetch_depth: "50"
-          pass_if_unchanged: true
-
  check-codestyle-rust:
    needs: [ check-permissions, build-build-tools-image ]
    strategy:
@@ -212,10 +159,6 @@ jobs:
      # This will catch compiler & clippy warnings in all feature combinations.
      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
      # NB: keep clippy args in sync with ./run_clippy.sh
-      #
-      # The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
-      # #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
-      # time just for that, so skip "clippy --release".
      - run: |
          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
@@ -225,6 +168,8 @@ jobs:
          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
      - name: Run cargo clippy (debug)
        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
+      - name: Run cargo clippy (release)
+        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS

      - name: Check documentation generation
        run: cargo doc --workspace --no-deps --document-private-items
@@ -266,7 +211,7 @@ jobs:
      build-tag: ${{ needs.tag.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds
-      pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
+      pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
    secrets: inherit

  # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
@@ -412,7 +357,6 @@ jobs:
            })

  coverage-report:
-    if: ${{ !startsWith(github.ref_name, 'release') }}
    needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
    runs-on: [ self-hosted, small ]
    container:
@@ -429,8 +373,8 @@ jobs:
        coverage-html: ${{ steps.upload-coverage-report-new.outputs.report-url }}
        coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }}
    steps:
-      # Need `fetch-depth: 0` for differential coverage (to get diff between two commits)
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 0
@@ -531,9 +475,11 @@ jobs:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}

    steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
        with:
          submodules: true
+          fetch-depth: 0

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/setup-buildx-action@v3
@@ -602,15 +548,17 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: [ v14, v15, v16, v17 ]
+        version: [ v14, v15, v16 ]
        arch: [ x64, arm64 ]

    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}

    steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
        with:
          submodules: true
+          fetch-depth: 0

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/setup-buildx-action@v3
@@ -679,7 +627,7 @@ jobs:

      - name: Build compute-tools image
        # compute-tools are Postgres independent, so build it only once
-        if: matrix.version == 'v17'
+        if: matrix.version == 'v16'
        uses: docker/build-push-action@v6
        with:
          target: compute-tools-image
@@ -701,7 +649,7 @@ jobs:

    strategy:
      matrix:
-        version: [ v14, v15, v16, v17 ]
+        version: [ v14, v15, v16 ]

    steps:
      - uses: docker/login-action@v3
@@ -723,7 +671,7 @@ jobs:
                                             neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64

      - name: Create multi-arch compute-tools image
-        if: matrix.version == 'v17'
+        if: matrix.version == 'v16'
        run: |
          docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
                                             neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
@@ -741,7 +689,7 @@ jobs:
                                                                                neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}

      - name: Push multi-arch compute-tools image to ECR
-        if: matrix.version == 'v17'
+        if: matrix.version == 'v16'
        run: |
          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
                                                                                neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
@@ -752,12 +700,15 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: [ v14, v15, v16, v17 ]
+        version: [ v14, v15, v16 ]
    env:
      VM_BUILDER_VERSION: v0.29.3

    steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0

      - name: Downloading vm-builder
        run: |
@@ -797,7 +748,10 @@ jobs:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}

    steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/login-action@v3
@@ -844,7 +798,7 @@ jobs:
    runs-on: ubuntu-22.04

    env:
-      VERSIONS: v14 v15 v16 v17
+      VERSIONS: v14 v15 v16

    steps:
      - uses: docker/login-action@v3
@@ -885,7 +839,7 @@ jobs:
            done
          done
          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
-                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
+                                             neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}

      - name: Login to prod ECR
        uses: docker/login-action@v3
@@ -898,7 +852,7 @@ jobs:
      - name: Copy all images to prod ECR
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
        run: |
-          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
+          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
            docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
          done
@@ -910,7 +864,7 @@ jobs:
    with:
      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
+      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -922,7 +876,7 @@ jobs:
    with:
      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
+      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -1003,7 +957,6 @@ jobs:

  deploy:
    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
-    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()

    runs-on: [ self-hosted, small ]
@@ -1018,12 +971,15 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16 17; do
+          for r in 14 15 16; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done

-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0

      - name: Trigger deploy workflow
        env:
@@ -1102,8 +1058,7 @@ jobs:
  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
    needs: [ deploy ]
-    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
-    if: github.ref_name == 'release' && !failure() && !cancelled()
+    if: github.ref_name == 'release'

    runs-on: ubuntu-22.04
    steps:
@@ -1162,7 +1117,6 @@ jobs:

              files_to_promote+=("s3://${BUCKET}/${s3_key}")

-              # TODO Add v17
              for pg_version in v14 v15 v16; do
                # We run less tests for debug builds, so we don't need to promote them
                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
@@ -1207,7 +1161,6 @@ jobs:
    # Usually we do `needs: [...]`
    needs:
      - build-and-test-locally
-      - check-submodules
      - check-codestyle-python
      - check-codestyle-rust
      - promote-images
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -72,10 +72,6 @@ jobs:
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT

-      - name: Set pg 17 revision for caching
-        id: pg_v17_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
-
      - name: Cache postgres v14 build
        id: cache_pg_14
        uses: actions/cache@v4
@@ -97,13 +93,6 @@ jobs:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

-      - name: Cache postgres v17 build
-        id: cache_pg_17
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
      - name: Set extra env for macOS
        run: |
          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
@@ -131,10 +120,6 @@ jobs:
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: make postgres-v16 -j$(sysctl -n hw.ncpu)

-      - name: Build postgres v17
-        if: steps.cache_pg_17.outputs.cache-hit != 'true'
-        run: make postgres-v17 -j$(sysctl -n hw.ncpu)
-
      - name: Build neon extensions
        run: make neon-pg-ext -j$(sysctl -n hw.ncpu)

@@ -181,7 +166,7 @@ jobs:
        run: make walproposer-lib -j$(nproc)

      - name: Produce the build stats
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
+        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc)

      - name: Upload the build stats
        id: upload-stats
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -34,8 +34,8 @@ jobs:
      build-tag: ${{ steps.build-tag.outputs.tag }}

    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.gitmodules
+++ b/.gitmodules
@@ -10,7 +10,3 @@
 	path = vendor/postgres-v16
 	url = https://github.com/neondatabase/postgres.git
 	branch = REL_16_STABLE_neon
-[submodule "vendor/postgres-v17"]
-	path = vendor/postgres-v17
-	url = https://github.com/neondatabase/postgres.git
-	branch = REL_17_STABLE_neon
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -76,6 +76,8 @@ clap = { version = "4.0", features = ["derive"] }
 comfy-table = "7.1"
 const_format = "0.2"
 crc32c = "0.6"
+crossbeam-deque = "0.8.5"
+crossbeam-utils = "0.8.5"
 dashmap = { version = "5.5.0", features = ["raw-api"] }
 either = "1.8"
 enum-map = "2.4.2"
@@ -93,7 +95,7 @@ hdrhistogram = "7.5.2"
 hex = "0.4"
 hex-literal = "0.4"
 hmac = "0.12.1"
-hostname = "0.4"
+hostname = "0.3.1"
 http = {version = "1.1.0", features = ["std"]}
 http-types = { version = "2", default-features = false }
 humantime = "2.1"
@@ -102,6 +104,7 @@ hyper = "0.14"
 tokio-tungstenite = "0.20.0"
 indexmap = "2"
 indoc = "2"
+inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "9"
@@ -110,7 +113,7 @@ libc = "0.2"
 md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
-memoffset = "0.9"
+memoffset = "0.8"
 nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
 num_cpus = "1.15"
@@ -139,6 +142,7 @@ rpds = "0.13"
 rustc-hash = "1.1.0"
 rustls = "0.22"
 rustls-pemfile = "2"
+rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
@@ -160,6 +164,7 @@ strum_macros = "0.26"
 svg_fmt = "0.4.3"
 sync_wrapper = "0.1.2"
 tar = "0.4"
+task-local-extensions = "0.1.4"
 test-context = "0.3"
 thiserror = "1.0"
 tikv-jemallocator = "0.5"
--- a/12
+++ b/12
@@ -5,8 +5,6 @@
 ARG REPOSITORY=neondatabase
 ARG IMAGE=build-tools
 ARG TAG=pinned
-ARG DEFAULT_PG_VERSION=17
-ARG STABLE_PG_VERSION=16

 # Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -15,7 +13,6 @@ WORKDIR /home/nonroot
 COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
-COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
 COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
@@ -31,19 +28,16 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
-ARG STABLE_PG_VERSION

 COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
-COPY --from=pg-build /home/nonroot/pg_install/v17/lib                       pg_install/v17/lib
 COPY --chown=nonroot . .

 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -58,7 +52,6 @@ RUN set -e \
 # Build final image
 #
 FROM debian:bullseye-slim
-ARG DEFAULT_PG_VERSION
 WORKDIR /data

 RUN set -e \
@@ -84,7 +77,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubbe
 COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
 COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
 COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
-COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
 COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
@@ -101,7 +93,7 @@ RUN mkdir -p /data/.neon/ && \

 # When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
 # that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib
+ENV LD_LIBRARY_PATH=/usr/local/v16/lib


 VOLUME ["/data"]
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -55,27 +55,22 @@ RUN cd postgres && \
    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
    # so we do it here.
+    old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
+    # the first loop is for pg_stat_statement extension version <= 1.6
    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
        filename=$(basename "$file"); \
-        # Note that there are no downgrade scripts for pg_stat_statements, so we \
-        # don't have to modify any downgrade paths or (much) older versions: we only \
-        # have to make sure every creation of the pg_stat_statements_reset function \
-        # also adds execute permissions to the neon_superuser.
-        case $filename in \
-          pg_stat_statements--1.4.sql) \
-            # pg_stat_statements_reset is first created with 1.4
+        if echo "$old_list" | grep -q -F "$filename"; then \
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
-            ;; \
-          pg_stat_statements--1.6--1.7.sql) \
-            # Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
+        fi; \
+    done; \
+    # the second loop is for pg_stat_statement extension versions >= 1.7,
+    # where pg_stat_statement_reset() got 3 additional arguments
+    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
+        filename=$(basename "$file"); \
+        if ! echo "$old_list" | grep -q -F "$filename"; then \
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
-            ;; \
-          pg_stat_statements--1.10--1.11.sql) \
-            # Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
-            ;; \
-        esac; \
-    done;
+        fi; \
+    done

 #########################################################################################
 #
@@ -84,7 +79,6 @@ RUN cd postgres && \
 #
 #########################################################################################
 FROM build-deps AS postgis-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
    apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
@@ -93,11 +87,7 @@ RUN apt update && \
    protobuf-c-compiler xsltproc

 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
-RUN case "${PG_VERSION}" in "v17") \
-    mkdir -p /sfcgal && \
-    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
-    esac && \
-    wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
+RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -106,10 +96,7 @@ RUN case "${PG_VERSION}" in "v17") \

 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
-    esac && \
-    wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -135,10 +122,7 @@ RUN case "${PG_VERSION}" in "v17") \
    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
+RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
@@ -158,19 +142,12 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS plv8-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt update && \
+RUN apt update && \
    apt install -y ninja-build python3-dev libncurses5 binutils clang

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
+RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
    # generate and copy upgrade scripts
@@ -195,13 +172,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS h3-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "$(uname -m)" in \
+RUN case "$(uname -m)" in \
      "x86_64") \
        export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
        ;; \
@@ -219,11 +192,7 @@ RUN case "${PG_VERSION}" in "v17") \
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh

-RUN case "${PG_VERSION}" in "v17") \
-        mkdir -p /h3/usr/ && \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
@@ -233,10 +202,7 @@ RUN case "${PG_VERSION}" in "v17") \
    cp -R /h3/usr / && \
    rm -rf build

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
@@ -252,13 +218,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS unit-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
+RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -277,7 +239,6 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS vector-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY patches/pgvector.patch /pgvector.patch
@@ -285,10 +246,7 @@ COPY patches/pgvector.patch /pgvector.patch
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
 # Pass OPTFLAGS="" to remove it.
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
    echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
    patch -p1 < /pgvector.patch && \
@@ -303,14 +261,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pgjwt-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
+RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -323,13 +277,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS hypopg-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
+RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -343,13 +293,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-hashids-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -363,15 +309,11 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS rum-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY patches/rum.patch /rum.patch

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
+RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
    mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
    patch -p1 < /rum.patch && \
@@ -386,13 +328,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pgtap-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
+RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -406,13 +344,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS ip4r-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
+RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -426,13 +360,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS prefix-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
+RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -446,13 +376,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS hll-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
+RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -466,13 +392,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS plpgsql-check-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
+RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -491,10 +413,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "${PG_VERSION}" in \
+RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
        export TIMESCALEDB_VERSION=2.10.1 \
        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -527,10 +446,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    case "${PG_VERSION}" in \
+RUN case "${PG_VERSION}" in \
      "v14") \
        export PG_HINT_PLAN_VERSION=14_1_4_1 \
        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -543,9 +459,6 @@ RUN case "${PG_VERSION}" in "v17") \
        export PG_HINT_PLAN_VERSION=16_1_6_0 \
        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
        ;; \
-      "v17") \
-        echo "TODO: PG17 pg_hint_plan support" && exit 0 \
-        ;; \
      *) \
        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
        ;; \
@@ -565,14 +478,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-cron-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
+RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -586,13 +495,9 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS rdkit-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt-get update && \
+RUN apt-get update && \
    apt-get install -y \
        cmake \
        libboost-iostreams1.74-dev \
@@ -602,10 +507,7 @@ RUN case "${PG_VERSION}" in "v17") \
        libeigen3-dev

 ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
+RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
@@ -642,14 +544,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-uuidv7-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
+RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -663,14 +561,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-roaringbitmap-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
+RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -684,14 +578,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-semver-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
+RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -730,14 +620,10 @@ RUN case "${PG_VERSION}" in \
 #
 #########################################################################################
 FROM build-deps AS pg-anon-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
-    esac && \
-    wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
+RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -755,7 +641,6 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS rust-extensions-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN apt-get update && \
@@ -766,11 +651,9 @@ ENV HOME=/home/nonroot
 ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
 USER nonroot
 WORKDIR /home/nonroot
+ARG PG_VERSION

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
-    esac && \
-    curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
+RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
    rm rustup-init && \
@@ -789,10 +672,7 @@ USER root
 FROM rust-extensions-build AS pg-jsonschema-pg-build
 ARG PG_VERSION

-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
-    esac && \
-    wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
+RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
    echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
    # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
@@ -814,10 +694,7 @@ RUN case "${PG_VERSION}" in "v17") \
 FROM rust-extensions-build AS pg-graphql-pg-build
 ARG PG_VERSION

-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
-    esac && \
-    wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
+RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
    echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
    mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -837,10 +714,7 @@ FROM rust-extensions-build AS pg-tiktoken-pg-build
 ARG PG_VERSION

 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
-    esac && \
-    wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
+RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
    echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
    # TODO update pgrx version in the pg_tiktoken repo and remove this line
@@ -859,10 +733,7 @@ RUN case "${PG_VERSION}" in "v17") \
 FROM rust-extensions-build AS pg-pgx-ulid-build
 ARG PG_VERSION

-RUN case "${PG_VERSION}" in "v17") \
-    echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
-    esac && \
-    wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
+RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
    echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx       = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -877,14 +748,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #########################################################################################

 FROM build-deps AS wal2json-pg-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
+RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -897,14 +764,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-ivm-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
-    esac && \
-    wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
+RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -918,14 +781,10 @@ RUN case "${PG_VERSION}" in "v17") \
 #
 #########################################################################################
 FROM build-deps AS pg-partman-build
-ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "pg_partman doesn't support PG17 yet" && exit 0;; \
-    esac && \
-    wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
+RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -995,8 +854,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
    case "${PG_VERSION}" in \
        "v14" | "v15") \
        ;; \
-        "v16" | "v17") \
-            echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \
+        "v16") \
+            echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
        ;; \
        *) \
            echo "unexpected PostgreSQL version" && exit 1 \
@@ -1040,7 +899,7 @@ FROM neon-pg-ext-build AS postgres-cleanup-layer
 COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql

 # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
+RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp

 # Remove headers that we won't need anymore - we've completed installation of all extensions
 RUN rm -r /usr/local/pgsql/include
@@ -1059,10 +918,7 @@ RUN rm /usr/local/pgsql/lib/lib*.a

 FROM neon-pg-ext-build AS neon-pg-ext-test
 ARG PG_VERSION
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    mkdir /ext-src
+RUN mkdir /ext-src

 #COPY --from=postgis-build /postgis.tar.gz /ext-src/
 #COPY --from=postgis-build /sfcgal/* /usr
@@ -1100,39 +956,18 @@ COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
 COPY patches/pg_anon.patch /ext-src
 COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
 COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    cd /ext-src/ && for f in *.tar.gz; \
+RUN cd /ext-src/ && for f in *.tar.gz; \
    do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
    || exit 1; rm -f $f; done
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    cd /ext-src/rum-src && patch -p1 <../rum.patch
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
+RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
+RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
 # cmake is required for the h3 test
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    apt-get update && apt-get install -y cmake
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
+RUN apt-get update && apt-get install -y cmake
+RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    patch -p1 </ext-src/pg_anon.patch
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    patch -p1 </ext-src/pg_cron.patch
+RUN patch -p1 </ext-src/pg_anon.patch
+RUN patch -p1 </ext-src/pg_cron.patch
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
--- a/56
+++ b/56
@@ -119,8 +119,6 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 # I'm not sure why it wouldn't work, but this is the only place (apart from
 # the "build-all-versions" entry points) where direct mention of PostgreSQL
 # versions is used.
-.PHONY: postgres-configure-v17
-postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
 .PHONY: postgres-configure-v16
 postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
 .PHONY: postgres-configure-v15
@@ -217,31 +215,29 @@ neon-pg-clean-ext-%:
 # they depend on openssl and other libraries that are not included in our
 # Rust build.
 .PHONY: walproposer-lib
-walproposer-lib: neon-pg-ext-v17
+walproposer-lib: neon-pg-ext-v16
 	+@echo "Compiling walproposer-lib"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
+	cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
+	cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
+ifeq ($(UNAME_S),Linux)
 	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
 		pg_strong_random.o
 	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
-		checksum_helper.o \
-		cryptohash_openssl.o \
+		pg_crc32c.o \
 		hmac_openssl.o \
+		cryptohash_openssl.o \
+		scram-common.o \
 		md5_common.o \
-		parse_manifest.o \
-		scram-common.o
-ifeq ($(UNAME_S),Linux)
-	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
-		pg_crc32c.o
+		checksum_helper.o
 endif

 .PHONY: walproposer-lib-clean
 walproposer-lib-clean:
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean

@@ -249,44 +245,38 @@ walproposer-lib-clean:
 neon-pg-ext: \
 	neon-pg-ext-v14 \
 	neon-pg-ext-v15 \
-	neon-pg-ext-v16 \
-	neon-pg-ext-v17
+	neon-pg-ext-v16

 .PHONY: neon-pg-clean-ext
 neon-pg-clean-ext: \
 	neon-pg-clean-ext-v14 \
 	neon-pg-clean-ext-v15 \
-	neon-pg-clean-ext-v16 \
-	neon-pg-clean-ext-v17
+	neon-pg-clean-ext-v16

 # shorthand to build all Postgres versions
 .PHONY: postgres
 postgres: \
 	postgres-v14 \
 	postgres-v15 \
-	postgres-v16 \
-	postgres-v17
+	postgres-v16

 .PHONY: postgres-headers
 postgres-headers: \
 	postgres-headers-v14 \
 	postgres-headers-v15 \
-	postgres-headers-v16 \
-	postgres-headers-v17
+	postgres-headers-v16

 .PHONY: postgres-clean
 postgres-clean: \
 	postgres-clean-v14 \
 	postgres-clean-v15 \
-	postgres-clean-v16 \
-	postgres-clean-v17
+	postgres-clean-v16

 .PHONY: postgres-check
 postgres-check: \
 	postgres-check-v14 \
 	postgres-check-v15 \
-	postgres-check-v16 \
-	postgres-check-v17
+	postgres-check-v16

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
@@ -331,13 +321,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 	rm -f pg*.BAK

 # Indent pxgn/neon.
-.PHONY: neon-pgindent
-neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
-		INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
-		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
+.PHONY: pgindent
+neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
+		INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
+		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
+		-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent


--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,6 +11,7 @@ testing = []

 [dependencies]
 anyhow.workspace = true
+async-compression.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
@@ -23,6 +24,7 @@ num_cpus.workspace = true
 opentelemetry.workspace = true
 postgres.workspace = true
 regex.workspace = true
+serde.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
 tar.workspace = true
@@ -41,6 +43,7 @@ url.workspace = true
 compute_api.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true
+toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.13"
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1052,19 +1052,26 @@ impl ComputeNode {
        let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;

        let config_time = Utc::now();
-        if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
-            let pgdata_path = Path::new(&self.pgdata);
-            // temporarily reset max_cluster_size in config
-            // to avoid the possibility of hitting the limit, while we are applying config:
-            // creating new extensions, roles, etc...
-            config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
+        if pspec.spec.mode == ComputeMode::Primary {
+            if !pspec.spec.skip_pg_catalog_updates {
+                let pgdata_path = Path::new(&self.pgdata);
+                // temporarily reset max_cluster_size in config
+                // to avoid the possibility of hitting the limit, while we are applying config:
+                // creating new extensions, roles, etc...
+                config::with_compute_ctl_tmp_override(
+                    pgdata_path,
+                    "neon.max_cluster_size=-1",
+                    || {
+                        self.pg_reload_conf()?;
+
+                        self.apply_config(&compute_state)?;
+
+                        Ok(())
+                    },
+                )?;
                self.pg_reload_conf()?;
-
-                self.apply_config(&compute_state)?;
-
-                Ok(())
-            })?;
-            self.pg_reload_conf()?;
+            }
+            self.post_apply_config()?;
        }

        let startup_end_time = Utc::now();
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -124,7 +124,6 @@ fn parse_pg_version(human_version: &str) -> &str {
            "14" => return "v14",
            "15" => return "v15",
            "16" => return "v16",
-            "17" => return "v17",
            _ => {}
        },
        _ => {}
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -1 +0,0 @@
-GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -793,9 +793,6 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
        include_str!(
            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
        ),
-        include_str!(
-            "./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
-        ),
    ];

    MigrationRunner::new(client, &migrations).run_migrations()?;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,10 +9,13 @@ anyhow.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
+futures.workspace = true
 git-version.workspace = true
 humantime.workspace = true
 nix.workspace = true
 once_cell.workspace = true
+postgres.workspace = true
+hex.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 regex.workspace = true
@@ -20,6 +23,8 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
 scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
+serde_with.workspace = true
+tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 toml_edit.workspace = true
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -151,7 +151,7 @@ where
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                tokio::time::sleep(RETRY_INTERVAL).await;
+                thread::sleep(RETRY_INTERVAL);
            }
            Err(e) => {
                println!("error starting process {process_name:?}: {e:#}");
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -34,14 +34,12 @@ use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
-use std::borrow::Cow;
 use std::collections::{BTreeSet, HashMap};
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
 use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
-use tokio::task::JoinSet;
 use url::Host;
 use utils::{
    auth::{Claims, Scope},
@@ -89,35 +87,34 @@ fn main() -> Result<()> {

    // Check for 'neon init' command first.
    let subcommand_result = if sub_name == "init" {
-        handle_init(sub_args).map(|env| Some(Cow::Owned(env)))
+        handle_init(sub_args).map(Some)
    } else {
        // all other commands need an existing config
-
-        let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
+        let mut env =
+            LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
        let original_env = env.clone();
-        let env = Box::leak(Box::new(env));
+
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
            .unwrap();

        let subcommand_result = match sub_name {
-            "tenant" => rt.block_on(handle_tenant(sub_args, env)),
-            "timeline" => rt.block_on(handle_timeline(sub_args, env)),
-            "start" => rt.block_on(handle_start_all(env, get_start_timeout(sub_args))),
-            "stop" => rt.block_on(handle_stop_all(sub_args, env)),
-            "pageserver" => rt.block_on(handle_pageserver(sub_args, env)),
-            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, env)),
-            "storage_broker" => rt.block_on(handle_storage_broker(sub_args, env)),
-            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, env)),
-            "endpoint" => rt.block_on(handle_endpoint(sub_args, env)),
-            "mappings" => handle_mappings(sub_args, env),
+            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
+            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
+            "start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))),
+            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
+            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
+            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
+            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
+            "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
+            "mappings" => handle_mappings(sub_args, &mut env),
            "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
            _ => bail!("unexpected subcommand {sub_name}"),
        };

-        if &original_env != env {
-            subcommand_result.map(|()| Some(Cow::Borrowed(env)))
+        if original_env != env {
+            subcommand_result.map(|()| Some(env))
        } else {
            subcommand_result.map(|()| None)
        }
@@ -1248,122 +1245,49 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_storage_broker(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match sub_match.subcommand() {
-        Some(broker_command_data) => broker_command_data,
-        None => bail!("no broker subcommand provided"),
-    };
-
-    match sub_name {
-        "start" => {
-            if let Err(e) = broker::start_broker_process(env, get_start_timeout(sub_args)).await {
-                eprintln!("broker start failed: {e}");
-                exit(1);
-            }
-        }
-
-        "stop" => {
-            if let Err(e) = broker::stop_broker_process(env) {
-                eprintln!("broker stop failed: {e}");
-                exit(1);
-            }
-        }
-
-        _ => bail!("Unexpected broker subcommand '{}'", sub_name),
-    }
-    Ok(())
-}
-
 async fn handle_start_all(
-    env: &'static local_env::LocalEnv,
+    env: &local_env::LocalEnv,
    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
-    let Err(errors) = handle_start_all_impl(env, *retry_timeout).await else {
-        neon_start_status_check(env, retry_timeout)
-            .await
-            .context("status check after successful startup of all services")?;
-        return Ok(());
-    };
-
-    eprintln!("startup failed because one or more services could not be started");
-
-    for e in errors {
-        eprintln!("{e}");
-        let debug_repr = format!("{e:?}");
-        for line in debug_repr.lines() {
-            eprintln!("  {line}");
-        }
-    }
-
-    try_stop_all(env, true).await;
-
-    exit(2);
-}
-
-/// Returns Ok() if and only if all services could be started successfully.
-/// Otherwise, returns the list of errors that occurred during startup.
-async fn handle_start_all_impl(
-    env: &'static local_env::LocalEnv,
-    retry_timeout: Duration,
-) -> Result<(), Vec<anyhow::Error>> {
    // Endpoints are not started automatically

-    let mut js = JoinSet::new();
+    broker::start_broker_process(env, retry_timeout).await?;

-    // force infalliblity through closure
-    #[allow(clippy::redundant_closure_call)]
-    (|| {
-        js.spawn(async move {
-            let retry_timeout = retry_timeout;
-            broker::start_broker_process(env, &retry_timeout).await
-        });
-
-        // Only start the storage controller if the pageserver is configured to need it
-        if env.control_plane_api.is_some() {
-            js.spawn(async move {
-                let storage_controller = StorageController::from_env(env);
-                storage_controller
-                    .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                        retry_timeout.into(),
-                    ))
-                    .await
-                    .map_err(|e| e.context("start storage_controller"))
-            });
-        }
-
-        for ps_conf in &env.pageservers {
-            js.spawn(async move {
-                let pageserver = PageServerNode::from_env(env, ps_conf);
-                pageserver
-                    .start(&retry_timeout)
-                    .await
-                    .map_err(|e| e.context(format!("start pageserver {}", ps_conf.id)))
-            });
-        }
-
-        for node in env.safekeepers.iter() {
-            js.spawn(async move {
-                let safekeeper = SafekeeperNode::from_env(env, node);
-                safekeeper
-                    .start(vec![], &retry_timeout)
-                    .await
-                    .map_err(|e| e.context(format!("start safekeeper {}", safekeeper.id)))
-            });
-        }
-    })();
-
-    let mut errors = Vec::new();
-    while let Some(result) = js.join_next().await {
-        let result = result.expect("we don't panic or cancel the tasks");
-        if let Err(e) = result {
-            errors.push(e);
+    // Only start the storage controller if the pageserver is configured to need it
+    if env.control_plane_api.is_some() {
+        let storage_controller = StorageController::from_env(env);
+        if let Err(e) = storage_controller
+            .start(NeonStorageControllerStartArgs::with_default_instance_id(
+                (*retry_timeout).into(),
+            ))
+            .await
+        {
+            eprintln!("storage_controller start failed: {:#}", e);
+            try_stop_all(env, true).await;
+            exit(1);
        }
    }

-    if !errors.is_empty() {
-        return Err(errors);
+    for ps_conf in &env.pageservers {
+        let pageserver = PageServerNode::from_env(env, ps_conf);
+        if let Err(e) = pageserver.start(retry_timeout).await {
+            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
+            try_stop_all(env, true).await;
+            exit(1);
+        }
    }

+    for node in env.safekeepers.iter() {
+        let safekeeper = SafekeeperNode::from_env(env, node);
+        if let Err(e) = safekeeper.start(vec![], retry_timeout).await {
+            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
+            try_stop_all(env, false).await;
+            exit(1);
+        }
+    }
+
+    neon_start_status_check(env, retry_timeout).await?;
+
    Ok(())
 }

@@ -1748,19 +1672,6 @@ fn cli() -> Command {
                            .arg(stop_mode_arg.clone())
                            .arg(instance_id))
        )
-        .subcommand(
-            Command::new("storage_broker")
-                .arg_required_else_help(true)
-                .about("Manage broker")
-                .subcommand(Command::new("start")
-                            .about("Start broker")
-                            .arg(timeout_arg.clone())
-                )
-                .subcommand(Command::new("stop")
-                            .about("Stop broker")
-                            .arg(stop_mode_arg.clone())
-                )
-        )
        .subcommand(
            Command::new("safekeeper")
                .arg_required_else_help(true)
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -702,7 +702,7 @@ impl Endpoint {
                    }
                }
            }
-            tokio::time::sleep(ATTEMPT_INTERVAL).await;
+            std::thread::sleep(ATTEMPT_INTERVAL);
        }

        // disarm the scopeguard, let the child outlive this function (and neon_local invoction)
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -342,7 +342,7 @@ impl LocalEnv {

        #[allow(clippy::manual_range_patterns)]
        match pg_version {
-            14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
+            14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
            _ => bail!("Unsupported postgres version: {}", pg_version),
        }
    }
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -17,7 +17,9 @@ use std::time::Duration;

 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
-use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo};
+use pageserver_api::models::{
+    self, AuxFilePolicy, LocationConfig, TenantHistorySize, TenantInfo, TimelineInfo,
+};
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
@@ -322,6 +324,22 @@ impl PageServerNode {
        background_process::stop_process(immediate, "pageserver", &self.pid_file())
    }

+    pub async fn page_server_psql_client(
+        &self,
+    ) -> anyhow::Result<(
+        tokio_postgres::Client,
+        tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
+    )> {
+        let mut config = self.pg_connection_config.clone();
+        if self.conf.pg_auth_type == AuthType::NeonJWT {
+            let token = self
+                .env
+                .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
+            config = config.set_password(Some(token));
+        }
+        Ok(config.connect_no_tls().await?)
+    }
+
    pub async fn check_status(&self) -> mgmt_api::Result<()> {
        self.http_client.status().await
    }
@@ -522,6 +540,19 @@ impl PageServerNode {
        Ok(())
    }

+    pub async fn location_config(
+        &self,
+        tenant_shard_id: TenantShardId,
+        config: LocationConfig,
+        flush_ms: Option<Duration>,
+        lazy: bool,
+    ) -> anyhow::Result<()> {
+        Ok(self
+            .http_client
+            .location_config(tenant_shard_id, config, flush_ms, lazy)
+            .await?)
+    }
+
    pub async fn timeline_list(
        &self,
        tenant_shard_id: &TenantShardId,
@@ -605,4 +636,14 @@ impl PageServerNode {

        Ok(())
    }
+
+    pub async fn tenant_synthetic_size(
+        &self,
+        tenant_shard_id: TenantShardId,
+    ) -> anyhow::Result<TenantHistorySize> {
+        Ok(self
+            .http_client
+            .tenant_synthetic_size(tenant_shard_id)
+            .await?)
+    }
 }
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -4,10 +4,13 @@
 /// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
 /// enough to extract a few settings we need in Neon, assuming you don't do
 /// funny stuff like include-directives or funny escaping.
+use anyhow::{bail, Context, Result};
 use once_cell::sync::Lazy;
 use regex::Regex;
 use std::collections::HashMap;
 use std::fmt;
+use std::io::BufRead;
+use std::str::FromStr;

 /// In-memory representation of a postgresql.conf file
 #[derive(Default, Debug)]
@@ -16,16 +19,84 @@ pub struct PostgresConf {
    hash: HashMap<String, String>,
 }

+static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
+
 impl PostgresConf {
    pub fn new() -> PostgresConf {
        PostgresConf::default()
    }

+    /// Read file into memory
+    pub fn read(read: impl std::io::Read) -> Result<PostgresConf> {
+        let mut result = Self::new();
+
+        for line in std::io::BufReader::new(read).lines() {
+            let line = line?;
+
+            // Store each line in a vector, in original format
+            result.lines.push(line.clone());
+
+            // Also parse each line and insert key=value lines into a hash map.
+            //
+            // FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL.
+            // But it's close enough for our usage.
+            let line = line.trim();
+            if line.starts_with('#') {
+                // comment, ignore
+                continue;
+            } else if let Some(caps) = CONF_LINE_RE.captures(line) {
+                let name = caps.get(1).unwrap().as_str();
+                let raw_val = caps.get(2).unwrap().as_str();
+
+                if let Ok(val) = deescape_str(raw_val) {
+                    // Note: if there's already an entry in the hash map for
+                    // this key, this will replace it. That's the behavior what
+                    // we want; when PostgreSQL reads the file, each line
+                    // overrides any previous value for the same setting.
+                    result.hash.insert(name.to_string(), val.to_string());
+                }
+            }
+        }
+        Ok(result)
+    }
+
    /// Return the current value of 'option'
    pub fn get(&self, option: &str) -> Option<&str> {
        self.hash.get(option).map(|x| x.as_ref())
    }

+    /// Return the current value of a field, parsed to the right datatype.
+    ///
+    /// This calls the FromStr::parse() function on the value of the field. If
+    /// the field does not exist, or parsing fails, returns an error.
+    ///
+    pub fn parse_field<T>(&self, field_name: &str, context: &str) -> Result<T>
+    where
+        T: FromStr,
+        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
+    {
+        self.get(field_name)
+            .with_context(|| format!("could not find '{}' option {}", field_name, context))?
+            .parse::<T>()
+            .with_context(|| format!("could not parse '{}' option {}", field_name, context))
+    }
+
+    pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
+    where
+        T: FromStr,
+        <T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
+    {
+        if let Some(val) = self.get(field_name) {
+            let result = val
+                .parse::<T>()
+                .with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
+
+            Ok(Some(result))
+        } else {
+            Ok(None)
+        }
+    }
+
    ///
    /// Note: if you call this multiple times for the same option, the config
    /// file will a line for each call. It would be nice to have a function
@@ -83,8 +154,48 @@ fn escape_str(s: &str) -> String {
    }
 }

+/// De-escape a possibly-quoted value.
+///
+/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL
+/// does this.
+fn deescape_str(s: &str) -> Result<String> {
+    // If the string has a quote at the beginning and end, strip them out.
+    if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') {
+        let mut result = String::new();
+
+        let mut iter = s[1..(s.len() - 1)].chars().peekable();
+        while let Some(c) = iter.next() {
+            let newc = if c == '\\' {
+                match iter.next() {
+                    Some('b') => '\x08',
+                    Some('f') => '\x0c',
+                    Some('n') => '\n',
+                    Some('r') => '\r',
+                    Some('t') => '\t',
+                    Some('0'..='7') => {
+                        // TODO
+                        bail!("octal escapes not supported");
+                    }
+                    Some(n) => n,
+                    None => break,
+                }
+            } else if c == '\'' && iter.peek() == Some(&'\'') {
+                // doubled quote becomes just one quote
+                iter.next().unwrap()
+            } else {
+                c
+            };
+
+            result.push(newc);
+        }
+        Ok(result)
+    } else {
+        Ok(s.to_string())
+    }
+}
+
 #[test]
-fn test_postgresql_conf_escapes() -> anyhow::Result<()> {
+fn test_postgresql_conf_escapes() -> Result<()> {
    assert_eq!(escape_str("foo bar"), "'foo bar'");
    // these don't need to be quoted
    assert_eq!(escape_str("foo"), "foo");
@@ -103,5 +214,13 @@ fn test_postgresql_conf_escapes() -> anyhow::Result<()> {
    assert_eq!(escape_str("fo\\o"), "'fo\\\\o'");
    assert_eq!(escape_str("10 cats"), "'10 cats'");

+    // Test de-escaping
+    assert_eq!(deescape_str(&escape_str("foo"))?, "foo");
+    assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r");
+    assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t");
+
+    // octal-escapes are currently not supported
+    assert!(deescape_str("'foo\\7\\07\\007'").is_err());
+
    Ok(())
 }
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -28,7 +28,6 @@ use utils::{
    auth::{encode_from_key_file, Claims, Scope},
    id::{NodeId, TenantId},
 };
-use whoami::username;

 pub struct StorageController {
    env: LocalEnv,
@@ -184,7 +183,7 @@ impl StorageController {
    /// to other versions if that one isn't found.  Some automated tests create circumstances
    /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
    async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
-        let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
+        let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];

        for v in prefer_versions {
            let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
@@ -212,16 +211,7 @@ impl StorageController {
    /// Readiness check for our postgres process
    async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
        let bin_path = pg_bin_dir.join("pg_isready");
-        let args = [
-            "-h",
-            "localhost",
-            "-U",
-            &username(),
-            "-d",
-            DB_NAME,
-            "-p",
-            &format!("{}", postgres_port),
-        ];
+        let args = ["-h", "localhost", "-p", &format!("{}", postgres_port)];
        let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;

        Ok(exitcode.success())
@@ -235,11 +225,7 @@ impl StorageController {
    ///
    /// Returns the database url
    pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
-        let database_url = format!(
-            "postgresql://{}@localhost:{}/{DB_NAME}",
-            &username(),
-            postgres_port
-        );
+        let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);

        let pg_bin_dir = self.get_pg_bin_dir().await?;
        let createdb_path = pg_bin_dir.join("createdb");
@@ -249,10 +235,6 @@ impl StorageController {
                "localhost",
                "-p",
                &format!("{}", postgres_port),
-                "-U",
-                &username(),
-                "-O",
-                &username(),
                DB_NAME,
            ])
            .output()
@@ -289,7 +271,7 @@ impl StorageController {
            // But tokio-postgres fork doesn't have this upstream commit:
            // https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
            // => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
-            .user(&username())
+            .user(&whoami::username())
            .dbname(DB_NAME)
            .connect(tokio_postgres::NoTls)
            .await
@@ -346,12 +328,6 @@ impl StorageController {
            let pg_log_path = pg_data_path.join("postgres.log");

            if !tokio::fs::try_exists(&pg_data_path).await? {
-                let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
-                tracing::info!(
-                    "Initializing storage controller database with args: {:?}",
-                    initdb_args
-                );
-
                // Initialize empty database
                let initdb_path = pg_bin_dir.join("initdb");
                let mut child = Command::new(&initdb_path)
@@ -359,7 +335,7 @@ impl StorageController {
                        ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
                        ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
                    ])
-                    .args(initdb_args)
+                    .args(["-D", pg_data_path.as_ref()])
                    .spawn()
                    .expect("Failed to spawn initdb");
                let status = child.wait().await?;
@@ -388,14 +364,8 @@ impl StorageController {
                pg_data_path.as_ref(),
                "-l",
                pg_log_path.as_ref(),
-                "-U",
-                &username(),
                "start",
            ];
-            tracing::info!(
-                "Starting storage controller database with args: {:?}",
-                db_start_args
-            );

            background_process::start_process(
                "storage_controller_db",
--- a/control_plane/storcon_cli/Cargo.toml
+++ b/control_plane/storcon_cli/Cargo.toml
@@ -11,11 +11,14 @@ clap.workspace = true
 comfy-table.workspace = true
 futures.workspace = true
 humantime.workspace = true
+hyper.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 reqwest.workspace = true
+serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 storage_controller_client.workspace = true
+thiserror.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 utils.workspace = true
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -8,6 +8,7 @@ license.workspace = true
 anyhow.workspace = true
 chrono.workspace = true
 serde.workspace = true
+serde_with.workspace = true
 serde_json.workspace = true
 regex.workspace = true

--- a/libs/consumption_metrics/Cargo.toml
+++ b/libs/consumption_metrics/Cargo.toml
@@ -5,6 +5,9 @@ edition = "2021"
 license = "Apache-2.0"

 [dependencies]
+anyhow.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 rand.workspace = true
 serde.workspace = true
+serde_with.workspace = true
+utils.workspace = true
--- a/libs/consumption_metrics/src/lib.rs
+++ b/libs/consumption_metrics/src/lib.rs
@@ -5,7 +5,7 @@ use chrono::{DateTime, Utc};
 use rand::Rng;
 use serde::{Deserialize, Serialize};

-#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
+#[derive(Serialize, serde::Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
 #[serde(tag = "type")]
 pub enum EventType {
    #[serde(rename = "absolute")]
@@ -107,7 +107,7 @@ pub const CHUNK_SIZE: usize = 1000;

 // Just a wrapper around a slice of events
 // to serialize it as `{"events" : [ ] }
-#[derive(serde::Serialize, Deserialize)]
+#[derive(serde::Serialize, serde::Deserialize)]
 pub struct EventChunk<'a, T: Clone> {
    pub events: std::borrow::Cow<'a, [T]>,
 }
--- a/libs/desim/Cargo.toml
+++ b/libs/desim/Cargo.toml
@@ -12,4 +12,5 @@ bytes.workspace = true
 utils.workspace = true
 parking_lot.workspace = true
 hex.workspace = true
+scopeguard.workspace = true
 smallvec = { workspace = true, features = ["write"] }
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -173,6 +173,40 @@ impl Default for EvictionOrder {
    }
 }

+#[derive(
+    Eq,
+    PartialEq,
+    Debug,
+    Copy,
+    Clone,
+    strum_macros::EnumString,
+    strum_macros::Display,
+    serde_with::DeserializeFromStr,
+    serde_with::SerializeDisplay,
+)]
+#[strum(serialize_all = "kebab-case")]
+pub enum GetVectoredImpl {
+    Sequential,
+    Vectored,
+}
+
+#[derive(
+    Eq,
+    PartialEq,
+    Debug,
+    Copy,
+    Clone,
+    strum_macros::EnumString,
+    strum_macros::Display,
+    serde_with::DeserializeFromStr,
+    serde_with::SerializeDisplay,
+)]
+#[strum(serialize_all = "kebab-case")]
+pub enum GetImpl {
+    Legacy,
+    Vectored,
+}
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(transparent)]
 pub struct MaxVectoredReadBytes(pub NonZeroUsize);
@@ -304,6 +338,8 @@ pub mod defaults {
    pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
        ImageCompressionAlgorithm::Zstd { level: Some(1) };

+    pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false;
+
    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;

    pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
@@ -340,10 +376,7 @@ impl Default for ConfigToml {

            concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
                .expect("Invalid default constant")),
-            concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
-                DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
-            )
-            .unwrap(),
+            concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(1).unwrap(),
            metric_collection_interval: (humantime::parse_duration(
                DEFAULT_METRIC_COLLECTION_INTERVAL,
            )
@@ -434,6 +467,8 @@ pub mod tenant_conf_defaults {
    // By default ingest enough WAL for two new L0 layers before checking if new image
    // image layers should be created.
    pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
+
+    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
 }

 impl Default for TenantConfigToml {
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,8 +1,8 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
-use postgres_ffi::Oid;
 use postgres_ffi::RepOriginId;
+use postgres_ffi::{Oid, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::{fmt, ops::Range};

@@ -350,17 +350,7 @@ impl Key {
 // 02 00000000 00000000 00000000 00   00000000
 //
 // TwoPhaseFile:
-//
-// 02 00000000 00000000 00XXXXXX XX   XXXXXXXX
-//
-//                        \______XID_________/
-//
-// The 64-bit XID is stored a little awkwardly in field6, field5 and
-// field4. PostgreSQL v16 and below only stored a 32-bit XID, which
-// fit completely in field6, but starting with PostgreSQL v17, a full
-// 64-bit XID is used. Most pageserver code that accesses
-// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
-// are just unused.
+// 02 00000000 00000000 00000000 00   XID
 //
 // ControlFile:
 // 03 00000000 00000000 00000000 00   00000000
@@ -592,36 +582,35 @@ pub const TWOPHASEDIR_KEY: Key = Key {
 };

 #[inline(always)]
-pub fn twophase_file_key(xid: u64) -> Key {
+pub fn twophase_file_key(xid: TransactionId) -> Key {
    Key {
        field1: 0x02,
        field2: 0,
        field3: 0,
-        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
-        field5: ((xid & 0x000000FF00000000) >> 32) as u8,
-        field6: (xid & 0x00000000FFFFFFFF) as u32,
+        field4: 0,
+        field5: 0,
+        field6: xid,
    }
 }

 #[inline(always)]
-pub fn twophase_key_range(xid: u64) -> Range<Key> {
-    // 64-bit XIDs really should not overflow
+pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
    let (next_xid, overflowed) = xid.overflowing_add(1);

    Key {
        field1: 0x02,
        field2: 0,
        field3: 0,
-        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
-        field5: ((xid & 0x000000FF00000000) >> 32) as u8,
-        field6: (xid & 0x00000000FFFFFFFF) as u32,
+        field4: 0,
+        field5: 0,
+        field6: xid,
    }..Key {
        field1: 0x02,
        field2: 0,
-        field3: u32::from(overflowed),
-        field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
-        field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
-        field6: (next_xid & 0x00000000FFFFFFFF) as u32,
+        field3: 0,
+        field4: 0,
+        field5: u8::from(overflowed),
+        field6: next_xid,
    }
 }

--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -495,7 +495,7 @@ pub struct CompactionAlgorithmSettings {
    pub kind: CompactionAlgorithm,
 }

-#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
+#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
 #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
 pub enum L0FlushConfig {
    #[serde(rename_all = "snake_case")]
--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -5,8 +5,10 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+async-trait.workspace = true
 anyhow.workspace = true
 bytes.workspace = true
+futures.workspace = true
 rustls.workspace = true
 serde.workspace = true
 thiserror.workspace = true
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -280,6 +280,16 @@ pub struct PostgresBackend<IO> {

 pub type PostgresBackendTCP = PostgresBackend<tokio::net::TcpStream>;

+pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
+    let mut query_string = query_string.to_vec();
+    if let Some(ch) = query_string.last() {
+        if *ch == 0 {
+            query_string.pop();
+        }
+    }
+    query_string
+}
+
 /// Cast a byte slice to a string slice, dropping null terminator if there's one.
 fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -5,10 +5,13 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+rand.workspace = true
 regex.workspace = true
 bytes.workspace = true
+byteorder.workspace = true
 anyhow.workspace = true
 crc32c.workspace = true
+hex.workspace = true
 once_cell.workspace = true
 log.workspace = true
 memoffset.workspace = true
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
        PathBuf::from("pg_install")
    };

-    for pg_version in &["v14", "v15", "v16", "v17"] {
+    for pg_version in &["v14", "v15", "v16"] {
        let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
        if pg_install_dir_versioned.is_relative() {
            let cwd = env::current_dir().context("Failed to get current_dir")?;
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -57,7 +57,6 @@ macro_rules! for_all_postgres_versions {
        $macro!(v14);
        $macro!(v15);
        $macro!(v16);
-        $macro!(v17);
    };
 }

@@ -92,7 +91,6 @@ macro_rules! dispatch_pgversion {
                14 : v14,
                15 : v15,
                16 : v16,
-                17 : v17,
            ]
        )
    };
@@ -123,7 +121,6 @@ macro_rules! enum_pgversion_dispatch {
                V14 : v14,
                V15 : v15,
                V16 : v16,
-                V17 : v17,
            ]
        )
    };
@@ -153,7 +150,6 @@ macro_rules! enum_pgversion {
                V14 : v14,
                V15 : v15,
                V16 : v16,
-                V17 : v17,
            ]
        }
    };
@@ -166,7 +162,6 @@ macro_rules! enum_pgversion {
                V14 : v14,
                V15 : v15,
                V16 : v16,
-                V17 : v17,
            ]
        }
    };
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -9,8 +9,8 @@
 //! comments on them.
 //!

-use crate::PageHeaderData;
 use crate::BLCKSZ;
+use crate::{PageHeaderData, XLogRecord};

 //
 // From pg_tablespace_d.h
@@ -152,9 +152,6 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
 pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
 pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;

-// From heapam_xlog.h
-pub const XLOG_HEAP2_REWRITE: u8 = 0x00;
-
 // From replication/message.h
 pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;

@@ -194,6 +191,8 @@ pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;
 pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
 pub const XLOG_TBLSPC_DROP: u8 = 0x10;

+pub const SIZEOF_XLOGRECORD: u32 = size_of::<XLogRecord>() as u32;
+
 //
 // from xlogrecord.h
 //
@@ -217,21 +216,18 @@ pub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */
 /* From transam.h */
 pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3;
 pub const INVALID_TRANSACTION_ID: u32 = 0;
+pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
+pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;

-/* pg_control.h */
 pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
 pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
-pub const XLOG_PARAMETER_CHANGE: u8 = 0x60;
-pub const XLOG_END_OF_RECOVERY: u8 = 0x90;
+pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
+pub const XLP_LONG_HEADER: u16 = 0x0002;

 /* From xlog.h */
 pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
 pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;

-/* xlog_internal.h */
-pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
-pub const XLP_LONG_HEADER: u16 = 0x0002;
-
 /* From replication/slot.h */
 pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4  /* offset of `slotdata` in ReplicationSlotOnDisk  */
   + 64 /* NameData */  + 4*4;
@@ -249,6 +245,33 @@ pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
 /* From origin.c */
 pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;

+// List of subdirectories inside pgdata.
+// Copied from src/bin/initdb/initdb.c
+pub const PGDATA_SUBDIRS: [&str; 22] = [
+    "global",
+    "pg_wal/archive_status",
+    "pg_commit_ts",
+    "pg_dynshmem",
+    "pg_notify",
+    "pg_serial",
+    "pg_snapshots",
+    "pg_subtrans",
+    "pg_twophase",
+    "pg_multixact",
+    "pg_multixact/members",
+    "pg_multixact/offsets",
+    "base",
+    "base/1",
+    "pg_replslot",
+    "pg_tblspc",
+    "pg_stat",
+    "pg_stat_tmp",
+    "pg_xact",
+    "pg_logical",
+    "pg_logical/snapshots",
+    "pg_logical/mappings",
+];
+
 // Don't include postgresql.conf as it is inconvenient on node start:
 // we need postgresql.conf before basebackup to synchronize safekeepers
 // so no point in overwriting it during backup restore. Rest of the files
--- a/libs/postgres_ffi/src/pg_constants_v14.rs
+++ b/libs/postgres_ffi/src/pg_constants_v14.rs
@@ -5,33 +5,6 @@ pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
 pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
 pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */

-// List of subdirectories inside pgdata.
-// Copied from src/bin/initdb/initdb.c
-pub const PGDATA_SUBDIRS: [&str; 22] = [
-    "global",
-    "pg_wal/archive_status",
-    "pg_commit_ts",
-    "pg_dynshmem",
-    "pg_notify",
-    "pg_serial",
-    "pg_snapshots",
-    "pg_subtrans",
-    "pg_twophase",
-    "pg_multixact",
-    "pg_multixact/members",
-    "pg_multixact/offsets",
-    "base",
-    "base/1",
-    "pg_replslot",
-    "pg_tblspc",
-    "pg_stat",
-    "pg_stat_tmp",
-    "pg_xact",
-    "pg_logical",
-    "pg_logical/snapshots",
-    "pg_logical/mappings",
-];
-
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    (bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
 }
--- a/libs/postgres_ffi/src/pg_constants_v15.rs
+++ b/libs/postgres_ffi/src/pg_constants_v15.rs
@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */

 pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */

-pub use super::super::v14::bindings::PGDATA_SUBDIRS;
-
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

--- a/libs/postgres_ffi/src/pg_constants_v16.rs
+++ b/libs/postgres_ffi/src/pg_constants_v16.rs
@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */

 pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */

-pub use super::super::v14::bindings::PGDATA_SUBDIRS;
-
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

--- a/libs/postgres_ffi/src/pg_constants_v17.rs
+++ b/libs/postgres_ffi/src/pg_constants_v17.rs
@@ -1,55 +0,0 @@
-pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
-
-pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
-pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
-pub const XLOG_DBASE_DROP: u8 = 0x20;
-
-pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
-pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
-pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
-pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
-
-pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
-
-// List of subdirectories inside pgdata.
-// Copied from src/bin/initdb/initdb.c
-pub const PGDATA_SUBDIRS: [&str; 23] = [
-    "global",
-    "pg_wal/archive_status",
-    "pg_wal/summaries",
-    "pg_commit_ts",
-    "pg_dynshmem",
-    "pg_notify",
-    "pg_serial",
-    "pg_snapshots",
-    "pg_subtrans",
-    "pg_twophase",
-    "pg_multixact",
-    "pg_multixact/members",
-    "pg_multixact/offsets",
-    "base",
-    "base/1",
-    "pg_replslot",
-    "pg_tblspc",
-    "pg_stat",
-    "pg_stat_tmp",
-    "pg_xact",
-    "pg_logical",
-    "pg_logical/snapshots",
-    "pg_logical/mappings",
-];
-
-pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
-    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
-
-    (bimg_info & ANY_COMPRESS_FLAG) != 0
-}
-
-
-pub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;
-pub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;
-pub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;
-
-
-pub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;
-pub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -26,12 +26,11 @@ use bytes::{Buf, Bytes};
 use log::*;

 use serde::Serialize;
-use std::ffi::OsStr;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::ErrorKind;
 use std::io::SeekFrom;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 use std::time::SystemTime;
 use utils::bin_ser::DeserializeError;
 use utils::bin_ser::SerializeError;
@@ -79,34 +78,19 @@ pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize
    )
 }

-pub fn XLogFromFileName(
-    fname: &OsStr,
-    wal_seg_size: usize,
-) -> anyhow::Result<(XLogSegNo, TimeLineID)> {
-    if let Some(fname_str) = fname.to_str() {
-        let tli = u32::from_str_radix(&fname_str[0..8], 16)?;
-        let log = u32::from_str_radix(&fname_str[8..16], 16)? as XLogSegNo;
-        let seg = u32::from_str_radix(&fname_str[16..24], 16)? as XLogSegNo;
-        Ok((log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli))
-    } else {
-        anyhow::bail!("non-ut8 filename: {:?}", fname);
-    }
+pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
+    let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
+    let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
+    let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
+    (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli)
 }

-pub fn IsXLogFileName(fname: &OsStr) -> bool {
-    if let Some(fname) = fname.to_str() {
-        fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit())
-    } else {
-        false
-    }
+pub fn IsXLogFileName(fname: &str) -> bool {
+    return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
 }

-pub fn IsPartialXLogFileName(fname: &OsStr) -> bool {
-    if let Some(fname) = fname.to_str() {
-        fname.ends_with(".partial") && IsXLogFileName(OsStr::new(&fname[0..fname.len() - 8]))
-    } else {
-        false
-    }
+pub fn IsPartialXLogFileName(fname: &str) -> bool {
+    fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8])
 }

 /// If LSN points to the beginning of the page, then shift it to first record,
@@ -276,6 +260,13 @@ fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
    }
 }

+pub fn main() {
+    let mut data_dir = PathBuf::new();
+    data_dir.push(".");
+    let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
+    println!("wal_end={:?}", wal_end);
+}
+
 impl XLogRecord {
    pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {
        use utils::bin_ser::LeSer;
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -9,6 +9,7 @@ anyhow.workspace = true
 clap.workspace = true
 env_logger.workspace = true
 log.workspace = true
+once_cell.workspace = true
 postgres.workspace = true
 postgres_ffi.workspace = true
 camino-tempfile.workspace = true
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -7,7 +7,6 @@ use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
 use postgres_ffi::{
    XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
 };
-use std::ffi::OsStr;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::time::{Duration, Instant};
@@ -27,6 +26,7 @@ macro_rules! xlog_utils_test {

 postgres_ffi::for_all_postgres_versions! { xlog_utils_test }

+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Conf {
    pub pg_version: u32,
    pub pg_distrib_dir: PathBuf,
@@ -53,7 +53,7 @@ impl Conf {

        #[allow(clippy::manual_range_patterns)]
        match self.pg_version {
-            14 | 15 | 16 | 17 => Ok(path.join(format!("v{}", self.pg_version))),
+            14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
            _ => bail!("Unsupported postgres version: {}", self.pg_version),
        }
    }
@@ -136,8 +136,8 @@ impl Conf {

    pub fn pg_waldump(
        &self,
-        first_segment_name: &OsStr,
-        last_segment_name: &OsStr,
+        first_segment_name: &str,
+        last_segment_name: &str,
    ) -> anyhow::Result<std::process::Output> {
        let first_segment_file = self.datadir.join(first_segment_name);
        let last_segment_file = self.datadir.join(last_segment_name);
--- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
+++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
@@ -4,7 +4,6 @@ use super::*;
 use crate::{error, info};
 use regex::Regex;
 use std::cmp::min;
-use std::ffi::OsStr;
 use std::fs::{self, File};
 use std::io::Write;
 use std::{env, str::FromStr};
@@ -55,7 +54,7 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
        .wal_dir()
        .read_dir()
        .unwrap()
-        .map(|f| f.unwrap().file_name())
+        .map(|f| f.unwrap().file_name().into_string().unwrap())
        .filter(|fname| IsXLogFileName(fname))
        .max()
        .unwrap();
@@ -71,11 +70,11 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
            start_lsn
        );
        for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() {
-            let fname = file.file_name();
+            let fname = file.file_name().into_string().unwrap();
            if !IsXLogFileName(&fname) {
                continue;
            }
-            let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE).unwrap();
+            let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
            let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
            if seg_start_lsn > u64::from(*start_lsn) {
                continue;
@@ -94,10 +93,10 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
    }
 }

-fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn {
+fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn {
    // Get the actual end of WAL by pg_waldump
    let waldump_output = cfg
-        .pg_waldump(OsStr::new("000000010000000000000001"), last_segment)
+        .pg_waldump("000000010000000000000001", last_segment)
        .unwrap()
        .stderr;
    let waldump_output = std::str::from_utf8(&waldump_output).unwrap();
@@ -118,7 +117,7 @@ fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn {

 fn check_end_of_wal(
    cfg: &crate::Conf,
-    last_segment: &OsStr,
+    last_segment: &str,
    start_lsn: Lsn,
    expected_end_of_wal: Lsn,
 ) {
@@ -133,8 +132,7 @@ fn check_end_of_wal(
    // Rename file to partial to actually find last valid lsn, then rename it back.
    fs::rename(
        cfg.wal_dir().join(last_segment),
-        cfg.wal_dir()
-            .join(format!("{}.partial", last_segment.to_str().unwrap())),
+        cfg.wal_dir().join(format!("{}.partial", last_segment)),
    )
    .unwrap();
    let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
@@ -144,8 +142,7 @@ fn check_end_of_wal(
    );
    assert_eq!(wal_end, expected_end_of_wal);
    fs::rename(
-        cfg.wal_dir()
-            .join(format!("{}.partial", last_segment.to_str().unwrap())),
+        cfg.wal_dir().join(format!("{}.partial", last_segment)),
        cfg.wal_dir().join(last_segment),
    )
    .unwrap();
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -8,8 +8,10 @@ license.workspace = true
 bytes.workspace = true
 byteorder.workspace = true
 itertools.workspace = true
+pin-project-lite.workspace = true
 postgres-protocol.workspace = true
 rand.workspace = true
 tokio = { workspace = true, features = ["io-util"] }
+tracing.workspace = true
 thiserror.workspace = true
 serde.workspace = true
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -13,11 +13,14 @@ aws-smithy-async.workspace = true
 aws-smithy-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
+aws-credential-types.workspace = true
 bytes.workspace = true
 camino = { workspace = true, features = ["serde1"] }
+humantime.workspace = true
 humantime-serde.workspace = true
 hyper = { workspace = true, features = ["stream"] }
 futures.workspace = true
+rand.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -127,6 +127,10 @@ impl RemotePath {
        &self.0
    }

+    pub fn extension(&self) -> Option<&str> {
+        self.0.extension()
+    }
+
    pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
        self.0.strip_prefix(&p.0)
    }
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -6,5 +6,6 @@ license.workspace = true

 [dependencies]
 serde.workspace = true
+serde_with.workspace = true
 const_format.workspace = true
 utils.workspace = true
--- a/libs/tracing-utils/Cargo.toml
+++ b/libs/tracing-utils/Cargo.toml
@@ -9,9 +9,8 @@ hyper.workspace = true
 opentelemetry = { workspace = true, features=["rt-tokio"] }
 opentelemetry-otlp = { workspace = true, default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions.workspace = true
+reqwest = { workspace = true, default-features = false, features = ["rustls-tls"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
-
-[dev-dependencies]
-tracing-subscriber.workspace = true    # For examples in docs
+tracing-subscriber.workspace = true
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -42,6 +42,7 @@ tracing.workspace = true
 tracing-error.workspace = true
 tracing-subscriber = { workspace = true, features = ["json", "registry"] }
 rand.workspace = true
+serde_with.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 url.workspace = true
--- a/libs/utils/src/accum.rs
+++ b/libs/utils/src/accum.rs
@@ -0,0 +1,33 @@
+/// A helper to "accumulate" a value similar to `Iterator::reduce`, but lets you
+/// feed the accumulated values by calling the 'accum' function, instead of having an
+/// iterator.
+///
+/// For example, to calculate the smallest value among some integers:
+///
+/// ```
+/// use utils::accum::Accum;
+///
+/// let values = [1, 2, 3];
+///
+/// let mut min_value: Accum<u32> = Accum(None);
+/// for new_value in &values {
+///     min_value.accum(std::cmp::min, *new_value);
+/// }
+///
+/// assert_eq!(min_value.0.unwrap(), 1);
+/// ```
+pub struct Accum<T>(pub Option<T>);
+impl<T: Copy> Accum<T> {
+    pub fn accum<F>(&mut self, func: F, new_value: T)
+    where
+        F: FnOnce(T, T) -> T,
+    {
+        // If there is no previous value, just store the new value.
+        // Otherwise call the function to decide which one to keep.
+        self.0 = Some(if let Some(accum) = self.0 {
+            func(accum, new_value)
+        } else {
+            new_value
+        });
+    }
+}
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -82,7 +82,7 @@ impl ApiError {
                StatusCode::INTERNAL_SERVER_ERROR,
            ),
            ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
-                format!("{err:#}"), // use alternative formatting so that we give the cause without backtrace
+                err.to_string(),
                StatusCode::INTERNAL_SERVER_ERROR,
            ),
        }
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -88,6 +88,12 @@ impl<'de> Deserialize<'de> for Id {
 }

 impl Id {
+    pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
+        let mut arr = [0u8; 16];
+        buf.copy_to_slice(&mut arr);
+        Id::from(arr)
+    }
+
    pub fn from_slice(src: &[u8]) -> Result<Id, IdError> {
        if src.len() != 16 {
            return Err(IdError::SliceParseError(src.len()));
@@ -173,6 +179,10 @@ impl fmt::Debug for Id {
 macro_rules! id_newtype {
    ($t:ident) => {
        impl $t {
+            pub fn get_from_buf(buf: &mut impl bytes::Buf) -> $t {
+                $t(Id::get_from_buf(buf))
+            }
+
            pub fn from_slice(src: &[u8]) -> Result<$t, IdError> {
                Ok($t(Id::from_slice(src)?))
            }
--- a/libs/utils/src/leaky_bucket.rs
+++ b/libs/utils/src/leaky_bucket.rs
@@ -21,13 +21,7 @@
 //!
 //! Another explaination can be found here: <https://brandur.org/rate-limiting>

-use std::{
-    sync::{
-        atomic::{AtomicU64, Ordering},
-        Mutex,
-    },
-    time::Duration,
-};
+use std::{sync::Mutex, time::Duration};

 use tokio::{sync::Notify, time::Instant};

@@ -134,7 +128,6 @@ impl LeakyBucketState {

 pub struct RateLimiter {
    pub config: LeakyBucketConfig,
-    pub sleep_counter: AtomicU64,
    pub state: Mutex<LeakyBucketState>,
    /// a queue to provide this fair ordering.
    pub queue: Notify,
@@ -151,7 +144,6 @@ impl Drop for Requeue<'_> {
 impl RateLimiter {
    pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self {
        RateLimiter {
-            sleep_counter: AtomicU64::new(0),
            state: Mutex::new(LeakyBucketState::with_initial_tokens(
                &config,
                initial_tokens,
@@ -171,16 +163,15 @@ impl RateLimiter {

    /// returns true if we did throttle
    pub async fn acquire(&self, count: usize) -> bool {
-        let start = tokio::time::Instant::now();
+        let mut throttled = false;

-        let start_count = self.sleep_counter.load(Ordering::Acquire);
-        let mut end_count = start_count;
+        let start = tokio::time::Instant::now();

        // wait until we are the first in the queue
        let mut notified = std::pin::pin!(self.queue.notified());
        if !notified.as_mut().enable() {
+            throttled = true;
            notified.await;
-            end_count = self.sleep_counter.load(Ordering::Acquire);
        }

        // notify the next waiter in the queue when we are done.
@@ -193,22 +184,9 @@ impl RateLimiter {
                .unwrap()
                .add_tokens(&self.config, start, count as f64);
            match res {
-                Ok(()) => return end_count > start_count,
+                Ok(()) => return throttled,
                Err(ready_at) => {
-                    struct Increment<'a>(&'a AtomicU64);
-
-                    impl Drop for Increment<'_> {
-                        fn drop(&mut self) {
-                            self.0.fetch_add(1, Ordering::AcqRel);
-                        }
-                    }
-
-                    // increment the counter after we finish sleeping (or cancel this task).
-                    // this ensures that tasks that have already started the acquire will observe
-                    // the new sleep count when they are allowed to resume on the notify.
-                    let _inc = Increment(&self.sleep_counter);
-                    end_count += 1;
-
+                    throttled = true;
                    tokio::time::sleep_until(ready_at).await;
                }
            }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -43,9 +43,16 @@ pub mod logging;
 pub mod lock_file;
 pub mod pid_file;

+// Misc
+pub mod accum;
+pub mod shutdown;
+
 // Utility for binding TcpListeners with proper socket options.
 pub mod tcp_listener;

+// Utility for putting a raw file descriptor into non-blocking mode
+pub mod nonblock;
+
 // Default signal handling
 pub mod sentry_init;
 pub mod signals;
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -1,5 +1,6 @@
 #![warn(missing_docs)]

+use camino::Utf8Path;
 use serde::{de::Visitor, Deserialize, Serialize};
 use std::fmt;
 use std::ops::{Add, AddAssign};
@@ -144,6 +145,14 @@ impl Lsn {
        i128::from(self.0) - i128::from(other)
    }

+    /// Parse an LSN from a filename in the form `0000000000000000`
+    pub fn from_filename<F>(filename: F) -> Result<Self, LsnParseError>
+    where
+        F: AsRef<Utf8Path>,
+    {
+        Lsn::from_hex(filename.as_ref().as_str())
+    }
+
    /// Parse an LSN from a string in the form `0000000000000000`
    pub fn from_hex<S>(s: S) -> Result<Self, LsnParseError>
    where
--- a/libs/utils/src/nonblock.rs
+++ b/libs/utils/src/nonblock.rs
@@ -0,0 +1,17 @@
+use nix::fcntl::{fcntl, OFlag, F_GETFL, F_SETFL};
+use std::os::unix::io::RawFd;
+
+/// Put a file descriptor into non-blocking mode
+pub fn set_nonblock(fd: RawFd) -> Result<(), std::io::Error> {
+    let bits = fcntl(fd, F_GETFL)?;
+
+    // If F_GETFL returns some unknown bits, they should be valid
+    // for passing back to F_SETFL, too. If we left them out, the F_SETFL
+    // would effectively clear them, which is not what we want.
+    let mut flags = OFlag::from_bits_retain(bits);
+    flags |= OFlag::O_NONBLOCK;
+
+    fcntl(fd, F_SETFL(flags))?;
+
+    Ok(())
+}
--- a/libs/utils/src/shutdown.rs
+++ b/libs/utils/src/shutdown.rs
@@ -0,0 +1,7 @@
+/// Immediately terminate the calling process without calling
+/// atexit callbacks, C runtime destructors etc. We mainly use
+/// this to protect coverage data from concurrent writes.
+pub fn exit_now(code: u8) -> ! {
+    // SAFETY: exiting is safe, the ffi is not safe
+    unsafe { nix::libc::_exit(code as _) };
+}
--- a/libs/utils/src/vec_map.rs
+++ b/libs/utils/src/vec_map.rs
@@ -120,6 +120,32 @@ impl<K: Ord, V> VecMap<K, V> {
        Ok((None, delta_size))
    }

+    /// Split the map into two.
+    ///
+    /// The left map contains everything before `cutoff` (exclusive).
+    /// Right map contains `cutoff` and everything after (inclusive).
+    pub fn split_at(&self, cutoff: &K) -> (Self, Self)
+    where
+        K: Clone,
+        V: Clone,
+    {
+        let split_idx = self
+            .data
+            .binary_search_by_key(&cutoff, extract_key)
+            .unwrap_or_else(std::convert::identity);
+
+        (
+            VecMap {
+                data: self.data[..split_idx].to_vec(),
+                ordering: self.ordering,
+            },
+            VecMap {
+                data: self.data[split_idx..].to_vec(),
+                ordering: self.ordering,
+            },
+        )
+    }
+
    /// Move items from `other` to the end of `self`, leaving `other` empty.
    /// If the `other` ordering is different from `self` ordering
    /// `ExtendOrderingError` error will be returned.
--- a/libs/vm_monitor/Cargo.toml
+++ b/libs/vm_monitor/Cargo.toml
@@ -15,11 +15,13 @@ anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
 futures.workspace = true
+inotify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 sysinfo.workspace = true
 tokio = { workspace = true, features = ["rt-multi-thread"] }
 tokio-postgres.workspace = true
+tokio-stream.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 tracing-subscriber.workspace = true
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -5,8 +5,6 @@ use std::{env, path::PathBuf, process::Command};

 use anyhow::{anyhow, Context};

-const WALPROPOSER_PG_VERSION: &str = "v17";
-
 fn main() -> anyhow::Result<()> {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
    println!("cargo:rerun-if-changed=bindgen_deps.h");
@@ -38,10 +36,7 @@ fn main() -> anyhow::Result<()> {
    // Rebuild crate when libwalproposer.a changes
    println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a");

-    let pg_config_bin = pg_install_abs
-        .join(WALPROPOSER_PG_VERSION)
-        .join("bin")
-        .join("pg_config");
+    let pg_config_bin = pg_install_abs.join("v16").join("bin").join("pg_config");
    let inc_server_path: String = if pg_config_bin.exists() {
        let output = Command::new(pg_config_bin)
            .arg("--includedir-server")
@@ -58,7 +53,7 @@ fn main() -> anyhow::Result<()> {
            .into()
    } else {
        let server_path = pg_install_abs
-            .join(WALPROPOSER_PG_VERSION)
+            .join("v16")
            .join("include")
            .join("postgresql")
            .join("server")
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -15,6 +15,7 @@ anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
+async-trait.workspace = true
 bit_field.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
@@ -22,9 +23,12 @@ camino.workspace = true
 camino-tempfile.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
+const_format.workspace = true
 consumption_metrics.workspace = true
 crc32c.workspace = true
+crossbeam-utils.workspace = true
 either.workspace = true
+flate2.workspace = true
 fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
@@ -53,6 +57,10 @@ serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 serde_path_to_error.workspace = true
 serde_with.workspace = true
+signal-hook.workspace = true
+smallvec = { workspace = true, features = ["write"] }
+svg_fmt.workspace = true
+sync_wrapper.workspace = true
 sysinfo.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
@@ -65,6 +73,7 @@ tokio-stream.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tracing.workspace = true
+twox-hash.workspace = true
 url.workspace = true
 walkdir.workspace = true
 metrics.workspace = true
--- a/pageserver/compaction/Cargo.toml
+++ b/pageserver/compaction/Cargo.toml
@@ -9,19 +9,41 @@ default = []

 [dependencies]
 anyhow.workspace = true
+async-compression.workspace = true
 async-stream.workspace = true
+byteorder.workspace = true
+bytes.workspace = true
+chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
+const_format.workspace = true
+consumption_metrics.workspace = true
+crossbeam-utils.workspace = true
+either.workspace = true
+flate2.workspace = true
+fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
+hex.workspace = true
+humantime.workspace = true
+humantime-serde.workspace = true
 itertools.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
 pin-project-lite.workspace = true
 rand.workspace = true
+smallvec = { workspace = true, features = ["write"] }
 svg_fmt.workspace = true
+sync_wrapper.workspace = true
+thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
+tokio-io-timeout.workspace = true
+tokio-util.workspace = true
 tracing.workspace = true
+tracing-error.workspace = true
 tracing-subscriber.workspace = true
+url.workspace = true
+walkdir.workspace = true
+metrics.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true

--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -8,6 +8,7 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+bytes.workspace = true
 camino.workspace = true
 clap = { workspace = true, features = ["string"] }
 git-version.workspace = true
@@ -23,4 +24,5 @@ toml_edit.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
+serde.workspace = true
 serde_json.workspace = true
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -79,24 +79,16 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
        return None;
    }
    let keys: Vec<&str> = split[0].split('-').collect();
-    let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
-    let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
-    let the_lsns: [&str; 2];
-
-    /*
-     * Generations add a -vX-XXXXXX postfix, which causes issues when we try to
-     * parse 'vX' as an LSN.
-     */
-    let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
-        the_lsns = [lsns[0], lsns[0]];
+    let mut lsns: Vec<&str> = split[1].split('-').collect();
+    let is_delta = if lsns.len() == 1 {
+        lsns.push(lsns[0]);
        false
    } else {
-        the_lsns = [lsns[0], lsns[1]];
        true
    };

    let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
-    let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
+    let lsn_range = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
    let holes = Vec::new();
    Some(LayerFile {
        key_range,
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -30,8 +30,9 @@ use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::dispatch_pgversion;
 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
-use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
+use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
 use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
+use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
 use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
@@ -254,11 +255,8 @@ where

        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;

-        let pgversion = self.timeline.pg_version;
-        let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);
-
        // Create pgdata subdirs structure
-        for dir in subdirs.iter() {
+        for dir in PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(dir)?;
            self.ar
                .append(&header, &mut io::empty())
@@ -608,7 +606,7 @@ where
    //
    // Extract twophase state files
    //
-    async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> {
+    async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
        let img = self
            .timeline
            .get_twophase_file(xid, self.lsn, self.ctx)
@@ -619,11 +617,7 @@ where
        buf.extend_from_slice(&img[..]);
        let crc = crc32c::crc32c(&img[..]);
        buf.put_u32_le(crc);
-        let path = if self.timeline.pg_version < 17 {
-            format!("pg_twophase/{:>08X}", xid)
-        } else {
-            format!("pg_twophase/{:>016X}", xid)
-        };
+        let path = format!("pg_twophase/{:>08X}", xid);
        let header = new_tar_header(&path, buf.len() as u64)?;
        self.ar
            .append(&header, &buf[..])
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -13,6 +13,7 @@ use pageserver_api::{
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use std::env;
 use storage_broker::Uri;
+use utils::crashsafe::path_with_suffix_extension;
 use utils::logging::SecretString;

 use once_cell::sync::OnceCell;
@@ -32,7 +33,7 @@ use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
 use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use crate::virtual_file;
 use crate::virtual_file::io_engine;
-use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME};
+use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX};

 /// Global state of pageserver.
 ///
@@ -256,6 +257,17 @@ impl PageServerConf {
            .join(timeline_id.to_string())
    }

+    pub(crate) fn timeline_delete_mark_file_path(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+    ) -> Utf8PathBuf {
+        path_with_suffix_extension(
+            self.timeline_path(&tenant_shard_id, &timeline_id),
+            TIMELINE_DELETE_MARK_SUFFIX,
+        )
+    }
+
    /// Turns storage remote path of a file into its local path.
    pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
        remote_path.with_base(&self.workdir)
@@ -269,7 +281,7 @@ impl PageServerConf {

        #[allow(clippy::manual_range_patterns)]
        match pg_version {
-            14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
+            14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
            _ => bail!("Unsupported postgres version: {}", pg_version),
        }
    }
@@ -479,6 +491,11 @@ pub struct ConfigurableSemaphore {
 }

 impl ConfigurableSemaphore {
+    pub const DEFAULT_INITIAL: NonZeroUsize = match NonZeroUsize::new(1) {
+        Some(x) => x,
+        None => panic!("const unwrap is not yet stable"),
+    };
+
    /// Initializse using a non-zero amount of permits.
    ///
    /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
@@ -499,6 +516,12 @@ impl ConfigurableSemaphore {
    }
 }

+impl Default for ConfigurableSemaphore {
+    fn default() -> Self {
+        Self::new(Self::DEFAULT_INITIAL)
+    }
+}
+
 impl PartialEq for ConfigurableSemaphore {
    fn eq(&self, other: &Self) -> bool {
        // the number of permits can be increased at runtime, so we cannot really fulfill the
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -178,7 +178,7 @@ async fn collect_metrics(
                )
                .await;
                if let Err(e) = res {
-                    tracing::error!("failed to upload to remote storage: {e:#}");
+                    tracing::error!("failed to upload to S3: {e:#}");
                }
            }
        };
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -580,11 +580,9 @@ async fn import_file(
        import_slru(modification, slru, file_path, reader, len, ctx).await?;
        debug!("imported multixact members slru");
    } else if file_path.starts_with("pg_twophase") {
-        let bytes = read_all_bytes(reader).await?;
+        let xid = u32::from_str_radix(file_name.as_ref(), 16)?;

-        // In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions,
-        // it's a 32-bit TransactionId, which fits in u64 anyway.
-        let xid = u64::from_str_radix(file_name.as_ref(), 16)?;
+        let bytes = read_all_bytes(reader).await?;
        modification
            .put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
            .await?;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1177,10 +1177,10 @@ pub(crate) mod virtual_file_io_engine {
 }

 struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
-    global_latency_histo: &'a Histogram,
+    global_metric: &'a Histogram,

    // Optional because not all op types are tracked per-timeline
-    per_timeline_latency_histo: Option<&'a Histogram>,
+    timeline_metric: Option<&'a Histogram>,

    ctx: &'c RequestContext,
    start: std::time::Instant,
@@ -1212,10 +1212,9 @@ impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
                elapsed
            }
        };
-        self.global_latency_histo
-            .observe(ex_throttled.as_secs_f64());
-        if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
-            per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
+        self.global_metric.observe(ex_throttled.as_secs_f64());
+        if let Some(timeline_metric) = self.timeline_metric {
+            timeline_metric.observe(ex_throttled.as_secs_f64());
        }
    }
 }
@@ -1241,32 +1240,10 @@ pub enum SmgrQueryType {

 #[derive(Debug)]
 pub(crate) struct SmgrQueryTimePerTimeline {
-    global_started: [IntCounter; SmgrQueryType::COUNT],
-    global_latency: [Histogram; SmgrQueryType::COUNT],
-    per_timeline_getpage_started: IntCounter,
-    per_timeline_getpage_latency: Histogram,
+    global_metrics: [Histogram; SmgrQueryType::COUNT],
+    per_timeline_getpage: Histogram,
 }

-static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        // it's a counter, but, name is prepared to extend it to a histogram of queue depth
-        "pageserver_smgr_query_started_global_count",
-        "Number of smgr queries started, aggregated by query type.",
-        &["smgr_query_type"],
-    )
-    .expect("failed to define a metric")
-});
-
-static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        // it's a counter, but, name is prepared to extend it to a histogram of queue depth
-        "pageserver_smgr_query_started_count",
-        "Number of smgr queries started, aggregated by query type and tenant/timeline.",
-        &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
-    )
-    .expect("failed to define a metric")
-});
-
 static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
    register_histogram_vec!(
        "pageserver_smgr_query_seconds",
@@ -1342,20 +1319,14 @@ impl SmgrQueryTimePerTimeline {
        let tenant_id = tenant_shard_id.tenant_id.to_string();
        let shard_slug = format!("{}", tenant_shard_id.shard_slug());
        let timeline_id = timeline_id.to_string();
-        let global_started = std::array::from_fn(|i| {
-            let op = SmgrQueryType::from_repr(i).unwrap();
-            SMGR_QUERY_STARTED_GLOBAL
-                .get_metric_with_label_values(&[op.into()])
-                .unwrap()
-        });
-        let global_latency = std::array::from_fn(|i| {
+        let global_metrics = std::array::from_fn(|i| {
            let op = SmgrQueryType::from_repr(i).unwrap();
            SMGR_QUERY_TIME_GLOBAL
                .get_metric_with_label_values(&[op.into()])
                .unwrap()
        });

-        let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
+        let per_timeline_getpage = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
            .get_metric_with_label_values(&[
                SmgrQueryType::GetPageAtLsn.into(),
                &tenant_id,
@@ -1363,20 +1334,9 @@ impl SmgrQueryTimePerTimeline {
                &timeline_id,
            ])
            .unwrap();
-        let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
-            .get_metric_with_label_values(&[
-                SmgrQueryType::GetPageAtLsn.into(),
-                &tenant_id,
-                &shard_slug,
-                &timeline_id,
-            ])
-            .unwrap();
-
        Self {
-            global_started,
-            global_latency,
-            per_timeline_getpage_latency,
-            per_timeline_getpage_started,
+            global_metrics,
+            per_timeline_getpage,
        }
    }
    pub(crate) fn start_timer<'c: 'a, 'a>(
@@ -1384,11 +1344,8 @@ impl SmgrQueryTimePerTimeline {
        op: SmgrQueryType,
        ctx: &'c RequestContext,
    ) -> Option<impl Drop + '_> {
+        let global_metric = &self.global_metrics[op as usize];
        let start = Instant::now();
-
-        self.global_started[op as usize].inc();
-
-        // We subtract time spent throttled from the observed latency.
        match ctx.micros_spent_throttled.open() {
            Ok(()) => (),
            Err(error) => {
@@ -1407,16 +1364,15 @@ impl SmgrQueryTimePerTimeline {
            }
        }

-        let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
-            self.per_timeline_getpage_started.inc();
-            Some(&self.per_timeline_getpage_latency)
+        let timeline_metric = if matches!(op, SmgrQueryType::GetPageAtLsn) {
+            Some(&self.per_timeline_getpage)
        } else {
            None
        };

        Some(GlobalAndPerTimelineHistogramTimer {
-            global_latency_histo: &self.global_latency[op as usize],
-            per_timeline_latency_histo,
+            global_metric,
+            timeline_metric,
            ctx,
            start,
            op,
@@ -1467,12 +1423,9 @@ mod smgr_query_time_tests {
            let get_counts = || {
                let global: u64 = ops
                    .iter()
-                    .map(|op| metrics.global_latency[*op as usize].get_sample_count())
+                    .map(|op| metrics.global_metrics[*op as usize].get_sample_count())
                    .sum();
-                (
-                    global,
-                    metrics.per_timeline_getpage_latency.get_sample_count(),
-                )
+                (global, metrics.per_timeline_getpage.get_sample_count())
            };

            let (pre_global, pre_per_tenant_timeline) = get_counts();
@@ -1824,7 +1777,7 @@ pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
    .expect("failed to define a metric"),
    upload_heatmap_duration: register_histogram!(
        "pageserver_secondary_upload_heatmap_duration",
-        "Time to build and upload a heatmap, including any waiting inside the remote storage client"
+        "Time to build and upload a heatmap, including any waiting inside the S3 client"
    )
    .expect("failed to define a metric"),
    download_heatmap: register_int_counter!(
@@ -2623,12 +2576,6 @@ impl TimelineMetrics {
            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
        }

-        let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
-            SmgrQueryType::GetPageAtLsn.into(),
-            tenant_id,
-            shard_id,
-            timeline_id,
-        ]);
        let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
            SmgrQueryType::GetPageAtLsn.into(),
            tenant_id,
@@ -2645,8 +2592,6 @@ pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
        let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    }

-    tenant_throttling::remove_tenant_metrics(tenant_shard_id);
-
    // we leave the BROKEN_TENANTS_SET entry if any
 }

@@ -3110,180 +3055,41 @@ pub mod tokio_epoll_uring {
 pub(crate) mod tenant_throttling {
    use metrics::{register_int_counter_vec, IntCounter};
    use once_cell::sync::Lazy;
-    use utils::shard::TenantShardId;

    use crate::tenant::{self, throttle::Metric};

-    struct GlobalAndPerTenantIntCounter {
-        global: IntCounter,
-        per_tenant: IntCounter,
-    }
-
-    impl GlobalAndPerTenantIntCounter {
-        #[inline(always)]
-        pub(crate) fn inc(&self) {
-            self.inc_by(1)
-        }
-        #[inline(always)]
-        pub(crate) fn inc_by(&self, n: u64) {
-            self.global.inc_by(n);
-            self.per_tenant.inc_by(n);
-        }
-    }
-
    pub(crate) struct TimelineGet {
-        count_accounted_start: GlobalAndPerTenantIntCounter,
-        count_accounted_finish: GlobalAndPerTenantIntCounter,
-        wait_time: GlobalAndPerTenantIntCounter,
-        count_throttled: GlobalAndPerTenantIntCounter,
+        wait_time: IntCounter,
+        count: IntCounter,
    }

-    static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count_accounted_start_global",
-            "Count of tenant throttling starts, by kind of throttle.",
-            &["kind"]
-        )
-        .unwrap()
-    });
-    static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count_accounted_start",
-            "Count of tenant throttling starts, by kind of throttle.",
-            &["kind", "tenant_id", "shard_id"]
-        )
-        .unwrap()
-    });
-    static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count_accounted_finish_global",
-            "Count of tenant throttling finishes, by kind of throttle.",
-            &["kind"]
-        )
-        .unwrap()
-    });
-    static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count_accounted_finish",
-            "Count of tenant throttling finishes, by kind of throttle.",
-            &["kind", "tenant_id", "shard_id"]
-        )
-        .unwrap()
-    });
-    static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
+    pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
+        static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
+            register_int_counter_vec!(
            "pageserver_tenant_throttling_wait_usecs_sum_global",
-            "Sum of microseconds that spent waiting throttle by kind of throttle.",
+            "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
            &["kind"]
        )
-        .unwrap()
-    });
-    static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_wait_usecs_sum",
-            "Sum of microseconds that spent waiting throttle by kind of throttle.",
-            &["kind", "tenant_id", "shard_id"]
-        )
-        .unwrap()
+            .unwrap()
+        });
+
+        static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
+            register_int_counter_vec!(
+                "pageserver_tenant_throttling_count_global",
+                "Count of tenant throttlings, by kind of throttle.",
+                &["kind"]
+            )
+            .unwrap()
+        });
+
+        let kind = "timeline_get";
+        TimelineGet {
+            wait_time: WAIT_USECS.with_label_values(&[kind]),
+            count: WAIT_COUNT.with_label_values(&[kind]),
+        }
    });

-    static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count_global",
-            "Count of tenant throttlings, by kind of throttle.",
-            &["kind"]
-        )
-        .unwrap()
-    });
-    static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_tenant_throttling_count",
-            "Count of tenant throttlings, by kind of throttle.",
-            &["kind", "tenant_id", "shard_id"]
-        )
-        .unwrap()
-    });
-
-    const KIND: &str = "timeline_get";
-
-    impl TimelineGet {
-        pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
-            TimelineGet {
-                count_accounted_start: {
-                    GlobalAndPerTenantIntCounter {
-                        global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
-                        per_tenant: COUNT_ACCOUNTED_START_PER_TENANT.with_label_values(&[
-                            KIND,
-                            &tenant_shard_id.tenant_id.to_string(),
-                            &tenant_shard_id.shard_slug().to_string(),
-                        ]),
-                    }
-                },
-                count_accounted_finish: {
-                    GlobalAndPerTenantIntCounter {
-                        global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
-                        per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT.with_label_values(&[
-                            KIND,
-                            &tenant_shard_id.tenant_id.to_string(),
-                            &tenant_shard_id.shard_slug().to_string(),
-                        ]),
-                    }
-                },
-                wait_time: {
-                    GlobalAndPerTenantIntCounter {
-                        global: WAIT_USECS.with_label_values(&[KIND]),
-                        per_tenant: WAIT_USECS_PER_TENANT.with_label_values(&[
-                            KIND,
-                            &tenant_shard_id.tenant_id.to_string(),
-                            &tenant_shard_id.shard_slug().to_string(),
-                        ]),
-                    }
-                },
-                count_throttled: {
-                    GlobalAndPerTenantIntCounter {
-                        global: WAIT_COUNT.with_label_values(&[KIND]),
-                        per_tenant: WAIT_COUNT_PER_TENANT.with_label_values(&[
-                            KIND,
-                            &tenant_shard_id.tenant_id.to_string(),
-                            &tenant_shard_id.shard_slug().to_string(),
-                        ]),
-                    }
-                },
-            }
-        }
-    }
-
-    pub(crate) fn preinitialize_global_metrics() {
-        Lazy::force(&COUNT_ACCOUNTED_START);
-        Lazy::force(&COUNT_ACCOUNTED_FINISH);
-        Lazy::force(&WAIT_USECS);
-        Lazy::force(&WAIT_COUNT);
-    }
-
-    pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
-        for m in &[
-            &COUNT_ACCOUNTED_START_PER_TENANT,
-            &COUNT_ACCOUNTED_FINISH_PER_TENANT,
-            &WAIT_USECS_PER_TENANT,
-            &WAIT_COUNT_PER_TENANT,
-        ] {
-            let _ = m.remove_label_values(&[
-                KIND,
-                &tenant_shard_id.tenant_id.to_string(),
-                &tenant_shard_id.shard_slug().to_string(),
-            ]);
-        }
-    }
-
-    impl Metric for TimelineGet {
-        #[inline(always)]
-        fn accounting_start(&self) {
-            self.count_accounted_start.inc();
-        }
-        #[inline(always)]
-        fn accounting_finish(&self) {
-            self.count_accounted_finish.inc();
-        }
+    impl Metric for &'static TimelineGet {
        #[inline(always)]
        fn observe_throttling(
            &self,
@@ -3291,7 +3097,7 @@ pub(crate) mod tenant_throttling {
        ) {
            let val = u64::try_from(wait_time.as_micros()).unwrap();
            self.wait_time.inc_by(val);
-            self.count_throttled.inc();
+            self.count.inc();
        }
    }
 }
@@ -3421,14 +3227,11 @@ pub fn preinitialize_metrics() {
    }

    // countervecs
-    [
-        &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
-        &SMGR_QUERY_STARTED_GLOBAL,
-    ]
-    .into_iter()
-    .for_each(|c| {
-        Lazy::force(c);
-    });
+    [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
+        .into_iter()
+        .for_each(|c| {
+            Lazy::force(c);
+        });

    // gauges
    WALRECEIVER_ACTIVE_MANAGERS.get();
@@ -3450,8 +3253,7 @@ pub fn preinitialize_metrics() {

    // Custom
    Lazy::force(&RECONSTRUCT_TIME);
+    Lazy::force(&tenant_throttling::TIMELINE_GET);
    Lazy::force(&BASEBACKUP_QUERY_TIME);
    Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
-
-    tenant_throttling::preinitialize_global_metrics();
 }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -633,7 +633,7 @@ impl Timeline {

    pub(crate) async fn get_twophase_file(
        &self,
-        xid: u64,
+        xid: TransactionId,
        lsn: Lsn,
        ctx: &RequestContext,
    ) -> Result<Bytes, PageReconstructError> {
@@ -646,19 +646,11 @@ impl Timeline {
        &self,
        lsn: Lsn,
        ctx: &RequestContext,
-    ) -> Result<HashSet<u64>, PageReconstructError> {
+    ) -> Result<HashSet<TransactionId>, PageReconstructError> {
        // fetch directory entry
        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;

-        if self.pg_version >= 17 {
-            Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)
-        } else {
-            Ok(TwoPhaseDirectory::des(&buf)?
-                .xids
-                .iter()
-                .map(|x| u64::from(*x))
-                .collect())
-        }
+        Ok(TwoPhaseDirectory::des(&buf)?.xids)
    }

    pub(crate) async fn get_control_file(
@@ -910,13 +902,9 @@ impl Timeline {

        // Then pg_twophase
        result.add_key(TWOPHASEDIR_KEY);
-
-        let mut xids: Vec<u64> = self
-            .list_twophase_files(lsn, ctx)
-            .await?
-            .iter()
-            .cloned()
-            .collect();
+        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
+        let twophase_dir = TwoPhaseDirectory::des(&buf)?;
+        let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
        xids.sort_unstable();
        for xid in xids {
            result.add_key(twophase_file_key(xid));
@@ -1139,15 +1127,9 @@ impl<'a> DatadirModification<'a> {
        // Create AuxFilesDirectory
        self.init_aux_dir()?;

-        let buf = if self.tline.pg_version >= 17 {
-            TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 {
-                xids: HashSet::new(),
-            })
-        } else {
-            TwoPhaseDirectory::ser(&TwoPhaseDirectory {
-                xids: HashSet::new(),
-            })
-        }?;
+        let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
+            xids: HashSet::new(),
+        })?;
        self.pending_directory_entries
            .push((DirectoryKind::TwoPhase, 0));
        self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
@@ -1339,31 +1321,22 @@ impl<'a> DatadirModification<'a> {

    pub async fn put_twophase_file(
        &mut self,
-        xid: u64,
+        xid: TransactionId,
        img: Bytes,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // Add it to the directory entry
-        let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?;
-        let newdirbuf = if self.tline.pg_version >= 17 {
-            let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?;
-            if !dir.xids.insert(xid) {
-                anyhow::bail!("twophase file for xid {} already exists", xid);
-            }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
-            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
-        } else {
-            let xid = xid as u32;
-            let mut dir = TwoPhaseDirectory::des(&dirbuf)?;
-            if !dir.xids.insert(xid) {
-                anyhow::bail!("twophase file for xid {} already exists", xid);
-            }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
-            Bytes::from(TwoPhaseDirectory::ser(&dir)?)
-        };
-        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
+        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
+        let mut dir = TwoPhaseDirectory::des(&buf)?;
+        if !dir.xids.insert(xid) {
+            anyhow::bail!("twophase file for xid {} already exists", xid);
+        }
+        self.pending_directory_entries
+            .push((DirectoryKind::TwoPhase, dir.xids.len()));
+        self.put(
+            TWOPHASEDIR_KEY,
+            Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
+        );

        self.put(twophase_file_key(xid), Value::Image(img));
        Ok(())
@@ -1666,32 +1639,22 @@ impl<'a> DatadirModification<'a> {
    /// This method is used for marking truncated SLRU files
    pub async fn drop_twophase_file(
        &mut self,
-        xid: u64,
+        xid: TransactionId,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // Remove it from the directory entry
        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
-        let newdirbuf = if self.tline.pg_version >= 17 {
-            let mut dir = TwoPhaseDirectoryV17::des(&buf)?;
+        let mut dir = TwoPhaseDirectory::des(&buf)?;

-            if !dir.xids.remove(&xid) {
-                warn!("twophase file for xid {} does not exist", xid);
-            }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
-            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
-        } else {
-            let xid: u32 = u32::try_from(xid)?;
-            let mut dir = TwoPhaseDirectory::des(&buf)?;
-
-            if !dir.xids.remove(&xid) {
-                warn!("twophase file for xid {} does not exist", xid);
-            }
-            self.pending_directory_entries
-                .push((DirectoryKind::TwoPhase, dir.xids.len()));
-            Bytes::from(TwoPhaseDirectory::ser(&dir)?)
-        };
-        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
+        if !dir.xids.remove(&xid) {
+            warn!("twophase file for xid {} does not exist", xid);
+        }
+        self.pending_directory_entries
+            .push((DirectoryKind::TwoPhase, dir.xids.len()));
+        self.put(
+            TWOPHASEDIR_KEY,
+            Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
+        );

        // Delete it
        self.delete(twophase_key_range(xid));
@@ -2161,21 +2124,11 @@ struct DbDirectory {
    dbdirs: HashMap<(Oid, Oid), bool>,
 }

-// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
-// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs.  Previously, the files
-// were named like "pg_twophase/000002E5", now they're like
-// "pg_twophsae/0000000A000002E4".
-
 #[derive(Debug, Serialize, Deserialize)]
 struct TwoPhaseDirectory {
    xids: HashSet<TransactionId>,
 }

-#[derive(Debug, Serialize, Deserialize)]
-struct TwoPhaseDirectoryV17 {
-    xids: HashSet<u64>,
-}
-
 #[derive(Debug, Serialize, Deserialize, Default)]
 struct RelDirectory {
    // Set of relations that exist. (relfilenode, forknum)
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -18,6 +18,7 @@ use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use enumset::EnumSet;
 use futures::stream::FuturesUnordered;
+use futures::FutureExt;
 use futures::StreamExt;
 use pageserver_api::models;
 use pageserver_api::models::AuxFilePolicy;
@@ -33,7 +34,6 @@ use remote_storage::GenericRemoteStorage;
 use remote_storage::TimeoutOrCancel;
 use std::collections::BTreeMap;
 use std::fmt;
-use std::future::Future;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
@@ -140,7 +140,6 @@ pub mod metadata;
 pub mod remote_timeline_client;
 pub mod storage_layer;

-pub mod checks;
 pub mod config;
 pub mod mgr;
 pub mod secondary;
@@ -302,7 +301,7 @@ pub struct Tenant {
    /// Throttle applied at the top of [`Timeline::get`].
    /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
    pub(crate) timeline_get_throttle:
-        Arc<throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
+        Arc<throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>>,

    /// An ongoing timeline detach concurrency limiter.
    ///
@@ -1031,9 +1030,13 @@ impl Tenant {
        }

        Ok(TenantPreload {
-            timelines: self
-                .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
-                .await?,
+            timelines: Self::load_timeline_metadata(
+                self,
+                remote_timeline_ids,
+                remote_storage,
+                cancel,
+            )
+            .await?,
        })
    }

@@ -1299,7 +1302,7 @@ impl Tenant {
        .await
    }

-    async fn load_timelines_metadata(
+    async fn load_timeline_metadata(
        self: &Arc<Tenant>,
        timeline_ids: HashSet<TimelineId>,
        remote_storage: &GenericRemoteStorage,
@@ -1307,10 +1310,33 @@ impl Tenant {
    ) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
        let mut part_downloads = JoinSet::new();
        for timeline_id in timeline_ids {
+            let client = RemoteTimelineClient::new(
+                remote_storage.clone(),
+                self.deletion_queue_client.clone(),
+                self.conf,
+                self.tenant_shard_id,
+                timeline_id,
+                self.generation,
+            );
            let cancel_clone = cancel.clone();
            part_downloads.spawn(
-                self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone)
-                    .instrument(info_span!("download_index_part", %timeline_id)),
+                async move {
+                    debug!("starting index part download");
+
+                    let index_part = client.download_index_file(&cancel_clone).await;
+
+                    debug!("finished index part download");
+
+                    Result::<_, anyhow::Error>::Ok(TimelinePreload {
+                        client,
+                        timeline_id,
+                        index_part,
+                    })
+                }
+                .map(move |res| {
+                    res.with_context(|| format!("download index part for timeline {timeline_id}"))
+                })
+                .instrument(info_span!("download_index_part", %timeline_id)),
            );
        }

@@ -1321,7 +1347,8 @@ impl Tenant {
                next = part_downloads.join_next() => {
                    match next {
                        Some(result) => {
-                            let preload = result.context("join preload task")?;
+                            let preload_result = result.context("join preload task")?;
+                            let preload = preload_result?;
                            timeline_preloads.insert(preload.timeline_id, preload);
                        },
                        None => {
@@ -1338,36 +1365,6 @@ impl Tenant {
        Ok(timeline_preloads)
    }

-    fn load_timeline_metadata(
-        self: &Arc<Tenant>,
-        timeline_id: TimelineId,
-        remote_storage: GenericRemoteStorage,
-        cancel: CancellationToken,
-    ) -> impl Future<Output = TimelinePreload> {
-        let client = RemoteTimelineClient::new(
-            remote_storage.clone(),
-            self.deletion_queue_client.clone(),
-            self.conf,
-            self.tenant_shard_id,
-            timeline_id,
-            self.generation,
-        );
-        async move {
-            debug_assert_current_span_has_tenant_and_timeline_id();
-            debug!("starting index part download");
-
-            let index_part = client.download_index_file(&cancel).await;
-
-            debug!("finished index part download");
-
-            TimelinePreload {
-                client,
-                timeline_id,
-                index_part,
-            }
-        }
-    }
-
    pub(crate) async fn apply_timeline_archival_config(
        &self,
        timeline_id: TimelineId,
@@ -1576,9 +1573,6 @@ impl Tenant {
        image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,
        end_lsn: Lsn,
    ) -> anyhow::Result<Arc<Timeline>> {
-        use checks::check_valid_layermap;
-        use itertools::Itertools;
-
        let tline = self
            .create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)
            .await?;
@@ -1593,18 +1587,6 @@ impl Tenant {
                .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx)
                .await?;
        }
-        let layer_names = tline
-            .layers
-            .read()
-            .await
-            .layer_map()
-            .unwrap()
-            .iter_historic_layers()
-            .map(|layer| layer.layer_name())
-            .collect_vec();
-        if let Some(err) = check_valid_layermap(&layer_names) {
-            bail!("invalid layermap: {err}");
-        }
        Ok(tline)
    }

@@ -2833,7 +2815,7 @@ impl Tenant {
            gate: Gate::default(),
            timeline_get_throttle: Arc::new(throttle::Throttle::new(
                Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf),
-                crate::metrics::tenant_throttling::TimelineGet::new(&tenant_shard_id),
+                &crate::metrics::tenant_throttling::TIMELINE_GET,
            )),
            tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
            ongoing_timeline_detach: std::sync::Mutex::default(),
@@ -3215,9 +3197,6 @@ impl Tenant {
        image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,
        end_lsn: Lsn,
    ) -> anyhow::Result<Arc<Timeline>> {
-        use checks::check_valid_layermap;
-        use itertools::Itertools;
-
        let tline = self
            .branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx)
            .await?;
@@ -3238,18 +3217,6 @@ impl Tenant {
                .force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx)
                .await?;
        }
-        let layer_names = tline
-            .layers
-            .read()
-            .await
-            .layer_map()
-            .unwrap()
-            .iter_historic_layers()
-            .map(|layer| layer.layer_name())
-            .collect_vec();
-        if let Some(err) = check_valid_layermap(&layer_names) {
-            bail!("invalid layermap: {err}");
-        }
        Ok(tline)
    }

@@ -4197,18 +4164,9 @@ pub(crate) mod harness {
            let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1));
            if records_neon {
                // For Neon wal records, we can decode without spawning postgres, so do so.
-                let mut page = match (base_img, records.first()) {
-                    (Some((_lsn, img)), _) => {
-                        let mut page = BytesMut::new();
-                        page.extend_from_slice(&img);
-                        page
-                    }
-                    (_, Some((_lsn, rec))) if rec.will_init() => BytesMut::new(),
-                    _ => {
-                        panic!("Neon WAL redo requires base image or will init record");
-                    }
-                };
-
+                let base_img = base_img.expect("Neon WAL redo requires base image").1;
+                let mut page = BytesMut::new();
+                page.extend_from_slice(&base_img);
                for (record_lsn, record) in records {
                    apply_neon::apply_in_neon(&record, record_lsn, key, &mut page)?;
                }
@@ -8512,135 +8470,4 @@ mod tests {

        Ok(())
    }
-
-    // Regression test for https://github.com/neondatabase/neon/issues/9012
-    // Create an image arrangement where we have to read at different LSN ranges
-    // from a delta layer. This is achieved by overlapping an image layer on top of
-    // a delta layer. Like so:
-    //
-    //     A      B
-    // +----------------+ -> delta_layer
-    // |                |                           ^ lsn
-    // |       =========|-> nested_image_layer      |
-    // |       C        |                           |
-    // +----------------+                           |
-    // ======== -> baseline_image_layer             +-------> key
-    //
-    //
-    // When querying the key range [A, B) we need to read at different LSN ranges
-    // for [A, C) and [C, B). This test checks that the described edge case is handled correctly.
-    #[tokio::test]
-    async fn test_vectored_read_with_nested_image_layer() -> anyhow::Result<()> {
-        let harness = TenantHarness::create("test_vectored_read_with_nested_image_layer").await?;
-        let (tenant, ctx) = harness.load().await;
-
-        let will_init_keys = [2, 6];
-        fn get_key(id: u32) -> Key {
-            let mut key = Key::from_hex("110000000033333333444444445500000000").unwrap();
-            key.field6 = id;
-            key
-        }
-
-        let mut expected_key_values = HashMap::new();
-
-        let baseline_image_layer_lsn = Lsn(0x10);
-        let mut baseline_img_layer = Vec::new();
-        for i in 0..5 {
-            let key = get_key(i);
-            let value = format!("value {i}@{baseline_image_layer_lsn}");
-
-            let removed = expected_key_values.insert(key, value.clone());
-            assert!(removed.is_none());
-
-            baseline_img_layer.push((key, Bytes::from(value)));
-        }
-
-        let nested_image_layer_lsn = Lsn(0x50);
-        let mut nested_img_layer = Vec::new();
-        for i in 5..10 {
-            let key = get_key(i);
-            let value = format!("value {i}@{nested_image_layer_lsn}");
-
-            let removed = expected_key_values.insert(key, value.clone());
-            assert!(removed.is_none());
-
-            nested_img_layer.push((key, Bytes::from(value)));
-        }
-
-        let mut delta_layer_spec = Vec::default();
-        let delta_layer_start_lsn = Lsn(0x20);
-        let mut delta_layer_end_lsn = delta_layer_start_lsn;
-
-        for i in 0..10 {
-            let key = get_key(i);
-            let key_in_nested = nested_img_layer
-                .iter()
-                .any(|(key_with_img, _)| *key_with_img == key);
-            let lsn = {
-                if key_in_nested {
-                    Lsn(nested_image_layer_lsn.0 + 0x10)
-                } else {
-                    delta_layer_start_lsn
-                }
-            };
-
-            let will_init = will_init_keys.contains(&i);
-            if will_init {
-                delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init())));
-
-                expected_key_values.insert(key, "".to_string());
-            } else {
-                let delta = format!("@{lsn}");
-                delta_layer_spec.push((
-                    key,
-                    lsn,
-                    Value::WalRecord(NeonWalRecord::wal_append(&delta)),
-                ));
-
-                expected_key_values
-                    .get_mut(&key)
-                    .expect("An image exists for each key")
-                    .push_str(delta.as_str());
-            }
-            delta_layer_end_lsn = std::cmp::max(delta_layer_start_lsn, lsn);
-        }
-
-        delta_layer_end_lsn = Lsn(delta_layer_end_lsn.0 + 1);
-
-        assert!(
-            nested_image_layer_lsn > delta_layer_start_lsn
-                && nested_image_layer_lsn < delta_layer_end_lsn
-        );
-
-        let tline = tenant
-            .create_test_timeline_with_layers(
-                TIMELINE_ID,
-                baseline_image_layer_lsn,
-                DEFAULT_PG_VERSION,
-                &ctx,
-                vec![DeltaLayerTestDesc::new_with_inferred_key_range(
-                    delta_layer_start_lsn..delta_layer_end_lsn,
-                    delta_layer_spec,
-                )], // delta layers
-                vec![
-                    (baseline_image_layer_lsn, baseline_img_layer),
-                    (nested_image_layer_lsn, nested_img_layer),
-                ], // image layers
-                delta_layer_end_lsn,
-            )
-            .await?;
-
-        let keyspace = KeySpace::single(get_key(0)..get_key(10));
-        let results = tline
-            .get_vectored(keyspace, delta_layer_end_lsn, &ctx)
-            .await
-            .expect("No vectored errors");
-        for (key, res) in results {
-            let value = res.expect("No key errors");
-            let expected_value = expected_key_values.remove(&key).expect("No unknown keys");
-            assert_eq!(value, Bytes::from(expected_value));
-        }
-
-        Ok(())
-    }
 }
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -1,55 +0,0 @@
-use std::collections::BTreeSet;
-
-use itertools::Itertools;
-
-use super::storage_layer::LayerName;
-
-/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
-/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
-///
-/// ```plain
-/// |       |                 |       |
-/// |   1   |    |   2   |    |   3   |
-/// |       |    |       |    |       |
-/// ```
-///
-/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have
-/// the same LSN range.
-///
-/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example,
-///
-/// ```plain
-/// |       |    |   2   |    |       |
-/// |   1   |    |-------|    |   3   |
-/// |       |    |   4   |    |       |
-///
-/// If layer 2 and 4 contain the same single key, this is also a valid layer map.
-pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
-    let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
-    let mut all_delta_layers = Vec::new();
-    for name in metadata {
-        if let LayerName::Delta(layer) = name {
-            if layer.key_range.start.next() != layer.key_range.end {
-                all_delta_layers.push(layer.clone());
-            }
-        }
-    }
-    for layer in &all_delta_layers {
-        let lsn_range = &layer.lsn_range;
-        lsn_split_point.insert(lsn_range.start);
-        lsn_split_point.insert(lsn_range.end);
-    }
-    for layer in &all_delta_layers {
-        let lsn_range = layer.lsn_range.clone();
-        let intersects = lsn_split_point.range(lsn_range).collect_vec();
-        if intersects.len() > 1 {
-            let err = format!(
-                "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
-                layer,
-                intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
-            );
-            return Some(err);
-        }
-    }
-    None
-}
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -1,29 +1,11 @@
-use std::{collections::HashMap, time::Duration};
+use std::collections::HashMap;

-use super::remote_timeline_client::index::GcBlockingReason;
-use tokio::time::Instant;
 use utils::id::TimelineId;

-type TimelinesBlocked = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
+use super::remote_timeline_client::index::GcBlockingReason;

-#[derive(Default)]
-struct Storage {
-    timelines_blocked: TimelinesBlocked,
-    /// The deadline before which we are blocked from GC so that
-    /// leases have a chance to be renewed.
-    lsn_lease_deadline: Option<Instant>,
-}
+type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;

-impl Storage {
-    fn is_blocked_by_lsn_lease_deadline(&self) -> bool {
-        self.lsn_lease_deadline
-            .map(|d| Instant::now() < d)
-            .unwrap_or(false)
-    }
-}
-
-/// GcBlock provides persistent (per-timeline) gc blocking and facilitates transient time based gc
-/// blocking.
 #[derive(Default)]
 pub(crate) struct GcBlock {
    /// The timelines which have current reasons to block gc.
@@ -31,12 +13,6 @@ pub(crate) struct GcBlock {
    /// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done
    /// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`.
    reasons: std::sync::Mutex<Storage>,
-
-    /// GC background task or manually run `Tenant::gc_iteration` holds a lock on this.
-    ///
-    /// Do not add any more features taking and forbidding taking this lock. It should be
-    /// `tokio::sync::Notify`, but that is rarely used. On the other side, [`GcBlock::insert`]
-    /// synchronizes with gc attempts by locking and unlocking this mutex.
    blocking: tokio::sync::Mutex<()>,
 }

@@ -66,20 +42,6 @@ impl GcBlock {
        }
    }

-    /// Sets a deadline before which we cannot proceed to GC due to lsn lease.
-    ///
-    /// We do this as the leases mapping are not persisted to disk. By delaying GC by lease
-    /// length, we guarantee that all the leases we granted before will have a chance to renew
-    /// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
-    pub(super) fn set_lsn_lease_deadline(&self, lsn_lease_length: Duration) {
-        let deadline = Instant::now() + lsn_lease_length;
-        let mut g = self.reasons.lock().unwrap();
-        g.lsn_lease_deadline = Some(deadline);
-    }
-
-    /// Describe the current gc blocking reasons.
-    ///
-    /// TODO: make this json serializable.
    pub(crate) fn summary(&self) -> Option<BlockingReasons> {
        let g = self.reasons.lock().unwrap();

@@ -102,7 +64,7 @@ impl GcBlock {
    ) -> anyhow::Result<bool> {
        let (added, uploaded) = {
            let mut g = self.reasons.lock().unwrap();
-            let set = g.timelines_blocked.entry(timeline.timeline_id).or_default();
+            let set = g.entry(timeline.timeline_id).or_default();
            let added = set.insert(reason);

            // LOCK ORDER: intentionally hold the lock, see self.reasons.
@@ -133,7 +95,7 @@ impl GcBlock {

        let (remaining_blocks, uploaded) = {
            let mut g = self.reasons.lock().unwrap();
-            match g.timelines_blocked.entry(timeline.timeline_id) {
+            match g.entry(timeline.timeline_id) {
                Entry::Occupied(mut oe) => {
                    let set = oe.get_mut();
                    set.remove(reason);
@@ -147,7 +109,7 @@ impl GcBlock {
                }
            }

-            let remaining_blocks = g.timelines_blocked.len();
+            let remaining_blocks = g.len();

            // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
            let uploaded = timeline
@@ -172,11 +134,11 @@ impl GcBlock {
    pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
        let unblocked = {
            let mut g = self.reasons.lock().unwrap();
-            if g.timelines_blocked.is_empty() {
+            if g.is_empty() {
                return;
            }

-            g.timelines_blocked.remove(&timeline.timeline_id);
+            g.remove(&timeline.timeline_id);

            BlockingReasons::clean_and_summarize(g).is_none()
        };
@@ -187,11 +149,10 @@ impl GcBlock {
    }

    /// Initialize with the non-deleted timelines of this tenant.
-    pub(crate) fn set_scanned(&self, scanned: TimelinesBlocked) {
+    pub(crate) fn set_scanned(&self, scanned: Storage) {
        let mut g = self.reasons.lock().unwrap();
-        assert!(g.timelines_blocked.is_empty());
-        g.timelines_blocked
-            .extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
+        assert!(g.is_empty());
+        g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));

        if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
            tracing::info!(summary=?reasons, "initialized with gc blocked");
@@ -205,7 +166,6 @@ pub(super) struct Guard<'a> {

 #[derive(Debug)]
 pub(crate) struct BlockingReasons {
-    tenant_blocked_by_lsn_lease_deadline: bool,
    timelines: usize,
    reasons: enumset::EnumSet<GcBlockingReason>,
 }
@@ -214,8 +174,8 @@ impl std::fmt::Display for BlockingReasons {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
-            "tenant_blocked_by_lsn_lease_deadline: {}, {} timelines block for {:?}",
-            self.tenant_blocked_by_lsn_lease_deadline, self.timelines, self.reasons
+            "{} timelines block for {:?}",
+            self.timelines, self.reasons
        )
    }
 }
@@ -223,15 +183,13 @@ impl std::fmt::Display for BlockingReasons {
 impl BlockingReasons {
    fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
        let mut reasons = enumset::EnumSet::empty();
-        g.timelines_blocked.retain(|_key, value| {
+        g.retain(|_key, value| {
            reasons = reasons.union(*value);
            !value.is_empty()
        });
-        let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
-        if !g.timelines_blocked.is_empty() || blocked_by_lsn_lease_deadline {
+        if !g.is_empty() {
            Some(BlockingReasons {
-                tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
-                timelines: g.timelines_blocked.len(),
+                timelines: g.len(),
                reasons,
            })
        } else {
@@ -240,17 +198,14 @@ impl BlockingReasons {
    }

    fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
-        let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
-        if g.timelines_blocked.is_empty() && !blocked_by_lsn_lease_deadline {
+        if g.is_empty() {
            None
        } else {
            let reasons = g
-                .timelines_blocked
                .values()
                .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
            Some(BlockingReasons {
-                tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
-                timelines: g.timelines_blocked.len(),
+                timelines: g.len(),
                reasons,
            })
        }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -949,12 +949,6 @@ impl TenantManager {
                (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {
                    match attach_conf.generation.cmp(&tenant.generation) {
                        Ordering::Equal => {
-                            if attach_conf.attach_mode == AttachmentMode::Single {
-                                tenant
-                                    .gc_block
-                                    .set_lsn_lease_deadline(tenant.get_lsn_lease_length());
-                            }
-
                            // A transition from Attached to Attached in the same generation, we may
                            // take our fast path and just provide the updated configuration
                            // to the tenant.
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -276,16 +276,6 @@ pub(crate) enum LayerId {
    InMemoryLayerId(InMemoryLayerFileId),
 }

-/// Uniquely identify a layer visit by the layer
-/// and LSN floor (or start LSN) of the reads.
-/// The layer itself is not enough since we may
-/// have different LSN lower bounds for delta layer reads.
-#[derive(Debug, PartialEq, Eq, Clone, Hash)]
-struct LayerToVisitId {
-    layer_id: LayerId,
-    lsn_floor: Lsn,
-}
-
 /// Layer wrapper for the read path. Note that it is valid
 /// to use these layers even after external operations have
 /// been performed on them (compaction, freeze, etc.).
@@ -297,9 +287,9 @@ pub(crate) enum ReadableLayer {

 /// A partial description of a read to be done.
 #[derive(Debug, Clone)]
-struct LayerVisit {
+struct ReadDesc {
    /// An id used to resolve the readable layer within the fringe
-    layer_to_visit_id: LayerToVisitId,
+    layer_id: LayerId,
    /// Lsn range for the read, used for selecting the next read
    lsn_range: Range<Lsn>,
 }
@@ -313,12 +303,12 @@ struct LayerVisit {
 /// a two layer indexing scheme.
 #[derive(Debug)]
 pub(crate) struct LayerFringe {
-    planned_visits_by_lsn: BinaryHeap<LayerVisit>,
-    visit_reads: HashMap<LayerToVisitId, LayerVisitReads>,
+    planned_reads_by_lsn: BinaryHeap<ReadDesc>,
+    layers: HashMap<LayerId, LayerKeyspace>,
 }

 #[derive(Debug)]
-struct LayerVisitReads {
+struct LayerKeyspace {
    layer: ReadableLayer,
    target_keyspace: KeySpaceRandomAccum,
 }
@@ -326,23 +316,23 @@ struct LayerVisitReads {
 impl LayerFringe {
    pub(crate) fn new() -> Self {
        LayerFringe {
-            planned_visits_by_lsn: BinaryHeap::new(),
-            visit_reads: HashMap::new(),
+            planned_reads_by_lsn: BinaryHeap::new(),
+            layers: HashMap::new(),
        }
    }

    pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
-        let read_desc = match self.planned_visits_by_lsn.pop() {
+        let read_desc = match self.planned_reads_by_lsn.pop() {
            Some(desc) => desc,
            None => return None,
        };

-        let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);
+        let removed = self.layers.remove_entry(&read_desc.layer_id);

        match removed {
            Some((
                _,
-                LayerVisitReads {
+                LayerKeyspace {
                    layer,
                    mut target_keyspace,
                },
@@ -361,24 +351,20 @@ impl LayerFringe {
        keyspace: KeySpace,
        lsn_range: Range<Lsn>,
    ) {
-        let layer_to_visit_id = LayerToVisitId {
-            layer_id: layer.id(),
-            lsn_floor: lsn_range.start,
-        };
-
-        let entry = self.visit_reads.entry(layer_to_visit_id.clone());
+        let layer_id = layer.id();
+        let entry = self.layers.entry(layer_id.clone());
        match entry {
            Entry::Occupied(mut entry) => {
                entry.get_mut().target_keyspace.add_keyspace(keyspace);
            }
            Entry::Vacant(entry) => {
-                self.planned_visits_by_lsn.push(LayerVisit {
+                self.planned_reads_by_lsn.push(ReadDesc {
                    lsn_range,
-                    layer_to_visit_id: layer_to_visit_id.clone(),
+                    layer_id: layer_id.clone(),
                });
                let mut accum = KeySpaceRandomAccum::new();
                accum.add_keyspace(keyspace);
-                entry.insert(LayerVisitReads {
+                entry.insert(LayerKeyspace {
                    layer,
                    target_keyspace: accum,
                });
@@ -393,7 +379,7 @@ impl Default for LayerFringe {
    }
 }

-impl Ord for LayerVisit {
+impl Ord for ReadDesc {
    fn cmp(&self, other: &Self) -> Ordering {
        let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
        if ord == std::cmp::Ordering::Equal {
@@ -404,19 +390,19 @@ impl Ord for LayerVisit {
    }
 }

-impl PartialOrd for LayerVisit {
+impl PartialOrd for ReadDesc {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl PartialEq for LayerVisit {
+impl PartialEq for ReadDesc {
    fn eq(&self, other: &Self) -> bool {
        self.lsn_range == other.lsn_range
    }
 }

-impl Eq for LayerVisit {}
+impl Eq for ReadDesc {}

 impl ReadableLayer {
    pub(crate) fn id(&self) -> LayerId {
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -38,7 +38,7 @@ use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
 };
-use crate::tenant::PageReconstructError;
+use crate::tenant::{PageReconstructError, Timeline};
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::virtual_file::{self, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
@@ -58,6 +58,7 @@ use std::io::SeekFrom;
 use std::ops::Range;
 use std::os::unix::prelude::FileExt;
 use std::str::FromStr;
+use std::sync::Arc;
 use tokio::sync::OnceCell;
 use tokio_stream::StreamExt;
 use tracing::*;
@@ -69,7 +70,9 @@ use utils::{
 };

 use super::layer_name::ImageLayerName;
-use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ValuesReconstructState};
+use super::{
+    AsLayerDesc, Layer, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
+};

 ///
 /// Header stored in the beginning of the file
@@ -797,9 +800,10 @@ impl ImageLayerWriterInner {
    ///
    async fn finish(
        self,
+        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Option<Key>,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+    ) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

@@ -875,9 +879,12 @@ impl ImageLayerWriterInner {
        // fsync the file
        file.sync_all().await?;

-        trace!("created image layer {}", self.path);
+        // FIXME: why not carry the virtualfile here, it supports renaming?
+        let layer = Layer::finish_creating(self.conf, timeline, desc, &self.path)?;

-        Ok((desc, self.path))
+        info!("created image layer {}", layer.local_path());
+
+        Ok(layer)
    }
 }

@@ -956,18 +963,24 @@ impl ImageLayerWriter {
    ///
    pub(crate) async fn finish(
        mut self,
+        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
-        self.inner.take().unwrap().finish(ctx, None).await
+    ) -> anyhow::Result<super::ResidentLayer> {
+        self.inner.take().unwrap().finish(timeline, ctx, None).await
    }

    /// Finish writing the image layer with an end key, used in [`super::split_writer::SplitImageLayerWriter`]. The end key determines the end of the image layer's covered range and is exclusive.
    pub(super) async fn finish_with_end_key(
        mut self,
+        timeline: &Arc<Timeline>,
        end_key: Key,
        ctx: &RequestContext,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
-        self.inner.take().unwrap().finish(ctx, Some(end_key)).await
+    ) -> anyhow::Result<super::ResidentLayer> {
+        self.inner
+            .take()
+            .unwrap()
+            .finish(timeline, ctx, Some(end_key))
+            .await
    }
 }

@@ -1071,7 +1084,7 @@ mod test {
        tenant::{
            config::TenantConf,
            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::{Layer, ResidentLayer},
+            storage_layer::ResidentLayer,
            vectored_blob_io::StreamingVectoredReadPlanner,
            Tenant, Timeline,
        },
@@ -1142,8 +1155,7 @@ mod test {

                key = key.next();
            }
-            let (desc, path) = writer.finish(&ctx).await.unwrap();
-            Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap()
+            writer.finish(&timeline, &ctx).await.unwrap()
        };
        let original_size = resident.metadata().file_size;

@@ -1205,9 +1217,7 @@ mod test {
                .await
                .unwrap();
            let replacement = if wrote_keys > 0 {
-                let (desc, path) = filtered_writer.finish(&ctx).await.unwrap();
-                let resident = Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap();
-                Some(resident)
+                Some(filtered_writer.finish(&timeline, &ctx).await.unwrap())
            } else {
                None
            };
@@ -1280,8 +1290,7 @@ mod test {
        for (key, img) in images {
            writer.put_image(key, img, ctx).await?;
        }
-        let (desc, path) = writer.finish(ctx).await?;
-        let img_layer = Layer::finish_creating(tenant.conf, tline, desc, &path)?;
+        let img_layer = writer.finish(tline, ctx).await?;

        Ok::<_, anyhow::Error>(img_layer)
    }
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -439,30 +439,11 @@ impl Layer {

    fn record_access(&self, ctx: &RequestContext) {
        if self.0.access_stats.record_access(ctx) {
-            // Visibility was modified to Visible: maybe log about this
-            match ctx.task_kind() {
-                TaskKind::CalculateSyntheticSize
-                | TaskKind::GarbageCollector
-                | TaskKind::MgmtRequest => {
-                    // This situation is expected in code paths do binary searches of the LSN space to resolve
-                    // an LSN to a timestamp, which happens during GC, during GC cutoff calculations in synthetic size,
-                    // and on-demand for certain HTTP API requests.
-                }
-                _ => {
-                    // In all other contexts, it is unusual to do I/O involving layers which are not visible at
-                    // some branch tip, so we log the fact that we are accessing something that the visibility
-                    // calculation thought should not be visible.
-                    //
-                    // This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object
-                    // which was covered by a concurrent compaction.
-                    tracing::info!(
-                        "Layer {} became visible as a result of access",
-                        self.0.desc.key()
-                    );
-                }
-            }
-
-            // Update the timeline's visible bytes count
+            // Visibility was modified to Visible
+            tracing::info!(
+                "Layer {} became visible as a result of access",
+                self.0.desc.key()
+            );
            if let Some(tl) = self.0.timeline.upgrade() {
                tl.metrics
                    .visible_physical_size_gauge
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -1025,15 +1025,6 @@ fn access_stats() {
    assert_eq!(access_stats.latest_activity(), lowres_time(atime));
    access_stats.set_visibility(LayerVisibilityHint::Visible);
    assert_eq!(access_stats.latest_activity(), lowres_time(atime));
-
-    // Recording access implicitly makes layer visible, if it wasn't already
-    let atime = UNIX_EPOCH + Duration::from_secs(2200000000);
-    access_stats.set_visibility(LayerVisibilityHint::Covered);
-    assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered);
-    assert!(access_stats.record_access_at(atime));
-    access_stats.set_visibility(LayerVisibilityHint::Visible);
-    assert!(!access_stats.record_access_at(atime));
-    access_stats.set_visibility(LayerVisibilityHint::Visible);
 }

 #[test]
--- a/pageserver/src/tenant/storage_layer/split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/split_writer.rs
@@ -121,11 +121,11 @@ impl SplitImageLayerWriter {
                self.generated_layers
                    .push(SplitWriterResult::Discarded(layer_key));
            } else {
-                let (desc, path) = prev_image_writer.finish_with_end_key(key, ctx).await?;
-
-                let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
-                self.generated_layers
-                    .push(SplitWriterResult::Produced(layer));
+                self.generated_layers.push(SplitWriterResult::Produced(
+                    prev_image_writer
+                        .finish_with_end_key(tline, key, ctx)
+                        .await?,
+                ));
            }
        }
        self.inner.put_image(key, img, ctx).await
@@ -170,9 +170,9 @@ impl SplitImageLayerWriter {
        if discard(&layer_key).await {
            generated_layers.push(SplitWriterResult::Discarded(layer_key));
        } else {
-            let (desc, path) = inner.finish_with_end_key(end_key, ctx).await?;
-            let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
-            generated_layers.push(SplitWriterResult::Produced(layer));
+            generated_layers.push(SplitWriterResult::Produced(
+                inner.finish_with_end_key(tline, end_key, ctx).await?,
+            ));
        }
        Ok(generated_layers)
    }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -163,6 +163,8 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    // How many errors we have seen consequtively
    let mut error_run_count = 0;

+    let mut last_throttle_flag_reset_at = Instant::now();
+
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
        let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
@@ -189,6 +191,8 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                }
            }

+
+
            let sleep_duration;
            if period == Duration::ZERO {
                #[cfg(not(feature = "testing"))]
@@ -203,18 +207,12 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                };

                // Run compaction
-                let IterationResult { output, elapsed } = iteration
-                    .run(tenant.compaction_iteration(&cancel, &ctx))
-                    .await;
+                let IterationResult { output, elapsed } = iteration.run(tenant.compaction_iteration(&cancel, &ctx)).await;
                match output {
                    Ok(has_pending_task) => {
                        error_run_count = 0;
                        // schedule the next compaction immediately in case there is a pending compaction task
-                        sleep_duration = if has_pending_task {
-                            Duration::ZERO
-                        } else {
-                            period
-                        };
+                        sleep_duration = if has_pending_task { Duration::ZERO } else { period };
                    }
                    Err(e) => {
                        let wait_duration = backoff::exponential_backoff_duration_seconds(
@@ -235,20 +233,38 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                }

                // the duration is recorded by performance tests by enabling debug in this function
-                tracing::debug!(
-                    elapsed_ms = elapsed.as_millis(),
-                    "compaction iteration complete"
-                );
+                tracing::debug!(elapsed_ms=elapsed.as_millis(), "compaction iteration complete");
            };

+
            // Perhaps we did no work and the walredo process has been idle for some time:
            // give it a chance to shut down to avoid leaving walredo process running indefinitely.
-            // TODO: move this to a separate task (housekeeping loop) that isn't affected by the back-off,
-            // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens.
            if let Some(walredo_mgr) = &tenant.walredo_mgr {
                walredo_mgr.maybe_quiesce(period * 10);
            }

+            // TODO: move this (and walredo quiesce) to a separate task that isn't affected by the back-off,
+            // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens.
+            info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
+                let now = Instant::now();
+                let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
+                let Stats { count_accounted, count_throttled, sum_throttled_usecs } = tenant.timeline_get_throttle.reset_stats();
+                if count_throttled == 0 {
+                    return;
+                }
+                let allowed_rps = tenant.timeline_get_throttle.steady_rps();
+                let delta = now - prev;
+                info!(
+                    n_seconds=%format_args!("{:.3}",
+                    delta.as_secs_f64()),
+                    count_accounted,
+                    count_throttled,
+                    sum_throttled_usecs,
+                    allowed_rps=%format_args!("{allowed_rps:.0}"),
+                    "shard was throttled in the last n_seconds"
+                );
+            });
+
            // Sleep
            if tokio::time::timeout(sleep_duration, cancel.cancelled())
                .await
@@ -330,7 +346,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
            RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);

        let mut first = true;
-        tenant.gc_block.set_lsn_lease_deadline(tenant.get_lsn_lease_length());
        loop {
            tokio::select! {
                _ = cancel.cancelled() => {
@@ -348,6 +363,7 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                first = false;

                let delays = async {
+                    delay_by_lease_length(tenant.get_lsn_lease_length(), &cancel).await?;
                    random_init_delay(period, &cancel).await?;
                    Ok::<_, Cancelled>(())
                };
@@ -421,7 +437,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
 async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
-    let mut last_throttle_flag_reset_at = Instant::now();
        loop {
            tokio::select! {
                _ = cancel.cancelled() => {
@@ -468,29 +483,6 @@ async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken
                kind: BackgroundLoopKind::IngestHouseKeeping,
            };
            iteration.run(tenant.ingest_housekeeping()).await;
-
-            // TODO: rename the background loop kind to something more generic, like, tenant housekeeping.
-            // Or just spawn another background loop for this throttle, it's not like it's super costly.
-            info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
-                let now = Instant::now();
-                let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
-                let Stats { count_accounted_start, count_accounted_finish, count_throttled, sum_throttled_usecs} = tenant.timeline_get_throttle.reset_stats();
-                if count_throttled == 0 {
-                    return;
-                }
-                let allowed_rps = tenant.timeline_get_throttle.steady_rps();
-                let delta = now - prev;
-                info!(
-                    n_seconds=%format_args!("{:.3}",
-                    delta.as_secs_f64()),
-                    count_accounted = count_accounted_finish,  // don't break existing log scraping
-                    count_throttled,
-                    sum_throttled_usecs,
-                    count_accounted_start, // log after pre-existing fields to not break existing log scraping
-                    allowed_rps=%format_args!("{allowed_rps:.0}"),
-                    "shard was throttled in the last n_seconds"
-                );
-            });
        }
    }
    .await;
@@ -546,12 +538,28 @@ pub(crate) async fn random_init_delay(
        let mut rng = rand::thread_rng();
        rng.gen_range(Duration::ZERO..=period)
    };
+
    match tokio::time::timeout(d, cancel.cancelled()).await {
        Ok(_) => Err(Cancelled),
        Err(_) => Ok(()),
    }
 }

+/// Delays GC by defaul lease length at restart.
+///
+/// We do this as the leases mapping are not persisted to disk. By delaying GC by default
+/// length, we gurantees that all the leases we granted before the restart will expire
+/// when we run GC for the first time after the restart.
+pub(crate) async fn delay_by_lease_length(
+    length: Duration,
+    cancel: &CancellationToken,
+) -> Result<(), Cancelled> {
+    match tokio::time::timeout(length, cancel.cancelled()).await {
+        Ok(_) => Err(Cancelled),
+        Err(_) => Ok(()),
+    }
+}
+
 struct Iteration {
    started_at: Instant,
    period: Duration,
--- a/pageserver/src/tenant/throttle.rs
+++ b/pageserver/src/tenant/throttle.rs
@@ -24,10 +24,8 @@ use crate::{context::RequestContext, task_mgr::TaskKind};
 pub struct Throttle<M: Metric> {
    inner: ArcSwap<Inner>,
    metric: M,
-    /// will be turned into [`Stats::count_accounted_start`]
-    count_accounted_start: AtomicU64,
-    /// will be turned into [`Stats::count_accounted_finish`]
-    count_accounted_finish: AtomicU64,
+    /// will be turned into [`Stats::count_accounted`]
+    count_accounted: AtomicU64,
    /// will be turned into [`Stats::count_throttled`]
    count_throttled: AtomicU64,
    /// will be turned into [`Stats::sum_throttled_usecs`]
@@ -45,21 +43,17 @@ pub struct Observation {
    pub wait_time: Duration,
 }
 pub trait Metric {
-    fn accounting_start(&self);
-    fn accounting_finish(&self);
    fn observe_throttling(&self, observation: &Observation);
 }

 /// See [`Throttle::reset_stats`].
 pub struct Stats {
-    /// Number of requests that started [`Throttle::throttle`] calls.
-    pub count_accounted_start: u64,
-    /// Number of requests that finished [`Throttle::throttle`] calls.
-    pub count_accounted_finish: u64,
-    /// Subset of the `accounted` requests that were actually throttled.
-    /// Note that the numbers are stored as two independent atomics, so, there might be a slight drift.
+    // Number of requests that were subject to throttling, i.e., requests of the configured [`Config::task_kinds`].
+    pub count_accounted: u64,
+    // Subset of the `accounted` requests that were actually throttled.
+    // Note that the numbers are stored as two independent atomics, so, there might be a slight drift.
    pub count_throttled: u64,
-    /// Sum of microseconds that throttled requests spent waiting for throttling.
+    // Sum of microseconds that throttled requests spent waiting for throttling.
    pub sum_throttled_usecs: u64,
 }

@@ -71,8 +65,7 @@ where
        Self {
            inner: ArcSwap::new(Arc::new(Self::new_inner(config))),
            metric,
-            count_accounted_start: AtomicU64::new(0),
-            count_accounted_finish: AtomicU64::new(0),
+            count_accounted: AtomicU64::new(0),
            count_throttled: AtomicU64::new(0),
            sum_throttled_usecs: AtomicU64::new(0),
        }
@@ -124,13 +117,11 @@ where
    /// This method allows retrieving & resetting that flag.
    /// Useful for periodic reporting.
    pub fn reset_stats(&self) -> Stats {
-        let count_accounted_start = self.count_accounted_start.swap(0, Ordering::Relaxed);
-        let count_accounted_finish = self.count_accounted_finish.swap(0, Ordering::Relaxed);
+        let count_accounted = self.count_accounted.swap(0, Ordering::Relaxed);
        let count_throttled = self.count_throttled.swap(0, Ordering::Relaxed);
        let sum_throttled_usecs = self.sum_throttled_usecs.swap(0, Ordering::Relaxed);
        Stats {
-            count_accounted_start,
-            count_accounted_finish,
+            count_accounted,
            count_throttled,
            sum_throttled_usecs,
        }
@@ -148,12 +139,9 @@ where
        };
        let start = std::time::Instant::now();

-        self.metric.accounting_start();
-        self.count_accounted_start.fetch_add(1, Ordering::Relaxed);
        let did_throttle = inner.rate_limiter.acquire(key_count).await;
-        self.count_accounted_finish.fetch_add(1, Ordering::Relaxed);
-        self.metric.accounting_finish();

+        self.count_accounted.fetch_add(1, Ordering::Relaxed);
        if did_throttle {
            self.count_throttled.fetch_add(1, Ordering::Relaxed);
            let now = Instant::now();
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -196,8 +196,9 @@ fn drop_wlock<T>(rlock: tokio::sync::RwLockWriteGuard<'_, T>) {
 /// The outward-facing resources required to build a Timeline
 pub struct TimelineResources {
    pub remote_client: RemoteTimelineClient,
-    pub timeline_get_throttle:
-        Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
+    pub timeline_get_throttle: Arc<
+        crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>,
+    >,
    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
 }

@@ -405,8 +406,9 @@ pub struct Timeline {
    gc_lock: tokio::sync::Mutex<()>,

    /// Cloned from [`super::Tenant::timeline_get_throttle`] on construction.
-    timeline_get_throttle:
-        Arc<crate::tenant::throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>,
+    timeline_get_throttle: Arc<
+        crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>,
+    >,

    /// Keep aux directory cache to avoid it's reconstruction on each update
    pub(crate) aux_files: tokio::sync::Mutex<AuxFilesState>,
@@ -4011,9 +4013,7 @@ impl Timeline {
        if wrote_keys {
            // Normal path: we have written some data into the new image layer for this
            // partition, so flush it to disk.
-            let (desc, path) = image_layer_writer.finish(ctx).await?;
-            let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
-            info!("created image layer for rel {}", image_layer.local_path());
+            let image_layer = image_layer_writer.finish(self, ctx).await?;
            Ok(ImageLayerCreationOutcome {
                image: Some(image_layer),
                next_start_key: img_range.end,
@@ -4101,12 +4101,7 @@ impl Timeline {
        if wrote_any_image {
            // Normal path: we have written some data into the new image layer for this
            // partition, so flush it to disk.
-            let (desc, path) = image_layer_writer.finish(ctx).await?;
-            let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
-            info!(
-                "created image layer for metadata {}",
-                image_layer.local_path()
-            );
+            let image_layer = image_layer_writer.finish(self, ctx).await?;
            Ok(ImageLayerCreationOutcome {
                image: Some(image_layer),
                next_start_key: img_range.end,
@@ -4314,9 +4309,7 @@ impl Timeline {
        timer.stop_and_record();

        // Creating image layers may have caused some previously visible layers to be covered
-        if !image_layers.is_empty() {
-            self.update_layer_visibility().await?;
-        }
+        self.update_layer_visibility().await?;

        Ok(image_layers)
    }
@@ -5378,8 +5371,7 @@ impl Timeline {
    /// Force create an image layer and place it into the layer map.
    ///
    /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
-    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
-    /// placed into the layer map in one run AND be validated.
+    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run.
    #[cfg(test)]
    pub(super) async fn force_create_image_layer(
        self: &Arc<Timeline>,
@@ -5411,9 +5403,8 @@ impl Timeline {
        for (key, img) in images {
            image_layer_writer.put_image(key, img, ctx).await?;
        }
-        let (desc, path) = image_layer_writer.finish(ctx).await?;
-        let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
-        info!("force created image layer {}", image_layer.local_path());
+        let image_layer = image_layer_writer.finish(self, ctx).await?;
+
        {
            let mut guard = self.layers.write().await;
            guard.open_mut().unwrap().force_insert_layer(image_layer);
@@ -5425,8 +5416,7 @@ impl Timeline {
    /// Force create a delta layer and place it into the layer map.
    ///
    /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
-    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
-    /// placed into the layer map in one run AND be validated.
+    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are placed into the layer map in one run.
    #[cfg(test)]
    pub(super) async fn force_create_delta_layer(
        self: &Arc<Timeline>,
@@ -5452,6 +5442,33 @@ impl Timeline {
        if let Some(check_start_lsn) = check_start_lsn {
            assert!(deltas.lsn_range.start >= check_start_lsn);
        }
+        // check if the delta layer does not violate the LSN invariant, the legacy compaction should always produce a batch of
+        // layers of the same start/end LSN, and so should the force inserted layer
+        {
+            /// Checks if a overlaps with b, assume a/b = [start, end).
+            pub fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
+                !(a.end <= b.start || b.end <= a.start)
+            }
+
+            if deltas.key_range.start.next() != deltas.key_range.end {
+                let guard = self.layers.read().await;
+                let mut invalid_layers =
+                    guard.layer_map()?.iter_historic_layers().filter(|layer| {
+                        layer.is_delta()
+                        && overlaps_with(&layer.lsn_range, &deltas.lsn_range)
+                        && layer.lsn_range != deltas.lsn_range
+                        // skip single-key layer files
+                        && layer.key_range.start.next() != layer.key_range.end
+                    });
+                if let Some(layer) = invalid_layers.next() {
+                    // If a delta layer overlaps with another delta layer AND their LSN range is not the same, panic
+                    panic!(
+                        "inserted layer violates delta layer LSN invariant: current_lsn_range={}..{}, conflict_lsn_range={}..{}",
+                        deltas.lsn_range.start, deltas.lsn_range.end, layer.lsn_range.start, layer.lsn_range.end
+                    );
+                }
+            }
+        }
        let mut delta_layer_writer = DeltaLayerWriter::new(
            self.conf,
            self.timeline_id,
@@ -5466,7 +5483,7 @@ impl Timeline {
        }
        let (desc, path) = delta_layer_writer.finish(deltas.key_range.end, ctx).await?;
        let delta_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
-        info!("force created delta layer {}", delta_layer.local_path());
+
        {
            let mut guard = self.layers.write().await;
            guard.open_mut().unwrap().force_insert_layer(delta_layer);
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -29,7 +29,6 @@ use utils::id::TimelineId;

 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
-use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::merge_iterator::MergeIterator;
 use crate::tenant::storage_layer::split_writer::{
@@ -564,12 +563,10 @@ impl Timeline {
                .await?;

            if keys_written > 0 {
-                let (desc, path) = image_layer_writer
-                    .finish(ctx)
+                let new_layer = image_layer_writer
+                    .finish(self, ctx)
                    .await
                    .map_err(CompactionError::Other)?;
-                let new_layer = Layer::finish_creating(self.conf, self, desc, &path)
-                    .map_err(CompactionError::Other)?;
                tracing::info!(layer=%new_layer, "Rewrote layer, {} -> {} bytes",
                    layer.metadata().file_size,
                    new_layer.metadata().file_size);
@@ -1789,12 +1786,20 @@ impl Timeline {
                stat.visit_image_layer(desc.file_size());
            }
        }
-        let layer_names: Vec<crate::tenant::storage_layer::LayerName> = layer_selection
-            .iter()
-            .map(|layer| layer.layer_desc().layer_name())
-            .collect_vec();
-        if let Some(err) = check_valid_layermap(&layer_names) {
-            bail!("cannot run gc-compaction because {}", err);
+        for layer in &layer_selection {
+            let desc = layer.layer_desc();
+            let key_range = &desc.key_range;
+            if desc.is_delta() && key_range.start.next() != key_range.end {
+                let lsn_range = desc.lsn_range.clone();
+                let intersects = lsn_split_point.range(lsn_range).collect_vec();
+                if intersects.len() > 1 {
+                    bail!(
+                        "cannot run gc-compaction because it violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
+                        desc.key(),
+                        intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
+                    );
+                }
+            }
        }
        // The maximum LSN we are processing in this compaction loop
        let end_lsn = layer_selection
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -135,6 +135,25 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<(
        .context("delete_all")
 }

+// This function removs remaining traces of a timeline on disk.
+// Namely: metadata file, timeline directory, delete mark.
+// Note: io::ErrorKind::NotFound are ignored for metadata and timeline dir.
+// delete mark should be present because it is the last step during deletion.
+// (nothing can fail after its deletion)
+async fn cleanup_remaining_timeline_fs_traces(
+    conf: &PageServerConf,
+    tenant_shard_id: TenantShardId,
+    timeline_id: TimelineId,
+) -> anyhow::Result<()> {
+    // Remove delete mark
+    // TODO: once we are confident that no more exist in the field, remove this
+    // line.  It cleans up a legacy marker file that might in rare cases be present.
+    tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_shard_id, timeline_id))
+        .await
+        .or_else(fs_ext::ignore_not_found)
+        .context("remove delete mark")
+}
+
 /// It is important that this gets called when DeletionGuard is being held.
 /// For more context see comments in [`DeleteTimelineFlow::prepare`]
 async fn remove_timeline_from_tenant(
@@ -175,10 +194,12 @@ async fn remove_timeline_from_tenant(
 /// 7. Delete mark file
 ///
 /// It is resumable from any step in case a crash/restart occurs.
-/// There are two entrypoints to the process:
+/// There are three entrypoints to the process:
 /// 1. [`DeleteTimelineFlow::run`] this is the main one called by a management api handler.
 /// 2. [`DeleteTimelineFlow::resume_deletion`] is called during restarts when local metadata is still present
 ///    and we possibly neeed to continue deletion of remote files.
+/// 3. [`DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces`] is used when we deleted remote
+///    index but still have local metadata, timeline directory and delete mark.
 ///
 /// Note the only other place that messes around timeline delete mark is the logic that scans directory with timelines during tenant load.
 #[derive(Default)]
@@ -290,6 +311,18 @@ impl DeleteTimelineFlow {
        Ok(())
    }

+    #[instrument(skip_all, fields(%timeline_id))]
+    pub async fn cleanup_remaining_timeline_fs_traces(
+        tenant: &Tenant,
+        timeline_id: TimelineId,
+    ) -> anyhow::Result<()> {
+        let r =
+            cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_shard_id, timeline_id)
+                .await;
+        info!("Done");
+        r
+    }
+
    fn prepare(
        tenant: &Tenant,
        timeline_id: TimelineId,
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -237,26 +237,6 @@ impl WalIngest {
                                .await?;
                        }
                    }
-                } else if pg_version == 17 {
-                    if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {
-                        debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
-                    } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {
-                        // The XLOG record was renamed between v14 and v15,
-                        // but the record format is the same.
-                        // So we can reuse XlCreateDatabase here.
-                        debug!("XLOG_DBASE_CREATE_FILE_COPY");
-                        let createdb = XlCreateDatabase::decode(&mut buf);
-                        self.ingest_xlog_dbase_create(modification, &createdb, ctx)
-                            .await?;
-                    } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {
-                        let dropdb = XlDropDatabase::decode(&mut buf);
-                        for tablespace_id in dropdb.tablespace_ids {
-                            trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
-                            modification
-                                .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
-                                .await?;
-                        }
-                    }
                }
            }
            pg_constants::RM_TBLSPC_ID => {
@@ -266,11 +246,7 @@ impl WalIngest {
                let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;

                if info == pg_constants::CLOG_ZEROPAGE {
-                    let pageno = if pg_version < 17 {
-                        buf.get_u32_le()
-                    } else {
-                        buf.get_u64_le() as u32
-                    };
+                    let pageno = buf.get_u32_le();
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -284,7 +260,7 @@ impl WalIngest {
                    .await?;
                } else {
                    assert!(info == pg_constants::CLOG_TRUNCATE);
-                    let xlrec = XlClogTruncate::decode(&mut buf, pg_version);
+                    let xlrec = XlClogTruncate::decode(&mut buf);
                    self.ingest_clog_truncate_record(modification, &xlrec, ctx)
                        .await?;
                }
@@ -323,21 +299,12 @@ impl WalIngest {
                        parsed_xact.xid,
                        lsn,
                    );
-
-                    let xid: u64 = if pg_version >= 17 {
-                        self.adjust_to_full_transaction_id(parsed_xact.xid)?
-                    } else {
-                        parsed_xact.xid as u64
-                    };
-                    modification.drop_twophase_file(xid, ctx).await?;
-                } else if info == pg_constants::XLOG_XACT_PREPARE {
-                    let xid: u64 = if pg_version >= 17 {
-                        self.adjust_to_full_transaction_id(decoded.xl_xid)?
-                    } else {
-                        decoded.xl_xid as u64
-                    };
                    modification
-                        .put_twophase_file(xid, Bytes::copy_from_slice(&buf[..]), ctx)
+                        .drop_twophase_file(parsed_xact.xid, ctx)
+                        .await?;
+                } else if info == pg_constants::XLOG_XACT_PREPARE {
+                    modification
+                        .put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
                        .await?;
                }
            }
@@ -345,11 +312,7 @@ impl WalIngest {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;

                if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
-                    let pageno = if pg_version < 17 {
-                        buf.get_u32_le()
-                    } else {
-                        buf.get_u64_le() as u32
-                    };
+                    let pageno = buf.get_u32_le();
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -362,11 +325,7 @@ impl WalIngest {
                    )
                    .await?;
                } else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
-                    let pageno = if pg_version < 17 {
-                        buf.get_u32_le()
-                    } else {
-                        buf.get_u64_le() as u32
-                    };
+                    let pageno = buf.get_u32_le();
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -395,20 +354,6 @@ impl WalIngest {
            pg_constants::RM_XLOG_ID => {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;

-                if info == pg_constants::XLOG_PARAMETER_CHANGE {
-                    if let CheckPoint::V17(cp) = &mut self.checkpoint {
-                        let rec = v17::XlParameterChange::decode(&mut buf);
-                        cp.wal_level = rec.wal_level;
-                        self.checkpoint_modified = true;
-                    }
-                } else if info == pg_constants::XLOG_END_OF_RECOVERY {
-                    if let CheckPoint::V17(cp) = &mut self.checkpoint {
-                        let rec = v17::XlEndOfRecovery::decode(&mut buf);
-                        cp.wal_level = rec.wal_level;
-                        self.checkpoint_modified = true;
-                    }
-                }
-
                enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
                    if info == pg_constants::XLOG_NEXTOID {
                        let next_oid = buf.get_u32_le();
@@ -452,24 +397,12 @@ impl WalIngest {
                        if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
                            && info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                        {
-                            let oldest_active_xid = if pg_version >= 17 {
-                                let mut oldest_active_full_xid = cp.nextXid.value;
-                                for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
-                                    if xid < oldest_active_full_xid {
-                                        oldest_active_full_xid = xid;
-                                    }
+                            let mut oldest_active_xid = cp.nextXid.value as u32;
+                            for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
+                                if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
+                                    oldest_active_xid = xid;
                                }
-                                oldest_active_full_xid as u32
-                            } else {
-                                let mut oldest_active_xid = cp.nextXid.value as u32;
-                                for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
-                                    let narrow_xid = xid as u32;
-                                    if (narrow_xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
-                                        oldest_active_xid = narrow_xid;
-                                    }
-                                }
-                                oldest_active_xid
-                            };
+                            }
                            cp.oldestActiveXid = oldest_active_xid;
                        } else {
                            cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
@@ -582,25 +515,6 @@ impl WalIngest {
        Ok(modification.len() > prev_len)
    }

-    /// This is the same as AdjustToFullTransactionId(xid) in PostgreSQL
-    fn adjust_to_full_transaction_id(&self, xid: TransactionId) -> Result<u64> {
-        let next_full_xid =
-            enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, cp, { cp.nextXid.value });
-
-        let next_xid = (next_full_xid) as u32;
-        let mut epoch = (next_full_xid >> 32) as u32;
-
-        if xid > next_xid {
-            // Wraparound occurred, must be from a prev epoch.
-            if epoch == 0 {
-                bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}");
-            }
-            epoch -= 1;
-        }
-
-        Ok((epoch as u64) << 32 | xid as u64)
-    }
-
    /// Do not store this block, but observe it for the purposes of updating our relation size state.
    async fn observe_decoded_block(
        &mut self,
@@ -901,73 +815,6 @@ impl WalIngest {
                    bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
                }
            }
-            17 => {
-                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
-                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
-
-                    if info == pg_constants::XLOG_HEAP_INSERT {
-                        let xlrec = v17::XlHeapInsert::decode(buf);
-                        assert_eq!(0, buf.remaining());
-                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
-                            new_heap_blkno = Some(decoded.blocks[0].blkno);
-                        }
-                    } else if info == pg_constants::XLOG_HEAP_DELETE {
-                        let xlrec = v17::XlHeapDelete::decode(buf);
-                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
-                            new_heap_blkno = Some(decoded.blocks[0].blkno);
-                        }
-                    } else if info == pg_constants::XLOG_HEAP_UPDATE
-                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE
-                    {
-                        let xlrec = v17::XlHeapUpdate::decode(buf);
-                        // the size of tuple data is inferred from the size of the record.
-                        // we can't validate the remaining number of bytes without parsing
-                        // the tuple data.
-                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
-                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
-                        }
-                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
-                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
-                            // non-HOT update where the new tuple goes to different page than
-                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
-                            // set.
-                            new_heap_blkno = Some(decoded.blocks[0].blkno);
-                        }
-                    } else if info == pg_constants::XLOG_HEAP_LOCK {
-                        let xlrec = v17::XlHeapLock::decode(buf);
-                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
-                            old_heap_blkno = Some(decoded.blocks[0].blkno);
-                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
-                        }
-                    }
-                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
-                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
-                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
-                        let xlrec = v17::XlHeapMultiInsert::decode(buf);
-
-                        let offset_array_len =
-                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
-                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
-                                0
-                            } else {
-                                size_of::<u16>() * xlrec.ntuples as usize
-                            };
-                        assert_eq!(offset_array_len, buf.remaining());
-
-                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
-                            new_heap_blkno = Some(decoded.blocks[0].blkno);
-                        }
-                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
-                        let xlrec = v17::XlHeapLockUpdated::decode(buf);
-                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
-                            old_heap_blkno = Some(decoded.blocks[0].blkno);
-                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
-                        }
-                    }
-                } else {
-                    bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
-                }
-            }
            _ => {}
        }

@@ -1076,26 +923,26 @@ impl WalIngest {
        assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);

        match pg_version {
-            16 | 17 => {
+            16 => {
                let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;

                match info {
                    pg_constants::XLOG_NEON_HEAP_INSERT => {
-                        let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);
+                        let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
                        assert_eq!(0, buf.remaining());
                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
                            new_heap_blkno = Some(decoded.blocks[0].blkno);
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_DELETE => {
-                        let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);
+                        let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
                            new_heap_blkno = Some(decoded.blocks[0].blkno);
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_UPDATE
                    | pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
-                        let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);
+                        let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
                        // the size of tuple data is inferred from the size of the record.
                        // we can't validate the remaining number of bytes without parsing
                        // the tuple data.
@@ -1111,7 +958,7 @@ impl WalIngest {
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
-                        let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);
+                        let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);

                        let offset_array_len =
                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
@@ -1127,7 +974,7 @@ impl WalIngest {
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_LOCK => {
-                        let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);
+                        let xlrec = v16::rm_neon::XlNeonHeapLock::decode(buf);
                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
                            old_heap_blkno = Some(decoded.blocks[0].blkno);
                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -174,7 +174,6 @@ impl DecodedWALRecord {
                }
                15 => info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
                16 => info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
-                17 => info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
                _ => {
                    panic!("Unsupported postgres version {pg_version}")
                }
@@ -342,47 +341,16 @@ pub mod v14 {
            }
        }
    }
-
-    #[repr(C)]
-    #[derive(Debug)]
-    pub struct XlParameterChange {
-        pub max_connections: i32,
-        pub max_worker_processes: i32,
-        pub max_wal_senders: i32,
-        pub max_prepared_xacts: i32,
-        pub max_locks_per_xact: i32,
-        pub wal_level: i32,
-        pub wal_log_hints: bool,
-        pub track_commit_timestamp: bool,
-        pub _padding: [u8; 2],
-    }
-
-    impl XlParameterChange {
-        pub fn decode(buf: &mut Bytes) -> XlParameterChange {
-            XlParameterChange {
-                max_connections: buf.get_i32_le(),
-                max_worker_processes: buf.get_i32_le(),
-                max_wal_senders: buf.get_i32_le(),
-                max_prepared_xacts: buf.get_i32_le(),
-                max_locks_per_xact: buf.get_i32_le(),
-                wal_level: buf.get_i32_le(),
-                wal_log_hints: buf.get_u8() != 0,
-                track_commit_timestamp: buf.get_u8() != 0,
-                _padding: [buf.get_u8(), buf.get_u8()],
-            }
-        }
-    }
 }

 pub mod v15 {
    pub use super::v14::{
        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapLockUpdated, XlHeapMultiInsert, XlHeapUpdate,
-        XlParameterChange,
    };
 }

 pub mod v16 {
-    pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};
+    pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert};
    use bytes::{Buf, Bytes};
    use postgres_ffi::{OffsetNumber, TransactionId};

@@ -561,37 +529,6 @@ pub mod v16 {
    }
 }

-pub mod v17 {
-    pub use super::v14::XlHeapLockUpdated;
-    use bytes::{Buf, Bytes};
-    pub use postgres_ffi::{TimeLineID, TimestampTz};
-
-    pub use super::v16::rm_neon;
-    pub use super::v16::{
-        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,
-    };
-
-    #[repr(C)]
-    #[derive(Debug)]
-    pub struct XlEndOfRecovery {
-        pub end_time: TimestampTz,
-        pub this_time_line_id: TimeLineID,
-        pub prev_time_line_id: TimeLineID,
-        pub wal_level: i32,
-    }
-
-    impl XlEndOfRecovery {
-        pub fn decode(buf: &mut Bytes) -> XlEndOfRecovery {
-            XlEndOfRecovery {
-                end_time: buf.get_i64_le(),
-                this_time_line_id: buf.get_u32_le(),
-                prev_time_line_id: buf.get_u32_le(),
-                wal_level: buf.get_i32_le(),
-            }
-        }
-    }
-}
-
 #[repr(C)]
 #[derive(Debug)]
 pub struct XlSmgrCreate {
@@ -809,13 +746,9 @@ pub struct XlClogTruncate {
 }

 impl XlClogTruncate {
-    pub fn decode(buf: &mut Bytes, pg_version: u32) -> XlClogTruncate {
+    pub fn decode(buf: &mut Bytes) -> XlClogTruncate {
        XlClogTruncate {
-            pageno: if pg_version < 17 {
-                buf.get_u32_le()
-            } else {
-                buf.get_u64_le() as u32
-            },
+            pageno: buf.get_u32_le(),
            oldest_xid: buf.get_u32_le(),
            oldest_xid_db: buf.get_u32_le(),
        }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -35,7 +35,6 @@ use anyhow::Context;
 use bytes::{Bytes, BytesMut};
 use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};
 use pageserver_api::shard::TenantShardId;
-use std::future::Future;
 use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
@@ -297,97 +296,6 @@ impl PostgresRedoManager {
        }
    }

-    async fn do_with_walredo_process<
-        F: FnOnce(Arc<Process>) -> Fut,
-        Fut: Future<Output = Result<O, Error>>,
-        O,
-    >(
-        &self,
-        pg_version: u32,
-        closure: F,
-    ) -> Result<O, Error> {
-        let proc: Arc<Process> = match self.redo_process.get_or_init_detached().await {
-            Ok(guard) => match &*guard {
-                ProcessOnceCell::Spawned(proc) => Arc::clone(proc),
-                ProcessOnceCell::ManagerShutDown => {
-                    return Err(Error::Cancelled);
-                }
-            },
-            Err(permit) => {
-                let start = Instant::now();
-                // acquire guard before spawning process, so that we don't spawn new processes
-                // if the gate is already closed.
-                let _launched_processes_guard = match self.launched_processes.enter() {
-                    Ok(guard) => guard,
-                    Err(GateError::GateClosed) => unreachable!(
-                        "shutdown sets the once cell to `ManagerShutDown` state before closing the gate"
-                    ),
-                };
-                let proc = Arc::new(Process {
-                    process: process::WalRedoProcess::launch(
-                        self.conf,
-                        self.tenant_shard_id,
-                        pg_version,
-                    )
-                    .context("launch walredo process")?,
-                    _launched_processes_guard,
-                });
-                let duration = start.elapsed();
-                WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
-                info!(
-                    elapsed_ms = duration.as_millis(),
-                    pid = proc.id(),
-                    "launched walredo process"
-                );
-                self.redo_process
-                    .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit);
-                proc
-            }
-        };
-
-        // async closures are unstable, would support &Process
-        let result = closure(proc.clone()).await;
-
-        if result.is_err() {
-            // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.
-            // Note that there may be other tasks concurrent with us that also hold `proc`.
-            // We have to deal with that here.
-            // Also read the doc comment on field `self.redo_process`.
-            //
-            // NB: there may still be other concurrent threads using `proc`.
-            // The last one will send SIGKILL when the underlying Arc reaches refcount 0.
-            //
-            // NB: the drop impl blocks the dropping thread with a wait() system call for
-            // the child process. In some ways the blocking is actually good: if we
-            // deferred the waiting into the background / to tokio if we used `tokio::process`,
-            // it could happen that if walredo always fails immediately, we spawn processes faster
-            // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,
-            // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.
-            // This probably needs revisiting at some later point.
-            match self.redo_process.get() {
-                None => (),
-                Some(guard) => {
-                    match &*guard {
-                        ProcessOnceCell::ManagerShutDown => {}
-                        ProcessOnceCell::Spawned(guard_proc) => {
-                            if Arc::ptr_eq(&proc, guard_proc) {
-                                // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
-                                guard.take_and_deinit();
-                            } else {
-                                // Another task already spawned another redo process (further up in this method)
-                                // and put it into `redo_process`. Do nothing, our view of the world is behind.
-                            }
-                        }
-                    }
-                }
-            }
-            // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.
-            drop(proc);
-        }
-
-        result
-    }
-
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
@@ -411,63 +319,130 @@ impl PostgresRedoManager {
        const MAX_RETRY_ATTEMPTS: u32 = 1;
        let mut n_attempts = 0u32;
        loop {
-            let base_img = &base_img;
-            let closure = |proc: Arc<Process>| async move {
-                let started_at = std::time::Instant::now();
-
-                // Relational WAL records are applied using wal-redo-postgres
-                let result = proc
-                    .apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
-                    .await
-                    .context("apply_wal_records");
-
-                let duration = started_at.elapsed();
-
-                let len = records.len();
-                let nbytes = records.iter().fold(0, |acumulator, record| {
-                    acumulator
-                        + match &record.1 {
-                            NeonWalRecord::Postgres { rec, .. } => rec.len(),
-                            _ => unreachable!("Only PostgreSQL records are accepted in this batch"),
-                        }
-                });
-
-                WAL_REDO_TIME.observe(duration.as_secs_f64());
-                WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
-                WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);
-
-                debug!(
-                    "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
-                    len,
-                    nbytes,
-                    duration.as_micros(),
-                    lsn
-                );
-
-                if let Err(e) = result.as_ref() {
-                    error!(
-                        "error applying {} WAL records {}..{} ({} bytes) to key {key}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
-                        records.len(),
-                        records.first().map(|p| p.0).unwrap_or(Lsn(0)),
-                        records.last().map(|p| p.0).unwrap_or(Lsn(0)),
-                        nbytes,
-                        base_img_lsn,
-                        lsn,
-                        n_attempts,
-                        e,
+            let proc: Arc<Process> = match self.redo_process.get_or_init_detached().await {
+                Ok(guard) => match &*guard {
+                    ProcessOnceCell::Spawned(proc) => Arc::clone(proc),
+                    ProcessOnceCell::ManagerShutDown => {
+                        return Err(Error::Cancelled);
+                    }
+                },
+                Err(permit) => {
+                    let start = Instant::now();
+                    // acquire guard before spawning process, so that we don't spawn new processes
+                    // if the gate is already closed.
+                    let _launched_processes_guard = match self.launched_processes.enter() {
+                                Ok(guard) => guard,
+                                Err(GateError::GateClosed) => unreachable!(
+                                    "shutdown sets the once cell to `ManagerShutDown` state before closing the gate"
+                                ),
+                            };
+                    let proc = Arc::new(Process {
+                        process: process::WalRedoProcess::launch(
+                            self.conf,
+                            self.tenant_shard_id,
+                            pg_version,
+                        )
+                        .context("launch walredo process")?,
+                        _launched_processes_guard,
+                    });
+                    let duration = start.elapsed();
+                    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
+                    info!(
+                        duration_ms = duration.as_millis(),
+                        pid = proc.id(),
+                        "launched walredo process"
                    );
+                    self.redo_process
+                        .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit);
+                    proc
                }
-
-                result.map_err(Error::Other)
            };
-            let result = self.do_with_walredo_process(pg_version, closure).await;

-            if result.is_ok() && n_attempts != 0 {
+            let started_at = std::time::Instant::now();
+
+            // Relational WAL records are applied using wal-redo-postgres
+            let result = proc
+                .apply_wal_records(rel, blknum, &base_img, records, wal_redo_timeout)
+                .await
+                .context("apply_wal_records");
+
+            let duration = started_at.elapsed();
+
+            let len = records.len();
+            let nbytes = records.iter().fold(0, |acumulator, record| {
+                acumulator
+                    + match &record.1 {
+                        NeonWalRecord::Postgres { rec, .. } => rec.len(),
+                        _ => unreachable!("Only PostgreSQL records are accepted in this batch"),
+                    }
+            });
+
+            WAL_REDO_TIME.observe(duration.as_secs_f64());
+            WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
+            WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);
+
+            debug!(
+                "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
+                len,
+                nbytes,
+                duration.as_micros(),
+                lsn
+            );
+
+            // If something went wrong, don't try to reuse the process. Kill it, and
+            // next request will launch a new one.
+            if let Err(e) = result.as_ref() {
+                error!(
+                    "error applying {} WAL records {}..{} ({} bytes) to key {key}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
+                    records.len(),
+                    records.first().map(|p| p.0).unwrap_or(Lsn(0)),
+                    records.last().map(|p| p.0).unwrap_or(Lsn(0)),
+                    nbytes,
+                    base_img_lsn,
+                    lsn,
+                    n_attempts,
+                    e,
+                );
+                // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.
+                // Note that there may be other tasks concurrent with us that also hold `proc`.
+                // We have to deal with that here.
+                // Also read the doc comment on field `self.redo_process`.
+                //
+                // NB: there may still be other concurrent threads using `proc`.
+                // The last one will send SIGKILL when the underlying Arc reaches refcount 0.
+                //
+                // NB: the drop impl blocks the dropping thread with a wait() system call for
+                // the child process. In some ways the blocking is actually good: if we
+                // deferred the waiting into the background / to tokio if we used `tokio::process`,
+                // it could happen that if walredo always fails immediately, we spawn processes faster
+                // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,
+                // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.
+                // This probably needs revisiting at some later point.
+                match self.redo_process.get() {
+                    None => (),
+                    Some(guard) => {
+                        match &*guard {
+                            ProcessOnceCell::ManagerShutDown => {}
+                            ProcessOnceCell::Spawned(guard_proc) => {
+                                if Arc::ptr_eq(&proc, guard_proc) {
+                                    // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
+                                    guard.take_and_deinit();
+                                } else {
+                                    // Another task already spawned another redo process (further up in this method)
+                                    // and put it into `redo_process`. Do nothing, our view of the world is behind.
+                                }
+                            }
+                        }
+                    }
+                }
+                // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.
+                drop(proc);
+            } else if n_attempts != 0 {
                info!(n_attempts, "retried walredo succeeded");
            }
            n_attempts += 1;
            if n_attempts > MAX_RETRY_ATTEMPTS || result.is_ok() {
-                return result;
+                return result.map_err(Error::Other);
            }
        }
    }
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -23,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
+DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/bitmap.h
+++ b/pgxn/neon/bitmap.h
@@ -1,12 +0,0 @@
-#ifndef NEON_BITMAP_H
-#define NEON_BITMAP_H
-
-/*
- * Utilities for manipulating bits8* as bitmaps.
- */
-
-#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))
-#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))
-#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))
-
-#endif //NEON_BITMAP_H
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -27,7 +27,6 @@
 #include "pagestore_client.h"
 #include "common/hashfn.h"
 #include "pgstat.h"
-#include "port/pg_iovec.h"
 #include "postmaster/bgworker.h"
 #include RELFILEINFO_HDR
 #include "storage/buf_internals.h"
@@ -41,7 +40,6 @@
 #include "utils/guc.h"

 #include "hll.h"
-#include "bitmap.h"

 #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)

@@ -471,99 +469,6 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	return found;
 }

-/*
- * Check if page is present in the cache.
- * Returns true if page is found in local cache.
- */
-int
-lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-					int nblocks, bits8 *bitmap)
-{
-	BufferTag	tag;
-	FileCacheEntry *entry;
-	uint32		chunk_offs;
-	int			found = 0;
-	uint32		hash;
-	int			i = 0;
-
-	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return 0;
-
-	CopyNRelFileInfoToBufTag(tag, rinfo);
-	tag.forkNum = forkNum;
-
-	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-
-	tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
-	hash = get_hash_value(lfc_hash, &tag);
-	chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
-
-	LWLockAcquire(lfc_lock, LW_SHARED);
-
-	while (true)
-	{
-		int		this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
-		if (LFC_ENABLED())
-		{
-			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-
-			if (entry != NULL)
-			{
-				for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
-				{
-					if ((entry->bitmap[chunk_offs >> 5] & 
-						(1 << (chunk_offs & 31))) != 0)
-					{
-						BITMAP_SET(bitmap, i);
-						found++;
-					}
-				}
-			}
-			else
-			{
-				i += this_chunk;
-			}
-		}
-		else
-		{
-			return found;
-		}
-
-		/*
-		 * Break out of the iteration before doing expensive stuff for
-		 * a next iteration
-		 */
-		if (i + 1 >= nblocks)
-			break;
-
-		/*
-		 * Prepare for the next iteration. We don't unlock here, as that'd
-		 * probably be more expensive than the gains it'd get us.
-		 */
-		tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
-		hash = get_hash_value(lfc_hash, &tag);
-		chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
-	}
-
-	LWLockRelease(lfc_lock);
-
-#if USE_ASSERT_CHECKING
-	do {
-		int count = 0;
-
-		for (int j = 0; j < nblocks; j++)
-		{
-			if (BITMAP_ISSET(bitmap, j))
-				count++;
-		}
-
-		Assert(count == found);
-	} while (false);
-#endif
-
-	return found;
-}
-
 /*
 * Evict a page (if present) from the local file cache
 */
@@ -643,171 +548,91 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 }

 /*
- * Try to read pages from local cache.
- * Returns the number of pages read from the local cache, and sets bits in
- * 'read' for the pages which were read. This may scribble over buffers not
- * marked in 'read', so be careful with operation ordering.
- *
- * In case of error local file cache is disabled (lfc->limit is set to zero),
- * and -1 is returned. Note that 'read' and the buffers may be touched and in
- * an otherwise invalid state.
- *
- * If the mask argument is supplied, bits will be set at the offsets of pages
- * that were present and read from the LFC.
+ * Try to read page from local cache.
+ * Returns true if page is found in local cache.
+ * In case of error local file cache is disabled (lfc->limit is set to zero).
 */
-int
-lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-				 void **buffers, BlockNumber nblocks, bits8 *mask)
+bool
+lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		 char *buffer)
 {
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
+	int			chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 	bool		result = true;
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
-	int			blocks_read = 0;
-	int			buf_offset = 0;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return 0;
+		return false;

 	if (!lfc_ensure_opened())
-		return 0;
+		return false;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
 	tag.forkNum = forkNum;
+	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
+	hash = get_hash_value(lfc_hash, &tag);

-	/* 
-	 * For every chunk that has blocks we're interested in, we
-	 * 1. get the chunk header
-	 * 2. Check if the chunk actually has the blocks we're interested in
-	 * 3. Read the blocks we're looking for (in one preadv), assuming they exist
-	 * 4. Update the statistics for the read call.
-	 *
-	 * If there is an error, we do an early return.
-	 */
-	while (nblocks > 0)
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (!LFC_ENABLED())
 	{
-		struct iovec iov[PG_IOV_MAX];
-		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
-		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
-		int		iteration_hits = 0;
-		int		iteration_misses = 0;
-		Assert(blocks_in_chunk > 0);
-
-		for (int i = 0; i < blocks_in_chunk; i++)
-		{
-			iov[i].iov_base = buffers[buf_offset + i];
-			iov[i].iov_len = BLCKSZ;
-		}
-
-		tag.blockNum = blkno - chunk_offs;
-		hash = get_hash_value(lfc_hash, &tag);
-
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		/* We can return the blocks we've read before LFC got disabled;
-		 * assuming we read any. */
-		if (!LFC_ENABLED())
-		{
-			LWLockRelease(lfc_lock);
-			return blocks_read;
-		}
-
-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
-
-		/* Approximate working set for the blocks assumed in this entry */
-		for (int i = 0; i < blocks_in_chunk; i++)
-		{
-			tag.blockNum = blkno + i;
-			addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
-		}
-
-		if (entry == NULL)
-		{
-			/* Pages are not cached */
-			lfc_ctl->misses += blocks_in_chunk;
-			pgBufferUsage.file_cache.misses += blocks_in_chunk;
-			LWLockRelease(lfc_lock);
-
-			buf_offset += blocks_in_chunk;
-			nblocks -= blocks_in_chunk;
-			blkno += blocks_in_chunk;
-
-			continue;
-		}
-
-		/* Unlink entry from LRU list to pin it for the duration of IO operation */
-		if (entry->access_count++ == 0)
-			dlist_delete(&entry->list_node);
-
-		generation = lfc_ctl->generation;
-		entry_offset = entry->offset;
-
 		LWLockRelease(lfc_lock);
-
-		for (int i = 0; i < blocks_in_chunk; i++)
-		{
-			/*
-			 * If the page is valid, we consider it "read".
-			 * All other pages will be fetched separately by the next cache
-			 */
-			if (entry->bitmap[(chunk_offs + i) / 32] & (1 << ((chunk_offs + i) % 32)))
-			{
-				BITMAP_SET(mask, buf_offset + i);
-				iteration_hits++;
-			}
-			else
-				iteration_misses++;
-		}
-
-		Assert(iteration_hits + iteration_misses > 0);
-
-		if (iteration_hits != 0)
-		{
-			rc = preadv(lfc_desc, iov, blocks_in_chunk,
-						((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
-
-			if (rc != (BLCKSZ * blocks_in_chunk))
-			{
-				lfc_disable("read");
-				return -1;
-			}
-		}
-
-		/* Place entry to the head of LRU list */
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (lfc_ctl->generation == generation)
-		{
-			CriticalAssert(LFC_ENABLED());
-			lfc_ctl->hits += iteration_hits;
-			lfc_ctl->misses += iteration_misses;
-			pgBufferUsage.file_cache.hits += iteration_hits;
-			pgBufferUsage.file_cache.misses += iteration_misses;
-			CriticalAssert(entry->access_count > 0);
-			if (--entry->access_count == 0)
-				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
-		}
-		else
-		{
-			/* generation mismatch, assume error condition */
-			LWLockRelease(lfc_lock);
-			return -1;
-		}
-
-		LWLockRelease(lfc_lock);
-
-		buf_offset += blocks_in_chunk;
-		nblocks -= blocks_in_chunk;
-		blkno += blocks_in_chunk;
-		blocks_read += iteration_hits;
+		return false;
 	}

-	return blocks_read;
+	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+
+	/* Approximate working set */
+	tag.blockNum = blkno;
+	addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+
+	if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
+	{
+		/* Page is not cached */
+		lfc_ctl->misses += 1;
+		pgBufferUsage.file_cache.misses += 1;
+		LWLockRelease(lfc_lock);
+		return false;
+	}
+	/* Unlink entry from LRU list to pin it for the duration of IO operation */
+	if (entry->access_count++ == 0)
+		dlist_delete(&entry->list_node);
+	generation = lfc_ctl->generation;
+	entry_offset = entry->offset;
+
+	LWLockRelease(lfc_lock);
+
+	rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+	if (rc != BLCKSZ)
+	{
+		lfc_disable("read");
+		return false;
+	}
+
+	/* Place entry to the head of LRU list */
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (lfc_ctl->generation == generation)
+	{
+		CriticalAssert(LFC_ENABLED());
+		lfc_ctl->hits += 1;
+		pgBufferUsage.file_cache.hits += 1;
+		CriticalAssert(entry->access_count > 0);
+		if (--entry->access_count == 0)
+			dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+	}
+	else
+		result = false;
+
+	LWLockRelease(lfc_lock);
+
+	return result;
 }

 /*
@@ -815,17 +640,20 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 * If cache is full then evict some other page.
 */
 void
-lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-		   const void *const *buffers, BlockNumber nblocks)
+#if PG_MAJORVERSION_NUM < 16
+lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer)
+#else
+lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void *buffer)
+#endif
 {
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
 	bool		found;
+	int			chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
-	int			buf_offset = 0;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;
@@ -833,142 +661,110 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	if (!lfc_ensure_opened())
 		return;

-	CopyNRelFileInfoToBufTag(tag, rinfo);
 	tag.forkNum = forkNum;
+	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
+	CopyNRelFileInfoToBufTag(tag, rinfo);

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
+	hash = get_hash_value(lfc_hash, &tag);

-	/* 
-	 * For every chunk that has blocks we're interested in, we
-	 * 1. get the chunk header
-	 * 2. Check if the chunk actually has the blocks we're interested in
-	 * 3. Read the blocks we're looking for (in one preadv), assuming they exist
-	 * 4. Update the statistics for the read call.
-	 *
-	 * If there is an error, we do an early return.
-	 */
-	while (nblocks > 0)
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (!LFC_ENABLED())
 	{
-		struct iovec iov[PG_IOV_MAX];
-		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
-		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
-		Assert(blocks_in_chunk > 0);
+		LWLockRelease(lfc_lock);
+		return;
+	}

-		for (int i = 0; i < blocks_in_chunk; i++)
+	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
+
+	if (found)
+	{
+		/*
+		 * Unlink entry from LRU list to pin it for the duration of IO
+		 * operation
+		 */
+		if (entry->access_count++ == 0)
+			dlist_delete(&entry->list_node);
+	}
+	else
+	{
+		/*
+		 * We have two choices if all cache pages are pinned (i.e. used in IO
+		 * operations):
+		 *
+		 * 1) Wait until some of this operation is completed and pages is
+		 * unpinned.
+		 *
+		 * 2) Allocate one more chunk, so that specified cache size is more
+		 * recommendation than hard limit.
+		 *
+		 * As far as probability of such event (that all pages are pinned) is
+		 * considered to be very very small: there are should be very large
+		 * number of concurrent IO operations and them are limited by
+		 * max_connections, we prefer not to complicate code and use second
+		 * approach.
+		 */
+		if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
 		{
-			iov[i].iov_base = unconstify(void *, buffers[buf_offset + i]);
-			iov[i].iov_len = BLCKSZ;
+			/* Cache overflow: evict least recently used chunk */
+			FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
+
+			CriticalAssert(victim->access_count == 0);
+			entry->offset = victim->offset; /* grab victim's chunk */
+			hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+			neon_log(DEBUG2, "Swap file cache page");
 		}
+		else if (!dlist_is_empty(&lfc_ctl->holes))
+		{
+			/* We can reuse a hole that was left behind when the LFC was shrunk previously */
+			FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
+			uint32		offset = hole->offset;
+			bool		found;

-		tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
-		hash = get_hash_value(lfc_hash, &tag);
+			hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
+			CriticalAssert(found);

+			lfc_ctl->used += 1;
+			entry->offset = offset;	/* reuse the hole */
+		}
+		else
+		{
+			lfc_ctl->used += 1;
+			entry->offset = lfc_ctl->size++;	/* allocate new chunk at end
+												 * of file */
+		}
+		entry->access_count = 1;
+		entry->hash = hash;
+		memset(entry->bitmap, 0, sizeof entry->bitmap);
+	}
+
+	generation = lfc_ctl->generation;
+	entry_offset = entry->offset;
+	lfc_ctl->writes += 1;
+	LWLockRelease(lfc_lock);
+
+	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+	if (rc != BLCKSZ)
+	{
+		lfc_disable("write");
+	}
+	else
+	{
 		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);

-		if (!LFC_ENABLED())
+		if (lfc_ctl->generation == generation)
 		{
-			LWLockRelease(lfc_lock);
-			return;
+			CriticalAssert(LFC_ENABLED());
+			/* Place entry to the head of LRU list */
+			CriticalAssert(entry->access_count > 0);
+			if (--entry->access_count == 0)
+				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+
+			entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
 		}

-		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
-
-		if (found)
-		{
-			/*
-			 * Unlink entry from LRU list to pin it for the duration of IO
-			 * operation
-			 */
-			if (entry->access_count++ == 0)
-				dlist_delete(&entry->list_node);
-		}
-		else
-		{
-			/*
-			 * We have two choices if all cache pages are pinned (i.e. used in IO
-			 * operations):
-			 *
-			 * 1) Wait until some of this operation is completed and pages is
-			 * unpinned.
-			 *
-			 * 2) Allocate one more chunk, so that specified cache size is more
-			 * recommendation than hard limit.
-			 *
-			 * As far as probability of such event (that all pages are pinned) is
-			 * considered to be very very small: there are should be very large
-			 * number of concurrent IO operations and them are limited by
-			 * max_connections, we prefer not to complicate code and use second
-			 * approach.
-			 */
-			if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
-			{
-				/* Cache overflow: evict least recently used chunk */
-				FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
-	
-				CriticalAssert(victim->access_count == 0);
-				entry->offset = victim->offset; /* grab victim's chunk */
-				hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
-				neon_log(DEBUG2, "Swap file cache page");
-			}
-			else if (!dlist_is_empty(&lfc_ctl->holes))
-			{
-				/* We can reuse a hole that was left behind when the LFC was shrunk previously */
-				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
-				uint32		offset = hole->offset;
-				bool		found;
-	
-				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
-				CriticalAssert(found);
-	
-				lfc_ctl->used += 1;
-				entry->offset = offset;	/* reuse the hole */
-			}
-			else
-			{
-				lfc_ctl->used += 1;
-				entry->offset = lfc_ctl->size++;	/* allocate new chunk at end
-													 * of file */
-			}
-			entry->access_count = 1;
-			entry->hash = hash;
-			memset(entry->bitmap, 0, sizeof entry->bitmap);
-		}
-
-		generation = lfc_ctl->generation;
-		entry_offset = entry->offset;
-		lfc_ctl->writes += blocks_in_chunk;
 		LWLockRelease(lfc_lock);
-
-		rc = pwritev(lfc_desc, iov, blocks_in_chunk,
-					 ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
-		if (rc != BLCKSZ * blocks_in_chunk)
-		{
-			lfc_disable("write");
-		}
-		else
-		{
-			LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-			if (lfc_ctl->generation == generation)
-			{
-				CriticalAssert(LFC_ENABLED());
-				/* Place entry to the head of LRU list */
-				CriticalAssert(entry->access_count > 0);
-				if (--entry->access_count == 0)
-					dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
-
-				for (int i = 0; i < blocks_in_chunk; i++)
-				{
-					entry->bitmap[(chunk_offs + i) >> 5] |=
-						(1 << ((chunk_offs + i) & 31));
-				}
-			}
-
-			LWLockRelease(lfc_lock);
-		}
-		blkno += blocks_in_chunk;
-		buf_offset += blocks_in_chunk;
-		nblocks -= blocks_in_chunk;
 	}
 }

@@ -1263,7 +1059,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
 		int32 dc;
 		time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
 		LWLockAcquire(lfc_lock, LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, 1.0);
+		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
 		LWLockRelease(lfc_lock);
 		PG_RETURN_INT32(dc);
 	}
@@ -1280,7 +1076,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 		int32 dc;
 		bool reset = PG_GETARG_BOOL(0);
 		LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1, 1.0);
+		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
 		if (reset)
 			memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
 		LWLockRelease(lfc_lock);
@@ -1288,21 +1084,3 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 	}
 	PG_RETURN_NULL();
 }
-
-PG_FUNCTION_INFO_V1(approximate_optimal_cache_size);
-
-Datum
-approximate_optimal_cache_size(PG_FUNCTION_ARGS)
-{
-	if (lfc_size_limit != 0)
-	{
-		int32 dc;
-		time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
-		double min_hit_ratio = PG_ARGISNULL(1) ? 1.0 : PG_GETARG_FLOAT8(1);
-		LWLockAcquire(lfc_lock, LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, min_hit_ratio);
-		LWLockRelease(lfc_lock);
-		PG_RETURN_INT32(dc);
-	}
-	PG_RETURN_NULL();
-}
--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -6,7 +6,7 @@
 * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
 *
 * Implements https://hal.science/hal-00465313/document
- *
+ * 
 * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally
 * suited to estimating the cardinality of very large sets;  in particular, we
 * have not attempted to further optimize the implementation as described in
@@ -126,69 +126,22 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
 	/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
 	count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);

-	if (cState->regs[index][count].ts)
-	{
-		/* update histgoram */
-		int64_t delta = (now - cState->regs[index][count].ts)/USECS_PER_SEC;
-		uint32_t new_histogram[HIST_SIZE] = {0};
-		for (int i = 0; i < HIST_SIZE; i++) {
-			/* Use middle point of interval */
-			uint32 interval_log2 = pg_ceil_log2_32((delta + (HIST_MIN_INTERVAL*((1<<i) + ((1<<i)/2))/2)) / HIST_MIN_INTERVAL);
-			uint32 cell = Min(interval_log2, HIST_SIZE-1);
-			new_histogram[cell] += cState->regs[index][count].histogram[i];
-		}
-		memcpy(cState->regs[index][count].histogram, new_histogram, sizeof new_histogram);
-	}
-	cState->regs[index][count].ts = now;
-	cState->regs[index][count].histogram[0] += 1; // most recent access always goes to first histogram backet
-}
-
-static uint32_t
-getAccessCount(const HyperLogLogRegister* reg, time_t duration)
-{
-	uint32_t count = 0;
-    /* Simplest solution is to take in account all points fro overlapped interval */
-	for (size_t i = 0; i < HIST_SIZE && HIST_MIN_INTERVAL*((1 << i)/2) <= duration; i++) {
-		count += reg->histogram[i];
-	}
-	return count;
+	cState->regs[index][count] = now;
 }

 static uint8
-getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, double min_hit_ratio)
+getMaximum(const TimestampTz* reg, TimestampTz since)
 {
 	uint8 max = 0;
-	size_t i, j;
-	if (min_hit_ratio == 1.0)
+
+	for (size_t i = 0; i < HLL_C_BITS + 1; i++)
 	{
-		for (i = 0; i < HLL_C_BITS + 1; i++)
+		if (reg[i] >= since)
 		{
-			if (reg[i].ts >= since)
-			{
-				max = i;
-			}
-		}
-	}
-	else
-	{
-		uint32_t total_count = 0;
-		for (i = 0; i < HLL_C_BITS + 1; i++)
-		{
-			total_count += getAccessCount(&reg[i], duration);
-		}
-		if (total_count != 0)
-		{
-			const double threshold = total_count * (1 - min_hit_ratio);
-			for (i = 0; i < HLL_C_BITS + 1; i++)
-			{
-				// Take in account only bits with access frequncy exceeding maximal miss rate (1 - hit rate)
-				if (reg[i].ts >= since && getAccessCount(&reg[i], duration) >= threshold)
-				{
-					max = i;
-				}
-			}
+			max = i;
 		}
 	}
+
 	return max;
 }

@@ -197,7 +150,7 @@ getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, d
 * Estimates cardinality, based on elements added so far
 */
 double
-estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
+estimateSHLL(HyperLogLogState *cState, time_t duration)
 {
 	double		result;
 	double		sum = 0.0;
@@ -208,7 +161,7 @@ estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)

 	for (i = 0; i < HLL_N_REGISTERS; i++)
 	{
-		R[i] = getMaximum(cState->regs[i], since, duration, min_hit_ratio);
+		R[i] = getMaximum(cState->regs[i], since);
 		sum += 1.0 / pow(2.0, R[i]);
 	}

--- a/pgxn/neon/hll.h
+++ b/pgxn/neon/hll.h
@@ -53,14 +53,6 @@
 #define HLL_C_BITS      (32 - HLL_BIT_WIDTH)
 #define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)

-/*
- * Number of histogram cells. We use exponential histogram with first interval
- * equals to one minutes. Autoscaler request LFC  statistic with intervals 1,2,...,60 minutes
- * so 2^8=64 seems to be enough for our needs.
- */
-#define HIST_SIZE         8
-#define HIST_MIN_INTERVAL 60 /* seconds */
-
 /*
 * HyperLogLog is an approximate technique for computing the number of distinct
 * entries in a set.  Importantly, it does this by using a fixed amount of
@@ -77,21 +69,18 @@
 * modified timestamp >= the query timestamp. This value is the number of bits
 * for this register in the normal HLL calculation.
 *
- * The memory usage is 2^B * (C + 1) * sizeof(HyperLogLogRegister), or 920kiB.
+ * The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
+ * Usage could be halved if we decide to reduce the required time dimension
+ * precision; as 32 bits in second precision should be enough for statistics.
+ * However, that is not yet implemented.
 */
-typedef struct
-{
-	TimestampTz ts; /* last access timestamp */
-	uint32_t    histogram[HIST_SIZE]; /* access counter exponential histogram */
-} HyperLogLogRegister;
-
 typedef struct HyperLogLogState
 {
-	HyperLogLogRegister regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
+	TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
 } HyperLogLogState;

 extern void   initSHLL(HyperLogLogState *cState);
 extern void   addSHLL(HyperLogLogState *cState, uint32 hash);
-extern double estimateSHLL(HyperLogLogState *cState, time_t dutration, double min_hit_ratio);
+extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);

 #endif
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -537,11 +537,7 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		/* No more polling needed; connection succeeded */
 		shard->last_connect_time = GetCurrentTimestamp();

-#if PG_MAJORVERSION_NUM >= 17
-		shard->wes_read = CreateWaitEventSet(NULL, 3);
-#else
 		shard->wes_read = CreateWaitEventSet(TopMemoryContext, 3);
-#endif
 		AddWaitEventToSet(shard->wes_read, WL_LATCH_SET, PGINVALID_SOCKET,
 						  MyLatch, NULL);
 		AddWaitEventToSet(shard->wes_read, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
--- a/pgxn/neon/neon--1.4--1.5.sql
+++ b/pgxn/neon/neon--1.4--1.5.sql
@@ -1,10 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
-
-- returns minimal LFC cache size (in 8kb pages) provided specified hit rate
-CREATE FUNCTION approximate_optimal_cache_size(duration_sec integer default null, min_hit_ration float8 default null)
-RETURNS integer
-AS 'MODULE_PATHNAME', 'approximate_optimal_cache_size'
-LANGUAGE C PARALLEL SAFE;
-
-GRANT EXECUTE ON FUNCTION approximate_optimal_cache_size(integer,float8) TO pg_monitor;
-
--- a/pgxn/neon/neon--1.5--1.4.sql
+++ b/pgxn/neon/neon--1.5--1.4.sql
@@ -1 +0,0 @@
-DROP FUNCTION IF EXISTS approximate_optimal_cache_size(integer,float8) CASCADE;
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;`
				`@@ -1 +0,0 @@`
				`DROP FUNCTION IF EXISTS approximate_optimal_cache_size(integer,float8) CASCADE;`