revert pageserver parts of "feat(walredo): use posix_spawn by moving close_fds() work to walredo C code (#6574 )"

the addition of close_range() to the C code remains, it doesn't matter for the purposes of this reproducer. This reverts parts of commit 1be5e564ce.
2026-05-27 01:50:38 +00:00 · 2024-02-15 18:48:04 +00:00
123 changed files with 2556 additions and 6484 deletions
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -59,7 +59,7 @@ runs:
        BUCKET: neon-github-public-dev

    # TODO: We can replace with a special docker image with Java and Allure pre-installed
-    - uses: actions/setup-java@v4
+    - uses: actions/setup-java@v3
      with:
        distribution: 'temurin'
        java-version: '17'
@@ -76,8 +76,8 @@ runs:
          rm -f ${ALLURE_ZIP}
        fi
      env:
-        ALLURE_VERSION: 2.27.0
-        ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
+        ALLURE_VERSION: 2.24.0
+        ALLURE_ZIP_SHA256: 60b1d6ce65d9ef24b23cf9c2c19fd736a123487c38e54759f1ed1a7a77353c90

    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
    - name: Acquire lock
@@ -180,7 +180,7 @@ runs:
        fi

    - name: Cache poetry deps
-      uses: actions/cache@v4
+      uses: actions/cache@v3
      with:
        path: ~/.cache/pypoetry/virtualenvs
        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
@@ -215,7 +215,7 @@ runs:
          rm -rf ${WORKDIR}
        fi

-    - uses: actions/github-script@v7
+    - uses: actions/github-script@v6
      if: always()
      env:
        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -80,13 +80,13 @@ runs:

    - name: Checkout
      if: inputs.needs_postgres_source == 'true'
-      uses: actions/checkout@v4
+      uses: actions/checkout@v3
      with:
        submodules: true
        fetch-depth: 1

    - name: Cache poetry deps
-      uses: actions/cache@v4
+      uses: actions/cache@v3
      with:
        path: ~/.cache/pypoetry/virtualenvs
        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -64,7 +64,7 @@ jobs:
    steps:
      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"

-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
        with:
          ref: main
          token: ${{ secrets.CI_ACCESS_TOKEN }}
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -66,7 +66,7 @@ jobs:
      options: --init

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3

    - name: Download Neon artifact
      uses: ./.github/actions/download
@@ -221,7 +221,7 @@ jobs:
    timeout-minutes: 480

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3

    - name: Download Neon artifact
      uses: ./.github/actions/download
@@ -366,7 +366,7 @@ jobs:
      options: --init

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3

    - name: Download Neon artifact
      uses: ./.github/actions/download
@@ -465,7 +465,7 @@ jobs:
      options: --init

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3

    - name: Download Neon artifact
      uses: ./.github/actions/download
@@ -562,7 +562,7 @@ jobs:
      options: --init

    steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v3

    - name: Download Neon artifact
      uses: ./.github/actions/download
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -69,7 +69,7 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          fetch-depth: 0

@@ -106,13 +106,13 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: false
          fetch-depth: 1

      - name: Cache poetry deps
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: ~/.cache/pypoetry/virtualenvs
          key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
@@ -138,7 +138,7 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 1
@@ -146,7 +146,7 @@ jobs:
 #      Disabled for now
 #      - name: Restore cargo deps cache
 #        id: cache_cargo
-#        uses: actions/cache@v4
+#        uses: actions/cache@v3
 #        with:
 #          path: |
 #            !~/.cargo/registry/src
@@ -231,7 +231,7 @@ jobs:
          done

      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 1
@@ -303,7 +303,7 @@ jobs:
      # compressed crates.
 #      - name: Cache cargo deps
 #        id: cache_cargo
-#        uses: actions/cache@v4
+#        uses: actions/cache@v3
 #        with:
 #          path: |
 #            ~/.cargo/registry/
@@ -317,21 +317,21 @@ jobs:

      - name: Cache postgres v14 build
        id: cache_pg_14
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
@@ -451,7 +451,7 @@ jobs:
        pg_version: [ v14, v15, v16 ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 1
@@ -472,14 +472,9 @@ jobs:
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: std-fs
-          PAGESERVER_GET_VECTORED_IMPL: vectored

-      # Temporary disable this step until we figure out why it's so flaky
-      # Ref https://github.com/neondatabase/neon/issues/4540
      - name: Merge and upload coverage data
-        if: |
-          false &&
-          matrix.build_type == 'debug' && matrix.pg_version == 'v14'
+        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
        uses: ./.github/actions/save-coverage-data

  get-benchmarks-durations:
@@ -493,10 +488,10 @@ jobs:
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Cache poetry deps
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: ~/.cache/pypoetry/virtualenvs
          key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
@@ -530,7 +525,7 @@ jobs:
        build_type: [ release ]
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Pytest benchmarks
        uses: ./.github/actions/run-python-test-set
@@ -559,7 +554,7 @@ jobs:
      options: --init

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3

      - name: Create Allure report
        if: ${{ !cancelled() }}
@@ -570,7 +565,7 @@ jobs:
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

-      - uses: actions/github-script@v7
+      - uses: actions/github-script@v6
        if: ${{ !cancelled() }}
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
@@ -610,7 +605,7 @@ jobs:
        coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }}
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: true
          fetch-depth: 0
@@ -679,7 +674,7 @@ jobs:
          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/lcov/summary.json
          echo "summary-json=${REPORT_URL}" >> $GITHUB_OUTPUT

-      - uses: actions/github-script@v7
+      - uses: actions/github-script@v6
        env:
          REPORT_URL_NEW: ${{ steps.upload-coverage-report-new.outputs.report-url }}
          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
@@ -905,7 +900,7 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          fetch-depth: 0

@@ -1119,7 +1114,7 @@ jobs:
          done

      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          submodules: false
          fetch-depth: 0
@@ -1142,7 +1137,7 @@ jobs:

      - name: Create git tag
        if: github.ref_name == 'release'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v6
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
@@ -1156,7 +1151,7 @@ jobs:

      - name: Create GitHub release
        if: github.ref_name == 'release'
-        uses: actions/github-script@v7
+        uses: actions/github-script@v6
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -57,21 +57,21 @@ jobs:

      - name: Cache postgres v14 build
        id: cache_pg_14
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
@@ -82,7 +82,7 @@ jobs:
          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV

      - name: Cache cargo deps
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: |
            ~/.cargo/registry
@@ -172,21 +172,21 @@ jobs:

      - name: Cache postgres v14 build
        id: cache_pg_14
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
-        uses: actions/cache@v4
+        uses: actions/cache@v3
        with:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
@@ -356,7 +356,7 @@ jobs:
          echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT

      - name: Publish build stats report
-        uses: actions/github-script@v7
+        uses: actions/github-script@v6
        env:
          REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -28,7 +28,7 @@ jobs:

    steps:
    - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v3

    - uses: actions/setup-python@v4
      with:
@@ -38,7 +38,7 @@ jobs:
      uses: snok/install-poetry@v1

    - name: Cache poetry deps
-      uses: actions/cache@v4
+      uses: actions/cache@v3
      with:
        path: ~/.cache/pypoetry/virtualenvs
        key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
@@ -82,7 +82,7 @@ jobs:
    # It will be fixed after switching to gen2 runner
    - name: Upload python test logs
      if: always()
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v3
      with:
        retention-days: 7
        name: python-test-pg_clients-${{ runner.os }}-stage-logs
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -9,7 +9,7 @@ on:
 defaults:
  run:
    shell: bash -euxo pipefail {0}
-
+    
 env:
  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
@@ -37,7 +37,7 @@ jobs:

    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          fetch-depth: 0

@@ -115,3 +115,4 @@ jobs:
                \"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
              }
            }"
+ 
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -286,7 +286,6 @@ dependencies = [
 "git-version",
 "hyper",
 "metrics",
- "once_cell",
 "pageserver_api",
 "pageserver_client",
 "postgres_connection",
@@ -1157,6 +1156,16 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"

+[[package]]
+name = "close_fds"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bc416f33de9d59e79e57560f450d21ff8393adcf1cdfc3e6d8fb93d5f88a2ed"
+dependencies = [
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
@@ -1814,7 +1823,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e875f1719c16de097dee81ed675e2d9bb63096823ed3f0ca827b7dea3028bbbb"
 dependencies = [
 "enumset_derive",
- "serde",
 ]

 [[package]]
@@ -2759,17 +2767,6 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"

-[[package]]
-name = "leaky-bucket"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eb491abd89e9794d50f93c8db610a29509123e3fbbc9c8c67a528e9391cd853"
-dependencies = [
- "parking_lot 0.12.1",
- "tokio",
- "tracing",
-]
-
 [[package]]
 name = "libc"
 version = "0.2.150"
@@ -3461,7 +3458,6 @@ name = "pageserver"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "arc-swap",
 "async-compression",
 "async-stream",
 "async-trait",
@@ -3471,6 +3467,7 @@ dependencies = [
 "camino-tempfile",
 "chrono",
 "clap",
+ "close_fds",
 "const_format",
 "consumption_metrics",
 "crc32c",
@@ -3489,7 +3486,6 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
- "leaky-bucket",
 "md5",
 "metrics",
 "nix 0.27.1",
@@ -3552,7 +3548,6 @@ dependencies = [
 "enum-map",
 "hex",
 "humantime-serde",
- "itertools",
 "postgres_ffi",
 "rand 0.8.5",
 "serde",
@@ -6363,7 +6358,6 @@ dependencies = [
 "hex-literal",
 "hyper",
 "jsonwebtoken",
- "leaky-bucket",
 "metrics",
 "nix 0.27.1",
 "once_cell",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,6 +66,7 @@ camino = "1.1.6"
 cfg-if = "1.0.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive"] }
+close_fds = "0.3.2"
 comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
@@ -97,7 +98,6 @@ ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "9"
 lasso = "0.7"
-leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
 memoffset = "0.8"
--- a/2
+++ b/2
@@ -47,7 +47,7 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build  \
+    && mold -run cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -769,24 +769,6 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

-#########################################################################################
-#
-# Layer "pg_ivm"
-# compile pg_ivm extension
-#
-#########################################################################################
-FROM build-deps AS pg-ivm-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
-    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
-    mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
-
-
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -828,7 +810,6 @@ COPY --from=pg-semver-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
 COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/16
+++ b/16
@@ -159,8 +159,8 @@ neon-pg-ext-%: postgres-%
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install

-.PHONY: neon-pg-clean-ext-%
-neon-pg-clean-ext-%:
+.PHONY: neon-pg-ext-clean-%
+neon-pg-ext-clean-%:
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
 	-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 	-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -216,11 +216,11 @@ neon-pg-ext: \
 	neon-pg-ext-v15 \
 	neon-pg-ext-v16

-.PHONY: neon-pg-clean-ext
-neon-pg-clean-ext: \
-	neon-pg-clean-ext-v14 \
-	neon-pg-clean-ext-v15 \
-	neon-pg-clean-ext-v16
+.PHONY: neon-pg-ext-clean
+neon-pg-ext-clean: \
+	neon-pg-ext-clean-v14 \
+	neon-pg-ext-clean-v15 \
+	neon-pg-ext-clean-v16

 # shorthand to build all Postgres versions
 .PHONY: postgres
@@ -249,7 +249,7 @@ postgres-check: \

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
-clean: postgres-clean neon-pg-clean-ext
+clean: postgres-clean neon-pg-ext-clean
 	$(CARGO_CMD_PREFIX) cargo clean

 # This removes everything
--- a/README.md
+++ b/README.md
@@ -249,16 +249,6 @@ testing locally, it is convenient to run just one set of permutations, like this
 DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
 ```

-## Flamegraphs
-
-You may find yourself in need of flamegraphs for software in this repository.
-You can use [`flamegraph-rs`](https://github.com/flamegraph-rs/flamegraph) or the original [`flamegraph.pl`](https://github.com/brendangregg/FlameGraph). Your choice!
-
->[!IMPORTANT]
-> If you're using `lld` or `mold`, you need the `--no-rosegment` linker argument.
-> It's a [general thing with Rust / lld / mold](https://crbug.com/919499#c16), not specific to this repository.
-> See [this PR for further instructions](https://github.com/neondatabase/neon/pull/6764).
-
 ## Documentation

 [docs](/docs) Contains a top-level overview of all available markdown documentation.
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -45,6 +45,7 @@ use std::{thread, time::Duration};
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
+use nix::sys::signal::{kill, Signal};
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
 use tracing::{error, info};
@@ -52,9 +53,7 @@ use url::Url;

 use compute_api::responses::ComputeStatus;

-use compute_tools::compute::{
-    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
-};
+use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec, PG_PID, SYNC_SAFEKEEPERS_PID};
 use compute_tools::configurator::launch_configurator;
 use compute_tools::extension_server::get_pg_version;
 use compute_tools::http::api::launch_http_server;
@@ -395,15 +394,6 @@ fn main() -> Result<()> {
        info!("synced safekeepers at lsn {lsn}");
    }

-    let mut state = compute.state.lock().unwrap();
-    if state.status == ComputeStatus::TerminationPending {
-        state.status = ComputeStatus::Terminated;
-        compute.state_changed.notify_all();
-        // we were asked to terminate gracefully, don't exit to avoid restart
-        delay_exit = true
-    }
-    drop(state);
-
    if let Err(err) = compute.check_for_core_dumps() {
        error!("error while checking for core dumps: {err:?}");
    }
@@ -533,7 +523,16 @@ fn cli() -> clap::Command {
 /// wait for termination which would be easy then.
 fn handle_exit_signal(sig: i32) {
    info!("received {sig} termination signal");
-    forward_termination_signal();
+    let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
+    if ss_pid != 0 {
+        let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
+        kill(ss_pid, Signal::SIGTERM).ok();
+    }
+    let pg_pid = PG_PID.load(Ordering::SeqCst);
+    if pg_pid != 0 {
+        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
+        kill(pg_pid, Signal::SIGTERM).ok();
+    }
    exit(1);
 }

--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -28,8 +28,6 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
 use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec};
 use utils::measured_stream::MeasuredReader;

-use nix::sys::signal::{kill, Signal};
-
 use remote_storage::{DownloadError, RemotePath};

 use crate::checker::create_availability_check_data;
@@ -326,8 +324,7 @@ impl ComputeNode {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        let start_time = Instant::now();

-        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
-        let mut config = postgres::Config::from_str(shard0_connstr)?;
+        let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
@@ -1324,17 +1321,3 @@ LIMIT 100",
        Ok(remote_ext_metrics)
    }
 }
-
-pub fn forward_termination_signal() {
-    let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
-    if ss_pid != 0 {
-        let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
-        kill(ss_pid, Signal::SIGTERM).ok();
-    }
-    let pg_pid = PG_PID.load(Ordering::SeqCst);
-    if pg_pid != 0 {
-        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
-        // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
-        kill(pg_pid, Signal::SIGQUIT).ok();
-    }
-}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -51,9 +51,6 @@ pub fn write_postgres_conf(
    if let Some(s) = &spec.pageserver_connstring {
        writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
    }
-    if let Some(stripe_size) = spec.shard_stripe_size {
-        writeln!(file, "neon.stripe_size={stripe_size}")?;
-    }
    if !spec.safekeeper_connstrings.is_empty() {
        writeln!(
            file,
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -5,7 +5,6 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::compute::forward_termination_signal;
 use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_api::requests::ConfigurationRequest;
 use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
@@ -124,17 +123,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        (&Method::POST, "/terminate") => {
-            info!("serving /terminate POST request");
-            match handle_terminate_request(compute).await {
-                Ok(()) => Response::new(Body::empty()),
-                Err((msg, code)) => {
-                    error!("error handling /terminate request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
@@ -309,49 +297,6 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
        .unwrap()
 }

-async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
-    {
-        let mut state = compute.state.lock().unwrap();
-        if state.status == ComputeStatus::Terminated {
-            return Ok(());
-        }
-        if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
-            let msg = format!(
-                "invalid compute status for termination request: {:?}",
-                state.status.clone()
-            );
-            return Err((msg, StatusCode::PRECONDITION_FAILED));
-        }
-        state.status = ComputeStatus::TerminationPending;
-        compute.state_changed.notify_all();
-        drop(state);
-    }
-    forward_termination_signal();
-    info!("sent signal and notified waiters");
-
-    // Spawn a blocking thread to wait for compute to become Terminated.
-    // This is needed to do not block the main pool of workers and
-    // be able to serve other requests while some particular request
-    // is waiting for compute to finish configuration.
-    let c = compute.clone();
-    task::spawn_blocking(move || {
-        let mut state = c.state.lock().unwrap();
-        while state.status != ComputeStatus::Terminated {
-            state = c.state_changed.wait(state).unwrap();
-            info!(
-                "waiting for compute to become Terminated, current status: {:?}",
-                state.status
-            );
-        }
-
-        Ok(())
-    })
-    .await
-    .unwrap()?;
-    info!("terminated Postgres");
-    Ok(())
-}
-
 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
 async fn serve(port: u16, state: Arc<ComputeNode>) {
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -168,29 +168,6 @@ paths:
              schema:
                $ref: "#/components/schemas/GenericError"

-  /terminate:
-    post:
-      tags:
-      - Terminate
-      summary: Terminate Postgres and wait for it to exit
-      description: ""
-      operationId: terminate
-      responses:
-        200:
-          description: Result
-        412:
-          description: "wrong state"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-        500:
-          description: "Unexpected error"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-
 components:
  securitySchemes:
    JWT:
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -4,11 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true

-[features]
-default = []
-# Enables test-only APIs and behaviors
-testing = []
-
 [dependencies]
 anyhow.workspace = true
 aws-config.workspace = true
@@ -18,7 +13,6 @@ clap.workspace = true
 futures.workspace = true
 git-version.workspace = true
 hyper.workspace = true
-once_cell.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 postgres_connection.workspace = true
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/control_plane/attachment_service/src/compute_hook.rs
@@ -3,7 +3,7 @@ use std::{collections::HashMap, time::Duration};
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
 use hyper::{Method, StatusCode};
-use pageserver_api::shard::{ShardIndex, ShardNumber, TenantShardId};
+use pageserver_api::shard::{ShardCount, ShardIndex, ShardNumber, TenantShardId};
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
@@ -77,7 +77,7 @@ impl ComputeHookTenant {
        self.shards
            .sort_by_key(|(shard, _node_id)| shard.shard_number);

-        if self.shards.len() == shard_count.count() as usize || shard_count.is_unsharded() {
+        if self.shards.len() == shard_count.0 as usize || shard_count == ShardCount(0) {
            // We have pageservers for all the shards: emit a configuration update
            return Some(ComputeHookNotifyRequest {
                tenant_id,
@@ -94,7 +94,7 @@ impl ComputeHookTenant {
            tracing::info!(
                "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
                self.shards.len(),
-                shard_count.count()
+                shard_count.0
            );
        }

@@ -155,7 +155,7 @@ impl ComputeHook {

        for (endpoint_name, endpoint) in &cplane.endpoints {
            if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
-                tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
+                tracing::info!("🔁 Reconfiguring endpoint {}", endpoint_name,);
                endpoint.reconfigure(compute_pageservers.clone()).await?;
            }
        }
@@ -177,7 +177,7 @@ impl ComputeHook {
            req
        };

-        tracing::info!(
+        tracing::debug!(
            "Sending notify request to {} ({:?})",
            url,
            reconfigure_request
@@ -266,7 +266,7 @@ impl ComputeHook {
    /// periods, but we don't retry forever.  The **caller** is responsible for handling failures and
    /// ensuring that they eventually call again to ensure that the compute is eventually notified of
    /// the proper pageserver nodes for a tenant.
-    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
+    #[tracing::instrument(skip_all, fields(tenant_shard_id, node_id))]
    pub(super) async fn notify(
        &self,
        tenant_shard_id: TenantShardId,
@@ -298,7 +298,7 @@ impl ComputeHook {
        let Some(reconfigure_request) = reconfigure_request else {
            // The tenant doesn't yet have pageservers for all its shards: we won't notify anything
            // until it does.
-            tracing::info!("Tenant isn't yet ready to emit a notification");
+            tracing::debug!("Tenant isn't yet ready to emit a notification",);
            return Ok(());
        };

--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -114,10 +114,7 @@ async fn handle_tenant_create(
    mut req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let create_req = json_request::<TenantCreateRequest>(&mut req).await?;
-    json_response(
-        StatusCode::CREATED,
-        service.tenant_create(create_req).await?,
-    )
+    json_response(StatusCode::OK, service.tenant_create(create_req).await?)
 }

 // For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
@@ -199,7 +196,7 @@ async fn handle_tenant_timeline_create(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
    json_response(
-        StatusCode::CREATED,
+        StatusCode::OK,
        service
            .tenant_timeline_create(tenant_id, create_req)
            .await?,
@@ -336,22 +333,6 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
    json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
 }

-async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&req);
-    state.service.tenants_dump()
-}
-
-async fn handle_scheduler_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&req);
-    state.service.scheduler_dump()
-}
-
-async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&req);
-
-    json_response(StatusCode::OK, state.service.consistency_check().await?)
-}
-
 /// Status endpoint is just used for checking that our HTTP listener is up
 async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(StatusCode::OK, ())
@@ -440,13 +421,6 @@ pub fn make_router(
        .post("/debug/v1/node/:node_id/drop", |r| {
            request_span(r, handle_node_drop)
        })
-        .get("/debug/v1/tenant", |r| request_span(r, handle_tenants_dump))
-        .get("/debug/v1/scheduler", |r| {
-            request_span(r, handle_scheduler_dump)
-        })
-        .post("/debug/v1/consistency_check", |r| {
-            request_span(r, handle_consistency_check)
-        })
        .get("/control/v1/tenant/:tenant_id/locate", |r| {
            tenant_service_handler(r, handle_tenant_locate)
        })
--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -3,7 +3,6 @@ use utils::seqwait::MonotonicCounter;

 mod compute_hook;
 pub mod http;
-pub mod metrics;
 mod node;
 pub mod persistence;
 mod reconciler;
@@ -12,7 +11,7 @@ mod schema;
 pub mod service;
 mod tenant_state;

-#[derive(Clone, Serialize, Deserialize, Debug)]
+#[derive(Clone, Serialize, Deserialize)]
 enum PlacementPolicy {
    /// Cheapest way to attach a tenant: just one pageserver, no secondary
    Single,
@@ -23,7 +22,7 @@ enum PlacementPolicy {
    Detached,
 }

-#[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
+#[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone)]
 struct Sequence(u64);

 impl Sequence {
@@ -38,12 +37,6 @@ impl std::fmt::Display for Sequence {
    }
 }

-impl std::fmt::Debug for Sequence {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
 impl MonotonicCounter<Sequence> for Sequence {
    fn cnt_advance(&mut self, v: Sequence) {
        assert!(*self <= v);
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -6,7 +6,6 @@
 ///
 use anyhow::{anyhow, Context};
 use attachment_service::http::make_router;
-use attachment_service::metrics::preinitialize_metrics;
 use attachment_service::persistence::Persistence;
 use attachment_service::service::{Config, Service};
 use aws_config::{self, BehaviorVersion, Region};
@@ -16,7 +15,6 @@ use diesel::Connection;
 use metrics::launch_timestamp::LaunchTimestamp;
 use std::sync::Arc;
 use tokio::signal::unix::SignalKind;
-use tokio_util::sync::CancellationToken;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};

@@ -206,8 +204,6 @@ async fn async_main() -> anyhow::Result<()> {
        logging::Output::Stdout,
    )?;

-    preinitialize_metrics();
-
    let args = Cli::parse();
    tracing::info!(
        "version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
@@ -241,23 +237,15 @@ async fn async_main() -> anyhow::Result<()> {
    let auth = secrets
        .public_key
        .map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
-    let router = make_router(service.clone(), auth)
+    let router = make_router(service, auth)
        .build()
        .map_err(|err| anyhow!(err))?;
    let router_service = utils::http::RouterService::new(router).unwrap();
+    let server = hyper::Server::from_tcp(http_listener)?.serve(router_service);

-    // Start HTTP server
-    let server_shutdown = CancellationToken::new();
-    let server = hyper::Server::from_tcp(http_listener)?
-        .serve(router_service)
-        .with_graceful_shutdown({
-            let server_shutdown = server_shutdown.clone();
-            async move {
-                server_shutdown.cancelled().await;
-            }
-        });
    tracing::info!("Serving on {0}", args.listen);
-    let server_task = tokio::task::spawn(server);
+
+    tokio::task::spawn(server);

    // Wait until we receive a signal
    let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
@@ -278,16 +266,5 @@ async fn async_main() -> anyhow::Result<()> {
        }
    }

-    // Stop HTTP server first, so that we don't have to service requests
-    // while shutting down Service
-    server_shutdown.cancel();
-    if let Err(e) = server_task.await {
-        tracing::error!("Error joining HTTP server task: {e}")
-    }
-    tracing::info!("Joined HTTP server task");
-
-    service.shutdown().await;
-    tracing::info!("Service shutdown complete");
-
    std::process::exit(0);
 }
--- a/control_plane/attachment_service/src/metrics.rs
+++ b/control_plane/attachment_service/src/metrics.rs
@@ -1,32 +0,0 @@
-use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
-use once_cell::sync::Lazy;
-
-pub(crate) struct ReconcilerMetrics {
-    pub(crate) spawned: IntCounter,
-    pub(crate) complete: IntCounterVec,
-}
-
-impl ReconcilerMetrics {
-    // Labels used on [`Self::complete`]
-    pub(crate) const SUCCESS: &'static str = "ok";
-    pub(crate) const ERROR: &'static str = "success";
-    pub(crate) const CANCEL: &'static str = "cancel";
-}
-
-pub(crate) static RECONCILER: Lazy<ReconcilerMetrics> = Lazy::new(|| ReconcilerMetrics {
-    spawned: register_int_counter!(
-        "storage_controller_reconcile_spawn",
-        "Count of how many times we spawn a reconcile task",
-    )
-    .expect("failed to define a metric"),
-    complete: register_int_counter_vec!(
-        "storage_controller_reconcile_complete",
-        "Reconciler tasks completed, broken down by success/failure/cancelled",
-        &["status"],
-    )
-    .expect("failed to define a metric"),
-});
-
-pub fn preinitialize_metrics() {
-    Lazy::force(&RECONCILER);
-}
--- a/control_plane/attachment_service/src/node.rs
+++ b/control_plane/attachment_service/src/node.rs
@@ -1,16 +1,9 @@
 use control_plane::attachment_service::{NodeAvailability, NodeSchedulingPolicy};
-use serde::Serialize;
 use utils::id::NodeId;

 use crate::persistence::NodePersistence;

-/// Represents the in-memory description of a Node.
-///
-/// Scheduling statistics are maintened separately in [`crate::scheduler`].
-///
-/// The persistent subset of the Node is defined in [`crate::persistence::NodePersistence`]: the
-/// implementation of serialization on this type is only for debug dumps.
-#[derive(Clone, Serialize, Eq, PartialEq)]
+#[derive(Clone)]
 pub(crate) struct Node {
    pub(crate) id: NodeId,

--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -222,7 +222,7 @@ impl Persistence {
            let tenant_shard_id = TenantShardId {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount::new(tsp.shard_count as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
            };

            tenants_map.insert(tenant_shard_id, tsp);
@@ -318,7 +318,7 @@ impl Persistence {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())
                    .map_err(|e| DatabaseError::Logical(format!("Malformed tenant id: {e}")))?,
                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount::new(tsp.shard_count as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
            };
            result.insert(tenant_shard_id, Generation::new(tsp.generation as u32));
        }
@@ -340,7 +340,7 @@ impl Persistence {
                let updated = diesel::update(tenant_shards)
                    .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
                    .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                    .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
+                    .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
                    .set((
                        generation.eq(generation + 1),
                        generation_pageserver.eq(node_id.0 as i64),
@@ -362,7 +362,7 @@ impl Persistence {
            let updated = diesel::update(tenant_shards)
                .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
                .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
+                .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
                .set((
                    generation_pageserver.eq(i64::MAX),
                    placement_policy.eq(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
@@ -392,19 +392,21 @@ impl Persistence {
            conn.transaction(|conn| -> DatabaseResult<()> {
                // Mark parent shards as splitting

+                let expect_parent_records = std::cmp::max(1, old_shard_count.0);
+
                let updated = diesel::update(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .filter(shard_count.eq(old_shard_count.0 as i32))
                    .set((splitting.eq(1),))
                    .execute(conn)?;
                if u8::try_from(updated)
                    .map_err(|_| DatabaseError::Logical(
                        format!("Overflow existing shard count {} while splitting", updated))
-                    )? != old_shard_count.count() {
+                    )? != expect_parent_records {
                    // Perhaps a deletion or another split raced with this attempt to split, mutating
                    // the parent shards that we intend to split. In this case the split request should fail.
                    return Err(DatabaseError::Logical(
-                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
+                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {expect_parent_records})")
                    ));
                }

@@ -416,7 +418,7 @@ impl Persistence {
                    let mut parent = crate::schema::tenant_shards::table
                        .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
                        .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
-                        .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
+                        .filter(shard_count.eq(parent_shard_id.shard_count.0 as i32))
                        .load::<TenantShardPersistence>(conn)?;
                    let parent = if parent.len() != 1 {
                        return Err(DatabaseError::Logical(format!(
@@ -457,7 +459,7 @@ impl Persistence {
                // Drop parent shards
                diesel::delete(tenant_shards)
                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .filter(shard_count.eq(old_shard_count.0 as i32))
                    .execute(conn)?;

                // Clear sharding flag
@@ -477,7 +479,7 @@ impl Persistence {
 }

 /// Parts of [`crate::tenant_state::TenantState`] that are stored durably
-#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
+#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone)]
 #[diesel(table_name = crate::schema::tenant_shards)]
 pub(crate) struct TenantShardPersistence {
    #[serde(default)]
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -13,7 +13,6 @@ use tokio_util::sync::CancellationToken;
 use utils::generation::Generation;
 use utils::id::{NodeId, TimelineId};
 use utils::lsn::Lsn;
-use utils::sync::gate::GateGuard;

 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
@@ -27,7 +26,7 @@ pub(super) struct Reconciler {
    pub(super) tenant_shard_id: TenantShardId,
    pub(crate) shard: ShardIdentity,
    pub(crate) generation: Generation,
-    pub(crate) intent: TargetState,
+    pub(crate) intent: IntentState,
    pub(crate) config: TenantConfig,
    pub(crate) observed: ObservedState,

@@ -54,46 +53,14 @@ pub(super) struct Reconciler {
    /// the tenant is changed.
    pub(crate) cancel: CancellationToken,

-    /// Reconcilers are registered with a Gate so that during a graceful shutdown we
-    /// can wait for all the reconcilers to respond to their cancellation tokens.
-    pub(crate) _gate_guard: GateGuard,
-
    /// Access to persistent storage for updating generation numbers
    pub(crate) persistence: Arc<Persistence>,
 }

-/// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
-/// reference counting for Scheduler.  The IntentState is what the scheduler works with,
-/// and the TargetState is just the instruction for a particular Reconciler run.
-#[derive(Debug)]
-pub(crate) struct TargetState {
-    pub(crate) attached: Option<NodeId>,
-    pub(crate) secondary: Vec<NodeId>,
-}
-
-impl TargetState {
-    pub(crate) fn from_intent(intent: &IntentState) -> Self {
-        Self {
-            attached: *intent.get_attached(),
-            secondary: intent.get_secondary().clone(),
-        }
-    }
-
-    fn all_pageservers(&self) -> Vec<NodeId> {
-        let mut result = self.secondary.clone();
-        if let Some(node_id) = &self.attached {
-            result.push(*node_id);
-        }
-        result
-    }
-}
-
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum ReconcileError {
    #[error(transparent)]
    Notify(#[from] NotifyError),
-    #[error("Cancelled")]
-    Cancel,
    #[error(transparent)]
    Other(#[from] anyhow::Error),
 }
@@ -296,7 +263,7 @@ impl Reconciler {
                secondary_conf,
                tenant_conf: config.clone(),
                shard_number: shard.number.0,
-                shard_count: shard.count.literal(),
+                shard_count: shard.count.0,
                shard_stripe_size: shard.stripe_size.0,
            }
        }
@@ -491,7 +458,7 @@ impl Reconciler {
                    generation: None,
                    secondary_conf: None,
                    shard_number: self.shard.number.0,
-                    shard_count: self.shard.count.literal(),
+                    shard_count: self.shard.count.0,
                    shard_stripe_size: self.shard.stripe_size.0,
                    tenant_conf: self.config.clone(),
                },
@@ -499,9 +466,6 @@ impl Reconciler {
        }

        for (node_id, conf) in changes {
-            if self.cancel.is_cancelled() {
-                return Err(ReconcileError::Cancel);
-            }
            self.location_config(node_id, conf, None).await?;
        }

@@ -542,7 +506,7 @@ pub(crate) fn attached_location_conf(
        generation: generation.into(),
        secondary_conf: None,
        shard_number: shard.number.0,
-        shard_count: shard.count.literal(),
+        shard_count: shard.count.0,
        shard_stripe_size: shard.stripe_size.0,
        tenant_conf: config.clone(),
    }
@@ -557,7 +521,7 @@ pub(crate) fn secondary_location_conf(
        generation: None,
        secondary_conf: Some(LocationConfigSecondary { warm: true }),
        shard_number: shard.number.0,
-        shard_count: shard.count.literal(),
+        shard_count: shard.count.0,
        shard_stripe_size: shard.stripe_size.0,
        tenant_conf: config.clone(),
    }
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/control_plane/attachment_service/src/scheduler.rs
@@ -1,8 +1,9 @@
-use crate::{node::Node, tenant_state::TenantState};
-use serde::Serialize;
-use std::collections::HashMap;
+use pageserver_api::shard::TenantShardId;
+use std::collections::{BTreeMap, HashMap};
 use utils::{http::error::ApiError, id::NodeId};

+use crate::{node::Node, tenant_state::TenantState};
+
 /// Scenarios in which we cannot find a suitable location for a tenant shard
 #[derive(thiserror::Error, Debug)]
 pub enum ScheduleError {
@@ -18,179 +19,52 @@ impl From<ScheduleError> for ApiError {
    }
 }

-#[derive(Serialize, Eq, PartialEq)]
-struct SchedulerNode {
-    /// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
-    shard_count: usize,
-
-    /// Whether this node is currently elegible to have new shards scheduled (this is derived
-    /// from a node's availability state and scheduling policy).
-    may_schedule: bool,
-}
-
-/// This type is responsible for selecting which node is used when a tenant shard needs to choose a pageserver
-/// on which to run.
-///
-/// The type has no persistent state of its own: this is all populated at startup.  The Serialize
-/// impl is only for debug dumps.
-#[derive(Serialize)]
 pub(crate) struct Scheduler {
-    nodes: HashMap<NodeId, SchedulerNode>,
+    tenant_counts: HashMap<NodeId, usize>,
 }

 impl Scheduler {
-    pub(crate) fn new<'a>(nodes: impl Iterator<Item = &'a Node>) -> Self {
-        let mut scheduler_nodes = HashMap::new();
-        for node in nodes {
-            scheduler_nodes.insert(
-                node.id,
-                SchedulerNode {
-                    shard_count: 0,
-                    may_schedule: node.may_schedule(),
-                },
-            );
+    pub(crate) fn new(
+        tenants: &BTreeMap<TenantShardId, TenantState>,
+        nodes: &HashMap<NodeId, Node>,
+    ) -> Self {
+        let mut tenant_counts = HashMap::new();
+        for node_id in nodes.keys() {
+            tenant_counts.insert(*node_id, 0);
        }

-        Self {
-            nodes: scheduler_nodes,
-        }
-    }
-
-    /// For debug/support: check that our internal statistics are in sync with the state of
-    /// the nodes & tenant shards.
-    ///
-    /// If anything is inconsistent, log details and return an error.
-    pub(crate) fn consistency_check<'a>(
-        &self,
-        nodes: impl Iterator<Item = &'a Node>,
-        shards: impl Iterator<Item = &'a TenantState>,
-    ) -> anyhow::Result<()> {
-        let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
-        for node in nodes {
-            expect_nodes.insert(
-                node.id,
-                SchedulerNode {
-                    shard_count: 0,
-                    may_schedule: node.may_schedule(),
-                },
-            );
-        }
-
-        for shard in shards {
-            if let Some(node_id) = shard.intent.get_attached() {
-                match expect_nodes.get_mut(node_id) {
-                    Some(node) => node.shard_count += 1,
-                    None => anyhow::bail!(
-                        "Tenant {} references nonexistent node {}",
-                        shard.tenant_shard_id,
-                        node_id
-                    ),
-                }
-            }
-
-            for node_id in shard.intent.get_secondary() {
-                match expect_nodes.get_mut(node_id) {
-                    Some(node) => node.shard_count += 1,
-                    None => anyhow::bail!(
-                        "Tenant {} references nonexistent node {}",
-                        shard.tenant_shard_id,
-                        node_id
-                    ),
-                }
+        for tenant in tenants.values() {
+            if let Some(ps) = tenant.intent.attached {
+                let entry = tenant_counts.entry(ps).or_insert(0);
+                *entry += 1;
            }
        }

-        for (node_id, expect_node) in &expect_nodes {
-            let Some(self_node) = self.nodes.get(node_id) else {
-                anyhow::bail!("Node {node_id} not found in Self")
-            };
-
-            if self_node != expect_node {
-                tracing::error!("Inconsistency detected in scheduling state for node {node_id}");
-                tracing::error!("Expected state: {}", serde_json::to_string(expect_node)?);
-                tracing::error!("Self state: {}", serde_json::to_string(self_node)?);
-
-                anyhow::bail!("Inconsistent state on {node_id}");
+        for (node_id, node) in nodes {
+            if !node.may_schedule() {
+                tenant_counts.remove(node_id);
            }
        }

-        if expect_nodes.len() != self.nodes.len() {
-            // We just checked that all the expected nodes are present.  If the lengths don't match,
-            // it means that we have nodes in Self that are unexpected.
-            for node_id in self.nodes.keys() {
-                if !expect_nodes.contains_key(node_id) {
-                    anyhow::bail!("Node {node_id} found in Self but not in expected nodes");
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Increment the reference count of a node.  This reference count is used to guide scheduling
-    /// decisions, not for memory management: it represents one tenant shard whose IntentState targets
-    /// this node.
-    ///
-    /// It is an error to call this for a node that is not known to the scheduler (i.e. passed into
-    /// [`Self::new`] or [`Self::node_upsert`])
-    pub(crate) fn node_inc_ref(&mut self, node_id: NodeId) {
-        let Some(node) = self.nodes.get_mut(&node_id) else {
-            tracing::error!("Scheduler missing node {node_id}");
-            debug_assert!(false);
-            return;
-        };
-
-        node.shard_count += 1;
-    }
-
-    /// Decrement a node's reference count.  Inverse of [`Self::node_inc_ref`].
-    pub(crate) fn node_dec_ref(&mut self, node_id: NodeId) {
-        let Some(node) = self.nodes.get_mut(&node_id) else {
-            debug_assert!(false);
-            tracing::error!("Scheduler missing node {node_id}");
-            return;
-        };
-
-        node.shard_count -= 1;
-    }
-
-    pub(crate) fn node_upsert(&mut self, node: &Node) {
-        use std::collections::hash_map::Entry::*;
-        match self.nodes.entry(node.id) {
-            Occupied(mut entry) => {
-                entry.get_mut().may_schedule = node.may_schedule();
-            }
-            Vacant(entry) => {
-                entry.insert(SchedulerNode {
-                    shard_count: 0,
-                    may_schedule: node.may_schedule(),
-                });
-            }
-        }
-    }
-
-    pub(crate) fn node_remove(&mut self, node_id: NodeId) {
-        if self.nodes.remove(&node_id).is_none() {
-            tracing::warn!(node_id=%node_id, "Removed non-existent node from scheduler");
-        }
+        Self { tenant_counts }
    }

    pub(crate) fn schedule_shard(
        &mut self,
        hard_exclude: &[NodeId],
    ) -> Result<NodeId, ScheduleError> {
-        if self.nodes.is_empty() {
+        if self.tenant_counts.is_empty() {
            return Err(ScheduleError::NoPageservers);
        }

        let mut tenant_counts: Vec<(NodeId, usize)> = self
-            .nodes
+            .tenant_counts
            .iter()
            .filter_map(|(k, v)| {
-                if hard_exclude.contains(k) || !v.may_schedule {
+                if hard_exclude.contains(k) {
                    None
                } else {
-                    Some((*k, v.shard_count))
+                    Some((*k, *v))
                }
            })
            .collect();
@@ -199,108 +73,17 @@ impl Scheduler {
        tenant_counts.sort_by_key(|i| (i.1, i.0));

        if tenant_counts.is_empty() {
-            // After applying constraints, no pageservers were left.  We log some detail about
-            // the state of nodes to help understand why this happened.  This is not logged as an error because
-            // it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
-            tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
-            for (node_id, node) in &self.nodes {
-                tracing::info!(
-                    "Node {node_id}: may_schedule={} shards={}",
-                    node.may_schedule,
-                    node.shard_count
-                );
-            }
-
+            // After applying constraints, no pageservers were left
            return Err(ScheduleError::ImpossibleConstraint);
        }

+        for (node_id, count) in &tenant_counts {
+            tracing::info!("tenant_counts[{node_id}]={count}");
+        }
+
        let node_id = tenant_counts.first().unwrap().0;
-        tracing::info!(
-            "scheduler selected node {node_id} (elegible nodes {:?}, exclude: {hard_exclude:?})",
-            tenant_counts.iter().map(|i| i.0 .0).collect::<Vec<_>>()
-        );
-
-        // Note that we do not update shard count here to reflect the scheduling: that
-        // is IntentState's job when the scheduled location is used.
-
+        tracing::info!("scheduler selected node {node_id}");
+        *self.tenant_counts.get_mut(&node_id).unwrap() += 1;
        Ok(node_id)
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::collections::HashMap;
-
-    use control_plane::attachment_service::{NodeAvailability, NodeSchedulingPolicy};
-    use utils::id::NodeId;
-
-    use crate::{node::Node, tenant_state::IntentState};
-
-    #[test]
-    fn scheduler_basic() -> anyhow::Result<()> {
-        let mut nodes = HashMap::new();
-        nodes.insert(
-            NodeId(1),
-            Node {
-                id: NodeId(1),
-                availability: NodeAvailability::Active,
-                scheduling: NodeSchedulingPolicy::Active,
-                listen_http_addr: String::new(),
-                listen_http_port: 0,
-                listen_pg_addr: String::new(),
-                listen_pg_port: 0,
-            },
-        );
-
-        nodes.insert(
-            NodeId(2),
-            Node {
-                id: NodeId(2),
-                availability: NodeAvailability::Active,
-                scheduling: NodeSchedulingPolicy::Active,
-                listen_http_addr: String::new(),
-                listen_http_port: 0,
-                listen_pg_addr: String::new(),
-                listen_pg_port: 0,
-            },
-        );
-
-        let mut scheduler = Scheduler::new(nodes.values());
-        let mut t1_intent = IntentState::new();
-        let mut t2_intent = IntentState::new();
-
-        let scheduled = scheduler.schedule_shard(&[])?;
-        t1_intent.set_attached(&mut scheduler, Some(scheduled));
-        let scheduled = scheduler.schedule_shard(&[])?;
-        t2_intent.set_attached(&mut scheduler, Some(scheduled));
-
-        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
-        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
-
-        let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers())?;
-        t1_intent.push_secondary(&mut scheduler, scheduled);
-
-        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
-        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 2);
-
-        t1_intent.clear(&mut scheduler);
-        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
-        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
-
-        if cfg!(debug_assertions) {
-            // Dropping an IntentState without clearing it causes a panic in debug mode,
-            // because we have failed to properly update scheduler shard counts.
-            let result = std::panic::catch_unwind(move || {
-                drop(t2_intent);
-            });
-            assert!(result.is_err());
-        } else {
-            t2_intent.clear(&mut scheduler);
-            assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
-            assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 0);
-        }
-
-        Ok(())
-    }
-}
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -1,47 +1,27 @@
 use std::{collections::HashMap, sync::Arc, time::Duration};

-use crate::{metrics, persistence::TenantShardPersistence};
 use control_plane::attachment_service::NodeAvailability;
 use pageserver_api::{
    models::{LocationConfig, LocationConfigMode, TenantConfig},
    shard::{ShardIdentity, TenantShardId},
 };
-use serde::Serialize;
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
-use tracing::{instrument, Instrument};
 use utils::{
    generation::Generation,
    id::NodeId,
    seqwait::{SeqWait, SeqWaitError},
-    sync::gate::Gate,
 };

 use crate::{
    compute_hook::ComputeHook,
    node::Node,
-    persistence::{split_state::SplitState, Persistence},
-    reconciler::{
-        attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState,
-    },
+    persistence::Persistence,
+    reconciler::{attached_location_conf, secondary_location_conf, ReconcileError, Reconciler},
    scheduler::{ScheduleError, Scheduler},
    service, PlacementPolicy, Sequence,
 };

-/// Serialization helper
-fn read_mutex_content<S, T>(v: &std::sync::Mutex<T>, serializer: S) -> Result<S::Ok, S::Error>
-where
-    S: serde::ser::Serializer,
-    T: Clone + std::fmt::Display,
-{
-    serializer.collect_str(&v.lock().unwrap())
-}
-
-/// In-memory state for a particular tenant shard.
-///
-/// This struct implement Serialize for debugging purposes, but is _not_ persisted
-/// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
-#[derive(Serialize)]
 pub(crate) struct TenantState {
    pub(crate) tenant_shard_id: TenantShardId,

@@ -76,29 +56,20 @@ pub(crate) struct TenantState {
    /// If a reconcile task is currently in flight, it may be joined here (it is
    /// only safe to join if either the result has been received or the reconciler's
    /// cancellation token has been fired)
-    #[serde(skip)]
    pub(crate) reconciler: Option<ReconcilerHandle>,

-    /// If a tenant is being split, then all shards with that TenantId will have a
-    /// SplitState set, this acts as a guard against other operations such as background
-    /// reconciliation, and timeline creation.
-    pub(crate) splitting: SplitState,
-
    /// Optionally wait for reconciliation to complete up to a particular
    /// sequence number.
-    #[serde(skip)]
    pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,

    /// Indicates sequence number for which we have encountered an error reconciling.  If
    /// this advances ahead of [`Self::waiter`] then a reconciliation error has occurred,
    /// and callers should stop waiting for `waiter` and propagate the error.
-    #[serde(skip)]
    pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,

    /// The most recent error from a reconcile on this tenant
    /// TODO: generalize to an array of recent events
    /// TOOD: use a ArcSwap instead of mutex for faster reads?
-    #[serde(serialize_with = "read_mutex_content")]
    pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,

    /// If we have a pending compute notification that for some reason we weren't able to send,
@@ -108,112 +79,13 @@ pub(crate) struct TenantState {
    pub(crate) pending_compute_notification: bool,
 }

-#[derive(Default, Clone, Debug, Serialize)]
+#[derive(Default, Clone, Debug)]
 pub(crate) struct IntentState {
-    attached: Option<NodeId>,
-    secondary: Vec<NodeId>,
+    pub(crate) attached: Option<NodeId>,
+    pub(crate) secondary: Vec<NodeId>,
 }

-impl IntentState {
-    pub(crate) fn new() -> Self {
-        Self {
-            attached: None,
-            secondary: vec![],
-        }
-    }
-    pub(crate) fn single(scheduler: &mut Scheduler, node_id: Option<NodeId>) -> Self {
-        if let Some(node_id) = node_id {
-            scheduler.node_inc_ref(node_id);
-        }
-        Self {
-            attached: node_id,
-            secondary: vec![],
-        }
-    }
-
-    pub(crate) fn set_attached(&mut self, scheduler: &mut Scheduler, new_attached: Option<NodeId>) {
-        if self.attached != new_attached {
-            if let Some(old_attached) = self.attached.take() {
-                scheduler.node_dec_ref(old_attached);
-            }
-            if let Some(new_attached) = &new_attached {
-                scheduler.node_inc_ref(*new_attached);
-            }
-            self.attached = new_attached;
-        }
-    }
-
-    pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
-        debug_assert!(!self.secondary.contains(&new_secondary));
-        scheduler.node_inc_ref(new_secondary);
-        self.secondary.push(new_secondary);
-    }
-
-    /// It is legal to call this with a node that is not currently a secondary: that is a no-op
-    pub(crate) fn remove_secondary(&mut self, scheduler: &mut Scheduler, node_id: NodeId) {
-        let index = self.secondary.iter().position(|n| *n == node_id);
-        if let Some(index) = index {
-            scheduler.node_dec_ref(node_id);
-            self.secondary.remove(index);
-        }
-    }
-
-    pub(crate) fn clear_secondary(&mut self, scheduler: &mut Scheduler) {
-        for secondary in self.secondary.drain(..) {
-            scheduler.node_dec_ref(secondary);
-        }
-    }
-
-    pub(crate) fn clear(&mut self, scheduler: &mut Scheduler) {
-        if let Some(old_attached) = self.attached.take() {
-            scheduler.node_dec_ref(old_attached);
-        }
-
-        self.clear_secondary(scheduler);
-    }
-
-    pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
-        let mut result = Vec::new();
-        if let Some(p) = self.attached {
-            result.push(p)
-        }
-
-        result.extend(self.secondary.iter().copied());
-
-        result
-    }
-
-    pub(crate) fn get_attached(&self) -> &Option<NodeId> {
-        &self.attached
-    }
-
-    pub(crate) fn get_secondary(&self) -> &Vec<NodeId> {
-        &self.secondary
-    }
-
-    /// When a node goes offline, we update intents to avoid using it
-    /// as their attached pageserver.
-    ///
-    /// Returns true if a change was made
-    pub(crate) fn notify_offline(&mut self, node_id: NodeId) -> bool {
-        if self.attached == Some(node_id) {
-            self.attached = None;
-            self.secondary.push(node_id);
-            true
-        } else {
-            false
-        }
-    }
-}
-
-impl Drop for IntentState {
-    fn drop(&mut self) {
-        // Must clear before dropping, to avoid leaving stale refcounts in the Scheduler
-        debug_assert!(self.attached.is_none() && self.secondary.is_empty());
-    }
-}
-
-#[derive(Default, Clone, Serialize)]
+#[derive(Default, Clone)]
 pub(crate) struct ObservedState {
    pub(crate) locations: HashMap<NodeId, ObservedStateLocation>,
 }
@@ -227,7 +99,7 @@ pub(crate) struct ObservedState {
 ///       what it is (e.g. we failed partway through configuring it)
 ///     * Instance exists with conf==Some: this tells us what we last successfully configured on this node,
 ///       and that configuration will still be present unless something external interfered.
-#[derive(Clone, Serialize)]
+#[derive(Clone)]
 pub(crate) struct ObservedStateLocation {
    /// If None, it means we do not know the status of this shard's location on this node, but
    /// we know that we might have some state on this node.
@@ -303,6 +175,46 @@ pub(crate) struct ReconcileResult {
    pub(crate) pending_compute_notification: bool,
 }

+impl IntentState {
+    pub(crate) fn new() -> Self {
+        Self {
+            attached: None,
+            secondary: vec![],
+        }
+    }
+    pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
+        let mut result = Vec::new();
+        if let Some(p) = self.attached {
+            result.push(p)
+        }
+
+        result.extend(self.secondary.iter().copied());
+
+        result
+    }
+
+    pub(crate) fn single(node_id: Option<NodeId>) -> Self {
+        Self {
+            attached: node_id,
+            secondary: vec![],
+        }
+    }
+
+    /// When a node goes offline, we update intents to avoid using it
+    /// as their attached pageserver.
+    ///
+    /// Returns true if a change was made
+    pub(crate) fn notify_offline(&mut self, node_id: NodeId) -> bool {
+        if self.attached == Some(node_id) {
+            self.attached = None;
+            self.secondary.push(node_id);
+            true
+        } else {
+            false
+        }
+    }
+}
+
 impl ObservedState {
    pub(crate) fn new() -> Self {
        Self {
@@ -326,7 +238,6 @@ impl TenantState {
            observed: ObservedState::default(),
            config: TenantConfig::default(),
            reconciler: None,
-            splitting: SplitState::Idle,
            sequence: Sequence(1),
            waiter: Arc::new(SeqWait::new(Sequence(0))),
            error_waiter: Arc::new(SeqWait::new(Sequence(0))),
@@ -396,12 +307,12 @@ impl TenantState {
                // Should have exactly one attached, and zero secondaries
                if self.intent.attached.is_none() {
                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.set_attached(scheduler, Some(node_id));
+                    self.intent.attached = Some(node_id);
                    used_pageservers.push(node_id);
                    modified = true;
                }
                if !self.intent.secondary.is_empty() {
-                    self.intent.clear_secondary(scheduler);
+                    self.intent.secondary.clear();
                    modified = true;
                }
            }
@@ -409,14 +320,14 @@ impl TenantState {
                // Should have exactly one attached, and N secondaries
                if self.intent.attached.is_none() {
                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.set_attached(scheduler, Some(node_id));
+                    self.intent.attached = Some(node_id);
                    used_pageservers.push(node_id);
                    modified = true;
                }

                while self.intent.secondary.len() < secondary_count {
                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.push_secondary(scheduler, node_id);
+                    self.intent.secondary.push(node_id);
                    used_pageservers.push(node_id);
                    modified = true;
                }
@@ -424,12 +335,12 @@ impl TenantState {
            Detached => {
                // Should have no attached or secondary pageservers
                if self.intent.attached.is_some() {
-                    self.intent.set_attached(scheduler, None);
+                    self.intent.attached = None;
                    modified = true;
                }

                if !self.intent.secondary.is_empty() {
-                    self.intent.clear_secondary(scheduler);
+                    self.intent.secondary.clear();
                    modified = true;
                }
            }
@@ -504,8 +415,6 @@ impl TenantState {
        false
    }

-    #[allow(clippy::too_many_arguments)]
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
    pub(crate) fn maybe_reconcile(
        &mut self,
        result_tx: tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
@@ -513,8 +422,6 @@ impl TenantState {
        compute_hook: &Arc<ComputeHook>,
        service_config: &service::Config,
        persistence: &Arc<Persistence>,
-        gate: &Gate,
-        cancel: &CancellationToken,
    ) -> Option<ReconcilerWaiter> {
        // If there are any ambiguous observed states, and the nodes they refer to are available,
        // we should reconcile to clean them up.
@@ -536,14 +443,6 @@ impl TenantState {
            return None;
        }

-        // If we are currently splitting, then never start a reconciler task: the splitting logic
-        // requires that shards are not interfered with while it runs. Do this check here rather than
-        // up top, so that we only log this message if we would otherwise have done a reconciliation.
-        if !matches!(self.splitting, SplitState::Idle) {
-            tracing::info!("Refusing to reconcile, splitting in progress");
-            return None;
-        }
-
        // Reconcile already in flight for the current sequence?
        if let Some(handle) = &self.reconciler {
            if handle.sequence == self.sequence {
@@ -561,101 +460,70 @@ impl TenantState {
        // doing our sequence's work.
        let old_handle = self.reconciler.take();

-        let Ok(gate_guard) = gate.enter() else {
-            // Shutting down, don't start a reconciler
-            return None;
-        };
-
-        let reconciler_cancel = cancel.child_token();
+        let cancel = CancellationToken::new();
        let mut reconciler = Reconciler {
            tenant_shard_id: self.tenant_shard_id,
            shard: self.shard,
            generation: self.generation,
-            intent: TargetState::from_intent(&self.intent),
+            intent: self.intent.clone(),
            config: self.config.clone(),
            observed: self.observed.clone(),
            pageservers: pageservers.clone(),
            compute_hook: compute_hook.clone(),
            service_config: service_config.clone(),
-            _gate_guard: gate_guard,
-            cancel: reconciler_cancel.clone(),
+            cancel: cancel.clone(),
            persistence: persistence.clone(),
            compute_notify_failure: false,
        };

        let reconcile_seq = self.sequence;

-        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
+        tracing::info!("Spawning Reconciler for sequence {}", self.sequence);
        let must_notify = self.pending_compute_notification;
-        let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
-                                                        tenant_id=%reconciler.tenant_shard_id.tenant_id,
-                                                        shard_id=%reconciler.tenant_shard_id.shard_slug());
-        metrics::RECONCILER.spawned.inc();
-        let join_handle = tokio::task::spawn(
-            async move {
-                // Wait for any previous reconcile task to complete before we start
-                if let Some(old_handle) = old_handle {
-                    old_handle.cancel.cancel();
-                    if let Err(e) = old_handle.handle.await {
-                        // We can't do much with this other than log it: the task is done, so
-                        // we may proceed with our work.
-                        tracing::error!("Unexpected join error waiting for reconcile task: {e}");
-                    }
+        let join_handle = tokio::task::spawn(async move {
+            // Wait for any previous reconcile task to complete before we start
+            if let Some(old_handle) = old_handle {
+                old_handle.cancel.cancel();
+                if let Err(e) = old_handle.handle.await {
+                    // We can't do much with this other than log it: the task is done, so
+                    // we may proceed with our work.
+                    tracing::error!("Unexpected join error waiting for reconcile task: {e}");
                }
-
-                // Early check for cancellation before doing any work
-                // TODO: wrap all remote API operations in cancellation check
-                // as well.
-                if reconciler.cancel.is_cancelled() {
-                    metrics::RECONCILER
-                        .complete
-                        .with_label_values(&[metrics::ReconcilerMetrics::CANCEL])
-                        .inc();
-                    return;
-                }
-
-                // Attempt to make observed state match intent state
-                let result = reconciler.reconcile().await;
-
-                // If we know we had a pending compute notification from some previous action, send a notification irrespective
-                // of whether the above reconcile() did any work
-                if result.is_ok() && must_notify {
-                    // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
-                    reconciler.compute_notify().await.ok();
-                }
-
-                // Update result counter
-                match &result {
-                    Ok(_) => metrics::RECONCILER
-                        .complete
-                        .with_label_values(&[metrics::ReconcilerMetrics::SUCCESS]),
-                    Err(ReconcileError::Cancel) => metrics::RECONCILER
-                        .complete
-                        .with_label_values(&[metrics::ReconcilerMetrics::CANCEL]),
-                    Err(_) => metrics::RECONCILER
-                        .complete
-                        .with_label_values(&[metrics::ReconcilerMetrics::ERROR]),
-                }
-                .inc();
-
-                result_tx
-                    .send(ReconcileResult {
-                        sequence: reconcile_seq,
-                        result,
-                        tenant_shard_id: reconciler.tenant_shard_id,
-                        generation: reconciler.generation,
-                        observed: reconciler.observed,
-                        pending_compute_notification: reconciler.compute_notify_failure,
-                    })
-                    .ok();
            }
-            .instrument(reconciler_span),
-        );
+
+            // Early check for cancellation before doing any work
+            // TODO: wrap all remote API operations in cancellation check
+            // as well.
+            if reconciler.cancel.is_cancelled() {
+                return;
+            }
+
+            // Attempt to make observed state match intent state
+            let result = reconciler.reconcile().await;
+
+            // If we know we had a pending compute notification from some previous action, send a notification irrespective
+            // of whether the above reconcile() did any work
+            if result.is_ok() && must_notify {
+                // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
+                reconciler.compute_notify().await.ok();
+            }
+
+            result_tx
+                .send(ReconcileResult {
+                    sequence: reconcile_seq,
+                    result,
+                    tenant_shard_id: reconciler.tenant_shard_id,
+                    generation: reconciler.generation,
+                    observed: reconciler.observed,
+                    pending_compute_notification: reconciler.compute_notify_failure,
+                })
+                .ok();
+        });

        self.reconciler = Some(ReconcilerHandle {
            sequence: self.sequence,
            handle: join_handle,
-            cancel: reconciler_cancel,
+            cancel,
        });

        Some(ReconcilerWaiter {
@@ -680,18 +548,4 @@ impl TenantState {

        debug_assert!(!self.intent.all_pageservers().contains(&node_id));
    }
-
-    pub(crate) fn to_persistent(&self) -> TenantShardPersistence {
-        TenantShardPersistence {
-            tenant_id: self.tenant_shard_id.tenant_id.to_string(),
-            shard_number: self.tenant_shard_id.shard_number.0 as i32,
-            shard_count: self.tenant_shard_id.shard_count.literal() as i32,
-            shard_stripe_size: self.shard.stripe_size.0 as i32,
-            generation: self.generation.into().unwrap_or(0) as i32,
-            generation_pageserver: i64::MAX,
-            placement_policy: serde_json::to_string(&self.policy).unwrap(),
-            config: serde_json::to_string(&self.config).unwrap(),
-            splitting: SplitState::default(),
-        }
-    }
 }
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -113,7 +113,7 @@ pub struct TenantShardMigrateRequest {
    pub node_id: NodeId,
 }

-#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Clone, Copy)]
 pub enum NodeAvailability {
    // Normal, happy state
    Active,
@@ -137,7 +137,7 @@ impl FromStr for NodeAvailability {

 /// FIXME: this is a duplicate of the type in the attachment_service crate, because the
 /// type needs to be defined with diesel traits in there.
-#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Clone, Copy)]
 pub enum NodeSchedulingPolicy {
    Active,
    Filling,
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -450,7 +450,7 @@ async fn handle_tenant(
                    new_tenant_id: TenantShardId::unsharded(tenant_id),
                    generation: None,
                    shard_parameters: ShardParameters {
-                        count: ShardCount::new(shard_count),
+                        count: ShardCount(shard_count),
                        stripe_size: shard_stripe_size
                            .map(ShardStripeSize)
                            .unwrap_or(ShardParameters::DEFAULT_STRIPE_SIZE),
@@ -652,10 +652,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
            let name = import_match
                .get_one::<String>("node-name")
                .ok_or_else(|| anyhow!("No node name provided"))?;
-            let update_catalog = import_match
-                .get_one::<bool>("update-catalog")
-                .cloned()
-                .unwrap_or_default();

            // Parse base inputs
            let base_tarfile = import_match
@@ -698,7 +694,6 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                None,
                pg_version,
                ComputeMode::Primary,
-                !update_catalog,
            )?;
            println!("Done");
        }
@@ -836,10 +831,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .get_one::<String>("endpoint_id")
                .map(String::to_string)
                .unwrap_or_else(|| format!("ep-{branch_name}"));
-            let update_catalog = sub_args
-                .get_one::<bool>("update-catalog")
-                .cloned()
-                .unwrap_or_default();

            let lsn = sub_args
                .get_one::<String>("lsn")
@@ -889,7 +880,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                http_port,
                pg_version,
                mode,
-                !update_catalog,
            )?;
        }
        "start" => {
@@ -928,11 +918,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .get(endpoint_id.as_str())
                .ok_or_else(|| anyhow::anyhow!("endpoint {endpoint_id} not found"))?;

-            let create_test_user = sub_args
-                .get_one::<bool>("create-test-user")
-                .cloned()
-                .unwrap_or_default();
-
            cplane.check_conflicting_endpoints(
                endpoint.mode,
                endpoint.tenant_id,
@@ -987,7 +972,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                    pageservers,
                    remote_ext_config,
                    stripe_size.0 as usize,
-                    create_test_user,
                )
                .await?;
        }
@@ -1473,18 +1457,6 @@ fn cli() -> Command {
        .required(false)
        .default_value("1");

-    let update_catalog = Arg::new("update-catalog")
-        .value_parser(value_parser!(bool))
-        .long("update-catalog")
-        .help("If set, will set up the catalog for neon_superuser")
-        .required(false);
-
-    let create_test_user = Arg::new("create-test-user")
-        .value_parser(value_parser!(bool))
-        .long("create-test-user")
-        .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
-        .required(false);
-
    Command::new("Neon CLI")
        .arg_required_else_help(true)
        .version(GIT_VERSION)
@@ -1545,7 +1517,6 @@ fn cli() -> Command {
                .arg(Arg::new("end-lsn").long("end-lsn")
                    .help("Lsn the basebackup ends at"))
                .arg(pg_version_arg.clone())
-                .arg(update_catalog.clone())
            )
        ).subcommand(
            Command::new("tenant")
@@ -1659,7 +1630,6 @@ fn cli() -> Command {
                            .required(false))
                    .arg(pg_version_arg.clone())
                    .arg(hot_standby_arg.clone())
-                    .arg(update_catalog)
                )
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
@@ -1667,7 +1637,6 @@ fn cli() -> Command {
                    .arg(endpoint_pageserver_id_arg.clone())
                    .arg(safekeepers_arg)
                    .arg(remote_ext_config_args)
-                    .arg(create_test_user)
                )
                .subcommand(Command::new("reconfigure")
                            .about("Reconfigure the endpoint")
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -41,15 +41,11 @@ use std::net::SocketAddr;
 use std::net::TcpStream;
 use std::path::PathBuf;
 use std::process::Command;
-use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{anyhow, bail, Context, Result};
-use compute_api::spec::Database;
-use compute_api::spec::PgIdent;
 use compute_api::spec::RemoteExtSpec;
-use compute_api::spec::Role;
 use nix::sys::signal::kill;
 use nix::sys::signal::Signal;
 use serde::{Deserialize, Serialize};
@@ -126,7 +122,6 @@ impl ComputeControlPlane {
        http_port: Option<u16>,
        pg_version: u32,
        mode: ComputeMode,
-        skip_pg_catalog_updates: bool,
    ) -> Result<Arc<Endpoint>> {
        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -145,7 +140,7 @@ impl ComputeControlPlane {
            // before and after start are the same. So, skip catalog updates,
            // with this we basically test a case of waking up an idle compute, where
            // we also skip catalog updates in the cloud.
-            skip_pg_catalog_updates,
+            skip_pg_catalog_updates: true,
            features: vec![],
        });

@@ -160,7 +155,7 @@ impl ComputeControlPlane {
                http_port,
                pg_port,
                pg_version,
-                skip_pg_catalog_updates,
+                skip_pg_catalog_updates: true,
                features: vec![],
            })?,
        )?;
@@ -505,7 +500,6 @@ impl Endpoint {
        pageservers: Vec<(Host, u16)>,
        remote_ext_config: Option<&String>,
        shard_stripe_size: usize,
-        create_test_user: bool,
    ) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
@@ -557,26 +551,8 @@ impl Endpoint {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
                state: None,
-                roles: if create_test_user {
-                    vec![Role {
-                        name: PgIdent::from_str("test").unwrap(),
-                        encrypted_password: None,
-                        options: None,
-                    }]
-                } else {
-                    Vec::new()
-                },
-                databases: if create_test_user {
-                    vec![Database {
-                        name: PgIdent::from_str("neondb").unwrap(),
-                        owner: PgIdent::from_str("test").unwrap(),
-                        options: None,
-                        restrict_conn: false,
-                        invalid: false,
-                    }]
-                } else {
-                    Vec::new()
-                },
+                roles: vec![],
+                databases: vec![],
                settings: None,
                postgresql_conf: Some(postgresql_conf),
            },
@@ -601,16 +577,11 @@ impl Endpoint {
            .open(self.endpoint_path().join("compute.log"))?;

        // Launch compute_ctl
-        let conn_str = self.connstr("cloud_admin", "postgres");
-        println!("Starting postgres node at '{}'", conn_str);
-        if create_test_user {
-            let conn_str = self.connstr("user", "neondb");
-            println!("Also at '{}'", conn_str);
-        }
+        println!("Starting postgres node at '{}'", self.connstr());
        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
        cmd.args(["--http-port", &self.http_address.port().to_string()])
            .args(["--pgdata", self.pgdata().to_str().unwrap()])
-            .args(["--connstr", &conn_str])
+            .args(["--connstr", &self.connstr()])
            .args([
                "--spec-path",
                self.endpoint_path().join("spec.json").to_str().unwrap(),
@@ -681,9 +652,7 @@ impl Endpoint {
                        }
                        ComputeStatus::Empty
                        | ComputeStatus::ConfigurationPending
-                        | ComputeStatus::Configuration
-                        | ComputeStatus::TerminationPending
-                        | ComputeStatus::Terminated => {
+                        | ComputeStatus::Configuration => {
                            bail!("unexpected compute status: {:?}", state.status)
                        }
                    }
@@ -814,13 +783,13 @@ impl Endpoint {
        Ok(())
    }

-    pub fn connstr(&self, user: &str, db_name: &str) -> String {
+    pub fn connstr(&self) -> String {
        format!(
            "postgresql://{}@{}:{}/{}",
-            user,
+            "cloud_admin",
            self.pg_address.ip(),
            self.pg_address.port(),
-            db_name
+            "postgres"
        )
    }
 }
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -400,11 +400,6 @@ impl PageServerNode {
                .map(|x| x.parse::<bool>())
                .transpose()
                .context("Failed to parse 'lazy_slru_download' as bool")?,
-            timeline_get_throttle: settings
-                .remove("timeline_get_throttle")
-                .map(serde_json::from_str)
-                .transpose()
-                .context("parse `timeline_get_throttle` from json")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -510,11 +505,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'lazy_slru_download' as bool")?,
-                timeline_get_throttle: settings
-                    .remove("timeline_get_throttle")
-                    .map(serde_json::from_str)
-                    .transpose()
-                    .context("parse `timeline_get_throttle` from json")?,
            }
        };

--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -52,10 +52,6 @@ pub enum ComputeStatus {
    // compute will exit soon or is waiting for
    // control-plane to terminate it.
    Failed,
-    // Termination requested
-    TerminationPending,
-    // Terminated Postgres
-    Terminated,
 }

 fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -201,11 +201,6 @@ impl<P: Atomic> GenericCounterPairVec<P> {
    pub fn with_label_values(&self, vals: &[&str]) -> GenericCounterPair<P> {
        self.get_metric_with_label_values(vals).unwrap()
    }
-
-    pub fn remove_label_values(&self, res: &mut [Result<()>; 2], vals: &[&str]) {
-        res[0] = self.inc.remove_label_values(vals);
-        res[1] = self.dec.remove_label_values(vals);
-    }
 }

 impl<P: Atomic> GenericCounterPair<P> {
@@ -252,15 +247,6 @@ impl<P: Atomic> GenericCounterPair<P> {
    }
 }

-impl<P: Atomic> Clone for GenericCounterPair<P> {
-    fn clone(&self) -> Self {
-        Self {
-            inc: self.inc.clone(),
-            dec: self.dec.clone(),
-        }
-    }
-}
-
 /// Guard returned by [`GenericCounterPair::guard`]
 pub struct GenericCounterPairGuard<P: Atomic>(GenericCounter<P>);

--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -21,7 +21,6 @@ hex.workspace = true
 thiserror.workspace = true
 humantime-serde.workspace = true
 chrono.workspace = true
-itertools.workspace = true

 workspace_hack.workspace = true

--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -2,7 +2,6 @@ use postgres_ffi::BLCKSZ;
 use std::ops::Range;

 use crate::key::Key;
-use itertools::Itertools;

 ///
 /// Represents a set of Keys, in a compact form.
@@ -64,36 +63,9 @@ impl KeySpace {
        KeyPartitioning { parts }
    }

-    /// Merge another keyspace into the current one.
-    /// Note: the keyspaces must not ovelap (enforced via assertions)
-    pub fn merge(&mut self, other: &KeySpace) {
-        let all_ranges = self
-            .ranges
-            .iter()
-            .merge_by(other.ranges.iter(), |lhs, rhs| lhs.start < rhs.start);
-
-        let mut accum = KeySpaceAccum::new();
-        let mut prev: Option<&Range<Key>> = None;
-        for range in all_ranges {
-            if let Some(prev) = prev {
-                let overlap =
-                    std::cmp::max(range.start, prev.start) < std::cmp::min(range.end, prev.end);
-                assert!(
-                    !overlap,
-                    "Attempt to merge ovelapping keyspaces: {:?} overlaps {:?}",
-                    prev, range
-                );
-            }
-
-            accum.add_range(range.clone());
-            prev = Some(range);
-        }
-
-        self.ranges = accum.to_keyspace().ranges;
-    }
-
-    /// Remove all keys in `other` from `self`.
-    /// This can involve splitting or removing of existing ranges.
+    /// Update the keyspace such that it doesn't contain any range
+    /// that is overlapping with `other`. This can involve splitting or
+    /// removing of existing ranges.
    pub fn remove_overlapping_with(&mut self, other: &KeySpace) {
        let (self_start, self_end) = match (self.start(), self.end()) {
            (Some(start), Some(end)) => (start, end),
@@ -248,7 +220,16 @@ impl KeySpaceAccum {
    }

    pub fn consume_keyspace(&mut self) -> KeySpace {
-        std::mem::take(self).to_keyspace()
+        if let Some(accum) = self.accum.take() {
+            self.ranges.push(accum);
+        }
+
+        let mut prev_accum = KeySpaceAccum::new();
+        std::mem::swap(self, &mut prev_accum);
+
+        KeySpace {
+            ranges: prev_accum.ranges,
+        }
    }

    pub fn size(&self) -> u64 {
@@ -298,13 +279,6 @@ impl KeySpaceRandomAccum {
        }
        KeySpace { ranges }
    }
-
-    pub fn consume_keyspace(&mut self) -> KeySpace {
-        let mut prev_accum = KeySpaceRandomAccum::new();
-        std::mem::swap(self, &mut prev_accum);
-
-        prev_accum.to_keyspace()
-    }
 }

 pub fn key_range_size(key_range: &Range<Key>) -> u32 {
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -180,7 +180,7 @@ pub enum TimelineState {
    Broken { reason: String, backtrace: String },
 }

-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize)]
 pub struct TimelineCreateRequest {
    pub new_timeline_id: TimelineId,
    #[serde(default)]
@@ -214,14 +214,14 @@ impl ShardParameters {
    pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);

    pub fn is_unsharded(&self) -> bool {
-        self.count.is_unsharded()
+        self.count == ShardCount(0)
    }
 }

 impl Default for ShardParameters {
    fn default() -> Self {
        Self {
-            count: ShardCount::new(0),
+            count: ShardCount(0),
            stripe_size: Self::DEFAULT_STRIPE_SIZE,
        }
    }
@@ -283,7 +283,6 @@ pub struct TenantConfig {
    pub gc_feedback: Option<bool>,
    pub heatmap_period: Option<String>,
    pub lazy_slru_download: Option<bool>,
-    pub timeline_get_throttle: Option<ThrottleConfig>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -310,35 +309,6 @@ pub struct EvictionPolicyLayerAccessThreshold {
    pub threshold: Duration,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
-pub struct ThrottleConfig {
-    pub task_kinds: Vec<String>, // TaskKind
-    pub initial: usize,
-    #[serde(with = "humantime_serde")]
-    pub refill_interval: Duration,
-    pub refill_amount: NonZeroUsize,
-    pub max: usize,
-    pub fair: bool,
-}
-
-impl ThrottleConfig {
-    pub fn disabled() -> Self {
-        Self {
-            task_kinds: vec![], // effectively disables the throttle
-            // other values don't matter with emtpy `task_kinds`.
-            initial: 0,
-            refill_interval: Duration::from_millis(1),
-            refill_amount: NonZeroUsize::new(1).unwrap(),
-            max: 1,
-            fair: true,
-        }
-    }
-    /// The requests per second allowed  by the given config.
-    pub fn steady_rps(&self) -> f64 {
-        (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64()) / 1e3
-    }
-}
-
 /// A flattened analog of a `pagesever::tenant::LocationMode`, which
 /// lists out all possible states (and the virtual "Detached" state)
 /// in a flat form rather than using rust-style enums.
@@ -720,7 +690,6 @@ pub enum PagestreamFeMessage {
    GetPage(PagestreamGetPageRequest),
    DbSize(PagestreamDbSizeRequest),
    GetSlruSegment(PagestreamGetSlruSegmentRequest),
-    GetVectoredPages(PagestreamGetVectoredPagesRequest),
 }

 // Wrapped in libpq CopyData
@@ -732,7 +701,6 @@ pub enum PagestreamBeMessage {
    Error(PagestreamErrorResponse),
    DbSize(PagestreamDbSizeResponse),
    GetSlruSegment(PagestreamGetSlruSegmentResponse),
-    GetVectoredPages(PagestreamGetVectoredPagesResponse),
 }

 // Keep in sync with `pagestore_client.h`
@@ -744,7 +712,6 @@ enum PagestreamBeMessageTag {
    Error = 103,
    DbSize = 104,
    GetSlruSegment = 105,
-    GetVectoredPages = 106,
 }
 impl TryFrom<u8> for PagestreamBeMessageTag {
    type Error = u8;
@@ -756,7 +723,6 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
            103 => Ok(PagestreamBeMessageTag::Error),
            104 => Ok(PagestreamBeMessageTag::DbSize),
            105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
-            106 => Ok(PagestreamBeMessageTag::GetVectoredPages),
            _ => Err(value),
        }
    }
@@ -799,15 +765,6 @@ pub struct PagestreamGetSlruSegmentRequest {
    pub segno: u32,
 }

-#[derive(Debug, PartialEq, Eq)]
-pub struct PagestreamGetVectoredPagesRequest {
-    pub latest: bool,
-    pub lsn: Lsn,
-    pub rel: RelTag,
-    pub blkno: u32,
-    pub count: u8,
-}
-
 #[derive(Debug)]
 pub struct PagestreamExistsResponse {
    pub exists: bool,
@@ -828,12 +785,6 @@ pub struct PagestreamGetSlruSegmentResponse {
    pub segment: Bytes,
 }

-#[derive(Debug)]
-pub struct PagestreamGetVectoredPagesResponse {
-    pub page_count: u8,
-    pub pages: Bytes,
-}
-
 #[derive(Debug)]
 pub struct PagestreamErrorResponse {
    pub message: String,
@@ -905,18 +856,6 @@ impl PagestreamFeMessage {
                bytes.put_u8(req.kind);
                bytes.put_u32(req.segno);
            }
-
-            Self::GetVectoredPages(req) => {
-                bytes.put_u8(5);
-                bytes.put_u8(u8::from(req.latest));
-                bytes.put_u64(req.lsn.0);
-                bytes.put_u32(req.rel.spcnode);
-                bytes.put_u32(req.rel.dbnode);
-                bytes.put_u32(req.rel.relnode);
-                bytes.put_u8(req.rel.forknum);
-                bytes.put_u32(req.blkno);
-                bytes.put_u8(req.count);
-            }
        }

        bytes.into()
@@ -975,20 +914,6 @@ impl PagestreamFeMessage {
                    segno: body.read_u32::<BigEndian>()?,
                },
            )),
-            5 => Ok(PagestreamFeMessage::GetVectoredPages(
-                PagestreamGetVectoredPagesRequest {
-                    latest: body.read_u8()? != 0,
-                    lsn: Lsn::from(body.read_u64::<BigEndian>()?),
-                    rel: RelTag {
-                        spcnode: body.read_u32::<BigEndian>()?,
-                        dbnode: body.read_u32::<BigEndian>()?,
-                        relnode: body.read_u32::<BigEndian>()?,
-                        forknum: body.read_u8()?,
-                    },
-                    blkno: body.read_u32::<BigEndian>()?,
-                    count: body.read_u8()?,
-                },
-            )),
            _ => bail!("unknown smgr message tag: {:?}", msg_tag),
        }
    }
@@ -1030,12 +955,6 @@ impl PagestreamBeMessage {
                bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
                bytes.put(&resp.segment[..]);
            }
-
-            Self::GetVectoredPages(resp) => {
-                bytes.put_u8(Tag::GetVectoredPages as u8);
-                bytes.put_u8(resp.page_count);
-                bytes.put(&resp.pages[..]);
-            }
        }

        bytes.into()
@@ -1084,15 +1003,6 @@ impl PagestreamBeMessage {
                        segment: segment.into(),
                    })
                }
-                Tag::GetVectoredPages => {
-                    let page_count = buf.read_u8()?;
-                    let mut pages = vec![0; page_count as usize * 8192];
-                    buf.read_exact(&mut pages)?;
-                    Self::GetVectoredPages(PagestreamGetVectoredPagesResponse {
-                        page_count,
-                        pages: pages.into(),
-                    })
-                }
            };
        let remaining = buf.into_inner();
        if !remaining.is_empty() {
@@ -1112,7 +1022,6 @@ impl PagestreamBeMessage {
            Self::Error(_) => "Error",
            Self::DbSize(_) => "DbSize",
            Self::GetSlruSegment(_) => "GetSlruSegment",
-            Self::GetVectoredPages(_) => "GetVectoredPages",
        }
    }
 }
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -13,41 +13,10 @@ use utils::id::TenantId;
 pub struct ShardNumber(pub u8);

 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
-pub struct ShardCount(u8);
+pub struct ShardCount(pub u8);

 impl ShardCount {
    pub const MAX: Self = Self(u8::MAX);
-
-    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
-    /// legacy format for TenantShardId that excludes the shard suffix", also known
-    /// as `TenantShardId::unsharded`.
-    ///
-    /// This method returns the actual number of shards, i.e. if our internal value is
-    /// zero, we return 1 (unsharded tenants have 1 shard).
-    pub fn count(&self) -> u8 {
-        if self.0 > 0 {
-            self.0
-        } else {
-            1
-        }
-    }
-
-    /// The literal internal value: this is **not** the number of shards in the
-    /// tenant, as we have a special zero value for legacy unsharded tenants.  Use
-    /// [`Self::count`] if you want to know the cardinality of shards.
-    pub fn literal(&self) -> u8 {
-        self.0
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.0 == 0
-    }
-
-    /// `v` may be zero, or the number of shards in the tenant.  `v` is what
-    /// [`Self::literal`] would return.
-    pub fn new(val: u8) -> Self {
-        Self(val)
-    }
 }

 impl ShardNumber {
@@ -117,7 +86,7 @@ impl TenantShardId {
    }

    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
+        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
    }

    /// Convenience for dropping the tenant_id and just getting the ShardIndex: this
@@ -502,12 +471,10 @@ impl ShardIdentity {
    pub fn is_key_disposable(&self, key: &Key) -> bool {
        if key_is_shard0(key) {
            // Q: Why can't we dispose of shard0 content if we're not shard 0?
-            // A1: because the WAL ingestion logic currently ingests some shard 0
-            //     content on all shards, even though it's only read on shard 0.  If we
-            //     dropped it, then subsequent WAL ingest to these keys would encounter
-            //     an error.
-            // A2: because key_is_shard0 also covers relation size keys, which are written
-            //     on all shards even though they're only maintained accurately on shard 0.
+            // A: because the WAL ingestion logic currently ingests some shard 0
+            //    content on all shards, even though it's only read on shard 0.  If we
+            //    dropped it, then subsequent WAL ingest to these keys would encounter
+            //    an error.
            false
        } else {
            !self.is_key_local(key)
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -25,7 +25,6 @@ hyper = { workspace = true, features = ["full"] }
 fail.workspace = true
 futures = { workspace = true}
 jsonwebtoken.workspace = true
-leaky-bucket.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -29,9 +29,6 @@ pub enum Scope {
    // Should only be used e.g. for status check.
    // Currently also used for connection from any pageserver to any safekeeper.
    SafekeeperData,
-    // The scope used by pageservers in upcalls to storage controller and cloud control plane
-    #[serde(rename = "generations_api")]
-    GenerationsApi,
 }

 /// JWT payload. See docs/authentication.md for the format
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -12,7 +12,6 @@ testing = ["fail/failpoints"]

 [dependencies]
 anyhow.workspace = true
-arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
 async-trait.workspace = true
@@ -22,6 +21,7 @@ camino.workspace = true
 camino-tempfile.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
+close_fds.workspace = true
 const_format.workspace = true
 consumption_metrics.workspace = true
 crc32c.workspace = true
@@ -36,7 +36,6 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 itertools.workspace = true
-leaky-bucket.workspace = true
 md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
@@ -84,7 +83,7 @@ workspace_hack.workspace = true
 reqwest.workspace = true
 rpds.workspace = true
 enum-map.workspace = true
-enumset = { workspace = true, features = ["serde"]}
+enumset.workspace = true
 strum.workspace = true
 strum_macros.workspace = true

--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -6,28 +6,14 @@
 //! There are two sets of inputs; `short` and `medium`. They were collected on postgres v14 by
 //! logging what happens when a sequential scan is requested on a small table, then picking out two
 //! suitable from logs.
-//!
-//!
-//! Reference data (git blame to see commit) on an i3en.3xlarge
-// ```text
-//! short/short/1           time:   [39.175 µs 39.348 µs 39.536 µs]
-//! short/short/2           time:   [51.227 µs 51.487 µs 51.755 µs]
-//! short/short/4           time:   [76.048 µs 76.362 µs 76.674 µs]
-//! short/short/8           time:   [128.94 µs 129.82 µs 130.74 µs]
-//! short/short/16          time:   [227.84 µs 229.00 µs 230.28 µs]
-//! short/short/32          time:   [455.97 µs 457.81 µs 459.90 µs]
-//! short/short/64          time:   [902.46 µs 904.84 µs 907.32 µs]
-//! short/short/128         time:   [1.7416 ms 1.7487 ms 1.7561 ms]
-//! ``

-use std::sync::Arc;
+use std::sync::{Arc, Barrier};

 use bytes::{Buf, Bytes};
 use pageserver::{
    config::PageServerConf, repository::Key, walrecord::NeonWalRecord, walredo::PostgresRedoManager,
 };
 use pageserver_api::shard::TenantShardId;
-use tokio::task::JoinSet;
 use utils::{id::TenantId, lsn::Lsn};

 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
@@ -53,11 +39,11 @@ fn redo_scenarios(c: &mut Criterion) {
            .build()
            .unwrap();
        tracing::info!("executing first");
-        rt.block_on(short().execute(&manager)).unwrap();
+        short().execute(rt.handle(), &manager).unwrap();
        tracing::info!("first executed");
    }

-    let thread_counts = [1, 2, 4, 8, 16, 32, 64, 128];
+    let thread_counts = [1, 2, 4, 8, 16];

    let mut group = c.benchmark_group("short");
    group.sampling_mode(criterion::SamplingMode::Flat);
@@ -88,69 +74,114 @@ fn redo_scenarios(c: &mut Criterion) {
    drop(group);
 }

-/// Sets up a multi-threaded tokio runtime with default worker thread count,
-/// then, spawn `requesters` tasks that repeatedly:
-/// - get input from `input_factor()`
-/// - call `manager.request_redo()` with their input
-///
-/// This stress-tests the scalability of a single walredo manager at high tokio-level concurrency.
-///
-/// Using tokio's default worker thread count means the results will differ on machines
-/// with different core countrs. We don't care about that, the performance will always
-/// be different on different hardware. To compare performance of different software versions,
-/// use the same hardware.
+/// Sets up `threads` number of requesters to `request_redo`, with the given input.
 fn add_multithreaded_walredo_requesters(
    b: &mut criterion::Bencher,
-    nrequesters: usize,
+    threads: u32,
    manager: &Arc<PostgresRedoManager>,
    input_factory: fn() -> Request,
 ) {
-    assert_ne!(nrequesters, 0);
+    assert_ne!(threads, 0);

-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
+    if threads == 1 {
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap();
+        let handle = rt.handle();
+        b.iter_batched_ref(
+            || Some(input_factory()),
+            |input| execute_all(input.take(), handle, manager),
+            criterion::BatchSize::PerIteration,
+        );
+    } else {
+        let (work_tx, work_rx) = std::sync::mpsc::sync_channel(threads as usize);

-    let barrier = Arc::new(tokio::sync::Barrier::new(nrequesters + 1));
+        let work_rx = std::sync::Arc::new(std::sync::Mutex::new(work_rx));

-    let mut requesters = JoinSet::new();
-    for _ in 0..nrequesters {
-        let _entered = rt.enter();
-        let manager = manager.clone();
-        let barrier = barrier.clone();
-        requesters.spawn(async move {
-            loop {
-                let input = input_factory();
-                barrier.wait().await;
-                let page = input.execute(&manager).await.unwrap();
-                assert_eq!(page.remaining(), 8192);
-                barrier.wait().await;
-            }
-        });
+        let barrier = Arc::new(Barrier::new(threads as usize + 1));
+
+        let jhs = (0..threads)
+            .map(|_| {
+                std::thread::spawn({
+                    let manager = manager.clone();
+                    let barrier = barrier.clone();
+                    let work_rx = work_rx.clone();
+                    move || {
+                        let rt = tokio::runtime::Builder::new_current_thread()
+                            .enable_all()
+                            .build()
+                            .unwrap();
+                        let handle = rt.handle();
+                        loop {
+                            // queue up and wait if we want to go another round
+                            if work_rx.lock().unwrap().recv().is_err() {
+                                break;
+                            }
+
+                            let input = Some(input_factory());
+
+                            barrier.wait();
+
+                            execute_all(input, handle, &manager).unwrap();
+
+                            barrier.wait();
+                        }
+                    }
+                })
+            })
+            .collect::<Vec<_>>();
+
+        let _jhs = JoinOnDrop(jhs);
+
+        b.iter_batched(
+            || {
+                for _ in 0..threads {
+                    work_tx.send(()).unwrap()
+                }
+            },
+            |()| {
+                // start the work
+                barrier.wait();
+
+                // wait for work to complete
+                barrier.wait();
+            },
+            criterion::BatchSize::PerIteration,
+        );
+
+        drop(work_tx);
    }
+}

-    let do_one_iteration = || {
-        rt.block_on(async {
-            barrier.wait().await;
-            // wait for work to complete
-            barrier.wait().await;
-        })
-    };
+struct JoinOnDrop(Vec<std::thread::JoinHandle<()>>);

-    b.iter_batched(
-        || {
-            // warmup
-            do_one_iteration();
-        },
-        |()| {
-            // work loop
-            do_one_iteration();
-        },
-        criterion::BatchSize::PerIteration,
-    );
+impl Drop for JoinOnDrop {
+    // it's not really needless because we want join all then check for panicks
+    #[allow(clippy::needless_collect)]
+    fn drop(&mut self) {
+        // first join all
+        let results = self.0.drain(..).map(|jh| jh.join()).collect::<Vec<_>>();
+        // then check the results; panicking here is not great, but it does get the message across
+        // to the user, and sets an exit value.
+        results.into_iter().try_for_each(|res| res).unwrap();
+    }
+}

-    rt.block_on(requesters.shutdown());
+fn execute_all<I>(
+    input: I,
+    handle: &tokio::runtime::Handle,
+    manager: &PostgresRedoManager,
+) -> anyhow::Result<()>
+where
+    I: IntoIterator<Item = Request>,
+{
+    // just fire all requests as fast as possible
+    input.into_iter().try_for_each(|req| {
+        let page = req.execute(handle, manager)?;
+        assert_eq!(page.remaining(), 8192);
+        anyhow::Ok(())
+    })
 }

 criterion_group!(benches, redo_scenarios);
@@ -462,7 +493,11 @@ struct Request {
 }

 impl Request {
-    async fn execute(self, manager: &PostgresRedoManager) -> anyhow::Result<Bytes> {
+    fn execute(
+        self,
+        rt: &tokio::runtime::Handle,
+        manager: &PostgresRedoManager,
+    ) -> anyhow::Result<Bytes> {
        let Request {
            key,
            lsn,
@@ -471,8 +506,6 @@ impl Request {
            pg_version,
        } = self;

-        manager
-            .request_redo(key, lsn, base_img, records, pg_version)
-            .await
+        rt.block_on(manager.request_redo(key, lsn, base_img, records, pg_version))
    }
 }
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -4,8 +4,7 @@ use futures::SinkExt;
 use pageserver_api::{
    models::{
        PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
-        PagestreamGetPageResponse, PagestreamGetVectoredPagesRequest,
-        PagestreamGetVectoredPagesResponse,
+        PagestreamGetPageResponse,
    },
    reltag::RelTag,
 };
@@ -158,39 +157,7 @@ impl PagestreamClient {
            PagestreamBeMessage::Exists(_)
            | PagestreamBeMessage::Nblocks(_)
            | PagestreamBeMessage::DbSize(_)
-            | PagestreamBeMessage::GetSlruSegment(_)
-            | PagestreamBeMessage::GetVectoredPages(_) => {
-                anyhow::bail!(
-                    "unexpected be message kind in response to getpage request: {}",
-                    msg.kind()
-                )
-            }
-        }
-    }
-
-    pub async fn getpages(
-        &mut self,
-        req: PagestreamGetVectoredPagesRequest,
-    ) -> anyhow::Result<PagestreamGetVectoredPagesResponse> {
-        let req = PagestreamFeMessage::GetVectoredPages(req);
-        let req: bytes::Bytes = req.serialize();
-        // let mut req = tokio_util::io::ReaderStream::new(&req);
-        let mut req = tokio_stream::once(Ok(req));
-
-        self.copy_both.send_all(&mut req).await?;
-
-        let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
-        let next: bytes::Bytes = next.unwrap()?;
-
-        let msg = PagestreamBeMessage::deserialize(next)?;
-        match msg {
-            PagestreamBeMessage::GetVectoredPages(p) => Ok(p),
-            PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
-            PagestreamBeMessage::Exists(_)
-            | PagestreamBeMessage::Nblocks(_)
-            | PagestreamBeMessage::DbSize(_)
-            | PagestreamBeMessage::GetSlruSegment(_)
-            | PagestreamBeMessage::GetPage(_) => {
+            | PagestreamBeMessage::GetSlruSegment(_) => {
                anyhow::bail!(
                    "unexpected be message kind in response to getpage request: {}",
                    msg.kind()
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -8,7 +8,7 @@ use utils::lsn::Lsn;
 use rand::prelude::*;
 use tokio::sync::Barrier;
 use tokio::task::JoinSet;
-use tracing::{info, instrument};
+use tracing::{debug, info, instrument};

 use std::collections::HashMap;
 use std::num::NonZeroUsize;
@@ -28,8 +28,6 @@ pub(crate) struct Args {
    #[clap(long, default_value = "localhost:64000")]
    page_service_host_port: String,
    #[clap(long)]
-    page_service_connstring: Option<String>,
-    #[clap(long)]
    pageserver_jwt: Option<String>,
    #[clap(long, default_value = "1")]
    num_clients: NonZeroUsize,
@@ -232,17 +230,12 @@ async fn client(
 ) {
    start_work_barrier.wait().await;

-    let connstr = match &args.page_service_connstring {
-        Some(connstr) => connstr.clone(),
-        None => crate::util::connstring::connstring(
-            &args.page_service_host_port,
-            args.pageserver_jwt.as_deref(),
-        ),
-    };
-
-    let client = pageserver_client::page_service::Client::new(connstr)
-        .await
-        .unwrap();
+    let client = pageserver_client::page_service::Client::new(crate::util::connstring::connstring(
+        &args.page_service_host_port,
+        args.pageserver_jwt.as_deref(),
+    ))
+    .await
+    .unwrap();

    while let Some(Work { lsn, gzip }) = work.recv().await {
        let start = Instant::now();
@@ -270,7 +263,7 @@ async fn client(
                }
            })
            .await;
-        info!("basebackup size is {} bytes", size.load(Ordering::Relaxed));
+        debug!("basebackup size is {} bytes", size.load(Ordering::Relaxed));
        let elapsed = start.elapsed();
        live_stats.inc();
        STATS.with(|stats| {
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -1,18 +1,20 @@
 use anyhow::Context;
 use camino::Utf8PathBuf;
+use futures::future::join_all;
 use pageserver_api::key::{is_rel_block_key, key_to_rel_block, Key};
 use pageserver_api::keyspace::KeySpaceAccum;
-use pageserver_api::models::{PagestreamGetPageRequest, PagestreamGetVectoredPagesRequest};
+use pageserver_api::models::PagestreamGetPageRequest;

 use tokio_util::sync::CancellationToken;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;

 use rand::prelude::*;
+use tokio::sync::Barrier;
 use tokio::task::JoinSet;
-use tracing::info;
+use tracing::{info, instrument};

-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 use std::future::Future;
 use std::num::NonZeroUsize;
 use std::pin::Pin;
@@ -36,12 +38,8 @@ pub(crate) struct Args {
    num_clients: NonZeroUsize,
    #[clap(long)]
    runtime: Option<humantime::Duration>,
-    /// Each client sends requests at the given rate.
-    ///
-    /// If a request takes too long and we should be issuing a new request already,
-    /// we skip that request and account it as `MISSED`.
    #[clap(long)]
-    per_client_rate: Option<usize>,
+    per_target_rate_limit: Option<usize>,
    /// Probability for sending `latest=true` in the request (uniform distribution).
    #[clap(long, default_value = "1")]
    req_latest_probability: f64,
@@ -57,24 +55,18 @@ pub(crate) struct Args {
    /// [`pageserver_api::models::virtual_file::IoEngineKind`].
    #[clap(long)]
    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,
-    #[clap(long)]
-    vectored_read_size: Option<u8>,
    targets: Option<Vec<TenantTimelineId>>,
 }

 #[derive(Debug, Default)]
 struct LiveStats {
    completed_requests: AtomicU64,
-    missed: AtomicU64,
 }

 impl LiveStats {
-    fn request_done(&self) {
+    fn inc(&self) {
        self.completed_requests.fetch_add(1, Ordering::Relaxed);
    }
-    fn missed(&self, n: u64) {
-        self.missed.fetch_add(n, Ordering::Relaxed);
-    }
 }

 #[derive(Clone, serde::Serialize, serde::Deserialize)]
@@ -228,12 +220,13 @@ async fn main_impl(

    let live_stats = Arc::new(LiveStats::default());

+    let num_client_tasks = args.num_clients.get() * timelines.len();
    let num_live_stats_dump = 1;
-    let num_work_sender_tasks = args.num_clients.get() * timelines.len();
+    let num_work_sender_tasks = 1;
    let num_main_impl = 1;

    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
-        num_live_stats_dump + num_work_sender_tasks + num_main_impl,
+        num_client_tasks + num_live_stats_dump + num_work_sender_tasks + num_main_impl,
    ));

    tokio::spawn({
@@ -245,12 +238,10 @@ async fn main_impl(
                let start = std::time::Instant::now();
                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
-                let missed = stats.missed.swap(0, Ordering::Relaxed);
                let elapsed = start.elapsed();
                info!(
-                    "RPS: {:.0}   MISSED: {:.0}",
-                    completed_requests as f64 / elapsed.as_secs_f64(),
-                    missed as f64 / elapsed.as_secs_f64()
+                    "RPS: {:.0}",
+                    completed_requests as f64 / elapsed.as_secs_f64()
                );
            }
        }
@@ -258,128 +249,127 @@ async fn main_impl(

    let cancel = CancellationToken::new();

-    let rps_period = args
-        .per_client_rate
-        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
-    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
-        let live_stats = live_stats.clone();
-        let start_work_barrier = start_work_barrier.clone();
-        let ranges: Vec<KeyRange> = all_ranges
-            .iter()
-            .filter(|r| r.timeline == worker_id.timeline)
-            .cloned()
-            .collect();
-        let weights =
-            rand::distributions::weighted::WeightedIndex::new(ranges.iter().map(|v| v.len()))
-                .unwrap();
-
-        let cancel = cancel.clone();
-        Box::pin(async move {
-            let client =
-                pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
-                    .await
-                    .unwrap();
-            let mut client = client
-                .pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
-                .await
-                .unwrap();
-
-            start_work_barrier.wait().await;
-            let client_start = Instant::now();
-            let mut ticks_processed = 0;
-            while !cancel.is_cancelled() {
-                // Detect if a request took longer than the RPS rate
-                if let Some(period) = &rps_period {
-                    let periods_passed_until_now =
-                        usize::try_from(client_start.elapsed().as_micros() / period.as_micros())
-                            .unwrap();
-
-                    if periods_passed_until_now > ticks_processed {
-                        live_stats.missed((periods_passed_until_now - ticks_processed) as u64);
-                    }
-                    ticks_processed = periods_passed_until_now;
-                }
-
-                let start = Instant::now();
-                if let Some(size) = args.vectored_read_size {
-                    assert!(size > 0);
-                    let req = {
-                        let mut rng = rand::thread_rng();
-                        let r = &ranges[weights.sample(&mut rng)];
-                        let key: i128 = rng.gen_range(r.start..r.end);
-                        let key = Key::from_i128(key);
-                        assert!(is_rel_block_key(&key));
-                        let (rel_tag, block_no) =
-                            key_to_rel_block(key).expect("we filter non-rel-block keys out above");
-
-                        PagestreamGetVectoredPagesRequest {
-                            latest: rng.gen_bool(args.req_latest_probability),
-                            lsn: r.timeline_lsn,
-                            rel: rel_tag,
-                            blkno: block_no,
-                            count: size
-                        }
-                    };
-                    client.getpages(req).await.unwrap();
-                } else {
-                    let req = {
-                        let mut rng = rand::thread_rng();
-                        let r = &ranges[weights.sample(&mut rng)];
-                        let key: i128 = rng.gen_range(r.start..r.end);
-                        let key = Key::from_i128(key);
-                        assert!(is_rel_block_key(&key));
-                        let (rel_tag, block_no) =
-                            key_to_rel_block(key).expect("we filter non-rel-block keys out above");
-
-                        PagestreamGetPageRequest {
-                            latest: rng.gen_bool(args.req_latest_probability),
-                            lsn: r.timeline_lsn,
-                            rel: rel_tag,
-                            blkno: block_no,
-                        }
-                    };
-                    client.getpage(req).await.unwrap();
-                }
-                let end = Instant::now();
-                live_stats.request_done();
-                ticks_processed += 1;
-                STATS.with(|stats| {
-                    stats
-                        .borrow()
-                        .lock()
-                        .unwrap()
-                        .observe(end.duration_since(start))
-                        .unwrap();
-                });
-
-                if let Some(period) = &rps_period {
-                    let next_at = client_start
-                        + Duration::from_micros(
-                            (ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
-                        );
-                    tokio::time::sleep_until(next_at.into()).await;
-                }
-            }
-        })
-    };
-
-    info!("spawning workers");
-    let mut workers = JoinSet::new();
+    let mut work_senders: HashMap<WorkerId, _> = HashMap::new();
+    let mut tasks = Vec::new();
    for timeline in timelines.iter().cloned() {
        for num_client in 0..args.num_clients.get() {
+            let (sender, receiver) = tokio::sync::mpsc::channel(10); // TODO: not sure what the implications of this are
            let worker_id = WorkerId {
                timeline,
                num_client,
            };
-            workers.spawn(make_worker(worker_id));
+            work_senders.insert(worker_id, sender);
+            tasks.push(tokio::spawn(client(
+                args,
+                worker_id,
+                Arc::clone(&start_work_barrier),
+                receiver,
+                Arc::clone(&live_stats),
+                cancel.clone(),
+            )));
        }
    }
-    let workers = async move {
-        while let Some(res) = workers.join_next().await {
-            res.unwrap();
+
+    let work_sender: Pin<Box<dyn Send + Future<Output = ()>>> = {
+        let start_work_barrier = start_work_barrier.clone();
+        let cancel = cancel.clone();
+        match args.per_target_rate_limit {
+            None => Box::pin(async move {
+                let weights = rand::distributions::weighted::WeightedIndex::new(
+                    all_ranges.iter().map(|v| v.len()),
+                )
+                .unwrap();
+
+                start_work_barrier.wait().await;
+
+                while !cancel.is_cancelled() {
+                    let (timeline, req) = {
+                        let mut rng = rand::thread_rng();
+                        let r = &all_ranges[weights.sample(&mut rng)];
+                        let key: i128 = rng.gen_range(r.start..r.end);
+                        let key = Key::from_i128(key);
+                        let (rel_tag, block_no) =
+                            key_to_rel_block(key).expect("we filter non-rel-block keys out above");
+                        (
+                            WorkerId {
+                                timeline: r.timeline,
+                                num_client: rng.gen_range(0..args.num_clients.get()),
+                            },
+                            PagestreamGetPageRequest {
+                                latest: rng.gen_bool(args.req_latest_probability),
+                                lsn: r.timeline_lsn,
+                                rel: rel_tag,
+                                blkno: block_no,
+                            },
+                        )
+                    };
+                    let sender = work_senders.get(&timeline).unwrap();
+                    // TODO: what if this blocks?
+                    if sender.send(req).await.is_err() {
+                        assert!(cancel.is_cancelled(), "client has gone away unexpectedly");
+                    }
+                }
+            }),
+            Some(rps_limit) => Box::pin(async move {
+                let period = Duration::from_secs_f64(1.0 / (rps_limit as f64));
+                let make_task: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> =
+                    &|worker_id| {
+                        let sender = work_senders.get(&worker_id).unwrap();
+                        let ranges: Vec<KeyRange> = all_ranges
+                            .iter()
+                            .filter(|r| r.timeline == worker_id.timeline)
+                            .cloned()
+                            .collect();
+                        let weights = rand::distributions::weighted::WeightedIndex::new(
+                            ranges.iter().map(|v| v.len()),
+                        )
+                        .unwrap();
+
+                        let cancel = cancel.clone();
+                        Box::pin(async move {
+                            let mut ticker = tokio::time::interval(period);
+                            ticker.set_missed_tick_behavior(
+                                /* TODO review this choice */
+                                tokio::time::MissedTickBehavior::Burst,
+                            );
+                            while !cancel.is_cancelled() {
+                                ticker.tick().await;
+                                let req = {
+                                    let mut rng = rand::thread_rng();
+                                    let r = &ranges[weights.sample(&mut rng)];
+                                    let key: i128 = rng.gen_range(r.start..r.end);
+                                    let key = Key::from_i128(key);
+                                    assert!(is_rel_block_key(&key));
+                                    let (rel_tag, block_no) = key_to_rel_block(key)
+                                        .expect("we filter non-rel-block keys out above");
+                                    PagestreamGetPageRequest {
+                                        latest: rng.gen_bool(args.req_latest_probability),
+                                        lsn: r.timeline_lsn,
+                                        rel: rel_tag,
+                                        blkno: block_no,
+                                    }
+                                };
+                                if sender.send(req).await.is_err() {
+                                    assert!(
+                                        cancel.is_cancelled(),
+                                        "client has gone away unexpectedly"
+                                    );
+                                }
+                            }
+                        })
+                    };
+
+                let tasks: Vec<_> = work_senders.keys().map(|tl| make_task(*tl)).collect();
+
+                start_work_barrier.wait().await;
+
+                join_all(tasks).await;
+            }),
        }
    };

+    let work_sender_task = tokio::spawn(work_sender);
+
    info!("waiting for everything to become ready");
    start_work_barrier.wait().await;
    info!("work started");
@@ -387,13 +377,20 @@ async fn main_impl(
        tokio::time::sleep(runtime.into()).await;
        info!("runtime over, signalling cancellation");
        cancel.cancel();
-        workers.await;
+        work_sender_task.await.unwrap();
        info!("work sender exited");
    } else {
-        workers.await;
+        work_sender_task.await.unwrap();
        unreachable!("work sender never terminates");
    }

+    info!("joining clients");
+    for t in tasks {
+        t.await.unwrap();
+    }
+
+    info!("all clients stopped");
+
    let output = Output {
        total: {
            let mut agg_stats = request_stats::Stats::new();
@@ -410,3 +407,49 @@ async fn main_impl(

    anyhow::Ok(())
 }
+
+#[instrument(skip_all)]
+async fn client(
+    args: &'static Args,
+    id: WorkerId,
+    start_work_barrier: Arc<Barrier>,
+    mut work: tokio::sync::mpsc::Receiver<PagestreamGetPageRequest>,
+    live_stats: Arc<LiveStats>,
+    cancel: CancellationToken,
+) {
+    let WorkerId {
+        timeline,
+        num_client: _,
+    } = id;
+    let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
+        .await
+        .unwrap();
+    let mut client = client
+        .pagestream(timeline.tenant_id, timeline.timeline_id)
+        .await
+        .unwrap();
+
+    let do_requests = async {
+        start_work_barrier.wait().await;
+        while let Some(req) = work.recv().await {
+            let start = Instant::now();
+            client
+                .getpage(req)
+                .await
+                .with_context(|| format!("getpage for {timeline}"))
+                .unwrap();
+            let elapsed = start.elapsed();
+            live_stats.inc();
+            STATS.with(|stats| {
+                stats.borrow().lock().unwrap().observe(elapsed).unwrap();
+            });
+        }
+    };
+    tokio::select! {
+        res = do_requests => { res },
+        _ = cancel.cancelled() => {
+            // fallthrough to shutdown
+        }
+    }
+    client.shutdown().await;
+}
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -14,12 +14,8 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
        }
        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
-        (Scope::SafekeeperData | Scope::GenerationsApi, _) => Err(AuthError(
-            format!(
-                "JWT scope '{:?}' is ineligible for Pageserver auth",
-                claims.scope
-            )
-            .into(),
+        (Scope::SafekeeperData, _) => Err(AuthError(
+            "SafekeeperData scope makes no sense for Pageserver".into(),
        )),
    }
 }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -143,7 +143,6 @@ where
    ar: &'a mut Builder<&'b mut W>,
    buf: Vec<u8>,
    current_segment: Option<(SlruKind, u32)>,
-    total_blocks: usize,
 }

 impl<'a, 'b, W> SlruSegmentsBuilder<'a, 'b, W>
@@ -155,7 +154,6 @@ where
            ar,
            buf: Vec::new(),
            current_segment: None,
-            total_blocks: 0,
        }
    }

@@ -201,8 +199,7 @@ where
        let header = new_tar_header(&segname, self.buf.len() as u64)?;
        self.ar.append(&header, self.buf.as_slice()).await?;

-        self.total_blocks += nblocks;
-        debug!("Added to basebackup slru {} relsize {}", segname, nblocks);
+        trace!("Added to basebackup slru {} relsize {}", segname, nblocks);

        self.buf.clear();

@@ -210,15 +207,11 @@ where
    }

    async fn finish(mut self) -> anyhow::Result<()> {
-        let res = if self.current_segment.is_none() || self.buf.is_empty() {
-            Ok(())
-        } else {
-            self.flush().await
-        };
+        if self.current_segment.is_none() || self.buf.is_empty() {
+            return Ok(());
+        }

-        info!("Collected {} SLRU blocks", self.total_blocks);
-
-        res
+        self.flush().await
    }
 }

@@ -268,7 +261,10 @@ where
            let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);

            for part in slru_partitions.parts {
-                let blocks = self.timeline.get_vectored(part, self.lsn, self.ctx).await?;
+                let blocks = self
+                    .timeline
+                    .get_vectored(&part.ranges, self.lsn, self.ctx)
+                    .await?;

                for (key, block) in blocks {
                    slru_builder.add_block(&key, block?).await?;
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -33,7 +33,6 @@ use utils::{
 use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
 use crate::tenant::config::TenantConf;
 use crate::tenant::config::TenantConfOpt;
-use crate::tenant::timeline::GetVectoredImpl;
 use crate::tenant::{
    TENANTS_SEGMENT_NAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
 };
@@ -85,12 +84,6 @@ pub mod defaults {

    pub const DEFAULT_VIRTUAL_FILE_IO_ENGINE: &str = "std-fs";

-    pub const DEFAULT_GET_VECTORED_IMPL: &str = "sequential";
-
-    pub const DEFAULT_MAX_VECTORED_READ_SIZE: usize = 128 * 1024; // 128 KiB
-
-    pub const DEFAULT_VALIDATE_VECTORED_GET: bool = true;
-
    ///
    /// Default built-in configuration file.
    ///
@@ -128,12 +121,6 @@ pub mod defaults {

 #virtual_file_io_engine = '{DEFAULT_VIRTUAL_FILE_IO_ENGINE}'

-#get_vectored_impl = '{DEFAULT_GET_VECTORED_IMPL}'
-
-#max_vectored_read_size = '{DEFAULT_MAX_VECTORED_READ_SIZE}'
-
-#validate_vectored_get = '{DEFAULT_VALIDATE_VECTORED_GET}'
-
 [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -269,12 +256,6 @@ pub struct PageServerConf {
    pub ingest_batch_size: u64,

    pub virtual_file_io_engine: virtual_file::IoEngineKind,
-
-    pub get_vectored_impl: GetVectoredImpl,
-
-    pub max_vectored_read_size: usize,
-
-    pub validate_vectored_get: bool,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -361,12 +342,6 @@ struct PageServerConfigBuilder {
    ingest_batch_size: BuilderValue<u64>,

    virtual_file_io_engine: BuilderValue<virtual_file::IoEngineKind>,
-
-    get_vectored_impl: BuilderValue<GetVectoredImpl>,
-
-    max_vectored_read_size: BuilderValue<usize>,
-
-    validate_vectored_get: BuilderValue<bool>,
 }

 impl Default for PageServerConfigBuilder {
@@ -444,10 +419,6 @@ impl Default for PageServerConfigBuilder {
            ingest_batch_size: Set(DEFAULT_INGEST_BATCH_SIZE),

            virtual_file_io_engine: Set(DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap()),
-
-            get_vectored_impl: Set(DEFAULT_GET_VECTORED_IMPL.parse().unwrap()),
-            max_vectored_read_size: Set(DEFAULT_MAX_VECTORED_READ_SIZE),
-            validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
        }
    }
 }
@@ -608,18 +579,6 @@ impl PageServerConfigBuilder {
        self.virtual_file_io_engine = BuilderValue::Set(value);
    }

-    pub fn get_vectored_impl(&mut self, value: GetVectoredImpl) {
-        self.get_vectored_impl = BuilderValue::Set(value);
-    }
-
-    pub fn get_max_vectored_read_size(&mut self, value: usize) {
-        self.max_vectored_read_size = BuilderValue::Set(value);
-    }
-
-    pub fn get_validate_vectored_get(&mut self, value: bool) {
-        self.validate_vectored_get = BuilderValue::Set(value);
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
        let concurrent_tenant_warmup = self
            .concurrent_tenant_warmup
@@ -730,15 +689,6 @@ impl PageServerConfigBuilder {
            virtual_file_io_engine: self
                .virtual_file_io_engine
                .ok_or(anyhow!("missing virtual_file_io_engine"))?,
-            get_vectored_impl: self
-                .get_vectored_impl
-                .ok_or(anyhow!("missing get_vectored_impl"))?,
-            max_vectored_read_size: self
-                .max_vectored_read_size
-                .ok_or(anyhow!("missing max_vectored_read_size"))?,
-            validate_vectored_get: self
-                .validate_vectored_get
-                .ok_or(anyhow!("missing validate_vectored_get"))?,
        })
    }
 }
@@ -993,15 +943,6 @@ impl PageServerConf {
                "virtual_file_io_engine" => {
                    builder.virtual_file_io_engine(parse_toml_from_str("virtual_file_io_engine", item)?)
                }
-                "get_vectored_impl" => {
-                    builder.get_vectored_impl(parse_toml_from_str("get_vectored_impl", item)?)
-                }
-                "max_vectored_read_size" => {
-                    builder.get_max_vectored_read_size(parse_toml_u64("max_vectored_read_size", item)? as usize)
-                }
-                "validate_vectored_get" => {
-                    builder.get_validate_vectored_get(parse_toml_bool("validate_vectored_get", item)?)
-                }
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }
@@ -1076,9 +1017,6 @@ impl PageServerConf {
            secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
            ingest_batch_size: defaults::DEFAULT_INGEST_BATCH_SIZE,
            virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
-            get_vectored_impl: defaults::DEFAULT_GET_VECTORED_IMPL.parse().unwrap(),
-            max_vectored_read_size: defaults::DEFAULT_MAX_VECTORED_READ_SIZE,
-            validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
        }
    }
 }
@@ -1312,9 +1250,6 @@ background_task_maximum_delay = '334 s'
                secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
                ingest_batch_size: defaults::DEFAULT_INGEST_BATCH_SIZE,
                virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
-                get_vectored_impl: defaults::DEFAULT_GET_VECTORED_IMPL.parse().unwrap(),
-                max_vectored_read_size: defaults::DEFAULT_MAX_VECTORED_READ_SIZE,
-                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1379,9 +1314,6 @@ background_task_maximum_delay = '334 s'
                secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
                ingest_batch_size: 100,
                virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
-                get_vectored_impl: defaults::DEFAULT_GET_VECTORED_IMPL.parse().unwrap(),
-                max_vectored_read_size: defaults::DEFAULT_MAX_VECTORED_READ_SIZE,
-                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -83,12 +83,12 @@ use utils::{
 // This is not functionally necessary (clients will retry), but avoids generating a lot of
 // failed API calls while tenants are activating.
 #[cfg(not(feature = "testing"))]
-pub(crate) const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);
+const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);

 // Tests run on slow/oversubscribed nodes, and may need to wait much longer for tenants to
 // finish attaching, if calls to remote storage are slow.
 #[cfg(feature = "testing")]
-pub(crate) const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
+const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);

 pub struct State {
    conf: &'static PageServerConf,
@@ -571,16 +571,10 @@ async fn timeline_list_handler(
        parse_query_param(&request, "force-await-initial-logical-size")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;

-    let state = get_state(&request);
    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);

    let response_data = async {
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id, false)?;
-
-        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
        let timelines = tenant.list_timelines();

        let mut response_data = Vec::with_capacity(timelines.len());
@@ -622,7 +616,7 @@ async fn timeline_preserve_initdb_handler(
    // location where timeline recreation cand find it.

    async {
-        let tenant = mgr::get_tenant(tenant_shard_id, false)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;

        let timeline = tenant
            .get_timeline(timeline_id, false)
@@ -1142,7 +1136,7 @@ async fn tenant_shard_split_handler(

    let new_shards = state
        .tenant_manager
-        .shard_split(tenant_shard_id, ShardCount::new(req.new_shard_count), &ctx)
+        .shard_split(tenant_shard_id, ShardCount(req.new_shard_count), &ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -4,8 +4,8 @@ use metrics::{
    register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
-    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
+    IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
@@ -1266,12 +1266,13 @@ pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {

 // remote storage metrics

-static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
-        "pageserver_remote_timeline_client_calls_started",
-        "Number of started calls to remote timeline client.",
-        "pageserver_remote_timeline_client_calls_finished",
-        "Number of finshed calls to remote timeline client.",
+/// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
+static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
+        "pageserver_remote_timeline_client_calls_unfinished",
+        "Number of ongoing calls to remote timeline client. \
+         Used to populate pageserver_remote_timeline_client_calls_started. \
+         This metric is not useful for sampling from Prometheus, but useful in tests.",
        &[
            "tenant_id",
            "shard_id",
@@ -1280,7 +1281,23 @@ static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
            "op_kind"
        ],
    )
-    .unwrap()
+    .expect("failed to define a metric")
+});
+
+static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_remote_timeline_client_calls_started",
+        "When calling a remote timeline client method, we record the current value \
+         of the calls_unfinished gauge in this histogram. Plot the histogram \
+         over time in a heatmap to visualize how many operations were ongoing \
+         at a given instant. It gives you a better idea of the queue depth \
+         than plotting the gauge directly, since operations may complete faster \
+         than the sampling interval.",
+        &["file_kind", "op_kind"],
+        // The calls_unfinished gauge is an integer gauge, hence we have integer buckets.
+        vec![0.0, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 15.0, 20.0, 40.0, 60.0, 80.0, 100.0, 500.0],
+    )
+    .expect("failed to define a metric")
 });

 static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
@@ -2061,7 +2078,7 @@ pub(crate) struct RemoteTimelineClientMetrics {
    shard_id: String,
    timeline_id: String,
    remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
-    calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
+    calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
    bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
    bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
 }
@@ -2072,7 +2089,7 @@ impl RemoteTimelineClientMetrics {
            tenant_id: tenant_shard_id.tenant_id.to_string(),
            shard_id: format!("{}", tenant_shard_id.shard_slug()),
            timeline_id: timeline_id.to_string(),
-            calls: Mutex::new(HashMap::default()),
+            calls_unfinished_gauge: Mutex::new(HashMap::default()),
            bytes_started_counter: Mutex::new(HashMap::default()),
            bytes_finished_counter: Mutex::new(HashMap::default()),
            remote_physical_size_gauge: Mutex::new(None),
@@ -2112,15 +2129,15 @@ impl RemoteTimelineClientMetrics {
            .unwrap()
    }

-    fn calls_counter_pair(
+    fn calls_unfinished_gauge(
        &self,
        file_kind: &RemoteOpFileKind,
        op_kind: &RemoteOpKind,
-    ) -> IntCounterPair {
-        let mut guard = self.calls.lock().unwrap();
+    ) -> IntGauge {
+        let mut guard = self.calls_unfinished_gauge.lock().unwrap();
        let key = (file_kind.as_str(), op_kind.as_str());
        let metric = guard.entry(key).or_insert_with(move || {
-            REMOTE_TIMELINE_CLIENT_CALLS
+            REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
                .get_metric_with_label_values(&[
                    &self.tenant_id,
                    &self.shard_id,
@@ -2133,6 +2150,17 @@ impl RemoteTimelineClientMetrics {
        metric.clone()
    }

+    fn calls_started_hist(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> Histogram {
+        let key = (file_kind.as_str(), op_kind.as_str());
+        REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST
+            .get_metric_with_label_values(&[key.0, key.1])
+            .unwrap()
+    }
+
    fn bytes_started_counter(
        &self,
        file_kind: &RemoteOpFileKind,
@@ -2203,7 +2231,7 @@ impl RemoteTimelineClientMetrics {
 #[must_use]
 pub(crate) struct RemoteTimelineClientCallMetricGuard {
    /// Decremented on drop.
-    calls_counter_pair: Option<IntCounterPair>,
+    calls_unfinished_metric: Option<IntGauge>,
    /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
    bytes_finished: Option<(IntCounter, u64)>,
 }
@@ -2213,10 +2241,10 @@ impl RemoteTimelineClientCallMetricGuard {
    /// The caller vouches to do the metric updates manually.
    pub fn will_decrement_manually(mut self) {
        let RemoteTimelineClientCallMetricGuard {
-            calls_counter_pair,
+            calls_unfinished_metric,
            bytes_finished,
        } = &mut self;
-        calls_counter_pair.take();
+        calls_unfinished_metric.take();
        bytes_finished.take();
    }
 }
@@ -2224,10 +2252,10 @@ impl RemoteTimelineClientCallMetricGuard {
 impl Drop for RemoteTimelineClientCallMetricGuard {
    fn drop(&mut self) {
        let RemoteTimelineClientCallMetricGuard {
-            calls_counter_pair,
+            calls_unfinished_metric,
            bytes_finished,
        } = self;
-        if let Some(guard) = calls_counter_pair.take() {
+        if let Some(guard) = calls_unfinished_metric.take() {
            guard.dec();
        }
        if let Some((bytes_finished_metric, value)) = bytes_finished {
@@ -2260,8 +2288,10 @@ impl RemoteTimelineClientMetrics {
        op_kind: &RemoteOpKind,
        size: RemoteTimelineClientMetricsCallTrackSize,
    ) -> RemoteTimelineClientCallMetricGuard {
-        let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
-        calls_counter_pair.inc();
+        let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+        self.calls_started_hist(file_kind, op_kind)
+            .observe(calls_unfinished_metric.get() as f64);
+        calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric

        let bytes_finished = match size {
            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
@@ -2275,7 +2305,7 @@ impl RemoteTimelineClientMetrics {
            }
        };
        RemoteTimelineClientCallMetricGuard {
-            calls_counter_pair: Some(calls_counter_pair),
+            calls_unfinished_metric: Some(calls_unfinished_metric),
            bytes_finished,
        }
    }
@@ -2289,8 +2319,12 @@ impl RemoteTimelineClientMetrics {
        op_kind: &RemoteOpKind,
        size: RemoteTimelineClientMetricsCallTrackSize,
    ) {
-        let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
-        calls_counter_pair.dec();
+        let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+        debug_assert!(
+            calls_unfinished_metric.get() > 0,
+            "begin and end should cancel out"
+        );
+        calls_unfinished_metric.dec();
        match size {
            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
            RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
@@ -2307,15 +2341,18 @@ impl Drop for RemoteTimelineClientMetrics {
            shard_id,
            timeline_id,
            remote_physical_size_gauge,
-            calls,
+            calls_unfinished_gauge,
            bytes_started_counter,
            bytes_finished_counter,
        } = self;
-        for ((a, b), _) in calls.get_mut().unwrap().drain() {
-            let mut res = [Ok(()), Ok(())];
-            REMOTE_TIMELINE_CLIENT_CALLS
-                .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
-            // don't care about results
+        for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
+            let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
+                tenant_id,
+                shard_id,
+                timeline_id,
+                a,
+                b,
+            ]);
        }
        for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
            let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
@@ -2459,56 +2496,6 @@ pub mod tokio_epoll_uring {
    }
 }

-pub(crate) mod tenant_throttling {
-    use metrics::{register_int_counter_vec, IntCounter};
-    use once_cell::sync::Lazy;
-
-    use crate::tenant::{self, throttle::Metric};
-
-    pub(crate) struct TimelineGet {
-        wait_time: IntCounter,
-        count: IntCounter,
-    }
-
-    pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
-        static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-            register_int_counter_vec!(
-            "pageserver_tenant_throttling_wait_usecs_sum_global",
-            "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
-            &["kind"]
-        )
-            .unwrap()
-        });
-
-        static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-            register_int_counter_vec!(
-                "pageserver_tenant_throttling_count_global",
-                "Count of tenant throttlings, by kind of throttle.",
-                &["kind"]
-            )
-            .unwrap()
-        });
-
-        let kind = "timeline_get";
-        TimelineGet {
-            wait_time: WAIT_USECS.with_label_values(&[kind]),
-            count: WAIT_COUNT.with_label_values(&[kind]),
-        }
-    });
-
-    impl Metric for &'static TimelineGet {
-        #[inline(always)]
-        fn observe_throttling(
-            &self,
-            tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
-        ) {
-            let val = u64::try_from(wait_time.as_micros()).unwrap();
-            self.wait_time.inc_by(val);
-            self.count.inc();
-        }
-    }
-}
-
 pub fn preinitialize_metrics() {
    // Python tests need these and on some we do alerting.
    //
@@ -2570,5 +2557,4 @@ pub fn preinitialize_metrics() {

    // Custom
    Lazy::force(&RECONSTRUCT_TIME);
-    Lazy::force(&tenant_throttling::TIMELINE_GET);
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -17,8 +17,6 @@ use futures::stream::FuturesUnordered;
 use futures::Stream;
 use futures::StreamExt;
 use pageserver_api::key::Key;
-use pageserver_api::models::PagestreamGetVectoredPagesRequest;
-use pageserver_api::models::PagestreamGetVectoredPagesResponse;
 use pageserver_api::models::TenantState;
 use pageserver_api::models::{
    PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
@@ -28,7 +26,7 @@ use pageserver_api::models::{
    PagestreamNblocksResponse,
 };
 use pageserver_api::shard::ShardIndex;
-use pageserver_api::shard::ShardNumber;
+use pageserver_api::shard::{ShardCount, ShardNumber};
 use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError};
 use pq_proto::framed::ConnectionError;
 use pq_proto::FeStartupPacket;
@@ -73,7 +71,6 @@ use crate::tenant::mgr;
 use crate::tenant::mgr::get_active_tenant_with_timeout;
 use crate::tenant::mgr::GetActiveTenantError;
 use crate::tenant::mgr::ShardSelector;
-use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::timeline::WaitLsnError;
 use crate::tenant::GetTimelineError;
 use crate::tenant::PageReconstructError;
@@ -337,10 +334,6 @@ enum PageStreamError {
    #[error("Read error")]
    Read(#[source] PageReconstructError),

-    /// Something went wrong reading a page: this likely indicates a pageserver bug
-    #[error("Vectored read error")]
-    VectoredRead(#[source] GetVectoredError),
-
    /// Ran out of time waiting for an LSN
    #[error("LSN timeout: {0}")]
    LsnTimeout(WaitLsnError),
@@ -364,15 +357,6 @@ impl From<PageReconstructError> for PageStreamError {
    }
 }

-impl From<GetVectoredError> for PageStreamError {
-    fn from(value: GetVectoredError) -> Self {
-        match value {
-            GetVectoredError::Cancelled => Self::Shutdown,
-            e => Self::VectoredRead(e),
-        }
-    }
-}
-
 impl From<GetActiveTimelineError> for PageStreamError {
    fn from(value: GetActiveTimelineError) -> Self {
        match value {
@@ -682,15 +666,6 @@ impl PageServerHandler {
                        span,
                    )
                }
-                PagestreamFeMessage::GetVectoredPages(req) => {
-                    let span = tracing::info_span!("handle_get_vectored_pages_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn, req_count = %req.count);
-                    (
-                        self.handle_get_pages_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
-                            .instrument(span.clone())
-                            .await,
-                        span,
-                    )
-                }
            };

            match response {
@@ -1023,7 +998,7 @@ impl PageServerHandler {
    ) -> Result<&Arc<Timeline>, Key> {
        let key = if let Some((first_idx, first_timeline)) = self.shard_timelines.iter().next() {
            // Fastest path: single sharded case
-            if first_idx.shard_count.count() == 1 {
+            if first_idx.shard_count < ShardCount(2) {
                return Ok(&first_timeline.timeline);
            }

@@ -1186,80 +1161,6 @@ impl PageServerHandler {
        }))
    }

-    #[instrument(skip_all, fields(shard_id))]
-    async fn handle_get_pages_at_lsn_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        req: &PagestreamGetVectoredPagesRequest,
-        ctx: &RequestContext,
-    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        // This is cheeky and relies on not using sharding :)
-        // A real solution has to split the requested key sequence between shards.
-        let get_page_request = PagestreamGetPageRequest {
-            latest: req.latest,
-            lsn: req.lsn,
-            rel: req.rel,
-            blkno: req.blkno,
-        };
-
-        let timeline = match self.get_cached_timeline_for_page(&get_page_request) {
-            Ok(tl) => tl,
-            Err(key) => {
-                match self
-                    .load_timeline_for_page(tenant_id, timeline_id, key)
-                    .await
-                {
-                    Ok(t) => t,
-                    Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
-                        // We already know this tenant exists in general, because we resolved it at
-                        // start of connection.  Getting a NotFound here indicates that the shard containing
-                        // the requested page is not present on this node: the client's knowledge of shard->pageserver
-                        // mapping is out of date.
-                        //
-                        // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
-                        // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
-                        // and talk to a different pageserver.
-                        return Err(PageStreamError::Reconnect(
-                            "getpage@lsn request routed to wrong shard".into(),
-                        ));
-                    }
-                    Err(e) => return Err(e.into()),
-                }
-            }
-        };
-
-        // load_timeline_for_page sets shard_id, but get_cached_timeline_for_page doesn't
-        set_tracing_field_shard_id(timeline);
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetPageAtLsn);
-
-        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn =
-            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
-                .await?;
-
-        let (page_count, pages_buf) = timeline
-            .get_rel_pages_at_lsn(
-                req.rel,
-                req.blkno,
-                req.count,
-                Version::Lsn(lsn),
-                req.latest,
-                ctx,
-            )
-            .await?;
-
-        Ok(PagestreamBeMessage::GetVectoredPages(
-            PagestreamGetVectoredPagesResponse {
-                page_count,
-                pages: pages_buf,
-            },
-        ))
-    }
-
    #[instrument(skip_all, fields(shard_id))]
    async fn handle_get_slru_segment_request(
        &mut self,
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -11,12 +11,10 @@ use crate::context::RequestContext;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::repository::*;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
-use crate::tenant::timeline::GetVectoredError;
 use crate::walrecord::NeonWalRecord;
-use anyhow::{anyhow, ensure, Context};
+use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
-use itertools::Itertools;
 use pageserver_api::key::{
    dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
    rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
@@ -28,7 +26,7 @@ use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
 use postgres_ffi::{Oid, TimestampTz, TransactionId};
 use serde::{Deserialize, Serialize};
-use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
+use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
 use strum::IntoEnumIterator;
@@ -199,41 +197,6 @@ impl Timeline {
        version.get(self, key, ctx).await
    }

-    pub(crate) async fn get_rel_pages_at_lsn(
-        &self,
-        tag: RelTag,
-        blknum: BlockNumber,
-        count: u8,
-        version: Version<'_>,
-        latest: bool,
-        ctx: &RequestContext,
-    ) -> Result<(u8, Bytes), GetVectoredError> {
-        if tag.relnode == 0 {
-            return Err(GetVectoredError::Other(
-                RelationError::InvalidRelnode.into(),
-            ));
-        }
-
-        let nblocks = self
-            .get_rel_size(tag, version, latest, ctx)
-            .await
-            .map_err(|e| GetVectoredError::Other(anyhow!(e)))?;
-        if blknum + (count - 1) as u32 >= nblocks {
-            debug!(
-                "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
-                tag,
-                blknum,
-                version.get_lsn(),
-                nblocks
-            );
-            return Ok((1, ZERO_PAGE.clone()));
-        }
-
-        let start_key = rel_block_to_key(tag, blknum);
-        let end_key = start_key.add(count as u32);
-        version.get_vectored(self, start_key..end_key, ctx).await
-    }
-
    // Get size of a database in blocks
    pub(crate) async fn get_db_size(
        &self,
@@ -1529,7 +1492,7 @@ impl<'a> DatadirModification<'a> {
            return Ok(());
        }

-        let mut writer = self.tline.writer().await;
+        let writer = self.tline.writer().await;

        // Flush relation and  SLRU data blocks, keep metadata.
        let mut retained_pending_updates = HashMap::<_, Vec<_>>::new();
@@ -1568,23 +1531,13 @@ impl<'a> DatadirModification<'a> {
    /// All the modifications in this atomic update are stamped by the specified LSN.
    ///
    pub async fn commit(&mut self, ctx: &RequestContext) -> anyhow::Result<()> {
-        let mut writer = self.tline.writer().await;
+        let writer = self.tline.writer().await;

        let pending_nblocks = self.pending_nblocks;
        self.pending_nblocks = 0;

        if !self.pending_updates.is_empty() {
-            let prev_pending_updates = std::mem::take(&mut self.pending_updates);
-
-            // The put_batch call below expects expects the inputs to be sorted by Lsn,
-            // so we do that first.
-            let lsn_ordered_batch: Vec<(Key, Lsn, Value)> = prev_pending_updates
-                .into_iter()
-                .map(|(key, vals)| vals.into_iter().map(move |(lsn, val)| (key, lsn, val)))
-                .kmerge_by(|lhs, rhs| lhs.1 .0 < rhs.1 .0)
-                .collect();
-
-            writer.put_batch(lsn_ordered_batch, ctx).await?;
+            writer.put_batch(&self.pending_updates, ctx).await?;
            self.pending_updates.clear();
        }

@@ -1645,55 +1598,6 @@ impl<'a> DatadirModification<'a> {
        self.tline.get(key, lsn, ctx).await
    }

-    async fn get_vectored(
-        &self,
-        key_range: Range<Key>,
-        ctx: &RequestContext,
-    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        // Have we already updated the same key? Read the latest pending updated
-        // version in that case.
-        //
-        // Note: we don't check pending_deletions. It is an error to request a
-        // value that has been removed, deletion only avoids leaking storage.
-        let mut results: BTreeMap<Key, Result<Bytes, PageReconstructError>> = BTreeMap::new();
-        let mut keys_in_modification = KeySpaceAccum::new();
-
-        let key = key_range.start;
-        while key != key_range.end {
-            if let Some(values) = self.pending_updates.get(&key) {
-                if let Some((_, value)) = values.last() {
-                    keys_in_modification.add_key(key);
-
-                    match value {
-                        Value::Image(img) => {
-                            results.insert(key, Ok(img.clone()));
-                        }
-                        _ => {
-                            results.insert(
-                                key,
-                                Err(PageReconstructError::from(anyhow::anyhow!(
-                                    "unexpected pending WAL record"
-                                ))),
-                            );
-                        }
-                    }
-                }
-            }
-        }
-
-        let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);
-
-        let mut keyspace = KeySpace {
-            ranges: vec![key_range],
-        };
-        keyspace.remove_overlapping_with(&keys_in_modification.to_keyspace());
-
-        let pages = self.tline.get_vectored(keyspace, lsn, ctx).await?;
-        results.extend(pages.into_iter());
-
-        Ok(results)
-    }
-
    fn put(&mut self, key: Key, val: Value) {
        let values = self.pending_updates.entry(key).or_default();
        // Replace the previous value if it exists at the same lsn
@@ -1737,43 +1641,6 @@ impl<'a> Version<'a> {
        }
    }

-    async fn get_vectored(
-        &self,
-        timeline: &Timeline,
-        key_range: Range<Key>,
-        ctx: &RequestContext,
-    ) -> Result<(u8, Bytes), GetVectoredError> {
-        let pages = match self {
-            Version::Lsn(lsn) => {
-                timeline
-                    .get_vectored(
-                        KeySpace {
-                            ranges: vec![key_range],
-                        },
-                        *lsn,
-                        ctx,
-                    )
-                    .await
-            }
-            Version::Modified(modification) => modification.get_vectored(key_range, ctx).await,
-        }?;
-
-        let mut buf = BytesMut::new();
-        let page_count: u8 = pages.len().try_into().expect("too many pages returned");
-        for page in pages {
-            match page {
-                (_key, Ok(bytes)) => {
-                    buf.extend_from_slice(&bytes[..]);
-                }
-                (_key, Err(err)) => {
-                    return Err(GetVectoredError::Other(anyhow!(err)));
-                }
-            }
-        }
-
-        Ok((page_count, buf.freeze()))
-    }
-
    fn get_lsn(&self) -> Lsn {
        match self {
            Version::Lsn(lsn) => *lsn,
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -188,7 +188,6 @@ task_local! {
    serde::Serialize,
    serde::Deserialize,
    strum_macros::IntoStaticStr,
-    strum_macros::EnumString,
 )]
 pub enum TaskKind {
    // Pageserver startup, i.e., `main`
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,8 +9,8 @@
 //! may lead to a data loss.
 //!
 use anyhow::bail;
+use pageserver_api::models;
 use pageserver_api::models::EvictionPolicy;
-use pageserver_api::models::{self, ThrottleConfig};
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::de::IntoDeserializer;
 use serde::{Deserialize, Serialize};
@@ -251,7 +251,7 @@ impl LocationConf {
        } else {
            ShardIdentity::new(
                ShardNumber(conf.shard_number),
-                ShardCount::new(conf.shard_count),
+                ShardCount(conf.shard_count),
                ShardStripeSize(conf.shard_stripe_size),
            )?
        };
@@ -285,7 +285,7 @@ impl Default for LocationConf {
 ///
 /// For storing and transmitting individual tenant's configuration, see
 /// TenantConfOpt.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 pub struct TenantConf {
    // Flush out an inmemory layer, if it's holding WAL older than this
    // This puts a backstop on how much WAL needs to be re-digested if the
@@ -348,13 +348,11 @@ pub struct TenantConf {

    /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
    pub lazy_slru_download: bool,
-
-    pub timeline_get_throttle: pageserver_api::models::ThrottleConfig,
 }

 /// Same as TenantConf, but this struct preserves the information about
 /// which parameters are set and which are not.
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
@@ -439,9 +437,6 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub lazy_slru_download: Option<bool>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub timeline_get_throttle: Option<pageserver_api::models::ThrottleConfig>,
 }

 impl TenantConfOpt {
@@ -490,10 +485,6 @@ impl TenantConfOpt {
            lazy_slru_download: self
                .lazy_slru_download
                .unwrap_or(global_conf.lazy_slru_download),
-            timeline_get_throttle: self
-                .timeline_get_throttle
-                .clone()
-                .unwrap_or(global_conf.timeline_get_throttle),
        }
    }
 }
@@ -533,7 +524,6 @@ impl Default for TenantConf {
            gc_feedback: false,
            heatmap_period: Duration::ZERO,
            lazy_slru_download: false,
-            timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
        }
    }
 }
@@ -606,7 +596,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
            gc_feedback: value.gc_feedback,
            heatmap_period: value.heatmap_period.map(humantime),
            lazy_slru_download: value.lazy_slru_download,
-            timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
        }
    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -246,8 +246,6 @@ async fn cleanup_remaining_fs_traces(

    rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;

-    rm(conf.tenant_heatmap_path(tenant_shard_id), false).await?;
-
    fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
        Err(anyhow::anyhow!(
            "failpoint: tenant-delete-before-remove-tenant-dir"
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -52,7 +52,8 @@ use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use anyhow::Result;
 use pageserver_api::keyspace::KeySpaceAccum;
-use std::collections::{HashMap, VecDeque};
+use std::cmp::Ordering;
+use std::collections::{BTreeMap, VecDeque};
 use std::iter::Peekable;
 use std::ops::Range;
 use std::sync::Arc;
@@ -146,28 +147,43 @@ impl Drop for BatchedUpdates<'_> {
 }

 /// Return value of LayerMap::search
-#[derive(Eq, PartialEq, Debug, Hash)]
+#[derive(Eq, PartialEq, Debug)]
 pub struct SearchResult {
    pub layer: Arc<PersistentLayerDesc>,
    pub lsn_floor: Lsn,
 }

-/// Return value of [`LayerMap::range_search`]
-///
-/// Contains a mapping from a layer description to a keyspace
-/// accumulator that contains all the keys which intersect the layer
-/// from the original search space. Keys that were not found are accumulated
-/// in a separate key space accumulator.
-#[derive(Debug)]
+pub struct OrderedSearchResult(SearchResult);
+
+impl Ord for OrderedSearchResult {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.0.lsn_floor.cmp(&other.0.lsn_floor)
+    }
+}
+
+impl PartialOrd for OrderedSearchResult {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for OrderedSearchResult {
+    fn eq(&self, other: &Self) -> bool {
+        self.0.lsn_floor == other.0.lsn_floor
+    }
+}
+
+impl Eq for OrderedSearchResult {}
+
 pub struct RangeSearchResult {
-    pub found: HashMap<SearchResult, KeySpaceAccum>,
+    pub found: BTreeMap<OrderedSearchResult, KeySpaceAccum>,
    pub not_found: KeySpaceAccum,
 }

 impl RangeSearchResult {
    fn new() -> Self {
        Self {
-            found: HashMap::new(),
+            found: BTreeMap::new(),
            not_found: KeySpaceAccum::new(),
        }
    }
@@ -298,7 +314,7 @@ where
            Some(search_result) => self
                .result
                .found
-                .entry(search_result)
+                .entry(OrderedSearchResult(search_result))
                .or_default()
                .add_range(covered_range),
            None => self.pad_range(covered_range),
@@ -346,35 +362,6 @@ where
    }
 }

-#[derive(PartialEq, Eq, Hash, Debug, Clone)]
-pub enum InMemoryLayerHandle {
-    Open {
-        lsn_floor: Lsn,
-        end_lsn: Lsn,
-    },
-    Frozen {
-        idx: usize,
-        lsn_floor: Lsn,
-        end_lsn: Lsn,
-    },
-}
-
-impl InMemoryLayerHandle {
-    pub fn get_lsn_floor(&self) -> Lsn {
-        match self {
-            InMemoryLayerHandle::Open { lsn_floor, .. } => *lsn_floor,
-            InMemoryLayerHandle::Frozen { lsn_floor, .. } => *lsn_floor,
-        }
-    }
-
-    pub fn get_end_lsn(&self) -> Lsn {
-        match self {
-            InMemoryLayerHandle::Open { end_lsn, .. } => *end_lsn,
-            InMemoryLayerHandle::Frozen { end_lsn, .. } => *end_lsn,
-        }
-    }
-}
-
 impl LayerMap {
    ///
    /// Find the latest layer (by lsn.end) that covers the given
@@ -569,43 +556,6 @@ impl LayerMap {
        self.historic.iter()
    }

-    /// Get a handle for the first in memory layer that matches the provided predicate.
-    /// The handle should be used with [`Self::get_in_memory_layer`] to retrieve the actual layer.
-    ///
-    /// Note: [`Self::find_in_memory_layer`] and [`Self::get_in_memory_layer`] should be called during
-    /// the same exclusive region established by holding the layer manager lock.
-    pub fn find_in_memory_layer<Pred>(&self, mut pred: Pred) -> Option<InMemoryLayerHandle>
-    where
-        Pred: FnMut(&Arc<InMemoryLayer>) -> bool,
-    {
-        if let Some(open) = &self.open_layer {
-            if pred(open) {
-                return Some(InMemoryLayerHandle::Open {
-                    lsn_floor: open.get_lsn_range().start,
-                    end_lsn: open.get_lsn_range().end,
-                });
-            }
-        }
-
-        let pos = self.frozen_layers.iter().rev().position(pred);
-        pos.map(|rev_idx| {
-            let idx = self.frozen_layers.len() - 1 - rev_idx;
-            InMemoryLayerHandle::Frozen {
-                idx,
-                lsn_floor: self.frozen_layers[idx].get_lsn_range().start,
-                end_lsn: self.frozen_layers[idx].get_lsn_range().end,
-            }
-        })
-    }
-
-    /// Get the layer pointed to by the provided handle.
-    pub fn get_in_memory_layer(&self, handle: &InMemoryLayerHandle) -> Option<Arc<InMemoryLayer>> {
-        match handle {
-            InMemoryLayerHandle::Open { .. } => self.open_layer.clone(),
-            InMemoryLayerHandle::Frozen { idx, .. } => self.frozen_layers.get(*idx).cloned(),
-        }
-    }
-
    ///
    /// Divide the whole given range of keys into sub-ranges based on the latest
    /// image layer that covers each range at the specified lsn (inclusive).
@@ -919,8 +869,6 @@ impl LayerMap {

 #[cfg(test)]
 mod tests {
-    use pageserver_api::keyspace::KeySpace;
-
    use super::*;

    #[derive(Clone)]
@@ -947,15 +895,15 @@ mod tests {

    fn assert_range_search_result_eq(lhs: RangeSearchResult, rhs: RangeSearchResult) {
        assert_eq!(lhs.not_found.to_keyspace(), rhs.not_found.to_keyspace());
-        let lhs: HashMap<SearchResult, KeySpace> = lhs
+        let lhs: Vec<_> = lhs
            .found
            .into_iter()
-            .map(|(search_result, accum)| (search_result, accum.to_keyspace()))
+            .map(|(search_result, accum)| (search_result.0, accum.to_keyspace()))
            .collect();
-        let rhs: HashMap<SearchResult, KeySpace> = rhs
+        let rhs: Vec<_> = rhs
            .found
            .into_iter()
-            .map(|(search_result, accum)| (search_result, accum.to_keyspace()))
+            .map(|(search_result, accum)| (search_result.0, accum.to_keyspace()))
            .collect();

        assert_eq!(lhs, rhs);
@@ -975,7 +923,7 @@ mod tests {
                Some(res) => {
                    range_search_result
                        .found
-                        .entry(res)
+                        .entry(OrderedSearchResult(res))
                        .or_default()
                        .add_key(key);
                }
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -294,6 +294,17 @@ pub enum LoadMetadataError {
    Decode(#[from] anyhow::Error),
 }

+pub fn load_metadata(
+    conf: &'static PageServerConf,
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+) -> Result<TimelineMetadata, LoadMetadataError> {
+    let metadata_path = conf.metadata_path(tenant_shard_id, timeline_id);
+    let metadata_bytes = std::fs::read(metadata_path)?;
+
+    Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -32,7 +32,6 @@ use crate::control_plane_client::{
    ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
 };
 use crate::deletion_queue::DeletionQueueClient;
-use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
 use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::{
@@ -485,7 +484,7 @@ pub async fn init_tenant_mgr(
                            TenantSlot::Secondary(SecondaryTenant::new(
                                tenant_shard_id,
                                location_conf.shard,
-                                location_conf.tenant_conf.clone(),
+                                location_conf.tenant_conf,
                                &SecondaryLocationConfig { warm: false },
                            )),
                        );
@@ -795,7 +794,7 @@ pub(crate) async fn set_new_tenant_config(
    info!("configuring tenant {tenant_id}");
    let tenant = get_tenant(tenant_shard_id, true)?;

-    if !tenant.tenant_shard_id().shard_count.is_unsharded() {
+    if tenant.tenant_shard_id().shard_count > ShardCount(0) {
        // Note that we use ShardParameters::default below.
        return Err(SetNewTenantConfigError::Other(anyhow::anyhow!(
            "This API may only be used on single-sharded tenants, use the /location_config API for sharded tenants"
@@ -806,7 +805,7 @@ pub(crate) async fn set_new_tenant_config(
    // API to use is the location_config/ endpoint, which lets the caller provide
    // the full LocationConf.
    let location_conf = LocationConf::attached_single(
-        new_tenant_conf.clone(),
+        new_tenant_conf,
        tenant.generation,
        &ShardParameters::default(),
    );
@@ -1377,7 +1376,7 @@ impl TenantManager {
        result
    }

-    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), new_shard_count=%new_shard_count.literal()))]
+    #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), new_shard_count=%new_shard_count.0))]
    pub(crate) async fn shard_split(
        &self,
        tenant_shard_id: TenantShardId,
@@ -1387,10 +1386,11 @@ impl TenantManager {
        let tenant = get_tenant(tenant_shard_id, true)?;

        // Plan: identify what the new child shards will be
-        if new_shard_count.count() <= tenant_shard_id.shard_count.count() {
+        let effective_old_shard_count = std::cmp::max(tenant_shard_id.shard_count.0, 1);
+        if new_shard_count <= ShardCount(effective_old_shard_count) {
            anyhow::bail!("Requested shard count is not an increase");
        }
-        let expansion_factor = new_shard_count.count() / tenant_shard_id.shard_count.count();
+        let expansion_factor = new_shard_count.0 / effective_old_shard_count;
        if !expansion_factor.is_power_of_two() {
            anyhow::bail!("Requested split is not a power of two");
        }
@@ -1467,7 +1467,7 @@ impl TenantManager {
                    attach_mode: AttachmentMode::Single,
                }),
                shard: child_shard_identity,
-                tenant_conf: parent_tenant_conf.clone(),
+                tenant_conf: parent_tenant_conf,
            };

            self.upsert_location(
@@ -1490,16 +1490,6 @@ impl TenantManager {
                peek_slot.and_then(|s| s.get_attached()).cloned()
            };
            if let Some(t) = child_shard {
-                // Wait for the child shard to become active: this should be very quick because it only
-                // has to download the index_part that we just uploaded when creating it.
-                if let Err(e) = t.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await {
-                    // This is not fatal: we have durably created the child shard.  It just makes the
-                    // split operation less seamless for clients, as we will may detach the parent
-                    // shard before the child shards are fully ready to serve requests.
-                    tracing::warn!("Failed to wait for shard {child_shard_id} to activate: {e}");
-                    continue;
-                }
-
                let timelines = t.timelines.lock().unwrap().clone();
                for timeline in timelines.values() {
                    let Some(target_lsn) = target_lsns.get(&timeline.timeline_id) else {
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -614,7 +614,7 @@ impl RemoteTimelineClient {
            metadata,
        );
        let op = UploadOp::UploadMetadata(index_part, disk_consistent_lsn);
-        self.metric_begin(&op);
+        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
        upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;

@@ -654,7 +654,7 @@ impl RemoteTimelineClient {
            metadata.generation, metadata.shard
        );
        let op = UploadOp::UploadLayer(layer, metadata);
-        self.metric_begin(&op);
+        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
    }

@@ -823,14 +823,10 @@ impl RemoteTimelineClient {
        }

        // schedule the actual deletions
-        if with_metadata.is_empty() {
-            // avoid scheduling the op & bumping the metric
-            return;
-        }
        let op = UploadOp::Delete(Delete {
            layers: with_metadata,
        });
-        self.metric_begin(&op);
+        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
    }

@@ -1520,10 +1516,10 @@ impl RemoteTimelineClient {
                .await;
        }

-        self.metric_end(&task.op);
+        self.calls_unfinished_metric_end(&task.op);
    }

-    fn metric_impl(
+    fn calls_unfinished_metric_impl(
        &self,
        op: &UploadOp,
    ) -> Option<(
@@ -1560,17 +1556,17 @@ impl RemoteTimelineClient {
        Some(res)
    }

-    fn metric_begin(&self, op: &UploadOp) {
-        let (file_kind, op_kind, track_bytes) = match self.metric_impl(op) {
+    fn calls_unfinished_metric_begin(&self, op: &UploadOp) {
+        let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) {
            Some(x) => x,
            None => return,
        };
        let guard = self.metrics.call_begin(&file_kind, &op_kind, track_bytes);
-        guard.will_decrement_manually(); // in metric_end(), see right below
+        guard.will_decrement_manually(); // in unfinished_ops_metric_end()
    }

-    fn metric_end(&self, op: &UploadOp) {
-        let (file_kind, op_kind, track_bytes) = match self.metric_impl(op) {
+    fn calls_unfinished_metric_end(&self, op: &UploadOp) {
+        let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) {
            Some(x) => x,
            None => return,
        };
@@ -1655,7 +1651,7 @@ impl RemoteTimelineClient {

                // Tear down queued ops
                for op in qi.queued_operations.into_iter() {
-                    self.metric_end(&op);
+                    self.calls_unfinished_metric_end(&op);
                    // Dropping UploadOp::Barrier() here will make wait_completion() return with an Err()
                    // which is exactly what we want to happen.
                    drop(op);
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -81,7 +81,15 @@ pub async fn download_layer_file<'a>(
                .with_context(|| format!("create a destination file for layer '{temp_file_path}'"))
                .map_err(DownloadError::Other)?;

-            let download = storage.download(&remote_path, cancel).await?;
+            let download = storage
+                .download(&remote_path, cancel)
+                .await
+                .with_context(|| {
+                    format!(
+                        "open a download stream for layer with remote storage path '{remote_path:?}'"
+                    )
+                })
+                .map_err(DownloadError::Other)?;

            let mut destination_file =
                tokio::io::BufWriter::with_capacity(super::BUFFER_SIZE, destination_file);
@@ -90,11 +98,9 @@ pub async fn download_layer_file<'a>(

            let bytes_amount = tokio::io::copy_buf(&mut reader, &mut destination_file)
                .await
-                .with_context(|| {
-                    format!(
+                .with_context(|| format!(
                    "download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
-                )
-                })
+                ))
                .map_err(DownloadError::Other);

            match bytes_amount {
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -133,7 +133,7 @@ impl SecondaryTenant {
    }

    pub(crate) fn set_tenant_conf(&self, config: &TenantConfOpt) {
-        *(self.tenant_conf.lock().unwrap()) = config.clone();
+        *(self.tenant_conf.lock().unwrap()) = *config;
    }

    /// For API access: generate a LocationConfig equivalent to the one that would be used to
@@ -144,13 +144,13 @@ impl SecondaryTenant {

        let conf = models::LocationConfigSecondary { warm: conf.warm };

-        let tenant_conf = self.tenant_conf.lock().unwrap().clone();
+        let tenant_conf = *self.tenant_conf.lock().unwrap();
        models::LocationConfig {
            mode: models::LocationConfigMode::Secondary,
            generation: None,
            secondary_conf: Some(conf),
            shard_number: self.tenant_shard_id.shard_number.0,
-            shard_count: self.tenant_shard_id.shard_count.literal(),
+            shard_count: self.tenant_shard_id.shard_count.0,
            shard_stripe_size: self.shard_identity.stripe_size.0,
            tenant_conf: tenant_conf.into(),
        }
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -8,21 +8,15 @@ pub(crate) mod layer;
 mod layer_desc;

 use crate::context::{AccessStatsBehavior, RequestContext};
-use crate::repository::Value;
 use crate::task_mgr::TaskKind;
 use crate::walrecord::NeonWalRecord;
 use bytes::Bytes;
 use enum_map::EnumMap;
 use enumset::EnumSet;
 use once_cell::sync::Lazy;
-use pageserver_api::key::Key;
-use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
 use pageserver_api::models::{
    LayerAccessKind, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
 };
-use std::cmp::{Ordering, Reverse};
-use std::collections::hash_map::Entry;
-use std::collections::{BinaryHeap, HashMap};
 use std::ops::Range;
 use std::sync::Mutex;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
@@ -40,11 +34,6 @@ pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};

 pub(crate) use layer::{EvictionError, Layer, ResidentLayer};

-use super::layer_map::InMemoryLayerHandle;
-use super::timeline::layer_manager::LayerManager;
-use super::timeline::GetVectoredError;
-use super::PageReconstructError;
-
 pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
 where
    T: PartialOrd<T>,
@@ -78,287 +67,6 @@ pub struct ValueReconstructState {
    pub img: Option<(Lsn, Bytes)>,
 }

-#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
-pub(crate) enum ValueReconstructSituation {
-    Complete,
-    #[default]
-    Continue,
-}
-
-/// Reconstruct data accumulated for a single key during a vectored get
-#[derive(Debug, Default, Clone)]
-pub(crate) struct VectoredValueReconstructState {
-    pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
-    pub(crate) img: Option<(Lsn, Bytes)>,
-
-    situation: ValueReconstructSituation,
-}
-
-impl VectoredValueReconstructState {
-    fn get_cached_lsn(&self) -> Option<Lsn> {
-        self.img.as_ref().map(|img| img.0)
-    }
-}
-
-impl From<VectoredValueReconstructState> for ValueReconstructState {
-    fn from(mut state: VectoredValueReconstructState) -> Self {
-        // walredo expects the records to be descending in terms of Lsn
-        state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
-
-        ValueReconstructState {
-            records: state.records,
-            img: state.img,
-        }
-    }
-}
-
-/// Bag of data accumulated during a vectored get
-pub(crate) struct ValuesReconstructState {
-    pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
-
-    keys_done: KeySpaceRandomAccum,
-}
-
-impl ValuesReconstructState {
-    pub(crate) fn new() -> Self {
-        Self {
-            keys: HashMap::new(),
-            keys_done: KeySpaceRandomAccum::new(),
-        }
-    }
-
-    /// Associate a key with the error which it encountered and mark it as done
-    pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
-        let previous = self.keys.insert(key, Err(err));
-        if let Some(Ok(state)) = previous {
-            if state.situation == ValueReconstructSituation::Continue {
-                self.keys_done.add_key(key);
-            }
-        }
-    }
-
-    /// Update the state collected for a given key.
-    /// Returns true if this was the last value needed for the key and false otherwise.
-    ///
-    /// If the key is done after the update, mark it as such.
-    pub(crate) fn update_key(
-        &mut self,
-        key: &Key,
-        lsn: Lsn,
-        value: Value,
-    ) -> ValueReconstructSituation {
-        let state = self
-            .keys
-            .entry(*key)
-            .or_insert(Ok(VectoredValueReconstructState::default()));
-
-        if let Ok(state) = state {
-            let key_done = match state.situation {
-                ValueReconstructSituation::Complete => unreachable!(),
-                ValueReconstructSituation::Continue => match value {
-                    Value::Image(img) => {
-                        state.img = Some((lsn, img));
-                        true
-                    }
-                    Value::WalRecord(rec) => {
-                        let reached_cache =
-                            state.get_cached_lsn().map(|clsn| clsn + 1) == Some(lsn);
-                        let will_init = rec.will_init();
-                        state.records.push((lsn, rec));
-                        will_init || reached_cache
-                    }
-                },
-            };
-
-            if key_done && state.situation == ValueReconstructSituation::Continue {
-                state.situation = ValueReconstructSituation::Complete;
-                self.keys_done.add_key(*key);
-            }
-
-            state.situation
-        } else {
-            ValueReconstructSituation::Complete
-        }
-    }
-
-    /// Returns the Lsn at which this key is cached if one exists.
-    /// The read path should go no further than this Lsn for the given key.
-    pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
-        self.keys
-            .get(key)
-            .and_then(|k| k.as_ref().ok())
-            .and_then(|state| state.get_cached_lsn())
-    }
-
-    /// Returns the key space describing the keys that have
-    /// been marked as completed since the last call to this function.
-    pub(crate) fn consume_done_keys(&mut self) -> KeySpace {
-        self.keys_done.consume_keyspace()
-    }
-}
-
-impl Default for ValuesReconstructState {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-/// Description of layer to be read - the layer map can turn
-/// this description into the actual layer.
-#[derive(PartialEq, Eq, Hash, Debug, Clone)]
-pub(crate) enum ReadableLayerDesc {
-    Persistent {
-        desc: PersistentLayerDesc,
-        lsn_floor: Lsn,
-        lsn_ceil: Lsn,
-    },
-    InMemory {
-        handle: InMemoryLayerHandle,
-        lsn_ceil: Lsn,
-    },
-}
-
-/// Wraper for 'ReadableLayerDesc' sorted by Lsn
-#[derive(Debug)]
-struct ReadableLayerDescOrdered(ReadableLayerDesc);
-
-/// Data structure which maintains a fringe of layers for the
-/// read path. The fringe is the set of layers which intersects
-/// the current keyspace that the search is descending on.
-/// Each layer tracks the keyspace that intersects it.
-///
-/// The fringe must appear sorted by Lsn. Hence, it uses
-/// a two layer indexing scheme.
-#[derive(Debug)]
-pub(crate) struct LayerFringe {
-    layers_by_lsn: BinaryHeap<ReadableLayerDescOrdered>,
-    layers: HashMap<ReadableLayerDesc, KeySpace>,
-}
-
-impl LayerFringe {
-    pub(crate) fn new() -> Self {
-        LayerFringe {
-            layers_by_lsn: BinaryHeap::new(),
-            layers: HashMap::new(),
-        }
-    }
-
-    pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayerDesc, KeySpace)> {
-        let handle = match self.layers_by_lsn.pop() {
-            Some(h) => h,
-            None => return None,
-        };
-
-        let removed = self.layers.remove_entry(&handle.0);
-        match removed {
-            Some((layer, keyspace)) => Some((layer, keyspace)),
-            None => unreachable!("fringe internals are always consistent"),
-        }
-    }
-
-    pub(crate) fn update(&mut self, layer: ReadableLayerDesc, keyspace: KeySpace) {
-        let entry = self.layers.entry(layer.clone());
-        match entry {
-            Entry::Occupied(mut entry) => {
-                entry.get_mut().merge(&keyspace);
-            }
-            Entry::Vacant(entry) => {
-                self.layers_by_lsn
-                    .push(ReadableLayerDescOrdered(entry.key().clone()));
-                entry.insert(keyspace);
-            }
-        }
-    }
-}
-
-impl Default for LayerFringe {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl Ord for ReadableLayerDescOrdered {
-    fn cmp(&self, other: &Self) -> Ordering {
-        let ord = self.0.get_lsn_ceil().cmp(&other.0.get_lsn_ceil());
-        if ord == std::cmp::Ordering::Equal {
-            self.0
-                .get_lsn_floor()
-                .cmp(&other.0.get_lsn_floor())
-                .reverse()
-        } else {
-            ord
-        }
-    }
-}
-
-impl PartialOrd for ReadableLayerDescOrdered {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl PartialEq for ReadableLayerDescOrdered {
-    fn eq(&self, other: &Self) -> bool {
-        self.0.get_lsn_floor() == other.0.get_lsn_floor()
-            && self.0.get_lsn_ceil() == other.0.get_lsn_ceil()
-    }
-}
-
-impl Eq for ReadableLayerDescOrdered {}
-
-impl ReadableLayerDesc {
-    pub(crate) fn get_lsn_floor(&self) -> Lsn {
-        match self {
-            ReadableLayerDesc::Persistent { lsn_floor, .. } => *lsn_floor,
-            ReadableLayerDesc::InMemory { handle, .. } => handle.get_lsn_floor(),
-        }
-    }
-
-    pub(crate) fn get_lsn_ceil(&self) -> Lsn {
-        match self {
-            ReadableLayerDesc::Persistent { lsn_ceil, .. } => *lsn_ceil,
-            ReadableLayerDesc::InMemory { lsn_ceil, .. } => *lsn_ceil,
-        }
-    }
-
-    pub(crate) async fn get_values_reconstruct_data(
-        &self,
-        layer_manager: &LayerManager,
-        keyspace: KeySpace,
-        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        match self {
-            ReadableLayerDesc::Persistent {
-                desc,
-                lsn_floor,
-                lsn_ceil,
-            } => {
-                let layer = layer_manager.get_from_desc(desc);
-                layer
-                    .get_values_reconstruct_data(
-                        keyspace,
-                        *lsn_floor,
-                        *lsn_ceil,
-                        reconstruct_state,
-                        ctx,
-                    )
-                    .await
-            }
-            ReadableLayerDesc::InMemory { handle, lsn_ceil } => {
-                let layer = layer_manager
-                    .layer_map()
-                    .get_in_memory_layer(handle)
-                    .unwrap();
-
-                layer
-                    .get_values_reconstruct_data(keyspace, *lsn_ceil, reconstruct_state, ctx)
-                    .await
-            }
-        }
-    }
-}
-
 /// Return value from [`Layer::get_value_reconstruct_data`]
 #[derive(Clone, Copy, Debug)]
 pub enum ValueReconstructResult {
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -35,18 +35,12 @@ use crate::tenant::blob_io::BlobWriter;
 use crate::tenant::block_io::{BlockBuf, BlockCursor, BlockLease, BlockReader, FileBlockReader};
 use crate::tenant::disk_btree::{DiskBtreeBuilder, DiskBtreeReader, VisitDirection};
 use crate::tenant::storage_layer::{Layer, ValueReconstructResult, ValueReconstructState};
-use crate::tenant::timeline::GetVectoredError;
-use crate::tenant::vectored_blob_io::{
-    BlobFlag, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
-};
-use crate::tenant::{PageReconstructError, Timeline};
+use crate::tenant::Timeline;
 use crate::virtual_file::{self, VirtualFile};
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
-use anyhow::{anyhow, bail, ensure, Context, Result};
-use bytes::BytesMut;
+use anyhow::{bail, ensure, Context, Result};
 use camino::{Utf8Path, Utf8PathBuf};
-use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::LayerAccessKind;
 use pageserver_api::shard::TenantShardId;
 use rand::{distributions::Alphanumeric, Rng};
@@ -65,9 +59,7 @@ use utils::{
    lsn::Lsn,
 };

-use super::{
-    AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
-};
+use super::{AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer};

 ///
 /// Header stored in the beginning of the file
@@ -215,7 +207,6 @@ pub struct DeltaLayerInner {
    // values copied from summary
    index_start_blk: u32,
    index_root_blk: u32,
-    vectored_blob_reader: VectoredBlobReader,

    /// Reader object for reading blocks from the file.
    file: FileBlockReader,
@@ -251,7 +242,7 @@ impl DeltaLayer {
            return Ok(());
        }

-        let inner = self.load(LayerAccessKind::Dump, 0, ctx).await?;
+        let inner = self.load(LayerAccessKind::Dump, ctx).await?;

        inner.dump(ctx).await
    }
@@ -287,25 +278,20 @@ impl DeltaLayer {
    async fn load(
        &self,
        access_kind: LayerAccessKind,
-        max_vectored_read_size: usize,
        ctx: &RequestContext,
    ) -> Result<&Arc<DeltaLayerInner>> {
        self.access_stats.record_access(access_kind, ctx);
        // Quick exit if already loaded
        self.inner
-            .get_or_try_init(|| self.load_inner(max_vectored_read_size, ctx))
+            .get_or_try_init(|| self.load_inner(ctx))
            .await
            .with_context(|| format!("Failed to load delta layer {}", self.path()))
    }

-    async fn load_inner(
-        &self,
-        max_vectored_read_size: usize,
-        ctx: &RequestContext,
-    ) -> Result<Arc<DeltaLayerInner>> {
+    async fn load_inner(&self, ctx: &RequestContext) -> Result<Arc<DeltaLayerInner>> {
        let path = self.path();

-        let loaded = DeltaLayerInner::load(&path, None, max_vectored_read_size, ctx)
+        let loaded = DeltaLayerInner::load(&path, None, ctx)
            .await
            .and_then(|res| res)?;

@@ -706,16 +692,15 @@ impl DeltaLayerInner {
    pub(super) async fn load(
        path: &Utf8Path,
        summary: Option<Summary>,
-        max_vectored_read_size: usize,
        ctx: &RequestContext,
    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
        let file = match VirtualFile::open(path).await {
            Ok(file) => file,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
        };
-        let block_reader = FileBlockReader::new(file);
+        let file = FileBlockReader::new(file);

-        let summary_blk = match block_reader.read_blk(0, ctx).await {
+        let summary_blk = match file.read_blk(0, ctx).await {
            Ok(blk) => blk,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("read first block"))),
        };
@@ -737,16 +722,8 @@ impl DeltaLayerInner {
            }
        }

-        // TODO: don't open file twice
-        let file = match VirtualFile::open(path).await {
-            Ok(file) => file,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
-        };
-        let vectored_blob_reader = VectoredBlobReader::new(file, max_vectored_read_size);
-
        Ok(Ok(DeltaLayerInner {
-            file: block_reader,
-            vectored_blob_reader,
+            file,
            index_start_blk: actual_summary.index_start_blk,
            index_root_blk: actual_summary.index_root_blk,
        }))
@@ -841,174 +818,6 @@ impl DeltaLayerInner {
        }
    }

-    // Look up the keys in the provided keyspace and update
-    // the reconstruct state with whatever is found.
-    //
-    // If the key is cached, go no further than the cached Lsn.
-    //
-    // Currently, the index is visited for each range, but this
-    // can be further optimised to visit the index only once.
-    pub(super) async fn get_values_reconstruct_data(
-        &self,
-        keyspace: KeySpace,
-        start_lsn: Lsn,
-        end_lsn: Lsn,
-        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        let reads = self
-            .plan_reads(keyspace, start_lsn..end_lsn, reconstruct_state, ctx)
-            .await
-            .map_err(GetVectoredError::Other)?;
-
-        self.do_reads_and_update_state(reads, reconstruct_state)
-            .await;
-
-        Ok(())
-    }
-
-    async fn plan_reads(
-        &self,
-        keyspace: KeySpace,
-        lsn_range: Range<Lsn>,
-        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<Vec<VectoredRead>> {
-        let mut planner = VectoredReadPlanner::new(self.vectored_blob_reader.get_max_read_size());
-
-        let file = &self.file;
-        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
-            self.index_start_blk,
-            self.index_root_blk,
-            file,
-        );
-
-        for range in keyspace.ranges.iter() {
-            let mut range_end_handled = false;
-
-            let start_key = DeltaKey::from_key_lsn(&range.start, lsn_range.start);
-            tree_reader
-                .visit(
-                    &start_key.0,
-                    VisitDirection::Forwards,
-                    |raw_key, value| {
-                        let key = Key::from_slice(&raw_key[..KEY_SIZE]);
-                        let lsn = DeltaKey::extract_lsn_from_buf(raw_key);
-                        let blob_ref = BlobRef(value);
-
-                        assert!(key >= range.start && lsn >= lsn_range.start);
-
-                        let cached_lsn = reconstruct_state.get_cached_lsn(&key);
-                        let flag = {
-                            if cached_lsn >= Some(lsn) {
-                                BlobFlag::Ignore
-                            } else if blob_ref.will_init() {
-                                BlobFlag::Replaces
-                            } else {
-                                BlobFlag::None
-                            }
-                        };
-
-                        if key >= range.end || (key.next() == range.end && lsn >= lsn_range.end) {
-                            planner.handle_range_end(blob_ref.pos());
-                            range_end_handled = true;
-                            false
-                        } else {
-                            planner.handle(key, lsn, blob_ref.pos(), flag);
-                            true
-                        }
-                    },
-                    &RequestContextBuilder::extend(ctx)
-                        .page_content_kind(PageContentKind::DeltaLayerBtreeNode)
-                        .build(),
-                )
-                .await
-                .map_err(|err| anyhow!(err))?;
-
-            if !range_end_handled {
-                let payload_end = self.index_start_blk as u64 * PAGE_SZ as u64;
-                tracing::info!("Handling range end fallback at {}", payload_end);
-                planner.handle_range_end(payload_end);
-            }
-        }
-
-        Ok(planner.finish())
-    }
-
-    async fn do_reads_and_update_state(
-        &self,
-        reads: Vec<VectoredRead>,
-        reconstruct_state: &mut ValuesReconstructState,
-    ) {
-        let mut ignore_key_with_err = None;
-
-        let mut buf = Some(BytesMut::with_capacity(
-            self.vectored_blob_reader.get_max_read_size(),
-        ));
-
-        for read in reads.into_iter().rev() {
-            let res = self
-                .vectored_blob_reader
-                .read_blobs(&read, buf.take().expect("Should have a buffer"))
-                .await;
-
-            let blobs_buf = match res {
-                Ok(blobs_buf) => blobs_buf,
-                Err(err) => {
-                    let kind = err.kind();
-                    for (_, blob_meta) in read.blobs_at.as_slice() {
-                        reconstruct_state.on_key_error(
-                            blob_meta.key,
-                            PageReconstructError::from(anyhow!(
-                                "Failed to read blobs from virtual file {}: {}",
-                                self.vectored_blob_reader.get_file_ref().path,
-                                kind
-                            )),
-                        );
-                    }
-
-                    // We have "lost" the buffer since the lower level IO api
-                    // doesn't return the buffer on error. Allocate a new one.
-                    buf = Some(BytesMut::with_capacity(
-                        self.vectored_blob_reader.get_max_read_size(),
-                    ));
-
-                    continue;
-                }
-            };
-
-            for meta in blobs_buf.blobs.iter().rev() {
-                if Some(meta.meta.key) == ignore_key_with_err {
-                    continue;
-                }
-
-                let value = Value::des(&blobs_buf.buf[meta.start..meta.end]);
-                let value = match value {
-                    Ok(v) => v,
-                    Err(e) => {
-                        reconstruct_state.on_key_error(
-                            meta.meta.key,
-                            PageReconstructError::from(anyhow!(e).context(format!(
-                                "Failed to deserialize blob from virtual file {}",
-                                self.vectored_blob_reader.get_file_ref().path,
-                            ))),
-                        );
-
-                        ignore_key_with_err = Some(meta.meta.key);
-                        continue;
-                    }
-                };
-
-                // Invariant: once a key reaches [`ValueReconstructSituation::Complete`]
-                // state, no further updates shall be made to it. The call below will
-                // panic if the invariant is violated.
-                reconstruct_state.update_key(&meta.meta.key, meta.meta.lsn, value);
-            }
-
-            buf = Some(blobs_buf.buf);
-        }
-    }
-
    pub(super) async fn load_keys<'a>(
        &'a self,
        ctx: &RequestContext,
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -26,25 +26,20 @@
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::page_cache::PAGE_SZ;
-use crate::repository::{Key, Value, KEY_SIZE};
+use crate::repository::{Key, KEY_SIZE};
 use crate::tenant::blob_io::BlobWriter;
 use crate::tenant::block_io::{BlockBuf, BlockReader, FileBlockReader};
 use crate::tenant::disk_btree::{DiskBtreeBuilder, DiskBtreeReader, VisitDirection};
 use crate::tenant::storage_layer::{
    LayerAccessStats, ValueReconstructResult, ValueReconstructState,
 };
-use crate::tenant::timeline::GetVectoredError;
-use crate::tenant::vectored_blob_io::{
-    BlobFlag, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
-};
-use crate::tenant::{PageReconstructError, Timeline};
+use crate::tenant::Timeline;
 use crate::virtual_file::{self, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
-use anyhow::{anyhow, bail, ensure, Context, Result};
-use bytes::{Bytes, BytesMut};
+use anyhow::{bail, ensure, Context, Result};
+use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
 use hex;
-use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::LayerAccessKind;
 use pageserver_api::shard::TenantShardId;
 use rand::{distributions::Alphanumeric, Rng};
@@ -64,7 +59,7 @@ use utils::{
 };

 use super::filename::ImageFileName;
-use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer, ValuesReconstructState};
+use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer};

 ///
 /// Header stored in the beginning of the file
@@ -157,7 +152,6 @@ pub struct ImageLayerInner {

    /// Reader object for reading blocks from the file.
    file: FileBlockReader,
-    vectored_blob_reader: VectoredBlobReader,
 }

 impl std::fmt::Debug for ImageLayerInner {
@@ -214,7 +208,7 @@ impl ImageLayer {
            return Ok(());
        }

-        let inner = self.load(LayerAccessKind::Dump, 0, ctx).await?;
+        let inner = self.load(LayerAccessKind::Dump, ctx).await?;

        inner.dump(ctx).await?;

@@ -244,32 +238,21 @@ impl ImageLayer {
    async fn load(
        &self,
        access_kind: LayerAccessKind,
-        max_vectored_read_size: usize,
        ctx: &RequestContext,
    ) -> Result<&ImageLayerInner> {
        self.access_stats.record_access(access_kind, ctx);
        self.inner
-            .get_or_try_init(|| self.load_inner(max_vectored_read_size, ctx))
+            .get_or_try_init(|| self.load_inner(ctx))
            .await
            .with_context(|| format!("Failed to load image layer {}", self.path()))
    }

-    async fn load_inner(
-        &self,
-        max_vectored_read_size: usize,
-        ctx: &RequestContext,
-    ) -> Result<ImageLayerInner> {
+    async fn load_inner(&self, ctx: &RequestContext) -> Result<ImageLayerInner> {
        let path = self.path();

-        let loaded = ImageLayerInner::load(
-            &path,
-            self.desc.image_layer_lsn(),
-            None,
-            max_vectored_read_size,
-            ctx,
-        )
-        .await
-        .and_then(|res| res)?;
+        let loaded = ImageLayerInner::load(&path, self.desc.image_layer_lsn(), None, ctx)
+            .await
+            .and_then(|res| res)?;

        // not production code
        let actual_filename = path.file_name().unwrap().to_owned();
@@ -376,15 +359,14 @@ impl ImageLayerInner {
        path: &Utf8Path,
        lsn: Lsn,
        summary: Option<Summary>,
-        max_vectored_read_size: usize,
        ctx: &RequestContext,
    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
        let file = match VirtualFile::open(path).await {
            Ok(file) => file,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
        };
-        let block_reader = FileBlockReader::new(file);
-        let summary_blk = match block_reader.read_blk(0, ctx).await {
+        let file = FileBlockReader::new(file);
+        let summary_blk = match file.read_blk(0, ctx).await {
            Ok(blk) => blk,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("read first block"))),
        };
@@ -410,19 +392,11 @@ impl ImageLayerInner {
            }
        }

-        // TODO: don't open file twice
-        let file = match VirtualFile::open(path).await {
-            Ok(file) => file,
-            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
-        };
-        let vectored_blob_reader = VectoredBlobReader::new(file, max_vectored_read_size);
-
        Ok(Ok(ImageLayerInner {
            index_start_blk: actual_summary.index_start_blk,
            index_root_blk: actual_summary.index_root_blk,
            lsn,
-            file: block_reader,
-            vectored_blob_reader,
+            file,
        }))
    }

@@ -464,124 +438,6 @@ impl ImageLayerInner {
            Ok(ValueReconstructResult::Missing)
        }
    }
-
-    // Look up the keys in the provided keyspace and update
-    // the reconstruct state with whatever is found.
-    pub(super) async fn get_values_reconstruct_data(
-        &self,
-        keyspace: KeySpace,
-        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        let reads = self
-            .plan_reads(keyspace, ctx)
-            .await
-            .map_err(GetVectoredError::Other)?;
-
-        self.do_reads_and_update_state(reads, reconstruct_state)
-            .await;
-
-        Ok(())
-    }
-
-    async fn plan_reads(
-        &self,
-        keyspace: KeySpace,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<Vec<VectoredRead>> {
-        let mut planner = VectoredReadPlanner::new(self.vectored_blob_reader.get_max_read_size());
-
-        let file = &self.file;
-        let tree_reader = DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, file);
-
-        for range in keyspace.ranges.iter() {
-            let mut range_end_handled = false;
-
-            let mut search_key: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
-            range.start.write_to_byte_slice(&mut search_key);
-
-            tree_reader
-                .visit(
-                    &search_key,
-                    VisitDirection::Forwards,
-                    |raw_key, offset| {
-                        let key = Key::from_slice(&raw_key[..KEY_SIZE]);
-                        assert!(key >= range.start);
-
-                        if key >= range.end {
-                            planner.handle_range_end(offset);
-                            range_end_handled = true;
-                            false
-                        } else {
-                            planner.handle(key, self.lsn, offset, BlobFlag::None);
-                            true
-                        }
-                    },
-                    &RequestContextBuilder::extend(ctx)
-                        .page_content_kind(PageContentKind::ImageLayerBtreeNode)
-                        .build(),
-                )
-                .await
-                .map_err(|err| GetVectoredError::Other(anyhow!(err)))?;
-
-            if !range_end_handled {
-                let payload_end = self.index_start_blk as u64 * PAGE_SZ as u64;
-                planner.handle_range_end(payload_end);
-            }
-        }
-
-        Ok(planner.finish())
-    }
-
-    async fn do_reads_and_update_state(
-        &self,
-        reads: Vec<VectoredRead>,
-        reconstruct_state: &mut ValuesReconstructState,
-    ) {
-        let mut buf = Some(BytesMut::with_capacity(
-            self.vectored_blob_reader.get_max_read_size(),
-        ));
-        for read in reads.into_iter().rev() {
-            let res = self
-                .vectored_blob_reader
-                .read_blobs(&read, buf.take().expect("Should have a buffer"))
-                .await;
-
-            match res {
-                Ok(blobs_buf) => {
-                    for meta in blobs_buf.blobs.iter().rev() {
-                        let img_buf = Bytes::copy_from_slice(&blobs_buf.buf[meta.start..meta.end]);
-                        reconstruct_state.update_key(
-                            &meta.meta.key,
-                            self.lsn,
-                            Value::Image(img_buf),
-                        );
-                    }
-
-                    buf = Some(blobs_buf.buf);
-                }
-                Err(err) => {
-                    let kind = err.kind();
-                    for (_, blob_meta) in read.blobs_at.as_slice() {
-                        reconstruct_state.on_key_error(
-                            blob_meta.key,
-                            PageReconstructError::from(anyhow!(
-                                "Failed to read blobs from virtual file {}: {}",
-                                self.vectored_blob_reader.get_file_ref().path,
-                                kind
-                            )),
-                        );
-                    }
-
-                    // We have "lost" the buffer since the lower level IO api
-                    // doesn't return the buffer on error. Allocate a new one.
-                    buf = Some(BytesMut::with_capacity(
-                        self.vectored_blob_reader.get_max_read_size(),
-                    ));
-                }
-            };
-        }
-    }
 }

 /// A builder object for constructing a new image layer.
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -9,15 +9,13 @@ use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::repository::{Key, Value};
 use crate::tenant::block_io::BlockReader;
 use crate::tenant::ephemeral_file::EphemeralFile;
-use crate::tenant::storage_layer::ValueReconstructResult;
-use crate::tenant::timeline::GetVectoredError;
-use crate::tenant::{PageReconstructError, Timeline};
+use crate::tenant::storage_layer::{ValueReconstructResult, ValueReconstructState};
+use crate::tenant::Timeline;
 use crate::walrecord;
-use anyhow::{anyhow, ensure, Result};
-use pageserver_api::keyspace::KeySpace;
+use anyhow::{ensure, Result};
 use pageserver_api::models::InMemoryLayerInfo;
 use pageserver_api::shard::TenantShardId;
-use std::collections::{BinaryHeap, HashMap, HashSet};
+use std::collections::HashMap;
 use std::sync::{Arc, OnceLock};
 use tracing::*;
 use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
@@ -27,10 +25,7 @@ use std::fmt::Write as _;
 use std::ops::Range;
 use tokio::sync::{RwLock, RwLockWriteGuard};

-use super::{
-    DeltaLayerWriter, ResidentLayer, ValueReconstructSituation, ValueReconstructState,
-    ValuesReconstructState,
-};
+use super::{DeltaLayerWriter, ResidentLayer};

 pub struct InMemoryLayer {
    conf: &'static PageServerConf,
@@ -207,91 +202,6 @@ impl InMemoryLayer {
            Ok(ValueReconstructResult::Complete)
        }
    }
-
-    // Look up the keys in the provided keyspace and update
-    // the reconstruct state with whatever is found.
-    //
-    // If the key is cached, go no further than the cached Lsn.
-    pub(crate) async fn get_values_reconstruct_data(
-        &self,
-        keyspace: KeySpace,
-        end_lsn: Lsn,
-        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        let ctx = RequestContextBuilder::extend(ctx)
-            .page_content_kind(PageContentKind::InMemoryLayer)
-            .build();
-
-        let inner = self.inner.read().await;
-        let reader = inner.file.block_cursor();
-
-        #[derive(Eq, PartialEq, Ord, PartialOrd)]
-        struct BlockRead {
-            key: Key,
-            lsn: Lsn,
-            block_offset: u64,
-        }
-
-        let mut planned_block_reads = BinaryHeap::new();
-
-        for range in keyspace.ranges.iter() {
-            let mut key = range.start;
-            while key < range.end {
-                if let Some(vec_map) = inner.index.get(&key) {
-                    let lsn_range = match reconstruct_state.get_cached_lsn(&key) {
-                        Some(cached_lsn) => (cached_lsn + 1)..end_lsn,
-                        None => self.start_lsn..end_lsn,
-                    };
-
-                    let slice = vec_map.slice_range(lsn_range);
-                    for (entry_lsn, pos) in slice.iter().rev() {
-                        planned_block_reads.push(BlockRead {
-                            key,
-                            lsn: *entry_lsn,
-                            block_offset: *pos,
-                        });
-                    }
-                }
-
-                key = key.next();
-            }
-        }
-
-        let keyspace_size = keyspace.total_size();
-
-        let mut completed_keys = HashSet::new();
-        while completed_keys.len() < keyspace_size && !planned_block_reads.is_empty() {
-            let block_read = planned_block_reads.pop().unwrap();
-            if completed_keys.contains(&block_read.key) {
-                continue;
-            }
-
-            let buf = reader.read_blob(block_read.block_offset, &ctx).await;
-            if let Err(e) = buf {
-                reconstruct_state
-                    .on_key_error(block_read.key, PageReconstructError::from(anyhow!(e)));
-                completed_keys.insert(block_read.key);
-                continue;
-            }
-
-            let value = Value::des(&buf.unwrap());
-            if let Err(e) = value {
-                reconstruct_state
-                    .on_key_error(block_read.key, PageReconstructError::from(anyhow!(e)));
-                completed_keys.insert(block_read.key);
-                continue;
-            }
-
-            let key_situation =
-                reconstruct_state.update_key(&block_read.key, block_read.lsn, value.unwrap());
-            if key_situation == ValueReconstructSituation::Complete {
-                completed_keys.insert(block_read.key);
-            }
-        }
-
-        Ok(())
-    }
 }

 impl std::fmt::Display for InMemoryLayer {
@@ -336,17 +246,32 @@ impl InMemoryLayer {

    /// Common subroutine of the public put_wal_record() and put_page_image() functions.
    /// Adds the page version to the in-memory tree
-
    pub(crate) async fn put_value(
        &self,
        key: Key,
        lsn: Lsn,
-        buf: &[u8],
+        val: &Value,
        ctx: &RequestContext,
    ) -> Result<()> {
        let mut inner = self.inner.write().await;
        self.assert_writable();
-        self.put_value_locked(&mut inner, key, lsn, buf, ctx).await
+        self.put_value_locked(&mut inner, key, lsn, val, ctx).await
+    }
+
+    pub(crate) async fn put_values(
+        &self,
+        values: &HashMap<Key, Vec<(Lsn, Value)>>,
+        ctx: &RequestContext,
+    ) -> Result<()> {
+        let mut inner = self.inner.write().await;
+        self.assert_writable();
+        for (key, vals) in values {
+            for (lsn, val) in vals {
+                self.put_value_locked(&mut inner, *key, *lsn, val, ctx)
+                    .await?;
+            }
+        }
+        Ok(())
    }

    async fn put_value_locked(
@@ -354,16 +279,22 @@ impl InMemoryLayer {
        locked_inner: &mut RwLockWriteGuard<'_, InMemoryLayerInner>,
        key: Key,
        lsn: Lsn,
-        buf: &[u8],
+        val: &Value,
        ctx: &RequestContext,
    ) -> Result<()> {
        trace!("put_value key {} at {}/{}", key, self.timeline_id, lsn);

        let off = {
+            // Avoid doing allocations for "small" values.
+            // In the regression test suite, the limit of 256 avoided allocations in 95% of cases:
+            // https://github.com/neondatabase/neon/pull/5056#discussion_r1301975061
+            let mut buf = smallvec::SmallVec::<[u8; 256]>::new();
+            buf.clear();
+            val.ser_into(&mut buf)?;
            locked_inner
                .file
                .write_blob(
-                    buf,
+                    &buf,
                    &RequestContextBuilder::extend(ctx)
                        .page_content_kind(PageContentKind::InMemoryLayer)
                        .build(),
@@ -391,12 +322,7 @@ impl InMemoryLayer {
    pub async fn freeze(&self, end_lsn: Lsn) {
        let inner = self.inner.write().await;

-        assert!(
-            self.start_lsn < end_lsn,
-            "{} >= {}",
-            self.start_lsn,
-            end_lsn
-        );
+        assert!(self.start_lsn < end_lsn);
        self.end_lsn.set(end_lsn).expect("end_lsn set only once");

        for vec_map in inner.index.values() {
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -1,6 +1,5 @@
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
-use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::{
    HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
 };
@@ -17,14 +16,13 @@ use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::repository::Key;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};

 use super::delta_layer::{self, DeltaEntry};
 use super::image_layer;
 use super::{
    AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc,
-    ValueReconstructResult, ValueReconstructState, ValuesReconstructState,
+    ValueReconstructResult, ValueReconstructState,
 };

 use utils::generation::Generation;
@@ -264,37 +262,6 @@ impl Layer {
            .with_context(|| format!("get_value_reconstruct_data for layer {self}"))
    }

-    pub(crate) async fn get_values_reconstruct_data(
-        &self,
-        keyspace: KeySpace,
-        start_lsn: Lsn,
-        end_lsn: Lsn,
-        reconstruct_data: &mut ValuesReconstructState,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        let layer = self
-            .0
-            .get_or_maybe_download(true, Some(ctx))
-            .await
-            .map_err(|err| GetVectoredError::Other(anyhow::anyhow!(err)))?;
-
-        self.0
-            .access_stats
-            .record_access(LayerAccessKind::GetValueReconstructData, ctx);
-
-        layer
-            .get_values_reconstruct_data(
-                keyspace,
-                start_lsn,
-                end_lsn,
-                reconstruct_data,
-                &self.0,
-                ctx,
-            )
-            .instrument(tracing::debug_span!("get_values_reconstruct_data", layer=%self))
-            .await
-    }
-
    /// Download the layer if evicted.
    ///
    /// Will not error when the layer is already downloaded.
@@ -1210,7 +1177,7 @@ pub(crate) enum EvictionError {

 /// Error internal to the [`LayerInner::get_or_maybe_download`]
 #[derive(Debug, thiserror::Error)]
-pub(crate) enum DownloadError {
+enum DownloadError {
    #[error("timeline has already shutdown")]
    TimelineShutdown,
    #[error("no remote storage configured")]
@@ -1307,14 +1274,9 @@ impl DownloadedLayer {
                    owner.desc.key_range.clone(),
                    owner.desc.lsn_range.clone(),
                ));
-                delta_layer::DeltaLayerInner::load(
-                    &owner.path,
-                    summary,
-                    owner.conf.max_vectored_read_size,
-                    ctx,
-                )
-                .await
-                .map(|res| res.map(LayerKind::Delta))
+                delta_layer::DeltaLayerInner::load(&owner.path, summary, ctx)
+                    .await
+                    .map(|res| res.map(LayerKind::Delta))
            } else {
                let lsn = owner.desc.image_layer_lsn();
                let summary = Some(image_layer::Summary::expected(
@@ -1323,15 +1285,9 @@ impl DownloadedLayer {
                    owner.desc.key_range.clone(),
                    lsn,
                ));
-                image_layer::ImageLayerInner::load(
-                    &owner.path,
-                    lsn,
-                    summary,
-                    owner.conf.max_vectored_read_size,
-                    ctx,
-                )
-                .await
-                .map(|res| res.map(LayerKind::Image))
+                image_layer::ImageLayerInner::load(&owner.path, lsn, summary, ctx)
+                    .await
+                    .map(|res| res.map(LayerKind::Image))
            };

            match res {
@@ -1381,29 +1337,6 @@ impl DownloadedLayer {
        }
    }

-    async fn get_values_reconstruct_data(
-        &self,
-        keyspace: KeySpace,
-        start_lsn: Lsn,
-        end_lsn: Lsn,
-        reconstruct_data: &mut ValuesReconstructState,
-        owner: &Arc<LayerInner>,
-        ctx: &RequestContext,
-    ) -> Result<(), GetVectoredError> {
-        use LayerKind::*;
-
-        match self.get(owner, ctx).await.map_err(GetVectoredError::from)? {
-            Delta(d) => {
-                d.get_values_reconstruct_data(keyspace, start_lsn, end_lsn, reconstruct_data, ctx)
-                    .await
-            }
-            Image(i) => {
-                i.get_values_reconstruct_data(keyspace, reconstruct_data, ctx)
-                    .await
-            }
-        }
-    }
-
    async fn dump(&self, owner: &Arc<LayerInner>, ctx: &RequestContext) -> anyhow::Result<()> {
        use LayerKind::*;
        match self.get(owner, ctx).await? {
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -15,7 +15,7 @@ use utils::id::TenantId;
 /// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the
 /// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides
 /// a unified way to generate layer information like file name.
-#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)]
+#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
 pub struct PersistentLayerDesc {
    pub tenant_shard_id: TenantShardId,
    pub timeline_id: TimelineId,
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -9,7 +9,6 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
 use tokio_util::sync::CancellationToken;
@@ -140,8 +139,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    // How many errors we have seen consequtively
    let mut error_run_count = 0;

-    let mut last_throttle_flag_reset_at = Instant::now();
-
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
        let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
@@ -206,27 +203,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                walredo_mgr.maybe_quiesce(period * 10);
            }

-            // TODO: move this (and walredo quiesce) to a separate task that isn't affected by the back-off,
-            // so we get some upper bound guarantee on when walredo quiesce / this throttling reporting here happens.
-            info_span!(parent: None, "timeline_get_throttle", tenant_id=%tenant.tenant_shard_id, shard_id=%tenant.tenant_shard_id.shard_slug()).in_scope(|| {
-                let now = Instant::now();
-                let prev = std::mem::replace(&mut last_throttle_flag_reset_at, now);
-                let Stats { count_accounted, count_throttled, sum_throttled_usecs } = tenant.timeline_get_throttle.reset_stats();
-                if count_throttled == 0 {
-                    return;
-                }
-                let allowed_rps = tenant.timeline_get_throttle.steady_rps();
-                let delta = now - prev;
-                warn!(
-                    n_seconds=%format_args!("{:.3}",
-                    delta.as_secs_f64()),
-                    count_accounted,
-                    count_throttled,
-                    sum_throttled_usecs,
-                    allowed_rps=%format_args!("{allowed_rps:.0}"),
-                    "shard was throttled in the last n_seconds")
-            });
-
            // Sleep
            if tokio::time::timeout(sleep_duration, cancel.cancelled())
                .await
--- a/pageserver/src/tenant/throttle.rs
+++ b/pageserver/src/tenant/throttle.rs
@@ -1,162 +0,0 @@
-use std::{
-    str::FromStr,
-    sync::{
-        atomic::{AtomicU64, Ordering},
-        Arc,
-    },
-    time::{Duration, Instant},
-};
-
-use arc_swap::ArcSwap;
-use enumset::EnumSet;
-use tracing::error;
-
-use crate::{context::RequestContext, task_mgr::TaskKind};
-
-/// Throttle for `async` functions.
-///
-/// Runtime reconfigurable.
-///
-/// To share a throttle among multiple entities, wrap it in an [`Arc`].
-///
-/// The intial use case for this is tenant-wide throttling of getpage@lsn requests.
-pub struct Throttle<M: Metric> {
-    inner: ArcSwap<Inner>,
-    metric: M,
-    /// will be turned into [`Stats::count_accounted`]
-    count_accounted: AtomicU64,
-    /// will be turned into [`Stats::count_throttled`]
-    count_throttled: AtomicU64,
-    /// will be turned into [`Stats::sum_throttled_usecs`]
-    sum_throttled_usecs: AtomicU64,
-}
-
-pub struct Inner {
-    task_kinds: EnumSet<TaskKind>,
-    rate_limiter: Arc<leaky_bucket::RateLimiter>,
-    config: Config,
-}
-
-pub type Config = pageserver_api::models::ThrottleConfig;
-
-pub struct Observation {
-    pub wait_time: Duration,
-}
-pub trait Metric {
-    fn observe_throttling(&self, observation: &Observation);
-}
-
-/// See [`Throttle::reset_stats`].
-pub struct Stats {
-    // Number of requests that were subject to throttling, i.e., requests of the configured [`Config::task_kinds`].
-    pub count_accounted: u64,
-    // Subset of the `accounted` requests that were actually throttled.
-    // Note that the numbers are stored as two independent atomics, so, there might be a slight drift.
-    pub count_throttled: u64,
-    // Sum of microseconds that throttled requests spent waiting for throttling.
-    pub sum_throttled_usecs: u64,
-}
-
-impl<M> Throttle<M>
-where
-    M: Metric,
-{
-    pub fn new(config: Config, metric: M) -> Self {
-        Self {
-            inner: ArcSwap::new(Arc::new(Self::new_inner(config))),
-            metric,
-            count_accounted: AtomicU64::new(0),
-            count_throttled: AtomicU64::new(0),
-            sum_throttled_usecs: AtomicU64::new(0),
-        }
-    }
-    fn new_inner(config: Config) -> Inner {
-        let Config {
-            task_kinds,
-            initial,
-            refill_interval,
-            refill_amount,
-            max,
-            fair,
-        } = &config;
-        let task_kinds: EnumSet<TaskKind> = task_kinds
-            .iter()
-            .filter_map(|s| match TaskKind::from_str(s) {
-                Ok(v) => Some(v),
-                Err(e) => {
-                    // TODO: avoid this failure mode
-                    error!(
-                        "cannot parse task kind, ignoring for rate limiting {}",
-                        utils::error::report_compact_sources(&e)
-                    );
-                    None
-                }
-            })
-            .collect();
-        Inner {
-            task_kinds,
-            rate_limiter: Arc::new(
-                leaky_bucket::RateLimiter::builder()
-                    .initial(*initial)
-                    .interval(*refill_interval)
-                    .refill(refill_amount.get())
-                    .max(*max)
-                    .fair(*fair)
-                    .build(),
-            ),
-            config,
-        }
-    }
-    pub fn reconfigure(&self, config: Config) {
-        self.inner.store(Arc::new(Self::new_inner(config)));
-    }
-
-    /// The [`Throttle`] keeps an internal flag that is true if there was ever any actual throttling.
-    /// This method allows retrieving & resetting that flag.
-    /// Useful for periodic reporting.
-    pub fn reset_stats(&self) -> Stats {
-        let count_accounted = self.count_accounted.swap(0, Ordering::Relaxed);
-        let count_throttled = self.count_throttled.swap(0, Ordering::Relaxed);
-        let sum_throttled_usecs = self.sum_throttled_usecs.swap(0, Ordering::Relaxed);
-        Stats {
-            count_accounted,
-            count_throttled,
-            sum_throttled_usecs,
-        }
-    }
-
-    /// See [`Config::steady_rps`].
-    pub fn steady_rps(&self) -> f64 {
-        self.inner.load().config.steady_rps()
-    }
-
-    pub async fn throttle(&self, ctx: &RequestContext, key_count: usize) {
-        let inner = self.inner.load_full(); // clones the `Inner` Arc
-        if !inner.task_kinds.contains(ctx.task_kind()) {
-            return;
-        };
-        let start = std::time::Instant::now();
-        let mut did_throttle = false;
-        let acquire = inner.rate_limiter.acquire(key_count);
-        // turn off runtime-induced preemption (aka coop) so our `did_throttle` is accurate
-        let acquire = tokio::task::unconstrained(acquire);
-        let mut acquire = std::pin::pin!(acquire);
-        std::future::poll_fn(|cx| {
-            use std::future::Future;
-            let poll = acquire.as_mut().poll(cx);
-            did_throttle = did_throttle || poll.is_pending();
-            poll
-        })
-        .await;
-        self.count_accounted.fetch_add(1, Ordering::Relaxed);
-        if did_throttle {
-            self.count_throttled.fetch_add(1, Ordering::Relaxed);
-            let now = Instant::now();
-            let wait_time = now - start;
-            self.sum_throttled_usecs
-                .fetch_add(wait_time.as_micros() as u64, Ordering::Relaxed);
-            let observation = Observation { wait_time };
-            self.metric.observe_throttling(&observation);
-        }
-    }
-}
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -419,7 +419,6 @@ impl DeleteTimelineFlow {
                TimelineResources {
                    remote_client,
                    deletion_queue_client,
-                    timeline_get_throttle: tenant.timeline_get_throttle.clone(),
                },
                // Important. We dont pass ancestor above because it can be missing.
                // Thus we need to skip the validation here.
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -343,6 +343,23 @@ pub(super) async fn handle_walreceiver_connection(
                            modification.commit(&ctx).await?;
                            uncommitted_records = 0;
                            filtered_records = 0;
+
+                            //
+                            // We should check checkpoint distance after appending each ingest_batch_size bytes because otherwise
+                            // layer size can become much larger than `checkpoint_distance`.
+                            // It can append because wal-sender is sending WAL using 125kb chucks and some WAL records can cause writing large
+                            // amount of data to key-value storage. So performing this check only after processing
+                            // all WAL records in the chunk, can cause huge L0 layer files.
+                            //
+                            timeline
+                                .check_checkpoint_distance()
+                                .await
+                                .with_context(|| {
+                                    format!(
+                                        "Failed to check checkpoint distance for timeline {}",
+                                        timeline.timeline_id
+                                    )
+                                })?;
                        }
                    }

@@ -389,6 +406,16 @@ pub(super) async fn handle_walreceiver_connection(
            }
        }

+        timeline
+            .check_checkpoint_distance()
+            .await
+            .with_context(|| {
+                format!(
+                    "Failed to check checkpoint distance for timeline {}",
+                    timeline.timeline_id
+                )
+            })?;
+
        if let Some(last_lsn) = status_update {
            let timeline_remote_consistent_lsn = timeline
                .get_remote_consistent_lsn_visible()
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -1,412 +0,0 @@
-//!
-//! Utilities for vectored reading of variable-sized "blobs".
-//!
-//! The "blob" api is an abstraction on top of the "block" api,
-//! with the main difference being that blobs do not have a fixed
-//! size (each blob is prefixed with 1 or 4 byte length field)
-//!
-//! The vectored apis provided in this module allow for planning
-//! and executing disk IO which covers multiple blobs.
-//!
-//! Reads are planned with [`VectoredReadPlanner`] which will coalesce
-//! adjacent blocks into a single disk IO request and exectuted by
-//! [`VectoredBlobReader`] which does all the required offset juggling
-//! and returns a buffer housing all the blobs and a list of offsets.
-//!
-//! Note that the vectored blob api does *not* go through the page cache.
-
-use std::collections::BTreeMap;
-
-use bytes::BytesMut;
-use pageserver_api::key::Key;
-use utils::lsn::Lsn;
-use utils::vec_map::VecMap;
-
-use crate::virtual_file::VirtualFile;
-
-/// Metadata bundled with the start and end offset of a blob.
-#[derive(Copy, Clone, Debug)]
-pub struct BlobMeta {
-    pub key: Key,
-    pub lsn: Lsn,
-}
-
-/// Blob offsets into [`VectoredBlobsBuf::buf`]
-pub struct VectoredBlob {
-    pub start: usize,
-    pub end: usize,
-    pub meta: BlobMeta,
-}
-
-/// Return type of [`VectoredBlobReader::read_blobs`]
-pub struct VectoredBlobsBuf {
-    /// Buffer for all blobs in this read
-    pub buf: BytesMut,
-    /// Offsets into the buffer and metadata for all blobs in this read
-    pub blobs: Vec<VectoredBlob>,
-}
-
-/// Description of one disk read for multiple blobs.
-/// Used as the argument form [`VectoredBlobReader::read_blobs`]
-#[derive(Debug)]
-pub struct VectoredRead {
-    pub start: u64,
-    pub end: u64,
-    /// Starting offsets and metadata for each blob in this read
-    pub blobs_at: VecMap<u64, BlobMeta>,
-
-    max_read_size: usize,
-}
-
-#[derive(Eq, PartialEq)]
-enum VectoredReadExtended {
-    Yes,
-    No,
-}
-
-impl VectoredRead {
-    fn new(start_offset: u64, end_offset: u64, meta: BlobMeta, max_read_size: usize) -> Self {
-        let mut blobs_at = VecMap::default();
-        blobs_at
-            .append(start_offset, meta)
-            .expect("First insertion always succeeds");
-
-        Self {
-            start: start_offset,
-            end: end_offset,
-            blobs_at,
-            max_read_size,
-        }
-    }
-
-    /// Attempt to extend the current read with a new blob if the start
-    /// offset matches with the current end of the vectored read
-    /// and the resuting size is below the max read size
-    fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
-        let size = (end - start) as usize;
-        if self.end == start && self.size() + size <= self.max_read_size {
-            self.end = end;
-            self.blobs_at
-                .append(start, meta)
-                .expect("LSNs are ordered within vectored reads");
-
-            return VectoredReadExtended::Yes;
-        }
-
-        VectoredReadExtended::No
-    }
-
-    fn size(&self) -> usize {
-        (self.end - self.start) as usize
-    }
-}
-
-#[derive(Copy, Clone, Debug)]
-pub enum BlobFlag {
-    None,
-    Ignore,
-    Replaces,
-}
-
-/// Planner for vectored blob reads.
-///
-/// Blob offsets are received via [`VectoredReadPlanner::handle`]
-/// and coalesced into disk reads.
-///
-/// The implementation is very simple:
-/// * Collect all blob offsets in an ordered structure
-/// * Iterate over the collected blobs and coalesce them into reads at the end
-pub struct VectoredReadPlanner {
-    // Track all the blob offsets. Start offsets must be ordered.
-    blobs: BTreeMap<Key, Vec<(Lsn, u64, u64)>>,
-    // Arguments for previous blob passed into [`VectoredReadPlanner::handle`]
-    prev: Option<(Key, Lsn, u64, BlobFlag)>,
-
-    max_read_size: usize,
-}
-
-impl VectoredReadPlanner {
-    pub fn new(max_read_size: usize) -> Self {
-        Self {
-            blobs: BTreeMap::new(),
-            prev: None,
-            max_read_size,
-        }
-    }
-
-    /// Include a new blob in the read plan.
-    ///
-    /// Notes:
-    /// * This function should be called for each blob in the desired *inclusive* range.
-    /// See `DeltaLayerInner::plan_reads` and `ImageLayerInner::plan_reads`.
-    /// * Calls to this function should be for monotonically continuous (key, lsn) tuples.
-    ///
-    /// The `flag` argument has two interesting values:
-    /// * [`BlobFlag::Replaces`]: The blob for this key should replace all existing blobs.
-    /// This is used for WAL records that `will_init`.
-    /// * [`BlobFlag::Ignore`]: This blob should not be included in the read. This happens
-    /// if the blob is cached.
-    pub fn handle(&mut self, key: Key, lsn: Lsn, offset: u64, flag: BlobFlag) {
-        // Implementation note: internally lag behind by one blob such that
-        // we have a start and end offset when initialising [`VectoredRead`]
-        let (prev_key, prev_lsn, prev_offset, prev_flag) = match self.prev {
-            None => {
-                self.prev = Some((key, lsn, offset, flag));
-                return;
-            }
-            Some(prev) => prev,
-        };
-
-        self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);
-
-        self.prev = Some((key, lsn, offset, flag));
-    }
-
-    pub fn handle_range_end(&mut self, offset: u64) {
-        if let Some((prev_key, prev_lsn, prev_offset, prev_flag)) = self.prev {
-            self.add_blob(prev_key, prev_lsn, prev_offset, offset, prev_flag);
-        }
-
-        self.prev = None;
-    }
-
-    fn add_blob(&mut self, key: Key, lsn: Lsn, start_offset: u64, end_offset: u64, flag: BlobFlag) {
-        match flag {
-            BlobFlag::None => {
-                let blobs_for_key = self.blobs.entry(key).or_default();
-                blobs_for_key.push((lsn, start_offset, end_offset));
-            }
-            BlobFlag::Replaces => {
-                let blobs_for_key = self.blobs.entry(key).or_default();
-                blobs_for_key.clear();
-                blobs_for_key.push((lsn, start_offset, end_offset));
-            }
-            BlobFlag::Ignore => {}
-        }
-    }
-
-    pub fn finish(self) -> Vec<VectoredRead> {
-        let mut current_read: Option<VectoredRead> = None;
-        let mut reads = Vec::new();
-
-        for (key, blobs_for_key) in self.blobs {
-            for (lsn, start_offset, end_offset) in blobs_for_key {
-                let extended = match &mut current_read {
-                    Some(read) => read.extend(start_offset, end_offset, BlobMeta { key, lsn }),
-                    None => VectoredReadExtended::No,
-                };
-
-                if extended == VectoredReadExtended::No {
-                    let next_read = VectoredRead::new(
-                        start_offset,
-                        end_offset,
-                        BlobMeta { key, lsn },
-                        self.max_read_size,
-                    );
-
-                    let prev_read = current_read.replace(next_read);
-
-                    if let Some(read) = prev_read {
-                        reads.push(read);
-                    }
-                }
-            }
-        }
-
-        if let Some(read) = current_read {
-            reads.push(read);
-        }
-
-        reads
-    }
-}
-
-/// Disk reader for vectored blob spans (does not go through the page cache)
-pub struct VectoredBlobReader {
-    file: VirtualFile,
-    max_vectored_read_size: usize,
-}
-
-impl VectoredBlobReader {
-    pub fn new(file: VirtualFile, max_vectored_read_size: usize) -> Self {
-        Self {
-            file,
-            max_vectored_read_size,
-        }
-    }
-
-    pub fn get_max_read_size(&self) -> usize {
-        self.max_vectored_read_size
-    }
-
-    pub fn get_file_ref(&self) -> &VirtualFile {
-        &self.file
-    }
-
-    /// Read the requested blobs into the buffer.
-    ///
-    /// We have to deal with the fact that blobs are not fixed size.
-    /// Each blob is prefixed by a size header.
-    ///
-    /// The success return value is a struct which contains the buffer
-    /// filled from disk and a list of offsets at which each blob lies
-    /// in the buffer.
-    pub async fn read_blobs(
-        &self,
-        read: &VectoredRead,
-        buf: BytesMut,
-    ) -> Result<VectoredBlobsBuf, std::io::Error> {
-        // tracing::info!("read_blobs(read={read:?}, read_size={})", read.size());
-
-        assert!(read.size() > 0);
-        assert!(
-            read.size() <= buf.capacity(),
-            "{} > {}",
-            read.size(),
-            buf.capacity()
-        );
-        let buf = self
-            .file
-            .read_exact_at_n(buf, read.start, read.size())
-            .await?;
-
-        let blobs_at = read.blobs_at.as_slice();
-        let start_offset = blobs_at.first().expect("VectoredRead is never empty").0;
-
-        let mut metas = Vec::new();
-        let pairs = blobs_at.iter().zip(
-            blobs_at
-                .iter()
-                .map(Some)
-                .skip(1)
-                .chain(std::iter::once(None)),
-        );
-        for ((offset, meta), next) in pairs {
-            let offset_in_buf = offset - start_offset;
-            let first_len_byte = buf[offset_in_buf as usize];
-
-            // Each blob is prefixed by a header containing it's size.
-            // Extract the size and skip that header to find the start of the data.
-            let (size_length, blob_size) = if first_len_byte < 0x80 {
-                (1, first_len_byte as u64)
-            } else {
-                let mut blob_size_buf = [0u8; 4];
-                let offset_in_buf = offset_in_buf as usize;
-
-                blob_size_buf.copy_from_slice(&buf[offset_in_buf..offset_in_buf + 4]);
-                blob_size_buf[0] &= 0x7f;
-                (4, u32::from_be_bytes(blob_size_buf) as u64)
-            };
-
-            let start = offset_in_buf + size_length;
-            let end = match next {
-                Some((next_blob_start_offset, _)) => next_blob_start_offset - start_offset,
-                None => start + blob_size,
-            };
-
-            assert_eq!(end - start, blob_size);
-
-            metas.push(VectoredBlob {
-                start: start as usize,
-                end: end as usize,
-                meta: *meta,
-            })
-        }
-
-        Ok(VectoredBlobsBuf { buf, blobs: metas })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {
-        assert_eq!(read.start, offset_range.first().unwrap().2);
-
-        let expected_offsets_in_read: Vec<_> = offset_range.iter().map(|o| o.2).collect();
-
-        let offsets_in_read: Vec<_> = read
-            .blobs_at
-            .as_slice()
-            .iter()
-            .map(|(offset, _)| *offset)
-            .collect();
-
-        assert_eq!(expected_offsets_in_read, offsets_in_read);
-    }
-
-    #[test]
-    fn planner_max_read_size_test() {
-        let max_read_size = 128 * 1024;
-        let key = Key::MIN;
-        let lsn = Lsn(0);
-
-        let blob_descriptions = vec![
-            (key, lsn, 0, BlobFlag::None),
-            (key, lsn, 32 * 1024, BlobFlag::None),
-            (key, lsn, 96 * 1024, BlobFlag::None), // Last in read 1
-            (key, lsn, 128 * 1024, BlobFlag::None), // Last in read 2
-            (key, lsn, 198 * 1024, BlobFlag::None), // Last in read 3
-            (key, lsn, 268 * 1024, BlobFlag::None), // Last in read 4
-            (key, lsn, 396 * 1024, BlobFlag::None), // Last in read 5
-            (key, lsn, 652 * 1024, BlobFlag::None), // Last in read 6
-        ];
-
-        let ranges = [
-            &blob_descriptions[0..3],
-            &blob_descriptions[3..4],
-            &blob_descriptions[4..5],
-            &blob_descriptions[5..6],
-            &blob_descriptions[6..7],
-            &blob_descriptions[7..],
-        ];
-
-        let mut planner = VectoredReadPlanner::new(max_read_size);
-        for (key, lsn, offset, flag) in blob_descriptions.clone() {
-            planner.handle(key, lsn, offset, flag);
-        }
-
-        planner.handle_range_end(652 * 1024);
-
-        let reads = planner.finish();
-        assert_eq!(reads.len(), 6);
-
-        for (idx, read) in reads.iter().enumerate() {
-            validate_read(read, ranges[idx]);
-        }
-    }
-
-    #[test]
-    fn planner_replacement_test() {
-        let max_read_size = 128 * 1024;
-        let first_key = Key::MIN;
-        let second_key = first_key.next();
-        let lsn = Lsn(0);
-
-        let blob_descriptions = vec![
-            (first_key, lsn, 0, BlobFlag::None),    // First in read 1
-            (first_key, lsn, 1024, BlobFlag::None), // Last in read 1
-            (second_key, lsn, 2 * 1024, BlobFlag::Replaces),
-            (second_key, lsn, 3 * 1024, BlobFlag::None),
-            (second_key, lsn, 4 * 1024, BlobFlag::Replaces), // First in read 2
-            (second_key, lsn, 5 * 1024, BlobFlag::None),     // Last in read 2
-        ];
-
-        let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]];
-
-        let mut planner = VectoredReadPlanner::new(max_read_size);
-        for (key, lsn, offset, flag) in blob_descriptions.clone() {
-            planner.handle(key, lsn, offset, flag);
-        }
-
-        planner.handle_range_end(6 * 1024);
-
-        let reads = planner.finish();
-        assert_eq!(reads.len(), 2);
-
-        for (idx, read) in reads.iter().enumerate() {
-            validate_read(read, ranges[idx]);
-        }
-    }
-}
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -562,18 +562,7 @@ impl VirtualFile {
        B: IoBufMut + Send,
    {
        let (buf, res) =
-            read_exact_at_impl(buf, offset, None, |buf, offset| self.read_at(buf, offset)).await;
-        res.map(|()| buf)
-    }
-
-    pub async fn read_exact_at_n<B>(&self, buf: B, offset: u64, count: usize) -> Result<B, Error>
-    where
-        B: IoBufMut + Send,
-    {
-        let (buf, res) = read_exact_at_impl(buf, offset, Some(count), |buf, offset| {
-            self.read_at(buf, offset)
-        })
-        .await;
+            read_exact_at_impl(buf, offset, |buf, offset| self.read_at(buf, offset)).await;
        res.map(|()| buf)
    }

@@ -707,7 +696,6 @@ impl VirtualFile {
 pub async fn read_exact_at_impl<B, F, Fut>(
    buf: B,
    mut offset: u64,
-    count: Option<usize>,
    mut read_at: F,
 ) -> (B, std::io::Result<()>)
 where
@@ -715,15 +703,7 @@ where
    F: FnMut(tokio_epoll_uring::Slice<B>, u64) -> Fut,
    Fut: std::future::Future<Output = (tokio_epoll_uring::Slice<B>, std::io::Result<usize>)>,
 {
-    let mut buf: tokio_epoll_uring::Slice<B> = match count {
-        Some(count) => {
-            assert!(count <= buf.bytes_total());
-            assert!(count > 0);
-            buf.slice(..count) // may include uninitialized memory
-        }
-        None => buf.slice_full(), // includes all the uninitialized memory
-    };
-
+    let mut buf: tokio_epoll_uring::Slice<B> = buf.slice_full(); // includes all the uninitialized memory
    while buf.bytes_total() != 0 {
        let res;
        (buf, res) = read_at(buf, offset).await;
@@ -813,7 +793,7 @@ mod test_read_exact_at_impl {
                result: Ok(vec![b'a', b'b', b'c', b'd', b'e']),
            }]),
        }));
-        let (buf, res) = read_exact_at_impl(buf, 0, None, |buf, offset| {
+        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
            let mock_read_at = Arc::clone(&mock_read_at);
            async move { mock_read_at.lock().await.read_at(buf, offset).await }
        })
@@ -822,33 +802,13 @@ mod test_read_exact_at_impl {
        assert_eq!(buf, vec![b'a', b'b', b'c', b'd', b'e']);
    }

-    #[tokio::test]
-    async fn test_with_count() {
-        let buf = Vec::with_capacity(5);
-        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
-            expectations: VecDeque::from(vec![Expectation {
-                offset: 0,
-                bytes_total: 3,
-                result: Ok(vec![b'a', b'b', b'c']),
-            }]),
-        }));
-
-        let (buf, res) = read_exact_at_impl(buf, 0, Some(3), |buf, offset| {
-            let mock_read_at = Arc::clone(&mock_read_at);
-            async move { mock_read_at.lock().await.read_at(buf, offset).await }
-        })
-        .await;
-        assert!(res.is_ok());
-        assert_eq!(buf, vec![b'a', b'b', b'c']);
-    }
-
    #[tokio::test]
    async fn test_empty_buf_issues_no_syscall() {
        let buf = Vec::new();
        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
            expectations: VecDeque::new(),
        }));
-        let (_buf, res) = read_exact_at_impl(buf, 0, None, |buf, offset| {
+        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
            let mock_read_at = Arc::clone(&mock_read_at);
            async move { mock_read_at.lock().await.read_at(buf, offset).await }
        })
@@ -873,7 +833,7 @@ mod test_read_exact_at_impl {
                },
            ]),
        }));
-        let (buf, res) = read_exact_at_impl(buf, 0, None, |buf, offset| {
+        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
            let mock_read_at = Arc::clone(&mock_read_at);
            async move { mock_read_at.lock().await.read_at(buf, offset).await }
        })
@@ -904,7 +864,7 @@ mod test_read_exact_at_impl {
                },
            ]),
        }));
-        let (_buf, res) = read_exact_at_impl(buf, 0, None, |buf, offset| {
+        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
            let mock_read_at = Arc::clone(&mock_read_at);
            async move { mock_read_at.lock().await.read_at(buf, offset).await }
        })
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -9,7 +9,7 @@ use bytes::Bytes;
 use nix::poll::{PollFd, PollFlags};
 use pageserver_api::{reltag::RelTag, shard::TenantShardId};
 use postgres_ffi::BLCKSZ;
-use std::os::fd::AsRawFd;
+use std::os::{fd::AsRawFd, unix::process::CommandExt};
 #[cfg(feature = "testing")]
 use std::sync::atomic::AtomicUsize;
 use std::{
@@ -26,6 +26,40 @@ mod no_leak_child;
 /// The IPC protocol that pageserver and walredo process speak over their shared pipe.
 mod protocol;

+///
+/// Command with ability not to give all file descriptors to child process
+///
+trait CloseFileDescriptors: CommandExt {
+    ///
+    /// Close file descriptors (other than stdin, stdout, stderr) in child process
+    ///
+    fn close_fds(&mut self) -> &mut Command;
+}
+
+impl<C: CommandExt> CloseFileDescriptors for C {
+    fn close_fds(&mut self) -> &mut Command {
+        // SAFETY: Code executed inside pre_exec should have async-signal-safety,
+        // which means it should be safe to execute inside a signal handler.
+        // The precise meaning depends on platform. See `man signal-safety`
+        // for the linux definition.
+        //
+        // The set_fds_cloexec_threadsafe function is documented to be
+        // async-signal-safe.
+        //
+        // Aside from this function, the rest of the code is re-entrant and
+        // doesn't make any syscalls. We're just passing constants.
+        //
+        // NOTE: It's easy to indirectly cause a malloc or lock a mutex,
+        // which is not async-signal-safe. Be careful.
+        unsafe {
+            self.pre_exec(move || {
+                close_fds::set_fds_cloexec_threadsafe(3, &[]);
+                Ok(())
+            })
+        }
+    }
+}
+
 pub struct WalRedoProcess {
    #[allow(dead_code)]
    conf: &'static PageServerConf,
@@ -79,14 +113,16 @@ impl WalRedoProcess {
            .env_clear()
            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
-            // NB: The redo process is not trusted after we sent it the first
-            // walredo work. Before that, it is trusted. Specifically, we trust
-            // it to
-            // 1. close all file descriptors except stdin, stdout, stderr because
-            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all
-            //    the files it opens, and
-            // 2. to use seccomp to sandbox itself before processing the first
-            //    walredo request.
+            // The redo process is not trusted, and runs in seccomp mode that
+            // doesn't allow it to open any files. We have to also make sure it
+            // doesn't inherit any file descriptors from the pageserver, that
+            // would allow an attacker to read any files that happen to be open
+            // in the pageserver.
+            //
+            // The Rust standard library makes sure to mark any file descriptors with
+            // as close-on-exec by default, but that's not enough, since we use
+            // libraries that directly call libc open without setting that flag.
+            .close_fds()
            .spawn_no_leak_child(tenant_shard_id)
            .context("spawn process")?;
        WAL_REDO_PROCESS_COUNTERS.started.inc();
--- a/poetry.lock
+++ b/poetry.lock
@@ -158,28 +158,6 @@ files = [
 attrs = ">=16.0.0"
 pluggy = ">=0.4.0"

-[[package]]
-name = "anyio"
-version = "4.3.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.3.0-py3-none-any.whl", hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"},
-    {file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
-
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -858,43 +836,43 @@ files = [

 [[package]]
 name = "cryptography"
-version = "42.0.2"
+version = "42.0.0"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"},
-    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"},
-    {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"},
-    {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"},
-    {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"},
-    {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"},
-    {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"},
-    {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"},
+    {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434"},
+    {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01"},
+    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd"},
+    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3"},
+    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b"},
+    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87"},
+    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17"},
+    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d"},
+    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec"},
+    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc"},
+    {file = "cryptography-42.0.0-cp37-abi3-win32.whl", hash = "sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4"},
+    {file = "cryptography-42.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0"},
+    {file = "cryptography-42.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf"},
+    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689"},
+    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0"},
+    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139"},
+    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2"},
+    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513"},
+    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8"},
+    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81"},
+    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221"},
+    {file = "cryptography-42.0.0-cp39-abi3-win32.whl", hash = "sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b"},
+    {file = "cryptography-42.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94"},
+    {file = "cryptography-42.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e"},
+    {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3"},
+    {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f"},
+    {file = "cryptography-42.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08"},
+    {file = "cryptography-42.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f"},
+    {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440"},
+    {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0"},
+    {file = "cryptography-42.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce"},
+    {file = "cryptography-42.0.0.tar.gz", hash = "sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4"},
 ]

 [package.dependencies]
@@ -1095,100 +1073,6 @@ files = [
    {file = "graphql_core-3.2.1-py3-none-any.whl", hash = "sha256:f83c658e4968998eed1923a2e3e3eddd347e005ac0315fbb7ca4d70ea9156323"},
 ]

-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "h2"
-version = "4.1.0"
-description = "HTTP/2 State-Machine based protocol implementation"
-optional = false
-python-versions = ">=3.6.1"
-files = [
-    {file = "h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d"},
-    {file = "h2-4.1.0.tar.gz", hash = "sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"},
-]
-
-[package.dependencies]
-hpack = ">=4.0,<5"
-hyperframe = ">=6.0,<7"
-
-[[package]]
-name = "hpack"
-version = "4.0.0"
-description = "Pure-Python HPACK header compression"
-optional = false
-python-versions = ">=3.6.1"
-files = [
-    {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"},
-    {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.3"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.3-py3-none-any.whl", hash = "sha256:9a6a501c3099307d9fd76ac244e08503427679b1e81ceb1d922485e2f2462ad2"},
-    {file = "httpcore-1.0.3.tar.gz", hash = "sha256:5c0f9546ad17dac4d0772b0808856eb616eb8b48ce94f49ed819fd6982a8a544"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.24.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.26.0"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.26.0-py3-none-any.whl", hash = "sha256:8915f5a3627c4d47b73e8202457cb28f1266982d1159bd5779d86a80c0eab1cd"},
-    {file = "httpx-0.26.0.tar.gz", hash = "sha256:451b55c30d5185ea6b23c2c793abf9bb237d2a7dfb901ced6ff69ad37ec1dfaf"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-h2 = {version = ">=3,<5", optional = true, markers = "extra == \"http2\""}
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-
-[[package]]
-name = "hyperframe"
-version = "6.0.1"
-description = "HTTP/2 framing layer for Python"
-optional = false
-python-versions = ">=3.6.1"
-files = [
-    {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"},
-    {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"},
-]
-
 [[package]]
 name = "idna"
 version = "3.3"
@@ -2025,20 +1909,6 @@ pytest = [
    {version = ">=6.2.4", markers = "python_version >= \"3.10\""},
 ]

-[[package]]
-name = "pytest-repeat"
-version = "0.9.3"
-description = "pytest plugin for repeating tests"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pytest_repeat-0.9.3-py3-none-any.whl", hash = "sha256:26ab2df18226af9d5ce441c858f273121e92ff55f5bb311d25755b8d7abdd8ed"},
-    {file = "pytest_repeat-0.9.3.tar.gz", hash = "sha256:ffd3836dfcd67bb270bec648b330e20be37d2966448c4148c4092d1e8aba8185"},
-]
-
-[package.dependencies]
-pytest = "*"
-
 [[package]]
 name = "pytest-rerunfailures"
 version = "13.0"
@@ -2182,6 +2052,7 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2271,28 +2142,28 @@ pyasn1 = ">=0.1.3"

 [[package]]
 name = "ruff"
-version = "0.2.2"
+version = "0.1.11"
 description = "An extremely fast Python linter and code formatter, written in Rust."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0a9efb032855ffb3c21f6405751d5e147b0c6b631e3ca3f6b20f917572b97eb6"},
-    {file = "ruff-0.2.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d450b7fbff85913f866a5384d8912710936e2b96da74541c82c1b458472ddb39"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecd46e3106850a5c26aee114e562c329f9a1fbe9e4821b008c4404f64ff9ce73"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e22676a5b875bd72acd3d11d5fa9075d3a5f53b877fe7b4793e4673499318ba"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1695700d1e25a99d28f7a1636d85bafcc5030bba9d0578c0781ba1790dbcf51c"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:b0c232af3d0bd8f521806223723456ffebf8e323bd1e4e82b0befb20ba18388e"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f63d96494eeec2fc70d909393bcd76c69f35334cdbd9e20d089fb3f0640216ca"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a61ea0ff048e06de273b2e45bd72629f470f5da8f71daf09fe481278b175001"},
-    {file = "ruff-0.2.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e1439c8f407e4f356470e54cdecdca1bd5439a0673792dbe34a2b0a551a2fe3"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:940de32dc8853eba0f67f7198b3e79bc6ba95c2edbfdfac2144c8235114d6726"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0c126da55c38dd917621552ab430213bdb3273bb10ddb67bc4b761989210eb6e"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3b65494f7e4bed2e74110dac1f0d17dc8e1f42faaa784e7c58a98e335ec83d7e"},
-    {file = "ruff-0.2.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1ec49be4fe6ddac0503833f3ed8930528e26d1e60ad35c2446da372d16651ce9"},
-    {file = "ruff-0.2.2-py3-none-win32.whl", hash = "sha256:d920499b576f6c68295bc04e7b17b6544d9d05f196bb3aac4358792ef6f34325"},
-    {file = "ruff-0.2.2-py3-none-win_amd64.whl", hash = "sha256:cc9a91ae137d687f43a44c900e5d95e9617cb37d4c989e462980ba27039d239d"},
-    {file = "ruff-0.2.2-py3-none-win_arm64.whl", hash = "sha256:c9d15fc41e6054bfc7200478720570078f0b41c9ae4f010bcc16bd6f4d1aacdd"},
-    {file = "ruff-0.2.2.tar.gz", hash = "sha256:e62ed7f36b3068a30ba39193a14274cd706bc486fad521276458022f7bccb31d"},
+    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:a7f772696b4cdc0a3b2e527fc3c7ccc41cdcb98f5c80fdd4f2b8c50eb1458196"},
+    {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:934832f6ed9b34a7d5feea58972635c2039c7a3b434fe5ba2ce015064cb6e955"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea0d3e950e394c4b332bcdd112aa566010a9f9c95814844a7468325290aabfd9"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9bd4025b9c5b429a48280785a2b71d479798a69f5c2919e7d274c5f4b32c3607"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1ad00662305dcb1e987f5ec214d31f7d6a062cae3e74c1cbccef15afd96611d"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4b077ce83f47dd6bea1991af08b140e8b8339f0ba8cb9b7a484c30ebab18a23f"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4a88efecec23c37b11076fe676e15c6cdb1271a38f2b415e381e87fe4517f18"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b25093dad3b055667730a9b491129c42d45e11cdb7043b702e97125bcec48a1"},
+    {file = "ruff-0.1.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:231d8fb11b2cc7c0366a326a66dafc6ad449d7fcdbc268497ee47e1334f66f77"},
+    {file = "ruff-0.1.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:09c415716884950080921dd6237767e52e227e397e2008e2bed410117679975b"},
+    {file = "ruff-0.1.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0f58948c6d212a6b8d41cd59e349751018797ce1727f961c2fa755ad6208ba45"},
+    {file = "ruff-0.1.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:190a566c8f766c37074d99640cd9ca3da11d8deae2deae7c9505e68a4a30f740"},
+    {file = "ruff-0.1.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6464289bd67b2344d2a5d9158d5eb81025258f169e69a46b741b396ffb0cda95"},
+    {file = "ruff-0.1.11-py3-none-win32.whl", hash = "sha256:9b8f397902f92bc2e70fb6bebfa2139008dc72ae5177e66c383fa5426cb0bf2c"},
+    {file = "ruff-0.1.11-py3-none-win_amd64.whl", hash = "sha256:eb85ee287b11f901037a6683b2374bb0ec82928c5cbc984f575d0437979c521a"},
+    {file = "ruff-0.1.11-py3-none-win_arm64.whl", hash = "sha256:97ce4d752f964ba559c7023a86e5f8e97f026d511e48013987623915431c7ea9"},
+    {file = "ruff-0.1.11.tar.gz", hash = "sha256:f9d4d88cb6eeb4dfe20f9f0519bd2eaba8119bde87c3d5065c541dbae2b5a2cb"},
 ]

 [[package]]
@@ -2354,17 +2225,6 @@ files = [
    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
 ]

-[[package]]
-name = "sniffio"
-version = "1.3.0"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
-    {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
-]
-
 [[package]]
 name = "sshpubkeys"
 version = "3.3.1"
@@ -2571,6 +2431,16 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
+    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
+    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -2808,4 +2678,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "af9d5b45310c12411bfe67cb9677d2236808d0780ca1bd81525d2763a928f7f9"
+content-hash = "e99954cbbfef8dcc5e13cea7103c87657639a192f2372983bdb8c5d624c2e447"
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -171,8 +171,16 @@ async fn task_main(
                    .context("failed to set socket option")?;

                info!(%peer_addr, "serving");
-                let ctx = RequestMonitoring::new(session_id, peer_addr.ip(), "sni_router", "sni");
-                handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
+                let mut ctx =
+                    RequestMonitoring::new(session_id, peer_addr.ip(), "sni_router", "sni");
+                handle_client(
+                    &mut ctx,
+                    dest_suffix,
+                    tls_config,
+                    tls_server_end_point,
+                    socket,
+                )
+                .await
            }
            .unwrap_or_else(|e| {
                // Acknowledge that the task has finished with an error.
@@ -240,7 +248,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
 }

 async fn handle_client(
-    mut ctx: RequestMonitoring,
+    ctx: &mut RequestMonitoring,
    dest_suffix: Arc<String>,
    tls_config: Arc<rustls::ServerConfig>,
    tls_server_end_point: TlsServerEndPoint,
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -87,22 +87,6 @@ pub mod errors {
    impl ReportableError for ApiError {
        fn get_error_kind(&self) -> crate::error::ErrorKind {
            match self {
-                ApiError::Console {
-                    status: http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE,
-                    ..
-                } => crate::error::ErrorKind::User,
-                ApiError::Console {
-                    status: http::StatusCode::LOCKED,
-                    text,
-                } if text.contains("quota exceeded")
-                    || text.contains("the limit for current plan reached") =>
-                {
-                    crate::error::ErrorKind::User
-                }
-                ApiError::Console {
-                    status: http::StatusCode::TOO_MANY_REQUESTS,
-                    ..
-                } => crate::error::ErrorKind::ServiceRateLimit,
                ApiError::Console { .. } => crate::error::ErrorKind::ControlPlane,
                ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane,
            }
@@ -238,7 +222,7 @@ pub mod errors {
            match self {
                WakeComputeError::BadComputeAddress(_) => crate::error::ErrorKind::ControlPlane,
                WakeComputeError::ApiError(e) => e.get_error_kind(),
-                WakeComputeError::TimeoutError => crate::error::ErrorKind::ServiceRateLimit,
+                WakeComputeError::TimeoutError => crate::error::ErrorKind::RateLimit,
            }
        }
    }
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -147,13 +147,15 @@ impl RequestMonitoring {
        self.success = true;
    }

-    pub fn log(self) {}
-}
-
-impl Drop for RequestMonitoring {
-    fn drop(&mut self) {
+    pub fn log(&mut self) {
        if let Some(tx) = self.sender.take() {
            let _: Result<(), _> = tx.send(self.clone());
        }
    }
 }
+
+impl Drop for RequestMonitoring {
+    fn drop(&mut self) {
+        self.log()
+    }
+}
--- a/proxy/src/error.rs
+++ b/proxy/src/error.rs
@@ -37,12 +37,9 @@ pub enum ErrorKind {
    /// Network error between user and proxy. Not necessarily user error
    ClientDisconnect,

-    /// Proxy self-imposed user rate limits
+    /// Proxy self-imposed rate limits
    RateLimit,

-    /// Proxy self-imposed service-wise rate limits
-    ServiceRateLimit,
-
    /// internal errors
    Service,

@@ -57,12 +54,25 @@ pub enum ErrorKind {
 }

 impl ErrorKind {
+    pub fn to_str(&self) -> &'static str {
+        match self {
+            ErrorKind::User => "request failed due to user error",
+            ErrorKind::ClientDisconnect => "client disconnected",
+            ErrorKind::RateLimit => "request cancelled due to rate limit",
+            ErrorKind::Service => "internal service error",
+            ErrorKind::ControlPlane => "non-retryable control plane error",
+            ErrorKind::Postgres => "postgres error",
+            ErrorKind::Compute => {
+                "non-retryable compute connection error (or exhausted retry capacity)"
+            }
+        }
+    }
+
    pub fn to_metric_label(&self) -> &'static str {
        match self {
            ErrorKind::User => "user",
            ErrorKind::ClientDisconnect => "clientdisconnect",
            ErrorKind::RateLimit => "ratelimit",
-            ErrorKind::ServiceRateLimit => "serviceratelimit",
            ErrorKind::Service => "service",
            ErrorKind::ControlPlane => "controlplane",
            ErrorKind::Postgres => "postgres",
@@ -75,6 +85,12 @@ pub trait ReportableError: fmt::Display + Send + 'static {
    fn get_error_kind(&self) -> ErrorKind;
 }

+impl ReportableError for tokio::time::error::Elapsed {
+    fn get_error_kind(&self) -> ErrorKind {
+        ErrorKind::RateLimit
+    }
+}
+
 impl ReportableError for tokio_postgres::error::Error {
    fn get_error_kind(&self) -> ErrorKind {
        if self.as_db_error().is_some() {
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -132,8 +132,9 @@ struct Scram(scram::ServerSecret);

 impl Scram {
    fn new(password: &str) -> anyhow::Result<Self> {
-        let secret =
-            scram::ServerSecret::build(password).context("failed to generate scram secret")?;
+        let salt = rand::random::<[u8; 16]>();
+        let secret = scram::ServerSecret::build(password, &salt, 256)
+            .context("failed to generate scram secret")?;
        Ok(Scram(secret))
    }

--- a/proxy/src/scram.rs
+++ b/proxy/src/scram.rs
@@ -12,6 +12,9 @@ mod messages;
 mod secret;
 mod signature;

+#[cfg(any(test, doc))]
+mod password;
+
 pub use exchange::{exchange, Exchange};
 pub use key::ScramKey;
 pub use secret::ServerSecret;
@@ -56,21 +59,27 @@ fn sha256<'a>(parts: impl IntoIterator<Item = &'a [u8]>) -> [u8; 32] {

 #[cfg(test)]
 mod tests {
-    use postgres_protocol::authentication::sasl::{ChannelBinding, ScramSha256};
-
    use crate::sasl::{Mechanism, Step};

-    use super::{Exchange, ServerSecret};
+    use super::{password::SaltedPassword, Exchange, ServerSecret};

    #[test]
-    fn snapshot() {
+    fn happy_path() {
        let iterations = 4096;
-        let salt = "QSXCR+Q6sek8bf92";
-        let stored_key = "FO+9jBb3MUukt6jJnzjPZOWc5ow/Pu6JtPyju0aqaE8=";
-        let server_key = "qxJ1SbmSAi5EcS0J5Ck/cKAm/+Ixa+Kwp63f4OHDgzo=";
-        let secret = format!("SCRAM-SHA-256${iterations}:{salt}${stored_key}:{server_key}",);
-        let secret = ServerSecret::parse(&secret).unwrap();
+        let salt_base64 = "QSXCR+Q6sek8bf92";
+        let pw = SaltedPassword::new(
+            b"pencil",
+            base64::decode(salt_base64).unwrap().as_slice(),
+            iterations,
+        );

+        let secret = ServerSecret {
+            iterations,
+            salt_base64: salt_base64.to_owned(),
+            stored_key: pw.client_key().sha256(),
+            server_key: pw.server_key(),
+            doomed: false,
+        };
        const NONCE: [u8; 18] = [
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        ];
@@ -112,33 +121,4 @@ mod tests {
            ]
        );
    }
-
-    fn run_round_trip_test(server_password: &str, client_password: &str) {
-        let scram_secret = ServerSecret::build(server_password).unwrap();
-        let sasl_client =
-            ScramSha256::new(client_password.as_bytes(), ChannelBinding::unsupported());
-
-        let outcome = super::exchange(
-            &scram_secret,
-            sasl_client,
-            crate::config::TlsServerEndPoint::Undefined,
-        )
-        .unwrap();
-
-        match outcome {
-            crate::sasl::Outcome::Success(_) => {}
-            crate::sasl::Outcome::Failure(r) => panic!("{r}"),
-        }
-    }
-
-    #[test]
-    fn round_trip() {
-        run_round_trip_test("pencil", "pencil")
-    }
-
-    #[test]
-    #[should_panic(expected = "password doesn't match")]
-    fn failure() {
-        run_round_trip_test("pencil", "eraser")
-    }
 }
--- a/proxy/src/scram/key.rs
+++ b/proxy/src/scram/key.rs
@@ -3,7 +3,7 @@
 /// Faithfully taken from PostgreSQL.
 pub const SCRAM_KEY_LEN: usize = 32;

-/// One of the keys derived from the user's password.
+/// One of the keys derived from the [password](super::password::SaltedPassword).
 /// We use the same structure for all keys, i.e.
 /// `ClientKey`, `StoredKey`, and `ServerKey`.
 #[derive(Clone, Default, PartialEq, Eq, Debug)]
--- a/proxy/src/scram/password.rs
+++ b/proxy/src/scram/password.rs
@@ -0,0 +1,74 @@
+//! Password hashing routines.
+
+use super::key::ScramKey;
+
+pub const SALTED_PASSWORD_LEN: usize = 32;
+
+/// Salted hashed password is essential for [key](super::key) derivation.
+#[repr(transparent)]
+pub struct SaltedPassword {
+    bytes: [u8; SALTED_PASSWORD_LEN],
+}
+
+impl SaltedPassword {
+    /// See `scram-common.c : scram_SaltedPassword` for details.
+    /// Further reading: <https://datatracker.ietf.org/doc/html/rfc2898> (see `PBKDF2`).
+    pub fn new(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
+        pbkdf2::pbkdf2_hmac_array::<sha2::Sha256, 32>(password, salt, iterations).into()
+    }
+
+    /// Derive `ClientKey` from a salted hashed password.
+    pub fn client_key(&self) -> ScramKey {
+        super::hmac_sha256(&self.bytes, [b"Client Key".as_ref()]).into()
+    }
+
+    /// Derive `ServerKey` from a salted hashed password.
+    pub fn server_key(&self) -> ScramKey {
+        super::hmac_sha256(&self.bytes, [b"Server Key".as_ref()]).into()
+    }
+}
+
+impl From<[u8; SALTED_PASSWORD_LEN]> for SaltedPassword {
+    #[inline(always)]
+    fn from(bytes: [u8; SALTED_PASSWORD_LEN]) -> Self {
+        Self { bytes }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SaltedPassword;
+
+    fn legacy_pbkdf2_impl(password: &[u8], salt: &[u8], iterations: u32) -> SaltedPassword {
+        let one = 1_u32.to_be_bytes(); // magic
+
+        let mut current = super::super::hmac_sha256(password, [salt, &one]);
+        let mut result = current;
+        for _ in 1..iterations {
+            current = super::super::hmac_sha256(password, [current.as_ref()]);
+            // TODO: result = current.zip(result).map(|(x, y)| x ^ y), issue #80094
+            for (i, x) in current.iter().enumerate() {
+                result[i] ^= x;
+            }
+        }
+
+        result.into()
+    }
+
+    #[test]
+    fn pbkdf2() {
+        let password = "a-very-secure-password";
+        let salt = "such-a-random-salt";
+        let iterations = 4096;
+        let output = [
+            203, 18, 206, 81, 4, 154, 193, 100, 147, 41, 211, 217, 177, 203, 69, 210, 194, 211,
+            101, 1, 248, 156, 96, 0, 8, 223, 30, 87, 158, 41, 20, 42,
+        ];
+
+        let actual = SaltedPassword::new(password.as_bytes(), salt.as_bytes(), iterations);
+        let expected = legacy_pbkdf2_impl(password.as_bytes(), salt.as_bytes(), iterations);
+
+        assert_eq!(actual.bytes, output);
+        assert_eq!(actual.bytes, expected.bytes);
+    }
+}
--- a/proxy/src/scram/secret.rs
+++ b/proxy/src/scram/secret.rs
@@ -3,7 +3,7 @@
 use super::base64_decode_array;
 use super::key::ScramKey;

-/// Server secret is produced from user's password,
+/// Server secret is produced from [password](super::password::SaltedPassword)
 /// and is used throughout the authentication process.
 #[derive(Clone, Eq, PartialEq, Debug)]
 pub struct ServerSecret {
@@ -59,10 +59,21 @@ impl ServerSecret {
    /// Build a new server secret from the prerequisites.
    /// XXX: We only use this function in tests.
    #[cfg(test)]
-    pub fn build(password: &str) -> Option<Self> {
-        Self::parse(&postgres_protocol::password::scram_sha_256(
-            password.as_bytes(),
-        ))
+    pub fn build(password: &str, salt: &[u8], iterations: u32) -> Option<Self> {
+        // TODO: implement proper password normalization required by the RFC
+        if !password.is_ascii() {
+            return None;
+        }
+
+        let password = super::password::SaltedPassword::new(password.as_bytes(), salt, iterations);
+
+        Some(Self {
+            iterations,
+            salt_base64: base64::encode(salt),
+            stored_key: password.client_key().sha256(),
+            server_key: password.server_key(),
+            doomed: false,
+        })
    }
 }

@@ -92,4 +103,20 @@ mod tests {
        assert_eq!(base64::encode(parsed.stored_key), stored_key);
        assert_eq!(base64::encode(parsed.server_key), server_key);
    }
+
+    #[test]
+    fn build_scram_secret() {
+        let salt = b"salt";
+        let secret = ServerSecret::build("password", salt, 4096).unwrap();
+        assert_eq!(secret.iterations, 4096);
+        assert_eq!(secret.salt_base64, base64::encode(salt));
+        assert_eq!(
+            base64::encode(secret.stored_key.as_ref()),
+            "lF4cRm/Jky763CN4HtxdHnjV4Q8AWTNlKvGmEFFU8IQ="
+        );
+        assert_eq!(
+            base64::encode(secret.server_key.as_ref()),
+            "ub8OgRsftnk2ccDMOt7ffHXNcikRkQkq1lh4xaAqrSw="
+        );
+    }
 }
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -88,10 +88,7 @@ pub async fn task_main(
            return Ok(());
        }
    };
-    let mut tls_server_config = rustls::ServerConfig::clone(&tls_config.to_server_config());
-    // prefer http2, but support http/1.1
-    tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
-    let tls_acceptor: tokio_rustls::TlsAcceptor = Arc::new(tls_server_config).into();
+    let tls_acceptor: tokio_rustls::TlsAcceptor = tls_config.to_server_config().into();

    let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
    let _ = addr_incoming.set_nodelay(true);
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -12,7 +12,7 @@ use hyper::StatusCode;
 use hyper::{Body, HeaderMap, Request};
 use serde_json::json;
 use serde_json::Value;
-use tokio::try_join;
+use tokio::join;
 use tokio_postgres::error::DbError;
 use tokio_postgres::error::ErrorPosition;
 use tokio_postgres::GenericClient;
@@ -32,9 +32,11 @@ use crate::auth::ComputeUserInfoParseError;
 use crate::config::ProxyConfig;
 use crate::config::TlsConfig;
 use crate::context::RequestMonitoring;
+use crate::error::ReportableError;
 use crate::metrics::HTTP_CONTENT_LENGTH;
 use crate::metrics::NUM_CONNECTION_REQUESTS_GAUGE;
 use crate::proxy::NeonOptions;
+use crate::serverless::backend::HttpConnError;
 use crate::DbName;
 use crate::RoleName;

@@ -164,12 +166,9 @@ fn get_conn_info(
    let mut options = Option::None;

    for (key, value) in pairs {
-        match &*key {
-            "options" => {
-                options = Some(NeonOptions::parse_options_raw(&value));
-            }
-            "application_name" => ctx.set_application(Some(value.into())),
-            _ => {}
+        if key == "options" {
+            options = Some(NeonOptions::parse_options_raw(&value));
+            break;
        }
    }

@@ -285,10 +284,8 @@ pub async fn handle(
                )?
            }
        },
-        Err(_) => {
-            // TODO: when http error classification is done, distinguish between
-            // timeout on sql vs timeout in proxy/cplane
-            // ctx.set_error_kind(crate::error::ErrorKind::RateLimit);
+        Err(e) => {
+            ctx.set_error_kind(e.get_error_kind());

            let message = format!(
                "HTTP-Connection timed out, execution time exeeded {} seconds",
@@ -402,11 +399,16 @@ async fn handle_inner(
        // not strictly necessary to mark success here,
        // but it's just insurance for if we forget it somewhere else
        ctx.latency_timer.success();
-        Ok::<_, anyhow::Error>(client)
+        Ok::<_, HttpConnError>(client)
    };

    // Run both operations in parallel
-    let (payload, mut client) = try_join!(fetch_and_process_request, authenticate_and_connect)?;
+    let (payload_result, auth_and_connect_result) =
+        join!(fetch_and_process_request, authenticate_and_connect,);
+
+    // Handle the results
+    let payload = payload_result?; // Handle errors appropriately
+    let mut client = auth_and_connect_result?; // Handle errors appropriately

    let mut response = Response::builder()
        .status(StatusCode::OK)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,22 +38,17 @@ pytest-rerunfailures = "^13.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
 zstandard = "^0.21.0"
-httpx = {extras = ["http2"], version = "^0.26.0"}
-pytest-repeat = "^0.9.3"

 [tool.poetry.group.dev.dependencies]
 mypy = "==1.3.0"
-ruff = "^0.2.2"
+ruff = "^0.1.11"

 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"

 [tool.mypy]
-exclude = [
-    "^vendor/",
-    "^target/",
-]
+exclude = "^vendor/"
 check_untyped_defs = true
 # Help mypy find imports when running against list of individual files.
 # Without this line it would behave differently when executed on the entire project.
@@ -77,13 +72,7 @@ ignore_missing_imports = true

 [tool.ruff]
 target-version = "py39"
-extend-exclude = [
-    "vendor/",
-    "target/",
-]
-line-length = 100 # this setting is rather guidance, it won't fail if it can't make the shorter
-
-[tool.ruff.lint]
+extend-exclude = ["vendor/"]
 ignore = [
    "E501", # Line too long, we don't want to be too strict about it
 ]
@@ -94,3 +83,4 @@ select = [
    "W", # pycodestyle
    "B", # bugbear
 ]
+line-length = 100 # this setting is rather guidance, it won't fail if it can't make the shorter
--- a/safekeeper/src/auth.rs
+++ b/safekeeper/src/auth.rs
@@ -12,12 +12,8 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
            }
            Ok(())
        }
-        (Scope::PageServerApi | Scope::GenerationsApi, _) => Err(AuthError(
-            format!(
-                "JWT scope '{:?}' is ineligible for Safekeeper auth",
-                claims.scope
-            )
-            .into(),
+        (Scope::PageServerApi, _) => Err(AuthError(
+            "PageServerApi scope makes no sense for Safekeeper".into(),
        )),
        (Scope::SafekeeperData, _) => Ok(()),
    }
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -695,11 +695,9 @@ impl Collector for TimelineCollector {

        // report total number of timelines
        self.timelines_count.set(timelines_count as i64);
-        mfs.extend(self.timelines_count.collect());
-
        self.active_timelines_count
            .set(active_timelines_count as i64);
-        mfs.extend(self.active_timelines_count.collect());
+        mfs.extend(self.timelines_count.collect());

        mfs
    }
--- a/scripts/comment-test-report.js
+++ b/scripts/comment-test-report.js
@@ -188,7 +188,7 @@ const reportSummary = async (params) => {
 }

 const parseCoverageSummary = async ({ summaryJsonUrl, coverageUrl, fetch }) => {
-    let summary = `\n### Code coverage* ([full report](${coverageUrl}))\n`
+    let summary = `\n### Code coverage ([full report](${coverageUrl}))\n`

    const coverage = await (await fetch(summaryJsonUrl)).json()
    for (const covType of Object.keys(coverage).sort()) {
@@ -198,7 +198,7 @@ const parseCoverageSummary = async ({ summaryJsonUrl, coverageUrl, fetch }) => {

        summary += `- \`${covType}s\`: \`${coverage[covType]["_summary"]}\`\n`
    }
-    summary += "\n\\* collected from Rust tests only\n"
+
    summary += `\n___\n`

    return summary
--- a/test_runner/fixtures/metrics.py
+++ b/test_runner/fixtures/metrics.py
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Tuple
 from prometheus_client.parser import text_string_to_metric_families
 from prometheus_client.samples import Sample

-from fixtures.log_helper import log
-

 class Metrics:
    metrics: Dict[str, List[Sample]]
@@ -33,60 +31,6 @@ class Metrics:
        return res[0]


-class MetricsGetter:
-    """
-    Mixin for types that implement a `get_metrics` function and would like associated
-    helpers for querying the metrics
-    """
-
-    def get_metrics(self) -> Metrics:
-        raise NotImplementedError()
-
-    def get_metric_value(
-        self, name: str, filter: Optional[Dict[str, str]] = None
-    ) -> Optional[float]:
-        metrics = self.get_metrics()
-        results = metrics.query_all(name, filter=filter)
-        if not results:
-            log.info(f'could not find metric "{name}"')
-            return None
-        assert len(results) == 1, f"metric {name} with given filters is not unique, got: {results}"
-        return results[0].value
-
-    def get_metrics_values(
-        self, names: list[str], filter: Optional[Dict[str, str]] = None, absence_ok=False
-    ) -> Dict[str, float]:
-        """
-        When fetching multiple named metrics, it is more efficient to use this
-        than to call `get_metric_value` repeatedly.
-
-        Throws RuntimeError if no metrics matching `names` are found, or if
-        not all of `names` are found: this method is intended for loading sets
-        of metrics whose existence is coupled.
-
-        If it's expected that there may be no results for some of the metrics,
-        specify `absence_ok=True`. The returned dict will then not contain values
-        for these metrics.
-        """
-        metrics = self.get_metrics()
-        samples = []
-        for name in names:
-            samples.extend(metrics.query_all(name, filter=filter))
-
-        result = {}
-        for sample in samples:
-            if sample.name in result:
-                raise RuntimeError(f"Multiple values found for {sample.name}")
-            result[sample.name] = sample.value
-
-        if not absence_ok:
-            if len(result) != len(names):
-                log.info(f"Metrics found: {metrics.metrics}")
-                raise RuntimeError(f"could not find all metrics {' '.join(names)}")
-
-        return result
-
-
 def parse_metrics(text: str, name: str = "") -> Metrics:
    metrics = Metrics(name)
    gen = text_string_to_metric_families(text)
@@ -103,8 +47,7 @@ def histogram(prefix_without_trailing_underscore: str) -> List[str]:


 PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = (
-    "pageserver_remote_timeline_client_calls_started_total",
-    "pageserver_remote_timeline_client_calls_finished_total",
+    "pageserver_remote_timeline_client_calls_unfinished",
    "pageserver_remote_physical_size",
    "pageserver_remote_timeline_client_bytes_started_total",
    "pageserver_remote_timeline_client_bytes_finished_total",
@@ -133,6 +76,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
    *histogram("pageserver_getpage_get_reconstruct_data_seconds"),
    *histogram("pageserver_wait_lsn_seconds"),
    *histogram("pageserver_remote_operation_seconds"),
+    *histogram("pageserver_remote_timeline_client_calls_started"),
    *histogram("pageserver_io_operations_seconds"),
    "pageserver_tenant_states_count",
 )
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -27,7 +27,6 @@ from urllib.parse import quote, urlparse

 import asyncpg
 import backoff
-import httpx
 import jwt
 import psycopg2
 import pytest
@@ -47,7 +46,6 @@ from urllib3.util.retry import Retry
 from fixtures import overlayfs
 from fixtures.broker import NeonBroker
 from fixtures.log_helper import log
-from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
 from fixtures.pageserver.allowed_errors import (
    DEFAULT_PAGESERVER_ALLOWED_ERRORS,
    scan_pageserver_log_for_errors,
@@ -488,11 +486,6 @@ class NeonEnvBuilder:

        self.pageserver_virtual_file_io_engine: Optional[str] = pageserver_virtual_file_io_engine

-        self.pageserver_get_vectored_impl: Optional[str] = None
-        if os.getenv("PAGESERVER_GET_VECTORED_IMPL", "") == "vectored":
-            self.pageserver_get_vectored_impl = "vectored"
-            log.debug('Overriding pageserver get_vectored_impl config to "vectored"')
-
        assert test_name.startswith(
            "test_"
        ), "Unexpectedly instantiated from outside a test function"
@@ -1060,8 +1053,6 @@ class NeonEnv:
            }
            if self.pageserver_virtual_file_io_engine is not None:
                ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine
-            if config.pageserver_get_vectored_impl is not None:
-                ps_cfg["get_vectored_impl"] = config.pageserver_get_vectored_impl

            # Create a corresponding NeonPageserver object
            self.pageservers.append(
@@ -1922,7 +1913,7 @@ class Pagectl(AbstractNeonCli):
        return IndexPartDump.from_json(parsed)


-class NeonAttachmentService(MetricsGetter):
+class NeonAttachmentService:
    def __init__(self, env: NeonEnv, auth_enabled: bool):
        self.env = env
        self.running = False
@@ -1960,11 +1951,6 @@ class NeonAttachmentService(MetricsGetter):

        return headers

-    def get_metrics(self) -> Metrics:
-        res = self.request("GET", f"{self.env.attachment_service_api}/metrics")
-        res.raise_for_status()
-        return parse_metrics(res.text)
-
    def ready(self) -> bool:
        resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
        if resp.status_code == 503:
@@ -2108,17 +2094,6 @@ class NeonAttachmentService(MetricsGetter):
        log.info(f"Migrated tenant {tenant_shard_id} to pageserver {dest_ps_id}")
        assert self.env.get_tenant_pageserver(tenant_shard_id).id == dest_ps_id

-    def consistency_check(self):
-        """
-        Throw an exception if the service finds any inconsistencies in its state
-        """
-        response = self.request(
-            "POST",
-            f"{self.env.attachment_service_api}/debug/v1/consistency_check",
-        )
-        response.raise_for_status()
-        log.info("Attachment service passed consistency check")
-
    def __enter__(self) -> "NeonAttachmentService":
        return self

@@ -2864,34 +2839,9 @@ class NeonProxy(PgProtocol):
        )

        if expected_code is not None:
-            assert response.status_code == expected_code, f"response: {response.json()}"
+            assert response.status_code == kwargs["expected_code"], f"response: {response.json()}"
        return response.json()

-    async def http2_query(self, query, args, **kwargs):
-        # TODO maybe use default values if not provided
-        user = kwargs["user"]
-        password = kwargs["password"]
-        expected_code = kwargs.get("expected_code")
-
-        connstr = f"postgresql://{user}:{password}@{self.domain}:{self.proxy_port}/postgres"
-        async with httpx.AsyncClient(
-            http2=True, verify=str(self.test_output_dir / "proxy.crt")
-        ) as client:
-            response = await client.post(
-                f"https://{self.domain}:{self.external_http_port}/sql",
-                json={"query": query, "params": args},
-                headers={
-                    "Content-Type": "application/sql",
-                    "Neon-Connection-String": connstr,
-                    "Neon-Pool-Opt-In": "true",
-                },
-            )
-            assert response.http_version == "HTTP/2"
-
-            if expected_code is not None:
-                assert response.status_code == expected_code, f"response: {response.json()}"
-            return response.json()
-
    def get_metrics(self) -> str:
        request_result = requests.get(f"http://{self.host}:{self.http_port}/metrics")
        request_result.raise_for_status()
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -82,11 +82,6 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
    # During shutdown, DownloadError::Cancelled may be logged as an error.  Cleaning this
    # up is tracked in https://github.com/neondatabase/neon/issues/6096
    ".*Cancelled, shutting down.*",
-    # Open layers are only rolled at Lsn boundaries to avoid name clashses.
-    # Hence, we can overshoot the soft limit set by checkpoint distance.
-    # This is especially pronounced in tests that set small checkpoint
-    # distances.
-    ".*Flushed oversized open layer with size.*",
 )


--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -12,7 +12,7 @@ from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry

 from fixtures.log_helper import log
-from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
+from fixtures.metrics import Metrics, parse_metrics
 from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn, TenantId, TenantShardId, TimelineId
 from fixtures.utils import Fn
@@ -125,7 +125,7 @@ class TenantConfig:
        )


-class PageserverHttpClient(requests.Session, MetricsGetter):
+class PageserverHttpClient(requests.Session):
    def __init__(
        self,
        port: int,
@@ -694,33 +694,71 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
            },
        ).value

-    def get_remote_timeline_client_queue_count(
+    def get_remote_timeline_client_metric(
        self,
+        metric_name: str,
        tenant_id: TenantId,
        timeline_id: TimelineId,
        file_kind: str,
        op_kind: str,
-    ) -> Optional[int]:
-        metrics = [
-            "pageserver_remote_timeline_client_calls_started_total",
-            "pageserver_remote_timeline_client_calls_finished_total",
-        ]
-        res = self.get_metrics_values(
-            metrics,
+    ) -> Optional[float]:
+        metrics = self.get_metrics()
+        matches = metrics.query_all(
+            name=metric_name,
            filter={
                "tenant_id": str(tenant_id),
                "timeline_id": str(timeline_id),
                "file_kind": str(file_kind),
                "op_kind": str(op_kind),
            },
-            absence_ok=True,
        )
-        if len(res) != 2:
+        if len(matches) == 0:
+            value = None
+        elif len(matches) == 1:
+            value = matches[0].value
+            assert value is not None
+        else:
+            assert len(matches) < 2, "above filter should uniquely identify metric"
+        return value
+
+    def get_metric_value(
+        self, name: str, filter: Optional[Dict[str, str]] = None
+    ) -> Optional[float]:
+        metrics = self.get_metrics()
+        results = metrics.query_all(name, filter=filter)
+        if not results:
+            log.info(f'could not find metric "{name}"')
            return None
-        inc, dec = [res[metric] for metric in metrics]
-        queue_count = int(inc) - int(dec)
-        assert queue_count >= 0
-        return queue_count
+        assert len(results) == 1, f"metric {name} with given filters is not unique, got: {results}"
+        return results[0].value
+
+    def get_metrics_values(
+        self, names: list[str], filter: Optional[Dict[str, str]] = None
+    ) -> Dict[str, float]:
+        """
+        When fetching multiple named metrics, it is more efficient to use this
+        than to call `get_metric_value` repeatedly.
+
+        Throws RuntimeError if no metrics matching `names` are found, or if
+        not all of `names` are found: this method is intended for loading sets
+        of metrics whose existence is coupled.
+        """
+        metrics = self.get_metrics()
+        samples = []
+        for name in names:
+            samples.extend(metrics.query_all(name, filter=filter))
+
+        result = {}
+        for sample in samples:
+            if sample.name in result:
+                raise RuntimeError(f"Multiple values found for {sample.name}")
+            result[sample.name] = sample.value
+
+        if len(result) != len(names):
+            log.info(f"Metrics found: {metrics.metrics}")
+            raise RuntimeError(f"could not find all metrics {' '.join(names)}")
+
+        return result

    def layer_map_info(
        self,
--- a/test_runner/fixtures/pageserver/utils.py
+++ b/test_runner/fixtures/pageserver/utils.py
@@ -1,8 +1,7 @@
 import time
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Union

 from mypy_boto3_s3.type_defs import (
-    DeleteObjectOutputTypeDef,
    EmptyResponseMetadataTypeDef,
    ListObjectsV2OutputTypeDef,
    ObjectTypeDef,
@@ -221,40 +220,16 @@ def wait_for_upload_queue_empty(
 ):
    while True:
        all_metrics = pageserver_http.get_metrics()
-        started = all_metrics.query_all(
-            "pageserver_remote_timeline_client_calls_started_total",
+        tl = all_metrics.query_all(
+            "pageserver_remote_timeline_client_calls_unfinished",
            {
                "tenant_id": str(tenant_id),
                "timeline_id": str(timeline_id),
            },
        )
-        finished = all_metrics.query_all(
-            "pageserver_remote_timeline_client_calls_finished_total",
-            {
-                "tenant_id": str(tenant_id),
-                "timeline_id": str(timeline_id),
-            },
-        )
-        assert len(started) == len(finished)
-        # this is `started left join finished`; if match, subtracting start from finished, resulting in queue depth
-        remaining_labels = ["shard_id", "file_kind", "op_kind"]
-        tl: List[Tuple[Any, float]] = []
-        for s in started:
-            found = False
-            for f in finished:
-                if all([s.labels[label] == f.labels[label] for label in remaining_labels]):
-                    assert (
-                        not found
-                    ), "duplicate match, remaining_labels don't uniquely identify sample"
-                    tl.append((s.labels, int(s.value) - int(f.value)))
-                    found = True
-            if not found:
-                tl.append((s.labels, int(s.value)))
-        assert len(tl) == len(started), "something broken with join logic"
-        log.info(f"upload queue for {tenant_id}/{timeline_id}:")
-        for labels, queue_count in tl:
-            log.info(f"  {labels}: {queue_count}")
-        if all(queue_count == 0 for (_, queue_count) in tl):
+        assert len(tl) > 0
+        log.info(f"upload queue for {tenant_id}/{timeline_id}: {tl}")
+        if all(m.value == 0 for m in tl):
            return
        time.sleep(0.2)

@@ -356,6 +331,7 @@ def list_prefix(
    """
    # For local_fs we need to properly handle empty directories, which we currently dont, so for simplicity stick to s3 api.
    assert isinstance(remote, S3Storage), "localfs is currently not supported"
+    assert remote.client is not None

    prefix_in_bucket = remote.prefix_in_bucket or ""
    if not prefix:
@@ -374,29 +350,6 @@ def list_prefix(
    return response


-def remote_storage_delete_key(
-    remote: RemoteStorage,
-    key: str,
-) -> DeleteObjectOutputTypeDef:
-    """
-    Note that this function takes into account prefix_in_bucket.
-    """
-    # For local_fs we need to use a different implementation. As we don't need local_fs, just don't support it for now.
-    assert isinstance(remote, S3Storage), "localfs is currently not supported"
-
-    prefix_in_bucket = remote.prefix_in_bucket or ""
-
-    # real s3 tests have uniqie per test prefix
-    # mock_s3 tests use special pageserver prefix for pageserver stuff
-    key = "/".join((prefix_in_bucket, key))
-
-    response = remote.client.delete_object(
-        Bucket=remote.bucket_name,
-        Key=key,
-    )
-    return response
-
-
 def enable_remote_storage_versioning(
    remote: RemoteStorage,
 ) -> EmptyResponseMetadataTypeDef:
@@ -405,6 +358,7 @@ def enable_remote_storage_versioning(
    """
    # local_fs has no support for versioning
    assert isinstance(remote, S3Storage), "localfs is currently not supported"
+    assert remote.client is not None

    # The SDK supports enabling versioning on normal S3 as well but we don't want to change
    # these settings from a test in a live bucket (also, our access isn't enough nor should it be)
--- a/Show More
+++ b/Show More