poetry lock

Merge remote-tracking branch 'origin/main' into arpad/less_async_trait
Adjust timeouts
2026-02-09 13:40:38 +00:00 · 2024-05-06 12:58:34 +02:00 · 2024-05-06 12:54:51 +02:00 · 2024-04-08 17:02:40 +02:00 · 2024-04-08 16:58:47 +02:00 · 2024-04-08 12:29:24 +01:00
229 changed files with 5426 additions and 9720 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -1,2 +1,2 @@
 [profile.default]
-slow-timeout = { period = "60s", terminate-after = 3 }
+slow-timeout = { period = "20s", terminate-after = 3 }
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -1,11 +1,12 @@
 self-hosted-runner:
  labels:
    - arm64
+    - dev
    - gen3
    - large
-    - large-arm64
+    # Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
+    - macos-14
    - small
-    - small-arm64
    - us-east-2
 config-variables:
  - REMOTE_STORAGE_AZURE_CONTAINER
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -48,6 +48,10 @@ inputs:
    description: 'benchmark durations JSON'
    required: false
    default: '{}'
+  session_timeout:
+    description: 'Session timeout for the test suite'
+    required: false
+    default: ''

 runs:
  using: "composite"
@@ -107,6 +111,7 @@ runs:
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FLAKY: ${{ inputs.rerun_flaky }}
        PG_VERSION: ${{ inputs.pg_version }}
+        SESSION_TIMEOUT: ${{ inputs.session_timeout }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
@@ -168,6 +173,10 @@ runs:
          EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
        fi

+        if [ -n "${SESSION_TIMEOUT}" ]; then
+          EXTRA_PARAMS="--session-timeout ${SESSION_TIMEOUT} ${EXTRA_PARAMS}"
+        fi
+
        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -39,7 +39,7 @@ jobs:
      matrix:
        arch: [ x64, arm64 ]

-    runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}

    env:
      IMAGE_TAG: ${{ inputs.image-tag }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -236,6 +236,27 @@ jobs:
          submodules: true
          fetch-depth: 1

+      - name: Check Postgres submodules revision
+        shell: bash -euo pipefail {0}
+        run: |
+          # This is a temporary solution to ensure that the Postgres submodules revision is correct (i.e. the updated intentionally).
+          # Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
+
+          FAILED=false
+          for postgres in postgres-v14 postgres-v15 postgres-v16; do
+            expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
+            actual=$(git rev-parse "HEAD:vendor/${postgres}")
+            if [ "${expected}" != "${actual}" ]; then
+              echo >&2 "Expected ${postgres} rev to be at '${expected}', but it is at '${actual}'"
+              FAILED=true
+            fi
+          done
+
+          if [ "${FAILED}" = "true" ]; then
+            echo >&2 "Please update vendor/revisions.json if these changes are intentional"
+            exit 1
+          fi
+
      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
@@ -341,9 +362,6 @@ jobs:
        env:
          NEXTEST_RETRIES: 3
        run: |
-          #nextest does not yet support running doctests
-          cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
-
          for io_engine in std-fs tokio-epoll-uring ; do
            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
          done
@@ -443,7 +461,8 @@ jobs:

      - name: Pytest regression tests
        uses: ./.github/actions/run-python-test-set
-        timeout-minutes: 60
+        # Hard timeout to prevent hanging tests, we also have set softer pytest timeout (set via `session_timeout`) which is shorter
+        timeout-minutes: 110
        with:
          build_type: ${{ matrix.build_type }}
          test_selection: regress
@@ -453,6 +472,8 @@ jobs:
          real_s3_region: eu-central-1
          rerun_flaky: true
          pg_version: ${{ matrix.pg_version }}
+          # Set pytest session timeout to 25 minutes
+          session_timeout: '1500'
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
@@ -546,27 +567,9 @@ jobs:
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

-  report-benchmarks-failures:
-    needs: [ benchmarks, create-test-report ]
-    if: github.ref_name == 'main' && needs.benchmarks.result == 'failure'
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: C060CNA47S9 # on-call-staging-storage-stream
-        slack-message: |
-          Benchmarks failed on main: ${{ github.event.head_commit.url }}
-
-          Allure report: ${{ needs.create-test-report.outputs.report-url }}
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
  create-test-report:
    needs: [ check-permissions, regress-tests, coverage-report, benchmarks, build-build-tools-image ]
    if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
-    outputs:
-      report-url: ${{ steps.create-allure-report.outputs.report-url }}

    runs-on: [ self-hosted, gen3, small ]
    container:
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -136,7 +136,7 @@ jobs:
  check-linux-arm-build:
    needs: [ check-permissions, build-build-tools-image ]
    timeout-minutes: 90
-    runs-on: [ self-hosted, small-arm64 ]
+    runs-on: [ self-hosted, dev, arm64 ]

    env:
      # Use release build only, to have less debug info around
@@ -232,20 +232,20 @@ jobs:

      - name: Run cargo build
        run: |
-          mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests -j$(nproc)
+          mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests

      - name: Run cargo test
        env:
          NEXTEST_RETRIES: 3
        run: |
-          cargo nextest run $CARGO_FEATURES -j$(nproc)
+          cargo nextest run $CARGO_FEATURES

          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          cargo nextest run --package remote_storage --test test_real_s3 -j$(nproc)
+          cargo nextest run --package remote_storage --test test_real_s3

          # Run separate tests for real Azure Blob Storage
          # XXX: replace region with `eu-central-1`-like region
@@ -255,12 +255,12 @@ jobs:
          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          cargo nextest run --package remote_storage --test test_real_azure -j$(nproc)
+          cargo nextest run --package remote_storage --test test_real_azure

  check-codestyle-rust-arm:
    needs: [ check-permissions, build-build-tools-image ]
    timeout-minutes: 90
-    runs-on: [ self-hosted, small-arm64 ]
+    runs-on: [ self-hosted, dev, arm64 ]

    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
@@ -269,11 +269,6 @@ jobs:
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

-    strategy:
-      fail-fast: false
-      matrix:
-        build_type: [ debug, release ]
-
    steps:
      - name: Fix git ownership
        run: |
@@ -310,35 +305,31 @@ jobs:
            exit 1
          fi
          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
-
      - name: Run cargo clippy (debug)
-        if: matrix.build_type == 'debug'
        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
      - name: Run cargo clippy (release)
-        if: matrix.build_type == 'release'
        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS

      - name: Check documentation generation
-        if: matrix.build_type == 'release'
-        run: cargo doc --workspace --no-deps --document-private-items -j$(nproc)
+        run: cargo doc --workspace --no-deps --document-private-items
        env:
            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"

      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
      - name: Check formatting
-        if: ${{ !cancelled() && matrix.build_type == 'release' }}
+        if: ${{ !cancelled() }}
        run: cargo fmt --all -- --check

      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
      - name: Check rust dependencies
-        if: ${{ !cancelled() && matrix.build_type == 'release' }}
+        if: ${{ !cancelled() }}
        run: |
          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack

      # https://github.com/EmbarkStudios/cargo-deny
      - name: Check rust licenses/bans/advisories/sources
-        if: ${{ !cancelled() && matrix.build_type == 'release' }}
+        if: ${{ !cancelled() }}
        run: cargo deny check

  gather-rust-build-stats:
@@ -347,7 +338,7 @@ jobs:
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    runs-on: [ self-hosted, large ]
+    runs-on: [ self-hosted, gen3, large ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
@@ -378,7 +369,7 @@ jobs:
        run: make walproposer-lib -j$(nproc)

      - name: Produce the build stats
-        run: cargo build --all --release --timings -j$(nproc)
+        run: cargo build --all --release --timings

      - name: Upload the build stats
        id: upload-stats
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -25,9 +25,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"

 [[package]]
 name = "ahash"
-version = "0.8.11"
+version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+checksum = "d713b3834d76b85304d4d525563c1276e2e30dc97cc67bfb4585a4a29fc2c89f"
 dependencies = [
 "cfg-if",
 "const-random",
@@ -284,9 +284,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

 [[package]]
 name = "aws-config"
-version = "1.3.0"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baaa0be6ee7d90b775ae6ccb6d2ba182b91219ec2001f92338773a094246af1d"
+checksum = "8b30c39ebe61f75d1b3785362b1586b41991873c9ab3e317a9181c246fb71d82"
 dependencies = [
 "aws-credential-types",
 "aws-runtime",
@@ -309,15 +309,14 @@ dependencies = [
 "time",
 "tokio",
 "tracing",
- "url",
 "zeroize",
 ]

 [[package]]
 name = "aws-credential-types"
-version = "1.2.0"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e16838e6c9e12125face1c1eff1343c75e3ff540de98ff7ebd61874a89bcfeb9"
+checksum = "fa8587ae17c8e967e4b05a62d495be2fb7701bec52a97f7acfe8a29f938384c8"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-runtime-api",
@@ -327,9 +326,9 @@ dependencies = [

 [[package]]
 name = "aws-runtime"
-version = "1.2.1"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "785da4a15e7b166b505fd577e4560c7a7cd8fbdf842eb1336cbcbf8944ce56f1"
+checksum = "b13dc54b4b49f8288532334bba8f87386a40571c47c37b1304979b556dc613c8"
 dependencies = [
 "aws-credential-types",
 "aws-sigv4",
@@ -374,11 +373,10 @@ dependencies = [

 [[package]]
 name = "aws-sdk-s3"
-version = "1.26.0"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7bc5ce518d4b8d16e0408de7bdf1b3097cec61a7daa979750a208f8d9934386d"
+checksum = "951f7730f51a2155c711c85c79f337fbc02a577fa99d2a0a8059acfce5392113"
 dependencies = [
- "ahash",
 "aws-credential-types",
 "aws-runtime",
 "aws-sigv4",
@@ -393,25 +391,20 @@ dependencies = [
 "aws-smithy-xml",
 "aws-types",
 "bytes",
- "fastrand 2.0.0",
- "hex",
- "hmac",
 "http 0.2.9",
 "http-body 0.4.5",
- "lru",
 "once_cell",
 "percent-encoding",
 "regex-lite",
- "sha2",
 "tracing",
 "url",
 ]

 [[package]]
 name = "aws-sdk-sso"
-version = "1.22.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca3d6c4cba4e009391b72b0fcf12aff04ea3c9c3aa2ecaafa330326a8bd7e601"
+checksum = "f486420a66caad72635bc2ce0ff6581646e0d32df02aa39dc983bfe794955a5b"
 dependencies = [
 "aws-credential-types",
 "aws-runtime",
@@ -431,9 +424,9 @@ dependencies = [

 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.22.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73400dc239d14f63d932f4ca7b55af5e9ef1f857f7d70655249ccc287adb2570"
+checksum = "39ddccf01d82fce9b4a15c8ae8608211ee7db8ed13a70b514bbfe41df3d24841"
 dependencies = [
 "aws-credential-types",
 "aws-runtime",
@@ -453,9 +446,9 @@ dependencies = [

 [[package]]
 name = "aws-sdk-sts"
-version = "1.22.0"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10f8858308af76fba3e5ffcf1bb56af5471574d2bdfaf0159470c25bc2f760e5"
+checksum = "1a591f8c7e6a621a501b2b5d2e88e1697fcb6274264523a6ad4d5959889a41ce"
 dependencies = [
 "aws-credential-types",
 "aws-runtime",
@@ -476,9 +469,9 @@ dependencies = [

 [[package]]
 name = "aws-sigv4"
-version = "1.2.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58b56f1cbe6fd4d0c2573df72868f20ab1c125ca9c9dbce17927a463433a2e57"
+checksum = "11d6f29688a4be9895c0ba8bef861ad0c0dac5c15e9618b9b7a6c233990fc263"
 dependencies = [
 "aws-credential-types",
 "aws-smithy-eventstream",
@@ -505,9 +498,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-async"
-version = "1.2.1"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c"
+checksum = "d26ea8fa03025b2face2b3038a63525a10891e3d8829901d502e5384a0d8cd46"
 dependencies = [
 "futures-util",
 "pin-project-lite",
@@ -516,9 +509,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-checksums"
-version = "0.60.7"
+version = "0.60.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83fa43bc04a6b2441968faeab56e68da3812f978a670a5db32accbdcafddd12f"
+checksum = "be2acd1b9c6ae5859999250ed5a62423aedc5cf69045b844432de15fa2f31f2b"
 dependencies = [
 "aws-smithy-http",
 "aws-smithy-types",
@@ -548,9 +541,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-http"
-version = "0.60.8"
+version = "0.60.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a7de001a1b9a25601016d8057ea16e31a45fdca3751304c8edf4ad72e706c08"
+checksum = "3f10fa66956f01540051b0aa7ad54574640f748f9839e843442d99b970d3aff9"
 dependencies = [
 "aws-smithy-eventstream",
 "aws-smithy-runtime-api",
@@ -588,9 +581,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-runtime"
-version = "1.5.0"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9ac79e9f3a4d576f3cd4a470a0275b138d9e7b11b1cd514a6858ae0a79dd5bb"
+checksum = "ec81002d883e5a7fd2bb063d6fb51c4999eb55d404f4fff3dd878bf4733b9f01"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-http",
@@ -601,7 +594,6 @@ dependencies = [
 "h2 0.3.26",
 "http 0.2.9",
 "http-body 0.4.5",
- "http-body 1.0.0",
 "hyper 0.14.26",
 "hyper-rustls 0.24.0",
 "once_cell",
@@ -614,9 +606,9 @@ dependencies = [

 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.6.0"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04ec42c2f5c0e7796a2848dde4d9f3bf8ce12ccbb3d5aa40c52fa0cdd61a1c47"
+checksum = "9acb931e0adaf5132de878f1398d83f8677f90ba70f01f65ff87f6d7244be1c5"
 dependencies = [
 "aws-smithy-async",
 "aws-smithy-types",
@@ -631,19 +623,16 @@ dependencies = [

 [[package]]
 name = "aws-smithy-types"
-version = "1.1.9"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf98d97bba6ddaba180f1b1147e202d8fe04940403a95a3f826c790f931bbd1"
+checksum = "abe14dceea1e70101d38fbf2a99e6a34159477c0fb95e68e05c66bd7ae4c3729"
 dependencies = [
 "base64-simd",
 "bytes",
 "bytes-utils",
 "futures-core",
 "http 0.2.9",
- "http 1.1.0",
 "http-body 0.4.5",
- "http-body 1.0.0",
- "http-body-util",
 "itoa",
 "num-integer",
 "pin-project-lite",
@@ -657,18 +646,18 @@ dependencies = [

 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.8"
+version = "0.60.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d123fbc2a4adc3c301652ba8e149bf4bc1d1725affb9784eb20c953ace06bf55"
+checksum = "872c68cf019c0e4afc5de7753c4f7288ce4b71663212771bf5e4542eb9346ca9"
 dependencies = [
 "xmlparser",
 ]

 [[package]]
 name = "aws-types"
-version = "1.2.0"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a43b56df2c529fe44cb4d92bd64d0479883fb9608ff62daede4df5405381814"
+checksum = "0dbf2f3da841a8930f159163175cf6a3d16ddde517c1b0fba7aa776822800f40"
 dependencies = [
 "aws-credential-types",
 "aws-smithy-async",
@@ -1359,7 +1348,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-util",
 "toml",
- "toml_edit",
 "tracing",
 "url",
 "utils",
@@ -2946,15 +2934,6 @@ version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"

-[[package]]
-name = "lru"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc"
-dependencies = [
- "hashbrown 0.14.0",
-]
-
 [[package]]
 name = "match_cfg"
 version = "0.1.0"
@@ -3659,7 +3638,6 @@ dependencies = [
 "arc-swap",
 "async-compression",
 "async-stream",
- "async-trait",
 "byteorder",
 "bytes",
 "camino",
@@ -4128,7 +4106,6 @@ name = "postgres_backend"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-trait",
 "bytes",
 "futures",
 "once_cell",
@@ -4392,7 +4369,6 @@ dependencies = [
 "hyper 1.2.0",
 "hyper-tungstenite",
 "hyper-util",
- "indexmap 2.0.1",
 "ipnet",
 "itertools",
 "lasso",
@@ -5952,7 +5928,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 [[package]]
 name = "svg_fmt"
 version = "0.4.2"
-source = "git+https://github.com/nical/rust_debug?rev=28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4#28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4"
+source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#c1820b28664b5df68de7f043fccf2ed5d67b6ae8"

 [[package]]
 name = "syn"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,14 +52,14 @@ azure_storage_blobs = "0.19"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.3", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "1.26"
+aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "1.14"
 aws-sdk-iam = "1.15.0"
-aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
-aws-smithy-types = "1.1.9"
-aws-credential-types = "1.2.0"
-aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
-aws-types = "1.2.0"
+aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
+aws-smithy-types = "1.1.4"
+aws-credential-types = "1.1.4"
+aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
+aws-types = "1.1.7"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -99,7 +99,6 @@ humantime = "2.1"
 humantime-serde = "1.1.1"
 hyper = "0.14"
 hyper-tungstenite = "0.13.0"
-indexmap = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -158,8 +157,8 @@ socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
 "subtle"  = "2.5.0"
-# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
-svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
+# https://github.com/nical/rust_debug/pull/4
+svg_fmt = { git = "https://github.com/neondatabase/fork--nical--rust_debug", branch = "neon" }
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -87,7 +87,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
    && rm awscliv2.zip

 # Mold: A Modern Linker
-ENV MOLD_VERSION v2.31.0
+ENV MOLD_VERSION v2.4.0
 RUN set -e \
    && git clone https://github.com/rui314/mold.git \
    && mkdir mold/build \
--- a/11
+++ b/11
@@ -81,14 +81,11 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
 		exit 1; }
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
-
-	VERSION=$*; \
-	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
-	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
+	(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
 		CFLAGS='$(PG_CFLAGS)' \
-		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
+		$(PG_CONFIGURE_OPTS) \
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)

 # nicer alias to run 'configure'
 # Note: I've been unable to use templates for this part of our configuration.
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -51,7 +51,6 @@ use tracing::{error, info, warn};
 use url::Url;

 use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeSpec;

 use compute_tools::compute::{
    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
@@ -70,34 +69,6 @@ use compute_tools::swap::resize_swap;
 const BUILD_TAG_DEFAULT: &str = "latest";

 fn main() -> Result<()> {
-    let (build_tag, clap_args) = init()?;
-
-    let (pg_handle, start_pg_result) = {
-        // Enter startup tracing context
-        let _startup_context_guard = startup_context_from_env();
-
-        let cli_args = process_cli(&clap_args)?;
-
-        let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
-
-        let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
-
-        start_postgres(&clap_args, wait_spec_result)?
-
-        // Startup is finished, exit the startup tracing span
-    };
-
-    // PostgreSQL is now running, if startup was successful. Wait until it exits.
-    let wait_pg_result = wait_postgres(pg_handle)?;
-
-    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
-
-    maybe_delay_exit(delay_exit);
-
-    deinit_and_exit(wait_pg_result);
-}
-
-fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -112,15 +83,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
        .to_string();
    info!("build_tag: {build_tag}");

-    Ok((build_tag, cli().get_matches()))
-}
-
-fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
-    let pgbin_default = "postgres";
-    let pgbin = matches
-        .get_one::<String>("pgbin")
-        .map(|s| s.as_str())
-        .unwrap_or(pgbin_default);
+    let matches = cli().get_matches();
+    let pgbin_default = String::from("postgres");
+    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);

    let ext_remote_storage = matches
        .get_one::<String>("remote-ext-config")
@@ -148,30 +113,6 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
    let spec_path = matches.get_one::<String>("spec-path");
    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");

-    Ok(ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        http_port,
-        spec_json,
-        spec_path,
-        resize_swap_on_bind,
-    })
-}
-
-struct ProcessCliResult<'clap> {
-    connstr: &'clap str,
-    pgdata: &'clap str,
-    pgbin: &'clap str,
-    ext_remote_storage: Option<&'clap str>,
-    http_port: u16,
-    spec_json: Option<&'clap String>,
-    spec_path: Option<&'clap String>,
-    resize_swap_on_bind: bool,
-}
-
-fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
    // tracing context.
@@ -208,7 +149,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    if let Ok(val) = std::env::var("TRACESTATE") {
        startup_tracing_carrier.insert("tracestate".to_string(), val);
    }
-    if !startup_tracing_carrier.is_empty() {
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
        use opentelemetry::propagation::TextMapPropagator;
        use opentelemetry::sdk::propagation::TraceContextPropagator;
        let guard = TraceContextPropagator::new()
@@ -218,17 +159,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
        Some(guard)
    } else {
        None
-    }
-}
+    };

-fn try_spec_from_cli(
-    matches: &clap::ArgMatches,
-    ProcessCliResult {
-        spec_json,
-        spec_path,
-        ..
-    }: &ProcessCliResult,
-) -> Result<CliSpecParams> {
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

@@ -269,34 +201,6 @@ fn try_spec_from_cli(
        }
    };

-    Ok(CliSpecParams {
-        spec,
-        live_config_allowed,
-    })
-}
-
-struct CliSpecParams {
-    /// If a spec was provided via CLI or file, the [`ComputeSpec`]
-    spec: Option<ComputeSpec>,
-    live_config_allowed: bool,
-}
-
-fn wait_spec(
-    build_tag: String,
-    ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        resize_swap_on_bind,
-        http_port,
-        ..
-    }: ProcessCliResult,
-    CliSpecParams {
-        spec,
-        live_config_allowed,
-    }: CliSpecParams,
-) -> Result<WaitSpecResult> {
    let mut new_state = ComputeState::new();
    let spec_set;

@@ -335,6 +239,8 @@ fn wait_spec(
    let _http_handle =
        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");

+    let extension_server_port: u16 = http_port;
+
    if !spec_set {
        // No spec provided, hang waiting for it.
        info!("no compute spec provided, waiting");
@@ -363,29 +269,6 @@ fn wait_spec(
        state.start_time = now;
    }

-    Ok(WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    })
-}
-
-struct WaitSpecResult {
-    compute: Arc<ComputeNode>,
-    // passed through from ProcessCliResult
-    http_port: u16,
-    resize_swap_on_bind: bool,
-}
-
-fn start_postgres(
-    // need to allow unused because `matches` is only used if target_os = "linux"
-    #[allow(unused_variables)] matches: &clap::ArgMatches,
-    WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    }: WaitSpecResult,
-) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
    state.status = ComputeStatus::Init;
@@ -435,10 +318,10 @@ fn start_postgres(
        }
    }

-    let extension_server_port: u16 = http_port;
-
    // Start Postgres
    let mut pg = None;
+    let mut exit_code = None;
+
    if !prestartup_failed {
        pg = match compute.start_compute(extension_server_port) {
            Ok(pg) => Some(pg),
@@ -493,7 +376,7 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();

-            let vm_monitor = rt.as_ref().map(|rt| {
+            let vm_monitor = &rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
@@ -506,41 +389,12 @@ fn start_postgres(
        }
    }

-    Ok((
-        pg,
-        StartPostgresResult {
-            delay_exit,
-            compute,
-            #[cfg(target_os = "linux")]
-            rt,
-            #[cfg(target_os = "linux")]
-            token,
-            #[cfg(target_os = "linux")]
-            vm_monitor,
-        },
-    ))
-}
-
-type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
-
-struct StartPostgresResult {
-    delay_exit: bool,
-    // passed through from WaitSpecResult
-    compute: Arc<ComputeNode>,
-
-    #[cfg(target_os = "linux")]
-    rt: Option<tokio::runtime::Runtime>,
-    #[cfg(target_os = "linux")]
-    token: tokio_util::sync::CancellationToken,
-    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
-}
-
-fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
    // Wait for the child Postgres process forever. In this state Ctrl+C will
    // propagate to Postgres and it will be shut down as well.
-    let mut exit_code = None;
    if let Some((mut pg, logs_handle)) = pg {
+        // Startup is finished, exit the startup tracing span
+        drop(startup_context_guard);
+
        let ecode = pg
            .wait()
            .expect("failed to start waiting on Postgres process");
@@ -555,25 +409,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
        exit_code = ecode.code()
    }

-    Ok(WaitPostgresResult { exit_code })
-}
-
-struct WaitPostgresResult {
-    exit_code: Option<i32>,
-}
-
-fn cleanup_after_postgres_exit(
-    StartPostgresResult {
-        mut delay_exit,
-        compute,
-        #[cfg(target_os = "linux")]
-        vm_monitor,
-        #[cfg(target_os = "linux")]
-        token,
-        #[cfg(target_os = "linux")]
-        rt,
-    }: StartPostgresResult,
-) -> Result<bool> {
    // Terminate the vm_monitor so it releases the file watcher on
    // /sys/fs/cgroup/neon-postgres.
    // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -615,19 +450,13 @@ fn cleanup_after_postgres_exit(
        error!("error while checking for core dumps: {err:?}");
    }

-    Ok(delay_exit)
-}
-
-fn maybe_delay_exit(delay_exit: bool) {
    // If launch failed, keep serving HTTP requests for a while, so the cloud
    // control plane can get the actual error.
    if delay_exit {
        info!("giving control plane 30s to collect the error before shutdown");
        thread::sleep(Duration::from_secs(30));
    }
-}

-fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -28,7 +28,6 @@ serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
-toml_edit.workspace = true
 tokio.workspace = true
 tokio-postgres.workspace = true
 tokio-util.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -9,11 +9,8 @@ use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::local_env::{
-    InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
-    SafekeeperConf,
-};
-use control_plane::pageserver::PageServerNode;
+use control_plane::local_env::{InitForceMode, LocalEnv};
+use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage_controller::StorageController;
 use control_plane::{broker, local_env};
@@ -55,6 +52,44 @@ const DEFAULT_PG_VERSION: &str = "15";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

+fn default_conf(num_pageservers: u16) -> String {
+    let mut template = format!(
+        r#"
+# Default built-in configuration, defined in main.rs
+control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
+
+[broker]
+listen_addr = '{DEFAULT_BROKER_ADDR}'
+
+[[safekeepers]]
+id = {DEFAULT_SAFEKEEPER_ID}
+pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
+http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
+
+"#,
+    );
+
+    for i in 0..num_pageservers {
+        let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
+        let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
+        let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
+
+        template += &format!(
+            r#"
+[[pageservers]]
+id = {pageserver_id}
+listen_pg_addr = '127.0.0.1:{pg_port}'
+listen_http_addr = '127.0.0.1:{http_port}'
+pg_auth_type = '{trust_auth}'
+http_auth_type = '{trust_auth}'
+"#,
+            trust_auth = AuthType::Trust,
+        )
+    }
+
+    template
+}
+
 ///
 /// Timelines tree element used as a value in the HashMap.
 ///
@@ -98,7 +133,7 @@ fn main() -> Result<()> {
        let subcommand_result = match sub_name {
            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(&env)),
+            "start" => rt.block_on(handle_start_all(sub_args, &env)),
            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
@@ -117,7 +152,7 @@ fn main() -> Result<()> {
    };

    match subcommand_result {
-        Ok(Some(updated_env)) => updated_env.persist_config()?,
+        Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
        Ok(None) => (),
        Err(e) => {
            eprintln!("command failed: {e:?}");
@@ -306,65 +341,48 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
 }

 fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
-    let num_pageservers = init_match.get_one::<u16>("num-pageservers");
-
-    let force = init_match.get_one("force").expect("we set a default value");
-
-    // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
-    let init_conf: NeonLocalInitConf = if let Some(config_path) =
-        init_match.get_one::<PathBuf>("config")
-    {
-        // User (likely the Python test suite) provided a description of the environment.
-        if num_pageservers.is_some() {
-            bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
-        }
+    let num_pageservers = init_match
+        .get_one::<u16>("num-pageservers")
+        .expect("num-pageservers arg has a default");
+    // Create config file
+    let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
        // load and parse the file
-        let contents = std::fs::read_to_string(config_path).with_context(|| {
+        std::fs::read_to_string(config_path).with_context(|| {
            format!(
                "Could not read configuration file '{}'",
                config_path.display()
            )
-        })?;
-        toml_edit::de::from_str(&contents)?
+        })?
    } else {
-        // User (likely interactive) did not provide a description of the environment, give them the default
-        NeonLocalInitConf {
-            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
-            broker: NeonBroker {
-                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
-            },
-            safekeepers: vec![SafekeeperConf {
-                id: DEFAULT_SAFEKEEPER_ID,
-                pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
-                http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
-                ..Default::default()
-            }],
-            pageservers: (0..num_pageservers.copied().unwrap_or(1))
-                .map(|i| {
-                    let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
-                    let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
-                    let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
-                    NeonLocalInitPageserverConf {
-                        id: pageserver_id,
-                        listen_pg_addr: format!("127.0.0.1:{pg_port}"),
-                        listen_http_addr: format!("127.0.0.1:{http_port}"),
-                        pg_auth_type: AuthType::Trust,
-                        http_auth_type: AuthType::Trust,
-                        other: Default::default(),
-                    }
-                })
-                .collect(),
-            pg_distrib_dir: None,
-            neon_distrib_dir: None,
-            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
-            storage_controller: None,
-            control_plane_compute_hook_api: None,
-        }
+        // Built-in default config
+        default_conf(*num_pageservers)
    };

-    LocalEnv::init(init_conf, force)
-        .context("materialize initial neon_local environment on disk")?;
-    Ok(LocalEnv::load_config().expect("freshly written config should be loadable"))
+    let pg_version = init_match
+        .get_one::<u32>("pg-version")
+        .copied()
+        .context("Failed to parse postgres version from the argument string")?;
+
+    let mut env =
+        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
+    let force = init_match.get_one("force").expect("we set a default value");
+    env.init(pg_version, force)
+        .context("Failed to initialize neon repository")?;
+
+    // Create remote storage location for default LocalFs remote storage
+    std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
+
+    // Initialize pageserver, create initial tenant and timeline.
+    for ps_conf in &env.pageservers {
+        PageServerNode::from_env(&env, ps_conf)
+            .initialize(&pageserver_config_overrides(init_match))
+            .unwrap_or_else(|e| {
+                eprintln!("pageserver init failed: {e:?}");
+                exit(1);
+            });
+    }
+
+    Ok(env)
 }

 /// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
@@ -379,6 +397,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
    PageServerNode::from_env(env, ps_conf)
 }

+fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
+    init_match
+        .get_many::<String>("pageserver-config-override")
+        .into_iter()
+        .flatten()
+        .map(String::as_str)
+        .collect()
+}
+
 async fn handle_tenant(
    tenant_match: &ArgMatches,
    env: &mut local_env::LocalEnv,
@@ -1049,7 +1076,10 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
 async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
-            if let Err(e) = get_pageserver(env, subcommand_args)?.start().await {
+            if let Err(e) = get_pageserver(env, subcommand_args)?
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
@@ -1075,7 +1105,10 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }

-            if let Err(e) = pageserver.start().await {
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
@@ -1202,7 +1235,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {
+async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
    // Endpoints are not started automatically

    broker::start_broker_process(env).await?;
@@ -1219,7 +1252,10 @@ async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {

    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start().await {
+        if let Err(e) = pageserver
+            .start(&pageserver_config_overrides(sub_match))
+            .await
+        {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1360,6 +1396,13 @@ fn cli() -> Command {
        .required(false)
        .value_name("stop-mode");

+    let pageserver_config_args = Arg::new("pageserver-config-override")
+        .long("pageserver-config-override")
+        .num_args(1)
+        .action(ArgAction::Append)
+        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
+        .required(false);
+
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
@@ -1393,7 +1436,9 @@ fn cli() -> Command {
    let num_pageservers_arg = Arg::new("num-pageservers")
        .value_parser(value_parser!(u16))
        .long("num-pageservers")
-        .help("How many pageservers to create (default 1)");
+        .help("How many pageservers to create (default 1)")
+        .required(false)
+        .default_value("1");

    let update_catalog = Arg::new("update-catalog")
        .value_parser(value_parser!(bool))
@@ -1419,13 +1464,14 @@ fn cli() -> Command {
        .subcommand(
            Command::new("init")
                .about("Initialize a new Neon repository, preparing configs for services to start with")
+                .arg(pageserver_config_args.clone())
                .arg(num_pageservers_arg.clone())
                .arg(
                    Arg::new("config")
                        .long("config")
                        .required(false)
                        .value_parser(value_parser!(PathBuf))
-                        .value_name("config")
+                        .value_name("config"),
                )
                .arg(pg_version_arg.clone())
                .arg(force_arg)
@@ -1507,6 +1553,7 @@ fn cli() -> Command {
                .subcommand(Command::new("status"))
                .subcommand(Command::new("start")
                    .about("Start local pageserver")
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("stop")
                    .about("Stop local pageserver")
@@ -1514,6 +1561,7 @@ fn cli() -> Command {
                )
                .subcommand(Command::new("restart")
                    .about("Restart local pageserver")
+                    .arg(pageserver_config_args.clone())
                )
        )
        .subcommand(
@@ -1628,6 +1676,7 @@ fn cli() -> Command {
        .subcommand(
            Command::new("start")
                .about("Start page server and safekeepers")
+                .arg(pageserver_config_args)
        )
        .subcommand(
            Command::new("stop")
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -3,7 +3,7 @@
 //! Now it also provides init method which acts like a stub for proper installation
 //! script which will use local paths.

-use anyhow::{bail, Context};
+use anyhow::{bail, ensure, Context};

 use clap::ValueEnum;
 use postgres_backend::AuthType;
@@ -23,8 +23,6 @@ use utils::{
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
 };

-use crate::pageserver::PageServerNode;
-use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
 use crate::safekeeper::SafekeeperNode;

 pub const DEFAULT_PG_VERSION: u32 = 15;
@@ -36,7 +34,7 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
 // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
-#[derive(PartialEq, Eq, Clone, Debug)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
    // compute endpoints).
@@ -44,99 +42,59 @@ pub struct LocalEnv {
    // This is not stored in the config file. Rather, this is the path where the
    // config file itself is. It is read from the NEON_REPO_DIR env variable or
    // '.neon' if not given.
+    #[serde(skip)]
    pub base_data_dir: PathBuf,

    // Path to postgres distribution. It's expected that "bin", "include",
    // "lib", "share" from postgres distribution are there. If at some point
    // in time we will be able to run against vanilla postgres we may split that
    // to four separate paths and match OS-specific installation layout.
+    #[serde(default)]
    pub pg_distrib_dir: PathBuf,

    // Path to pageserver binary.
+    #[serde(default)]
    pub neon_distrib_dir: PathBuf,

    // Default tenant ID to use with the 'neon_local' command line utility, when
    // --tenant_id is not explicitly specified.
+    #[serde(default)]
    pub default_tenant_id: Option<TenantId>,

    // used to issue tokens during e.g pg start
+    #[serde(default)]
    pub private_key_path: PathBuf,

    pub broker: NeonBroker,

    // Configuration for the storage controller (1 per neon_local environment)
+    #[serde(default)]
    pub storage_controller: NeonStorageControllerConf,

    /// This Vec must always contain at least one pageserver
-    /// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
-    /// NB: not used anymore except for informing users that they need to change their `.neon/config`.
    pub pageservers: Vec<PageServerConf>,

+    #[serde(default)]
    pub safekeepers: Vec<SafekeeperConf>,

    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
    // be propagated into each pageserver's configuration.
+    #[serde(default)]
    pub control_plane_api: Option<Url>,

    // Control plane upcall API for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
+    #[serde(default)]
    pub control_plane_compute_hook_api: Option<Url>,

    /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
+    #[serde(default)]
    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
    // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
-    pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
-}
-
-/// On-disk state stored in `.neon/config`.
-#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
-#[serde(default, deny_unknown_fields)]
-pub struct OnDiskConfig {
-    pub pg_distrib_dir: PathBuf,
-    pub neon_distrib_dir: PathBuf,
-    pub default_tenant_id: Option<TenantId>,
-    pub private_key_path: PathBuf,
-    pub broker: NeonBroker,
-    pub storage_controller: NeonStorageControllerConf,
-    #[serde(
-        skip_serializing,
-        deserialize_with = "fail_if_pageservers_field_specified"
-    )]
-    pub pageservers: Vec<PageServerConf>,
-    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Url>,
-    pub control_plane_compute_hook_api: Option<Url>,
    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
 }

-fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
-where
-    D: serde::Deserializer<'de>,
-{
-    Err(serde::de::Error::custom(
-        "The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
-         Please remove the `pageservers` from your .neon/config.",
-    ))
-}
-
-/// The description of the neon_local env to be initialized by `neon_local init --config`.
-#[derive(Clone, Debug, Deserialize)]
-#[serde(deny_unknown_fields)]
-pub struct NeonLocalInitConf {
-    // TODO: do we need this? Seems unused
-    pub pg_distrib_dir: Option<PathBuf>,
-    // TODO: do we need this? Seems unused
-    pub neon_distrib_dir: Option<PathBuf>,
-    pub default_tenant_id: TenantId,
-    pub broker: NeonBroker,
-    pub storage_controller: Option<NeonStorageControllerConf>,
-    pub pageservers: Vec<NeonLocalInitPageserverConf>,
-    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Option<Url>>,
-    pub control_plane_compute_hook_api: Option<Option<Url>>,
-}
-
 /// Broker config for cluster internal communication.
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
@@ -152,9 +110,6 @@ pub struct NeonStorageControllerConf {
    /// Heartbeat timeout before marking a node offline
    #[serde(with = "humantime_serde")]
    pub max_unavailable: Duration,
-
-    /// Threshold for auto-splitting a tenant into shards
-    pub split_threshold: Option<u64>,
 }

 impl NeonStorageControllerConf {
@@ -167,7 +122,6 @@ impl Default for NeonStorageControllerConf {
    fn default() -> Self {
        Self {
            max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
-            split_threshold: None,
        }
    }
 }
@@ -187,18 +141,24 @@ impl NeonBroker {
    }
 }

-// neon_local needs to know this subset of pageserver configuration.
-// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
-// It can get stale if `pageserver.toml` is changed.
-// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default, deny_unknown_fields)]
 pub struct PageServerConf {
+    // node id
    pub id: NodeId,
+
+    // Pageserver connection settings
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
+
+    // auth type used for the PG and HTTP ports
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
+
+    pub(crate) virtual_file_io_engine: Option<String>,
+    pub(crate) get_vectored_impl: Option<String>,
+    pub(crate) get_impl: Option<String>,
+    pub(crate) validate_vectored_get: Option<bool>,
 }

 impl Default for PageServerConf {
@@ -209,40 +169,10 @@ impl Default for PageServerConf {
            listen_http_addr: String::new(),
            pg_auth_type: AuthType::Trust,
            http_auth_type: AuthType::Trust,
-        }
-    }
-}
-
-/// The toml that can be passed to `neon_local init --config`.
-/// This is a subset of the `pageserver.toml` configuration.
-// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
-#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
-pub struct NeonLocalInitPageserverConf {
-    pub id: NodeId,
-    pub listen_pg_addr: String,
-    pub listen_http_addr: String,
-    pub pg_auth_type: AuthType,
-    pub http_auth_type: AuthType,
-    #[serde(flatten)]
-    pub other: HashMap<String, toml::Value>,
-}
-
-impl From<&NeonLocalInitPageserverConf> for PageServerConf {
-    fn from(conf: &NeonLocalInitPageserverConf) -> Self {
-        let NeonLocalInitPageserverConf {
-            id,
-            listen_pg_addr,
-            listen_http_addr,
-            pg_auth_type,
-            http_auth_type,
-            other: _,
-        } = conf;
-        Self {
-            id: *id,
-            listen_pg_addr: listen_pg_addr.clone(),
-            listen_http_addr: listen_http_addr.clone(),
-            pg_auth_type: *pg_auth_type,
-            http_auth_type: *http_auth_type,
+            virtual_file_io_engine: None,
+            get_vectored_impl: None,
+            get_impl: None,
+            validate_vectored_get: None,
        }
    }
 }
@@ -430,7 +360,44 @@ impl LocalEnv {
            .collect()
    }

-    ///  Construct `Self` from on-disk state.
+    /// Create a LocalEnv from a config file.
+    ///
+    /// Unlike 'load_config', this function fills in any defaults that are missing
+    /// from the config file.
+    pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
+        let mut env: LocalEnv = toml::from_str(toml)?;
+
+        // Find postgres binaries.
+        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
+        // Note that later in the code we assume, that distrib dirs follow the same pattern
+        // for all postgres versions.
+        if env.pg_distrib_dir == Path::new("") {
+            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
+                env.pg_distrib_dir = postgres_bin.into();
+            } else {
+                let cwd = env::current_dir()?;
+                env.pg_distrib_dir = cwd.join("pg_install")
+            }
+        }
+
+        // Find neon binaries.
+        if env.neon_distrib_dir == Path::new("") {
+            env::current_exe()?
+                .parent()
+                .unwrap()
+                .clone_into(&mut env.neon_distrib_dir);
+        }
+
+        if env.pageservers.is_empty() {
+            anyhow::bail!("Configuration must contain at least one pageserver");
+        }
+
+        env.base_data_dir = base_path();
+
+        Ok(env)
+    }
+
+    /// Locate and load config
    pub fn load_config() -> anyhow::Result<Self> {
        let repopath = base_path();

@@ -444,129 +411,38 @@ impl LocalEnv {
        // TODO: check that it looks like a neon repository

        // load and parse file
-        let config_file_contents = fs::read_to_string(repopath.join("config"))?;
-        let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
-        let mut env = {
-            let OnDiskConfig {
-                pg_distrib_dir,
-                neon_distrib_dir,
-                default_tenant_id,
-                private_key_path,
-                broker,
-                storage_controller,
-                pageservers,
-                safekeepers,
-                control_plane_api,
-                control_plane_compute_hook_api,
-                branch_name_mappings,
-            } = on_disk_config;
-            LocalEnv {
-                base_data_dir: repopath.clone(),
-                pg_distrib_dir,
-                neon_distrib_dir,
-                default_tenant_id,
-                private_key_path,
-                broker,
-                storage_controller,
-                pageservers,
-                safekeepers,
-                control_plane_api,
-                control_plane_compute_hook_api,
-                branch_name_mappings,
-            }
-        };
+        let config = fs::read_to_string(repopath.join("config"))?;
+        let mut env: LocalEnv = toml::from_str(config.as_str())?;

-        // The source of truth for pageserver configuration is the pageserver.toml.
-        assert!(
-            env.pageservers.is_empty(),
-            "we ensure this during deserialization"
-        );
-        env.pageservers = {
-            let iter = std::fs::read_dir(&repopath).context("open dir")?;
-            let mut pageservers = Vec::new();
-            for res in iter {
-                let dentry = res?;
-                const PREFIX: &str = "pageserver_";
-                let dentry_name = dentry
-                    .file_name()
-                    .into_string()
-                    .ok()
-                    .with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
-                    .unwrap();
-                if !dentry_name.starts_with(PREFIX) {
-                    continue;
-                }
-                if !dentry.file_type().context("determine file type")?.is_dir() {
-                    anyhow::bail!("expected a directory, got {:?}", dentry.path());
-                }
-                let id = dentry_name[PREFIX.len()..]
-                    .parse::<NodeId>()
-                    .with_context(|| format!("parse id from {:?}", dentry.path()))?;
-                // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
-                #[derive(serde::Serialize, serde::Deserialize)]
-                // (allow unknown fields, unlike PageServerConf)
-                struct PageserverConfigTomlSubset {
-                    id: NodeId,
-                    listen_pg_addr: String,
-                    listen_http_addr: String,
-                    pg_auth_type: AuthType,
-                    http_auth_type: AuthType,
-                }
-                let config_toml_path = dentry.path().join("pageserver.toml");
-                let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
-                    &std::fs::read_to_string(&config_toml_path)
-                        .with_context(|| format!("read {:?}", config_toml_path))?,
-                )
-                .context("parse pageserver.toml")?;
-                let PageserverConfigTomlSubset {
-                    id: config_toml_id,
-                    listen_pg_addr,
-                    listen_http_addr,
-                    pg_auth_type,
-                    http_auth_type,
-                } = config_toml;
-                let conf = PageServerConf {
-                    id: {
-                        anyhow::ensure!(
-                            config_toml_id == id,
-                            "id mismatch: config_toml.id={config_toml_id} id={id}",
-                        );
-                        id
-                    },
-                    listen_pg_addr,
-                    listen_http_addr,
-                    pg_auth_type,
-                    http_auth_type,
-                };
-                pageservers.push(conf);
-            }
-            pageservers
-        };
+        env.base_data_dir = repopath;

        Ok(env)
    }

-    pub fn persist_config(&self) -> anyhow::Result<()> {
-        Self::persist_config_impl(
-            &self.base_data_dir,
-            &OnDiskConfig {
-                pg_distrib_dir: self.pg_distrib_dir.clone(),
-                neon_distrib_dir: self.neon_distrib_dir.clone(),
-                default_tenant_id: self.default_tenant_id,
-                private_key_path: self.private_key_path.clone(),
-                broker: self.broker.clone(),
-                storage_controller: self.storage_controller.clone(),
-                pageservers: vec![], // it's skip_serializing anyway
-                safekeepers: self.safekeepers.clone(),
-                control_plane_api: self.control_plane_api.clone(),
-                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
-                branch_name_mappings: self.branch_name_mappings.clone(),
-            },
-        )
-    }
+    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
+        // Currently, the user first passes a config file with 'neon_local init --config=<path>'
+        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
+        // a bit sad.
+        let mut conf_content = r#"# This file describes a local deployment of the page server
+# and safekeeeper node. It is read by the 'neon_local' command-line
+# utility.
+"#
+        .to_string();
+
+        // Convert the LocalEnv to a toml file.
+        //
+        // This could be as simple as this:
+        //
+        // conf_content += &toml::to_string_pretty(env)?;
+        //
+        // But it results in a "values must be emitted before tables". I'm not sure
+        // why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
+        // Maybe rust reorders the fields to squeeze avoid padding or something?
+        // In any case, converting to toml::Value first, and serializing that, works.
+        // See https://github.com/alexcrichton/toml-rs/issues/142
+        conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;

-    pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
-        let conf_content = &toml::to_string_pretty(config)?;
        let target_config_path = base_path.join("config");
        fs::write(&target_config_path, conf_content).with_context(|| {
            format!(
@@ -591,13 +467,17 @@ impl LocalEnv {
        }
    }

-    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
-    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
-        let base_path = base_path();
-        assert_ne!(base_path, Path::new(""));
-        let base_path = &base_path;
+    //
+    // Initialize a new Neon repository
+    //
+    pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
+        // check if config already exists
+        let base_path = &self.base_data_dir;
+        ensure!(
+            base_path != Path::new(""),
+            "repository base path is missing"
+        );

-        // create base_path dir
        if base_path.exists() {
            match force {
                InitForceMode::MustNotExist => {
@@ -629,96 +509,70 @@ impl LocalEnv {
                }
            }
        }
+
+        if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                self.pg_bin_dir(pg_version)?.display()
+            );
+        }
+        for binary in ["pageserver", "safekeeper"] {
+            if !self.neon_distrib_dir.join(binary).exists() {
+                bail!(
+                    "Can't find binary '{binary}' in neon distrib dir '{}'",
+                    self.neon_distrib_dir.display()
+                );
+            }
+        }
+
        if !base_path.exists() {
            fs::create_dir(base_path)?;
        }

-        let NeonLocalInitConf {
-            pg_distrib_dir,
-            neon_distrib_dir,
-            default_tenant_id,
-            broker,
-            storage_controller,
-            pageservers,
-            safekeepers,
-            control_plane_api,
-            control_plane_compute_hook_api,
-        } = conf;
-
-        // Find postgres binaries.
-        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
-        // Note that later in the code we assume, that distrib dirs follow the same pattern
-        // for all postgres versions.
-        let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
-            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
-                postgres_bin.into()
-            } else {
-                let cwd = env::current_dir().unwrap();
-                cwd.join("pg_install")
-            }
-        });
-
-        // Find neon binaries.
-        let neon_distrib_dir = neon_distrib_dir
-            .unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
-
        // Generate keypair for JWT.
        //
        // The keypair is only needed if authentication is enabled in any of the
        // components. For convenience, we generate the keypair even if authentication
        // is not enabled, so that you can easily enable it after the initialization
-        // step.
-        generate_auth_keys(
-            base_path.join("auth_private_key.pem").as_path(),
-            base_path.join("auth_public_key.pem").as_path(),
-        )
-        .context("generate auth keys")?;
-        let private_key_path = PathBuf::from("auth_private_key.pem");
-
-        // create the runtime type because the remaining initialization code below needs
-        // a LocalEnv instance op operation
-        // TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
-        let env = LocalEnv {
-            base_data_dir: base_path.clone(),
-            pg_distrib_dir,
-            neon_distrib_dir,
-            default_tenant_id: Some(default_tenant_id),
-            private_key_path,
-            broker,
-            storage_controller: storage_controller.unwrap_or_default(),
-            pageservers: pageservers.iter().map(Into::into).collect(),
-            safekeepers,
-            control_plane_api: control_plane_api.unwrap_or_default(),
-            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
-            branch_name_mappings: Default::default(),
-        };
-
-        // create endpoints dir
-        fs::create_dir_all(env.endpoints_path())?;
-
-        // create safekeeper dirs
-        for safekeeper in &env.safekeepers {
-            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
+        // step. However, if the key generation fails, we treat it as non-fatal if
+        // authentication was not enabled.
+        if self.private_key_path == PathBuf::new() {
+            match generate_auth_keys(
+                base_path.join("auth_private_key.pem").as_path(),
+                base_path.join("auth_public_key.pem").as_path(),
+            ) {
+                Ok(()) => {
+                    self.private_key_path = PathBuf::from("auth_private_key.pem");
+                }
+                Err(e) => {
+                    if !self.auth_keys_needed() {
+                        eprintln!("Could not generate keypair for JWT authentication: {e}");
+                        eprintln!("Continuing anyway because authentication was not enabled");
+                        self.private_key_path = PathBuf::from("auth_private_key.pem");
+                    } else {
+                        return Err(e);
+                    }
+                }
+            }
        }

-        // initialize pageserver state
-        for (i, ps) in pageservers.into_iter().enumerate() {
-            let runtime_ps = &env.pageservers[i];
-            assert_eq!(&PageServerConf::from(&ps), runtime_ps);
-            fs::create_dir(env.pageserver_data_dir(ps.id))?;
-            PageServerNode::from_env(&env, runtime_ps)
-                .initialize(ps)
-                .context("pageserver init failed")?;
+        fs::create_dir_all(self.endpoints_path())?;
+
+        for safekeeper in &self.safekeepers {
+            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
        }

-        // setup remote remote location for default LocalFs remote storage
-        std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
+        self.persist_config(base_path)
+    }

-        env.persist_config()
+    fn auth_keys_needed(&self) -> bool {
+        self.pageservers.iter().any(|ps| {
+            ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
+        }) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
    }
 }

-pub fn base_path() -> PathBuf {
+fn base_path() -> PathBuf {
    match std::env::var_os("NEON_REPO_DIR") {
        Some(val) => PathBuf::from(val),
        None => PathBuf::from(".neon"),
@@ -761,3 +615,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
    }
    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn simple_conf_parsing() {
+        let simple_conf_toml = include_str!("../simple.conf");
+        let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
+        assert!(
+            simple_conf_parse_result.is_ok(),
+            "failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
+        );
+
+        let string_to_replace = "listen_addr = '127.0.0.1:50051'";
+        let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
+        let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
+        assert!(
+            spoiled_url_toml.contains(spoiled_url_str),
+            "Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
+        );
+        let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
+        assert!(
+            spoiled_url_parse_result.is_err(),
+            "expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
+        );
+    }
+}
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -4,21 +4,21 @@
 //!
 //!   .neon/
 //!
+use std::borrow::Cow;
 use std::collections::HashMap;

 use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
-use std::str::FromStr;
+use std::process::Command;
 use std::time::Duration;

 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
 use futures::SinkExt;
 use pageserver_api::models::{
-    self, AuxFilePolicy, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo,
-    TimelineInfo,
+    self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
@@ -30,7 +30,7 @@ use utils::{
    lsn::Lsn,
 };

-use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
+use crate::local_env::PageServerConf;
 use crate::{background_process, local_env::LocalEnv};

 /// Directory within .neon which will be used by default for LocalFs remote storage.
@@ -74,23 +74,71 @@ impl PageServerNode {
        }
    }

-    fn pageserver_init_make_toml(
-        &self,
-        conf: NeonLocalInitPageserverConf,
-    ) -> anyhow::Result<toml_edit::Document> {
-        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
-
-        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
-
+    /// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
+    ///
+    /// These all end up on the command line of the `pageserver` binary.
+    fn neon_local_overrides(&self, cli_overrides: &[&str]) -> Vec<String> {
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
        let pg_distrib_dir_param = format!(
            "pg_distrib_dir='{}'",
            self.env.pg_distrib_dir_raw().display()
        );

+        let PageServerConf {
+            id,
+            listen_pg_addr,
+            listen_http_addr,
+            pg_auth_type,
+            http_auth_type,
+            virtual_file_io_engine,
+            get_vectored_impl,
+            get_impl,
+            validate_vectored_get,
+        } = &self.conf;
+
+        let id = format!("id={}", id);
+
+        let http_auth_type_param = format!("http_auth_type='{}'", http_auth_type);
+        let listen_http_addr_param = format!("listen_http_addr='{}'", listen_http_addr);
+
+        let pg_auth_type_param = format!("pg_auth_type='{}'", pg_auth_type);
+        let listen_pg_addr_param = format!("listen_pg_addr='{}'", listen_pg_addr);
+        let virtual_file_io_engine = if let Some(virtual_file_io_engine) = virtual_file_io_engine {
+            format!("virtual_file_io_engine='{virtual_file_io_engine}'")
+        } else {
+            String::new()
+        };
+        let get_vectored_impl = if let Some(get_vectored_impl) = get_vectored_impl {
+            format!("get_vectored_impl='{get_vectored_impl}'")
+        } else {
+            String::new()
+        };
+        let get_impl = if let Some(get_impl) = get_impl {
+            format!("get_impl='{get_impl}'")
+        } else {
+            String::new()
+        };
+        let validate_vectored_get = if let Some(validate_vectored_get) = validate_vectored_get {
+            format!("validate_vectored_get={validate_vectored_get}")
+        } else {
+            String::new()
+        };
+
        let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());

-        let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
+        let mut overrides = vec![
+            id,
+            pg_distrib_dir_param,
+            http_auth_type_param,
+            pg_auth_type_param,
+            listen_http_addr_param,
+            listen_pg_addr_param,
+            broker_endpoint_param,
+            virtual_file_io_engine,
+            get_vectored_impl,
+            get_impl,
+            validate_vectored_get,
+        ];

        if let Some(control_plane_api) = &self.env.control_plane_api {
            overrides.push(format!(
@@ -100,7 +148,7 @@ impl PageServerNode {

            // Storage controller uses the same auth as pageserver: if JWT is enabled
            // for us, we will also need it to talk to them.
-            if matches!(conf.http_auth_type, AuthType::NeonJWT) {
+            if matches!(http_auth_type, AuthType::NeonJWT) {
                let jwt_token = self
                    .env
                    .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
@@ -109,40 +157,31 @@ impl PageServerNode {
            }
        }

-        if !conf.other.contains_key("remote_storage") {
+        if !cli_overrides
+            .iter()
+            .any(|c| c.starts_with("remote_storage"))
+        {
            overrides.push(format!(
                "remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
            ));
        }

-        if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
+        if *http_auth_type != AuthType::Trust || *pg_auth_type != AuthType::Trust {
            // Keys are generated in the toplevel repo dir, pageservers' workdirs
            // are one level below that, so refer to keys with ../
            overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
        }

        // Apply the user-provided overrides
-        overrides.push(
-            toml_edit::ser::to_string_pretty(&conf)
-                .expect("we deserialized this from toml earlier"),
-        );
+        overrides.extend(cli_overrides.iter().map(|&c| c.to_owned()));

-        // Turn `overrides` into a toml document.
-        // TODO: above code is legacy code, it should be refactored to use toml_edit directly.
-        let mut config_toml = toml_edit::Document::new();
-        for fragment_str in overrides {
-            let fragment = toml_edit::Document::from_str(&fragment_str)
-                .expect("all fragments in `overrides` are valid toml documents, this function controls that");
-            for (key, item) in fragment.iter() {
-                config_toml.insert(key, item.clone());
-            }
-        }
-        Ok(config_toml)
+        overrides
    }

    /// Initializes a pageserver node by creating its config with the overrides provided.
-    pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
-        self.pageserver_init(conf)
+    pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        // First, run `pageserver --init` and wait for it to write a config into FS and exit.
+        self.pageserver_init(config_overrides)
            .with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
    }

@@ -158,11 +197,11 @@ impl PageServerNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self) -> anyhow::Result<()> {
-        self.start_node().await
+    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        self.start_node(config_overrides, false).await
    }

-    fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
+    fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
        let datadir = self.repo_path();
        let node_id = self.conf.id;
        println!(
@@ -173,20 +212,29 @@ impl PageServerNode {
        );
        io::stdout().flush()?;

-        let config = self
-            .pageserver_init_make_toml(conf)
-            .context("make pageserver toml")?;
-        let config_file_path = datadir.join("pageserver.toml");
-        let mut config_file = std::fs::OpenOptions::new()
-            .create_new(true)
-            .write(true)
-            .open(&config_file_path)
-            .with_context(|| format!("open pageserver toml for write: {config_file_path:?}"))?;
-        config_file
-            .write_all(config.to_string().as_bytes())
-            .context("write pageserver toml")?;
-        drop(config_file);
-        // TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
+        if !datadir.exists() {
+            std::fs::create_dir(&datadir)?;
+        }
+
+        let datadir_path_str = datadir.to_str().with_context(|| {
+            format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
+        })?;
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        args.push(Cow::Borrowed("--init"));
+
+        let init_output = Command::new(self.env.pageserver_bin())
+            .args(args.iter().map(Cow::as_ref))
+            .envs(self.pageserver_env_variables()?)
+            .output()
+            .with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
+
+        anyhow::ensure!(
+            init_output.status.success(),
+            "Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}",
+            node_id,
+            String::from_utf8_lossy(&init_output.stdout),
+            String::from_utf8_lossy(&init_output.stderr),
+        );

        // Write metadata file, used by pageserver on startup to register itself with
        // the storage controller
@@ -214,7 +262,11 @@ impl PageServerNode {
        Ok(())
    }

-    async fn start_node(&self) -> anyhow::Result<()> {
+    async fn start_node(
+        &self,
+        config_overrides: &[&str],
+        update_config: bool,
+    ) -> anyhow::Result<()> {
        // TODO: using a thread here because start_process() is not async but we need to call check_status()
        let datadir = self.repo_path();
        print!(
@@ -231,12 +283,15 @@ impl PageServerNode {
                self.conf.id, datadir,
            )
        })?;
-        let args = vec!["-D", datadir_path_str];
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        if update_config {
+            args.push(Cow::Borrowed("--update-config"));
+        }
        background_process::start_process(
            "pageserver",
            &datadir,
            &self.env.pageserver_bin(),
-            args,
+            args.iter().map(Cow::as_ref),
            self.pageserver_env_variables()?,
            background_process::InitialPidFile::Expect(self.pid_file()),
            || async {
@@ -253,6 +308,22 @@ impl PageServerNode {
        Ok(())
    }

+    fn pageserver_basic_args<'a>(
+        &self,
+        config_overrides: &'a [&'a str],
+        datadir_path_str: &'a str,
+    ) -> Vec<Cow<'a, str>> {
+        let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)];
+
+        let overrides = self.neon_local_overrides(config_overrides);
+        for config_override in overrides {
+            args.push(Cow::Borrowed("-c"));
+            args.push(Cow::Owned(config_override));
+        }
+
+        args
+    }
+
    fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
        // FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
        // needs a token, and how to generate that token, seems independent to whether
@@ -378,11 +449,11 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            switch_aux_file_policy: settings
-                .remove("switch_aux_file_policy")
-                .map(|x| x.parse::<AuxFilePolicy>())
+            switch_to_aux_file_v2: settings
+                .remove("switch_to_aux_file_v2")
+                .map(|x| x.parse::<bool>())
                .transpose()
-                .context("Failed to parse 'switch_aux_file_policy'")?,
+                .context("Failed to parse 'switch_to_aux_file_v2' as bool")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -501,11 +572,11 @@ impl PageServerNode {
                    .map(serde_json::from_str)
                    .transpose()
                    .context("parse `timeline_get_throttle` from json")?,
-                switch_aux_file_policy: settings
-                    .remove("switch_aux_file_policy")
-                    .map(|x| x.parse::<AuxFilePolicy>())
+                switch_to_aux_file_v2: settings
+                    .remove("switch_to_aux_file_v2")
+                    .map(|x| x.parse::<bool>())
                    .transpose()
-                    .context("Failed to parse 'switch_aux_file_policy'")?,
+                    .context("Failed to parse 'switch_to_aux_file_v2' as bool")?,
            }
        };

--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -305,10 +305,6 @@ impl StorageController {
            ));
        }

-        if let Some(split_threshold) = self.config.split_threshold.as_ref() {
-            args.push(format!("--split-threshold={split_threshold}"))
-        }
-
        background_process::start_process(
            COMMAND,
            &self.env.base_data_dir,
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -480,15 +480,6 @@ impl<A: CounterPairAssoc> CounterPairVec<A> {
        let id = self.vec.with_labels(labels);
        self.vec.remove_metric(id)
    }
-
-    pub fn sample(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) -> u64 {
-        let id = self.vec.with_labels(labels);
-        let metric = self.vec.get_metric(id);
-
-        let inc = metric.inc.count.load(std::sync::atomic::Ordering::Relaxed);
-        let dec = metric.dec.count.load(std::sync::atomic::Ordering::Relaxed);
-        inc.saturating_sub(dec)
-    }
 }

 impl<T, A> ::measured::metric::group::MetricGroup<T> for CounterPairVec<A>
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -240,7 +240,7 @@ impl<'a> ShardedRange<'a> {
    /// pages that would not actually be stored on this node.
    ///
    /// Don't use this function in code that works with physical entities like layer files.
-    pub fn raw_size(range: &Range<Key>) -> u32 {
+    fn raw_size(range: &Range<Key>) -> u32 {
        if is_contiguous_range(range) {
            contiguous_range_len(range)
        } else {
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1,4 +1,3 @@
-pub mod detach_ancestor;
 pub mod partitioning;
 pub mod utilization;

@@ -9,7 +8,6 @@ use std::{
    collections::HashMap,
    io::{BufRead, Read},
    num::{NonZeroU64, NonZeroUsize},
-    str::FromStr,
    time::{Duration, SystemTime},
 };

@@ -305,31 +303,7 @@ pub struct TenantConfig {
    pub lazy_slru_download: Option<bool>,
    pub timeline_get_throttle: Option<ThrottleConfig>,
    pub image_layer_creation_check_threshold: Option<u8>,
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum AuxFilePolicy {
-    V1,
-    V2,
-    CrossValidation,
-}
-
-impl FromStr for AuxFilePolicy {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let s = s.to_lowercase();
-        if s == "v1" {
-            Ok(Self::V1)
-        } else if s == "v2" {
-            Ok(Self::V2)
-        } else if s == "crossvalidation" || s == "cross_validation" {
-            Ok(Self::CrossValidation)
-        } else {
-            anyhow::bail!("cannot parse {} to aux file policy", s)
-        }
-    }
+    pub switch_to_aux_file_v2: Option<bool>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -745,16 +719,6 @@ impl HistoricLayerInfo {
        };
        *field = value;
    }
-    pub fn layer_file_size(&self) -> u64 {
-        match self {
-            HistoricLayerInfo::Delta {
-                layer_file_size, ..
-            } => *layer_file_size,
-            HistoricLayerInfo::Image {
-                layer_file_size, ..
-            } => *layer_file_size,
-        }
-    }
 }

 #[derive(Debug, Serialize, Deserialize)]
@@ -786,6 +750,9 @@ pub struct TimelineGcRequest {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct WalRedoManagerProcessStatus {
    pub pid: u32,
+    /// The strum-generated `into::<&'static str>()` for `pageserver::walredo::ProcessKind`.
+    /// `ProcessKind` are a transitory thing, so, they have no enum representation in `pageserver_api`.
+    pub kind: Cow<'static, str>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -824,55 +791,6 @@ pub struct TenantScanRemoteStorageResponse {
    pub shards: Vec<TenantScanRemoteStorageShard>,
 }

-#[derive(Serialize, Deserialize, Debug, Clone)]
-#[serde(rename_all = "snake_case")]
-pub enum TenantSorting {
-    ResidentSize,
-    MaxLogicalSize,
-}
-
-impl Default for TenantSorting {
-    fn default() -> Self {
-        Self::ResidentSize
-    }
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub struct TopTenantShardsRequest {
-    // How would you like to sort the tenants?
-    pub order_by: TenantSorting,
-
-    // How many results?
-    pub limit: usize,
-
-    // Omit tenants with more than this many shards (e.g. if this is the max number of shards
-    // that the caller would ever split to)
-    pub where_shards_lt: Option<ShardCount>,
-
-    // Omit tenants where the ordering metric is less than this (this is an optimization to
-    // let us quickly exclude numerous tiny shards)
-    pub where_gt: Option<u64>,
-}
-
-#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
-pub struct TopTenantShardItem {
-    pub id: TenantShardId,
-
-    /// Total size of layers on local disk for all timelines in this tenant
-    pub resident_size: u64,
-
-    /// Total size of layers in remote storage for all timelines in this tenant
-    pub physical_size: u64,
-
-    /// The largest logical size of a timeline within this tenant
-    pub max_logical_size: u64,
-}
-
-#[derive(Serialize, Deserialize, Debug, Default)]
-pub struct TopTenantShardsResponse {
-    pub shards: Vec<TopTenantShardItem>,
-}
-
 pub mod virtual_file {
    #[derive(
        Copy,
--- a/libs/pageserver_api/src/models/detach_ancestor.rs
+++ b/libs/pageserver_api/src/models/detach_ancestor.rs
@@ -1,6 +0,0 @@
-use utils::id::TimelineId;
-
-#[derive(Default, serde::Serialize)]
-pub struct AncestorDetached {
-    pub reparented_timelines: Vec<TimelineId>,
-}
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -125,7 +125,7 @@ impl ShardCount {

    /// `v` may be zero, or the number of shards in the tenant.  `v` is what
    /// [`Self::literal`] would return.
-    pub const fn new(val: u8) -> Self {
+    pub fn new(val: u8) -> Self {
        Self(val)
    }
 }
--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -5,7 +5,6 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-async-trait.workspace = true
 anyhow.workspace = true
 bytes.workspace = true
 futures.workspace = true
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -78,17 +78,16 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
    )
 }

-#[async_trait::async_trait]
 pub trait Handler<IO> {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
    /// might be not what we want after CopyData streaming, but currently we don't
    /// care). It will also flush out the output buffer.
-    async fn process_query(
+    fn process_query(
        &mut self,
        pgb: &mut PostgresBackend<IO>,
        query_string: &str,
-    ) -> Result<(), QueryError>;
+    ) -> impl Future<Output = Result<(), QueryError>> + Send;

    /// Called on startup packet receival, allows to process params.
    ///
--- a/libs/postgres_backend/tests/simple_select.rs
+++ b/libs/postgres_backend/tests/simple_select.rs
@@ -22,7 +22,6 @@ async fn make_tcp_pair() -> (TcpStream, TcpStream) {

 struct TestHandler {}

-#[async_trait::async_trait]
 impl<IO: AsyncRead + AsyncWrite + Unpin + Send> Handler<IO> for TestHandler {
    // return single col 'hey' for any query
    async fn process_query(
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -29,7 +29,6 @@ use http_types::{StatusCode, Url};
 use tokio_util::sync::CancellationToken;
 use tracing::debug;

-use crate::RemoteStorageActivity;
 use crate::{
    error::Cancelled, s3_bucket::RequestKind, AzureConfig, ConcurrencyLimiter, Download,
    DownloadError, Listing, ListingMode, RemotePath, RemoteStorage, StorageMetadata,
@@ -526,10 +525,6 @@ impl RemoteStorage for AzureBlobStorage {
        // https://learn.microsoft.com/en-us/azure/storage/blobs/point-in-time-restore-overview
        Err(TimeTravelError::Unimplemented)
    }
-
-    fn activity(&self) -> RemoteStorageActivity {
-        self.concurrency_limiter.activity()
-    }
 }

 pin_project_lite::pin_project! {
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -263,17 +263,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
        done_if_after: SystemTime,
        cancel: &CancellationToken,
    ) -> Result<(), TimeTravelError>;
-
-    /// Query how busy we currently are: may be used by callers which wish to politely
-    /// back off if there are already a lot of operations underway.
-    fn activity(&self) -> RemoteStorageActivity;
-}
-
-pub struct RemoteStorageActivity {
-    pub read_available: usize,
-    pub read_total: usize,
-    pub write_available: usize,
-    pub write_total: usize,
 }

 /// DownloadStream is sensitive to the timeout and cancellation used with the original
@@ -455,15 +444,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            }
        }
    }
-
-    pub fn activity(&self) -> RemoteStorageActivity {
-        match self {
-            Self::LocalFs(s) => s.activity(),
-            Self::AwsS3(s) => s.activity(),
-            Self::AzureBlob(s) => s.activity(),
-            Self::Unreliable(s) => s.activity(),
-        }
-    }
 }

 impl GenericRemoteStorage {
@@ -794,9 +774,6 @@ struct ConcurrencyLimiter {
    // The helps to ensure we don't exceed the thresholds.
    write: Arc<Semaphore>,
    read: Arc<Semaphore>,
-
-    write_total: usize,
-    read_total: usize,
 }

 impl ConcurrencyLimiter {
@@ -825,21 +802,10 @@ impl ConcurrencyLimiter {
        Arc::clone(self.for_kind(kind)).acquire_owned().await
    }

-    fn activity(&self) -> RemoteStorageActivity {
-        RemoteStorageActivity {
-            read_available: self.read.available_permits(),
-            read_total: self.read_total,
-            write_available: self.write.available_permits(),
-            write_total: self.write_total,
-        }
-    }
-
    fn new(limit: usize) -> ConcurrencyLimiter {
        Self {
            read: Arc::new(Semaphore::new(limit)),
            write: Arc::new(Semaphore::new(limit)),
-            read_total: limit,
-            write_total: limit,
        }
    }
 }
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
 use utils::crashsafe::path_with_suffix_extension;

 use crate::{
-    Download, DownloadError, Listing, ListingMode, RemotePath, RemoteStorageActivity,
-    TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    Download, DownloadError, Listing, ListingMode, RemotePath, TimeTravelError, TimeoutOrCancel,
+    REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

 use super::{RemoteStorage, StorageMetadata};
@@ -605,16 +605,6 @@ impl RemoteStorage for LocalFs {
    ) -> Result<(), TimeTravelError> {
        Err(TimeTravelError::Unimplemented)
    }
-
-    fn activity(&self) -> RemoteStorageActivity {
-        // LocalFS has no concurrency limiting: give callers the impression that plenty of units are available
-        RemoteStorageActivity {
-            read_available: 16,
-            read_total: 16,
-            write_available: 16,
-            write_total: 16,
-        }
-    }
 }

 fn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -27,7 +27,7 @@ use aws_config::{
 };
 use aws_credential_types::provider::SharedCredentialsProvider;
 use aws_sdk_s3::{
-    config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep},
+    config::{AsyncSleep, Builder, IdentityCache, Region, SharedAsyncSleep},
    error::SdkError,
    operation::get_object::GetObjectError,
    types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass},
@@ -47,8 +47,8 @@ use utils::backoff;
 use super::StorageMetadata;
 use crate::{
    error::Cancelled, support::PermitCarrying, ConcurrencyLimiter, Download, DownloadError,
-    Listing, ListingMode, RemotePath, RemoteStorage, RemoteStorageActivity, S3Config,
-    TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
+    Listing, ListingMode, RemotePath, RemoteStorage, S3Config, TimeTravelError, TimeoutOrCancel,
+    MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
 };

 pub(super) mod metrics;
@@ -75,13 +75,13 @@ struct GetObjectRequest {
 }
 impl S3Bucket {
    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
-    pub fn new(remote_storage_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
+    pub fn new(aws_config: &S3Config, timeout: Duration) -> anyhow::Result<Self> {
        tracing::debug!(
            "Creating s3 remote storage for S3 bucket {}",
-            remote_storage_config.bucket_name
+            aws_config.bucket_name
        );

-        let region = Some(Region::new(remote_storage_config.bucket_region.clone()));
+        let region = Some(Region::new(aws_config.bucket_region.clone()));

        let provider_conf = ProviderConfig::without_region().with_region(region.clone());

@@ -113,38 +113,6 @@ impl S3Bucket {
        // AWS SDK requires us to specify how the RetryConfig should sleep when it wants to back off
        let sleep_impl: Arc<dyn AsyncSleep> = Arc::new(TokioSleep::new());

-        let sdk_config_loader: aws_config::ConfigLoader = aws_config::defaults(
-            #[allow(deprecated)] /* TODO: https://github.com/neondatabase/neon/issues/7665 */
-            BehaviorVersion::v2023_11_09(),
-        )
-        .region(region)
-        .identity_cache(IdentityCache::lazy().build())
-        .credentials_provider(SharedCredentialsProvider::new(credentials_provider))
-        .sleep_impl(SharedAsyncSleep::from(sleep_impl));
-
-        let sdk_config: aws_config::SdkConfig = std::thread::scope(|s| {
-            s.spawn(|| {
-                // TODO: make this function async.
-                tokio::runtime::Builder::new_current_thread()
-                    .enable_all()
-                    .build()
-                    .unwrap()
-                    .block_on(sdk_config_loader.load())
-            })
-            .join()
-            .unwrap()
-        });
-
-        let mut s3_config_builder = aws_sdk_s3::config::Builder::from(&sdk_config);
-
-        // Technically, the `remote_storage_config.endpoint` field only applies to S3 interactions.
-        // (In case we ever re-use the `sdk_config` for more than just the S3 client in the future)
-        if let Some(custom_endpoint) = remote_storage_config.endpoint.clone() {
-            s3_config_builder = s3_config_builder
-                .endpoint_url(custom_endpoint)
-                .force_path_style(true);
-        }
-
        // We do our own retries (see [`backoff::retry`]).  However, for the AWS SDK to enable rate limiting in response to throttling
        // responses (e.g. 429 on too many ListObjectsv2 requests), we must provide a retry config.  We set it to use at most one
        // attempt, and enable 'Adaptive' mode, which causes rate limiting to be enabled.
@@ -152,36 +120,42 @@ impl S3Bucket {
        retry_config
            .set_max_attempts(Some(1))
            .set_mode(Some(RetryMode::Adaptive));
-        s3_config_builder = s3_config_builder.retry_config(retry_config.build());

-        let s3_config = s3_config_builder.build();
-        let client = aws_sdk_s3::Client::from_conf(s3_config);
+        let mut config_builder = Builder::default()
+            .behavior_version(BehaviorVersion::v2023_11_09())
+            .region(region)
+            .identity_cache(IdentityCache::lazy().build())
+            .credentials_provider(SharedCredentialsProvider::new(credentials_provider))
+            .retry_config(retry_config.build())
+            .sleep_impl(SharedAsyncSleep::from(sleep_impl));

-        let prefix_in_bucket = remote_storage_config
-            .prefix_in_bucket
-            .as_deref()
-            .map(|prefix| {
-                let mut prefix = prefix;
-                while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
-                    prefix = &prefix[1..]
-                }
+        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
+            config_builder = config_builder
+                .endpoint_url(custom_endpoint)
+                .force_path_style(true);
+        }

-                let mut prefix = prefix.to_string();
-                while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
-                    prefix.pop();
-                }
-                prefix
-            });
+        let client = Client::from_conf(config_builder.build());

+        let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
+            let mut prefix = prefix;
+            while prefix.starts_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
+                prefix = &prefix[1..]
+            }
+
+            let mut prefix = prefix.to_string();
+            while prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
+                prefix.pop();
+            }
+            prefix
+        });
        Ok(Self {
            client,
-            bucket_name: remote_storage_config.bucket_name.clone(),
-            max_keys_per_list_response: remote_storage_config.max_keys_per_list_response,
+            bucket_name: aws_config.bucket_name.clone(),
+            max_keys_per_list_response: aws_config.max_keys_per_list_response,
            prefix_in_bucket,
-            concurrency_limiter: ConcurrencyLimiter::new(
-                remote_storage_config.concurrency_limit.get(),
-            ),
-            upload_storage_class: remote_storage_config.upload_storage_class.clone(),
+            concurrency_limiter: ConcurrencyLimiter::new(aws_config.concurrency_limit.get()),
+            upload_storage_class: aws_config.upload_storage_class.clone(),
            timeout,
        })
    }
@@ -975,10 +949,6 @@ impl RemoteStorage for S3Bucket {
        }
        Ok(())
    }
-
-    fn activity(&self) -> RemoteStorageActivity {
-        self.concurrency_limiter.activity()
-    }
 }

 /// On drop (cancellation) count towards [`metrics::BucketMetrics::cancelled_waits`].
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -12,7 +12,7 @@ use tokio_util::sync::CancellationToken;

 use crate::{
    Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
-    RemoteStorageActivity, StorageMetadata, TimeTravelError,
+    StorageMetadata, TimeTravelError,
 };

 pub struct UnreliableWrapper {
@@ -213,8 +213,4 @@ impl RemoteStorage for UnreliableWrapper {
            .time_travel_recover(prefix, timestamp, done_if_after, cancel)
            .await
    }
-
-    fn activity(&self) -> RemoteStorageActivity {
-        self.inner.activity()
-    }
 }
--- a/libs/utils/src/poison.rs
+++ b/libs/utils/src/poison.rs
@@ -3,7 +3,7 @@
 //!  # Example
 //!
 //!  ```
-//!  # tokio::runtime::Builder::new_current_thread().enable_all().build().unwrap().block_on(async {
+//!  # tokio_test::block_on(async {
 //!  use utils::poison::Poison;
 //!  use std::time::Duration;
 //!
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -50,14 +50,6 @@ extern "C" fn get_flush_rec_ptr(wp: *mut WalProposer) -> XLogRecPtr {
    }
 }

-extern "C" fn update_donor(wp: *mut WalProposer, donor: *mut Safekeeper, donor_lsn: XLogRecPtr) {
-    unsafe {
-        let callback_data = (*(*wp).config).callback_data;
-        let api = callback_data as *mut Box<dyn ApiImpl>;
-        (*api).update_donor(&mut (*donor), donor_lsn)
-    }
-}
-
 extern "C" fn get_current_timestamp(wp: *mut WalProposer) -> TimestampTz {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
@@ -399,7 +391,6 @@ pub(crate) fn create_api() -> walproposer_api {
        get_shmem_state: Some(get_shmem_state),
        start_streaming: Some(start_streaming),
        get_flush_rec_ptr: Some(get_flush_rec_ptr),
-        update_donor: Some(update_donor),
        get_current_timestamp: Some(get_current_timestamp),
        conn_error_message: Some(conn_error_message),
        conn_status: Some(conn_status),
@@ -430,32 +421,6 @@ pub(crate) fn create_api() -> walproposer_api {
    }
 }

-pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
-    let empty_feedback = crate::bindings::PageserverFeedback {
-        present: false,
-        currentClusterSize: 0,
-        last_received_lsn: 0,
-        disk_consistent_lsn: 0,
-        remote_consistent_lsn: 0,
-        replytime: 0,
-        shard_number: 0,
-    };
-
-    crate::bindings::WalproposerShmemState {
-        propEpochStartLsn: crate::bindings::pg_atomic_uint64 { value: 0 },
-        donor_name: [0; 64],
-        donor_conninfo: [0; 1024],
-        donor_lsn: 0,
-        mutex: 0,
-        mineLastElectedTerm: crate::bindings::pg_atomic_uint64 { value: 0 },
-        backpressureThrottlingTime: crate::bindings::pg_atomic_uint64 { value: 0 },
-        currentClusterSize: crate::bindings::pg_atomic_uint64 { value: 0 },
-        shard_ps_feedback: [empty_feedback; 128],
-        num_shards: 0,
-        min_ps_feedback: empty_feedback,
-    }
-}
-
 impl std::fmt::Display for Level {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{:?}", self)
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -1,5 +1,8 @@
 use std::ffi::CString;

+use postgres_ffi::WAL_SEGMENT_SIZE;
+use utils::{id::TenantTimelineId, lsn::Lsn};
+
 use crate::{
    api_bindings::{create_api, take_vec_u8, Level},
    bindings::{
@@ -7,8 +10,6 @@ use crate::{
        WalProposerCreate, WalProposerFree, WalProposerPoll, WalProposerStart,
    },
 };
-use postgres_ffi::WAL_SEGMENT_SIZE;
-use utils::{id::TenantTimelineId, lsn::Lsn};

 /// Rust high-level wrapper for C walproposer API. Many methods are not required
 /// for simple cases, hence todo!() in default implementations.
@@ -27,10 +28,6 @@ pub trait ApiImpl {
        todo!()
    }

-    fn update_donor(&self, _donor: &mut Safekeeper, _donor_lsn: u64) {
-        todo!()
-    }
-
    fn get_current_timestamp(&self) -> i64 {
        todo!()
    }
@@ -277,7 +274,6 @@ mod tests {
        sync::{atomic::AtomicUsize, mpsc::sync_channel},
    };

-    use std::cell::UnsafeCell;
    use utils::id::TenantTimelineId;

    use crate::{api_bindings::Level, bindings::NeonWALReadResult, walproposer::Wrapper};
@@ -301,8 +297,6 @@ mod tests {
        replies_ptr: AtomicUsize,
        // channel to send LSN to the main thread
        sync_channel: std::sync::mpsc::SyncSender<u64>,
-        // Shmem state, used for storing donor info
-        shmem: UnsafeCell<crate::bindings::WalproposerShmemState>,
    }

    impl MockImpl {
@@ -333,22 +327,11 @@ mod tests {
    }

    impl ApiImpl for MockImpl {
-        fn get_shmem_state(&self) -> *mut crate::bindings::WalproposerShmemState {
-            self.shmem.get()
-        }
-
        fn get_current_timestamp(&self) -> i64 {
            println!("get_current_timestamp");
            0
        }

-        fn update_donor(&self, donor: &mut crate::bindings::Safekeeper, donor_lsn: u64) {
-            let mut shmem = unsafe { *self.get_shmem_state() };
-            shmem.propEpochStartLsn.value = donor_lsn;
-            shmem.donor_conninfo = donor.conninfo;
-            shmem.donor_lsn = donor_lsn;
-        }
-
        fn conn_status(
            &self,
            _: &mut crate::bindings::Safekeeper,
@@ -524,7 +507,6 @@ mod tests {
            ],
            replies_ptr: AtomicUsize::new(0),
            sync_channel: sender,
-            shmem: UnsafeCell::new(crate::api_bindings::empty_shmem()),
        });
        let config = crate::walproposer::Config {
            ttid,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -15,7 +15,6 @@ anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
-async-trait.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
 camino.workspace = true
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -1,7 +1,7 @@
 use pageserver::keyspace::{KeyPartitioning, KeySpace};
 use pageserver::repository::Key;
 use pageserver::tenant::layer_map::LayerMap;
-use pageserver::tenant::storage_layer::LayerName;
+use pageserver::tenant::storage_layer::LayerFileName;
 use pageserver::tenant::storage_layer::PersistentLayerDesc;
 use pageserver_api::shard::TenantShardId;
 use rand::prelude::{SeedableRng, SliceRandom, StdRng};
@@ -28,7 +28,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap {
    let mut updates = layer_map.batch_update();
    for fname in filenames {
        let fname = fname.unwrap();
-        let fname = LayerName::from_str(&fname).unwrap();
+        let fname = LayerFileName::from_str(&fname).unwrap();
        let layer = PersistentLayerDesc::from(fname);

        let lsn_range = layer.get_lsn_range();
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -30,27 +30,47 @@
 //! 2024-04-15 on i3en.3xlarge
 //!
 //! ```text
-//! short/1           time:   [24.584 µs 24.737 µs 24.922 µs]
-//! short/2           time:   [33.479 µs 33.660 µs 33.888 µs]
-//! short/4           time:   [42.713 µs 43.046 µs 43.440 µs]
-//! short/8           time:   [71.814 µs 72.478 µs 73.240 µs]
-//! short/16          time:   [132.73 µs 134.45 µs 136.22 µs]
-//! short/32          time:   [258.31 µs 260.73 µs 263.27 µs]
-//! short/64          time:   [511.61 µs 514.44 µs 517.51 µs]
-//! short/128         time:   [992.64 µs 998.23 µs 1.0042 ms]
-//! medium/1          time:   [110.11 µs 110.50 µs 110.96 µs]
-//! medium/2          time:   [153.06 µs 153.85 µs 154.99 µs]
-//! medium/4          time:   [317.51 µs 319.92 µs 322.85 µs]
-//! medium/8          time:   [638.30 µs 644.68 µs 652.12 µs]
-//! medium/16         time:   [1.2651 ms 1.2773 ms 1.2914 ms]
-//! medium/32         time:   [2.5117 ms 2.5410 ms 2.5720 ms]
-//! medium/64         time:   [4.8088 ms 4.8555 ms 4.9047 ms]
-//! medium/128        time:   [8.8311 ms 8.9849 ms 9.1263 ms]
+//! async-short/1           time:   [24.584 µs 24.737 µs 24.922 µs]
+//! async-short/2           time:   [33.479 µs 33.660 µs 33.888 µs]
+//! async-short/4           time:   [42.713 µs 43.046 µs 43.440 µs]
+//! async-short/8           time:   [71.814 µs 72.478 µs 73.240 µs]
+//! async-short/16          time:   [132.73 µs 134.45 µs 136.22 µs]
+//! async-short/32          time:   [258.31 µs 260.73 µs 263.27 µs]
+//! async-short/64          time:   [511.61 µs 514.44 µs 517.51 µs]
+//! async-short/128         time:   [992.64 µs 998.23 µs 1.0042 ms]
+//! async-medium/1          time:   [110.11 µs 110.50 µs 110.96 µs]
+//! async-medium/2          time:   [153.06 µs 153.85 µs 154.99 µs]
+//! async-medium/4          time:   [317.51 µs 319.92 µs 322.85 µs]
+//! async-medium/8          time:   [638.30 µs 644.68 µs 652.12 µs]
+//! async-medium/16         time:   [1.2651 ms 1.2773 ms 1.2914 ms]
+//! async-medium/32         time:   [2.5117 ms 2.5410 ms 2.5720 ms]
+//! async-medium/64         time:   [4.8088 ms 4.8555 ms 4.9047 ms]
+//! async-medium/128        time:   [8.8311 ms 8.9849 ms 9.1263 ms]
+//! sync-short/1            time:   [25.503 µs 25.626 µs 25.771 µs]
+//! sync-short/2            time:   [30.850 µs 31.013 µs 31.208 µs]
+//! sync-short/4            time:   [45.543 µs 45.856 µs 46.193 µs]
+//! sync-short/8            time:   [84.114 µs 84.639 µs 85.220 µs]
+//! sync-short/16           time:   [185.22 µs 186.15 µs 187.13 µs]
+//! sync-short/32           time:   [377.43 µs 378.87 µs 380.46 µs]
+//! sync-short/64           time:   [756.49 µs 759.04 µs 761.70 µs]
+//! sync-short/128          time:   [1.4825 ms 1.4874 ms 1.4923 ms]
+//! sync-medium/1           time:   [105.66 µs 106.01 µs 106.43 µs]
+//! sync-medium/2           time:   [153.10 µs 153.84 µs 154.72 µs]
+//! sync-medium/4           time:   [327.13 µs 329.44 µs 332.27 µs]
+//! sync-medium/8           time:   [654.26 µs 658.73 µs 663.63 µs]
+//! sync-medium/16          time:   [1.2682 ms 1.2748 ms 1.2816 ms]
+//! sync-medium/32          time:   [2.4456 ms 2.4595 ms 2.4731 ms]
+//! sync-medium/64          time:   [4.6523 ms 4.6890 ms 4.7256 ms]
+//! sync-medium/128         time:   [8.7215 ms 8.8323 ms 8.9344 ms]
 //! ```

 use bytes::{Buf, Bytes};
 use criterion::{BenchmarkId, Criterion};
-use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
+use pageserver::{
+    config::PageServerConf,
+    walrecord::NeonWalRecord,
+    walredo::{PostgresRedoManager, ProcessKind},
+};
 use pageserver_api::{key::Key, shard::TenantShardId};
 use std::{
    sync::Arc,
@@ -60,32 +80,39 @@ use tokio::{sync::Barrier, task::JoinSet};
 use utils::{id::TenantId, lsn::Lsn};

 fn bench(c: &mut Criterion) {
-    {
-        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
-        for nclients in nclients {
-            let mut group = c.benchmark_group("short");
-            group.bench_with_input(
-                BenchmarkId::from_parameter(nclients),
-                &nclients,
-                |b, nclients| {
-                    let redo_work = Arc::new(Request::short_input());
-                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
-                },
-            );
+    for process_kind in &[ProcessKind::Async, ProcessKind::Sync] {
+        {
+            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
+            for nclients in nclients {
+                let mut group = c.benchmark_group(format!("{process_kind}-short"));
+                group.bench_with_input(
+                    BenchmarkId::from_parameter(nclients),
+                    &nclients,
+                    |b, nclients| {
+                        let redo_work = Arc::new(Request::short_input());
+                        b.iter_custom(|iters| {
+                            bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
+                        });
+                    },
+                );
+            }
        }
-    }
-    {
-        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
-        for nclients in nclients {
-            let mut group = c.benchmark_group("medium");
-            group.bench_with_input(
-                BenchmarkId::from_parameter(nclients),
-                &nclients,
-                |b, nclients| {
-                    let redo_work = Arc::new(Request::medium_input());
-                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
-                },
-            );
+
+        {
+            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
+            for nclients in nclients {
+                let mut group = c.benchmark_group(format!("{process_kind}-medium"));
+                group.bench_with_input(
+                    BenchmarkId::from_parameter(nclients),
+                    &nclients,
+                    |b, nclients| {
+                        let redo_work = Arc::new(Request::medium_input());
+                        b.iter_custom(|iters| {
+                            bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
+                        });
+                    },
+                );
+            }
        }
    }
 }
@@ -93,10 +120,16 @@ criterion::criterion_group!(benches, bench);
 criterion::criterion_main!(benches);

 // Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
-fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
+fn bench_impl(
+    process_kind: ProcessKind,
+    redo_work: Arc<Request>,
+    n_redos: u64,
+    nclients: u64,
+) -> Duration {
    let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();

-    let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
+    let mut conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
+    conf.walredo_process_kind = process_kind;
    let conf = Box::leak(Box::new(conf));
    let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());

@@ -125,13 +158,27 @@ fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration
        });
    }

-    rt.block_on(async move {
+    let elapsed = rt.block_on(async move {
        let mut total_wallclock_time = Duration::ZERO;
        while let Some(res) = tasks.join_next().await {
            total_wallclock_time += res.unwrap();
        }
        total_wallclock_time
-    })
+    });
+
+    // consistency check to ensure process kind setting worked
+    if nredos_per_client > 0 {
+        assert_eq!(
+            manager
+                .status()
+                .process
+                .map(|p| p.kind)
+                .expect("the benchmark work causes a walredo process to be spawned"),
+            std::borrow::Cow::Borrowed(process_kind.into())
+        );
+    }
+
+    elapsed
 }

 async fn client(
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -486,18 +486,6 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    pub async fn top_tenant_shards(
-        &self,
-        request: TopTenantShardsRequest,
-    ) -> Result<TopTenantShardsResponse> {
-        let uri = format!("{}/v1/top_tenants", self.mgmt_api_endpoint);
-        self.request(Method::POST, uri, request)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
    pub async fn layer_map_info(
        &self,
        tenant_shard_id: TenantShardId,
--- a/pageserver/compaction/src/compact_tiered.rs
+++ b/pageserver/compaction/src/compact_tiered.rs
@@ -24,9 +24,7 @@ use tracing::{debug, info};
 use std::collections::{HashSet, VecDeque};
 use std::ops::Range;

-use crate::helpers::{
-    accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with,
-};
+use crate::helpers::{accum_key_values, keyspace_total_size, merge_delta_keys, overlaps_with};
 use crate::interface::*;
 use utils::lsn::Lsn;

@@ -106,13 +104,7 @@ pub async fn compact_tiered<E: CompactionJobExecutor>(
            ctx,
        )
        .await?;
-        if current_level_target_height == u64::MAX {
-            // our target height includes all possible lsns
-            info!(
-                level = current_level_no,
-                depth = depth,
-                "compaction loop reached max current_level_target_height"
-            );
+        if target_file_size == u64::MAX {
            break;
        }
        current_level_no += 1;
@@ -530,6 +522,8 @@ where
        // If we have accumulated only a narrow band of keyspace, create an
        // image layer. Otherwise write a delta layer.

+        // FIXME: deal with the case of lots of values for same key
+
        // FIXME: we are ignoring images here. Did we already divide the work
        // so that we won't encounter them here?

@@ -541,100 +535,42 @@ where
            }
        }
        // Open stream
-        let key_value_stream =
-            std::pin::pin!(merge_delta_keys_buffered::<E>(deltas.as_slice(), ctx)
-                .await?
-                .map(Result::<_, anyhow::Error>::Ok));
+        let key_value_stream = std::pin::pin!(merge_delta_keys::<E>(deltas.as_slice(), ctx));
        let mut new_jobs = Vec::new();

        // Slide a window through the keyspace
-        let mut key_accum =
-            std::pin::pin!(accum_key_values(key_value_stream, self.target_file_size));
+        let mut key_accum = std::pin::pin!(accum_key_values(key_value_stream));
        let mut all_in_window: bool = false;
        let mut window = Window::new();
-
-        // Helper function to create a job for a new delta layer with given key-lsn
-        // rectangle.
-        let create_delta_job = |key_range, lsn_range: &Range<Lsn>, new_jobs: &mut Vec<_>| {
-            // The inputs for the job are all the input layers of the original job that
-            // overlap with the rectangle.
-            let batch_layers: Vec<LayerId> = job
-                .input_layers
-                .iter()
-                .filter(|layer_id| {
-                    overlaps_with(self.layers[layer_id.0].layer.key_range(), &key_range)
-                })
-                .cloned()
-                .collect();
-            assert!(!batch_layers.is_empty());
-            new_jobs.push(CompactionJob {
-                key_range,
-                lsn_range: lsn_range.clone(),
-                strategy: CompactionStrategy::CreateDelta,
-                input_layers: batch_layers,
-                completed: false,
-            });
-        };
-
        loop {
-            if all_in_window && window.is_empty() {
+            if all_in_window && window.elems.is_empty() {
                // All done!
                break;
            }
-
-            // If we now have enough keyspace for next delta layer in the window, create a
-            // new delta layer
            if let Some(key_range) = window.choose_next_delta(self.target_file_size, !all_in_window)
            {
-                create_delta_job(key_range, &job.lsn_range, &mut new_jobs);
-                continue;
-            }
-            assert!(!all_in_window);
-
-            // Process next key in the key space
-            match key_accum.next().await.transpose()? {
-                None => {
-                    all_in_window = true;
-                }
-                Some(next_key) if next_key.partition_lsns.is_empty() => {
-                    // Normal case: extend the window by the key
+                let batch_layers: Vec<LayerId> = job
+                    .input_layers
+                    .iter()
+                    .filter(|layer_id| {
+                        overlaps_with(self.layers[layer_id.0].layer.key_range(), &key_range)
+                    })
+                    .cloned()
+                    .collect();
+                assert!(!batch_layers.is_empty());
+                new_jobs.push(CompactionJob {
+                    key_range,
+                    lsn_range: job.lsn_range.clone(),
+                    strategy: CompactionStrategy::CreateDelta,
+                    input_layers: batch_layers,
+                    completed: false,
+                });
+            } else {
+                assert!(!all_in_window);
+                if let Some(next_key) = key_accum.next().await.transpose()? {
                    window.feed(next_key.key, next_key.size);
-                }
-                Some(next_key) => {
-                    // A key with too large size impact for a single delta layer. This
-                    // case occurs if you make a huge number of updates for a single key.
-                    //
-                    // Drain the window with has_more = false to make a clean cut before
-                    // the key, and then make dedicated delta layers for the single key.
-                    //
-                    // We cannot cluster the key with the others, because we don't want
-                    // layer files to overlap with each other in the lsn,key space (no
-                    // overlaps for the rectangles).
-                    let key = next_key.key;
-                    debug!("key {key} with size impact larger than the layer size");
-                    while !window.is_empty() {
-                        let has_more = false;
-                        let key_range = window.choose_next_delta(self.target_file_size, has_more)
-                            .expect("with has_more==false, choose_next_delta always returns something for a non-empty Window");
-                        create_delta_job(key_range, &job.lsn_range, &mut new_jobs);
-                    }
-
-                    // Not really required: but here for future resilience:
-                    // We make a "gap" here, so any structure the window holds should
-                    // probably be reset.
-                    window = Window::new();
-
-                    let mut prior_lsn = job.lsn_range.start;
-                    let mut lsn_ranges = Vec::new();
-                    for (lsn, _size) in next_key.partition_lsns.iter() {
-                        lsn_ranges.push(prior_lsn..*lsn);
-                        prior_lsn = *lsn;
-                    }
-                    lsn_ranges.push(prior_lsn..job.lsn_range.end);
-                    for lsn_range in lsn_ranges {
-                        let key_range = key..key.next();
-                        create_delta_job(key_range, &lsn_range, &mut new_jobs);
-                    }
+                } else {
+                    all_in_window = true;
                }
            }
        }
@@ -856,10 +792,6 @@ where
        self.elems.front().unwrap().accum_size - self.splitoff_size
    }

-    fn is_empty(&self) -> bool {
-        self.elems.is_empty()
-    }
-
    fn commit_upto(&mut self, mut upto: usize) {
        while upto > 1 {
            let popped = self.elems.pop_front().unwrap();
--- a/pageserver/compaction/src/helpers.rs
+++ b/pageserver/compaction/src/helpers.rs
@@ -9,12 +9,10 @@ use pageserver_api::shard::ShardIdentity;
 use pin_project_lite::pin_project;
 use std::collections::BinaryHeap;
 use std::collections::VecDeque;
-use std::fmt::Display;
 use std::future::Future;
 use std::ops::{DerefMut, Range};
 use std::pin::Pin;
 use std::task::{ready, Poll};
-use utils::lsn::Lsn;

 pub fn keyspace_total_size<K>(
    keyspace: &CompactionKeySpace<K>,
@@ -110,40 +108,17 @@ pub fn merge_delta_keys<'a, E: CompactionJobExecutor>(
    }
 }

-pub async fn merge_delta_keys_buffered<'a, E: CompactionJobExecutor + 'a>(
-    layers: &'a [E::DeltaLayer],
-    ctx: &'a E::RequestContext,
-) -> anyhow::Result<impl Stream<Item = <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>
-{
-    let mut keys = Vec::new();
-    for l in layers {
-        // Boxing and casting to LoadFuture is required to obtain the right Sync bound.
-        // If we do l.load_keys(ctx).await? directly, there is a compilation error.
-        let load_future: LoadFuture<'a, _> = Box::pin(l.load_keys(ctx));
-        keys.extend(load_future.await?.into_iter());
-    }
-    keys.sort_by_key(|k| (k.key(), k.lsn()));
-    let stream = futures::stream::iter(keys.into_iter());
-    Ok(stream)
-}
-
 enum LazyLoadLayer<'a, E: CompactionJobExecutor> {
    Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),
    Unloaded(&'a E::DeltaLayer),
 }
 impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
-    fn min_key(&self) -> E::Key {
+    fn key(&self) -> E::Key {
        match self {
            Self::Loaded(entries) => entries.front().unwrap().key(),
            Self::Unloaded(dl) => dl.key_range().start,
        }
    }
-    fn min_lsn(&self) -> Lsn {
-        match self {
-            Self::Loaded(entries) => entries.front().unwrap().lsn(),
-            Self::Unloaded(dl) => dl.lsn_range().start,
-        }
-    }
 }
 impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
@@ -153,12 +128,12 @@ impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
 impl<'a, E: CompactionJobExecutor> Ord for LazyLoadLayer<'a, E> {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        // reverse order so that we get a min-heap
-        (other.min_key(), other.min_lsn()).cmp(&(self.min_key(), self.min_lsn()))
+        other.key().cmp(&self.key())
    }
 }
 impl<'a, E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'a, E> {
    fn eq(&self, other: &Self) -> bool {
-        self.cmp(other) == std::cmp::Ordering::Equal
+        self.key().eq(&other.key())
    }
 }
 impl<'a, E: CompactionJobExecutor> Eq for LazyLoadLayer<'a, E> {}
@@ -235,16 +210,11 @@ pub struct KeySize<K> {
    pub key: K,
    pub num_values: u64,
    pub size: u64,
-    /// The lsns to partition at (if empty then no per-lsn partitioning)
-    pub partition_lsns: Vec<(Lsn, u64)>,
 }

-pub fn accum_key_values<'a, I, K, D, E>(
-    input: I,
-    target_size: u64,
-) -> impl Stream<Item = Result<KeySize<K>, E>>
+pub fn accum_key_values<'a, I, K, D, E>(input: I) -> impl Stream<Item = Result<KeySize<K>, E>>
 where
-    K: Eq + PartialOrd + Display + Copy,
+    K: Eq,
    I: Stream<Item = Result<D, E>>,
    D: CompactionDeltaEntry<'a, K>,
 {
@@ -254,39 +224,25 @@ where

        if let Some(first) = input.next().await {
            let first = first?;
-            let mut part_size = first.size();
            let mut accum: KeySize<K> = KeySize {
                key: first.key(),
                num_values: 1,
-                size: part_size,
-                partition_lsns: Vec::new(),
+                size: first.size(),
            };
-            let mut last_key = accum.key;
            while let Some(this) = input.next().await {
                let this = this?;
                if this.key() == accum.key {
-                    let add_size = this.size();
-                    if part_size + add_size > target_size {
-                        accum.partition_lsns.push((this.lsn(), part_size));
-                        part_size = 0;
-                    }
-                    part_size += add_size;
-                    accum.size += add_size;
+                    accum.size += this.size();
                    accum.num_values += 1;
                } else {
-                    assert!(last_key <= accum.key, "last_key={last_key} <= accum.key={}", accum.key);
-                    last_key = accum.key;
                    yield accum;
-                    part_size = this.size();
                    accum = KeySize {
                        key: this.key(),
                        num_values: 1,
-                        size: part_size,
-                        partition_lsns: Vec::new(),
+                        size: this.size(),
                    };
                }
            }
-            assert!(last_key <= accum.key, "last_key={last_key} <= accum.key={}", accum.key);
            yield accum;
        }
    }
--- a/pageserver/compaction/src/identify_levels.rs
+++ b/pageserver/compaction/src/identify_levels.rs
@@ -184,12 +184,6 @@ impl<L> Level<L> {
        }
        let mut events: Vec<Event<K>> = Vec::new();
        for (idx, l) in self.layers.iter().enumerate() {
-            let key_range = l.key_range();
-            if key_range.end == key_range.start.next() && l.is_delta() {
-                // Ignore single-key delta layers as they can be stacked on top of each other
-                // as that is the only way to cut further.
-                continue;
-            }
            events.push(Event {
                key: l.key_range().start,
                layer_idx: idx,
--- a/pageserver/compaction/tests/tests.rs
+++ b/pageserver/compaction/tests/tests.rs
@@ -1,35 +1,23 @@
-use once_cell::sync::OnceCell;
 use pageserver_compaction::interface::CompactionLayer;
 use pageserver_compaction::simulator::MockTimeline;
-use utils::logging;
-
-static LOG_HANDLE: OnceCell<()> = OnceCell::new();
-
-pub(crate) fn setup_logging() {
-    LOG_HANDLE.get_or_init(|| {
-        logging::init(
-            logging::LogFormat::Test,
-            logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
-            logging::Output::Stdout,
-        )
-        .expect("Failed to init test logging")
-    });
-}

 /// Test the extreme case that there are so many updates for a single key that
 /// even if we produce an extremely narrow delta layer, spanning just that one
 /// key, we still too many records to fit in the target file size. We need to
 /// split in the LSN dimension too in that case.
+///
+/// TODO: The code to avoid this problem has not been implemented yet! So the
+/// assertion currently fails, but we need to make it not fail.
+#[ignore]
 #[tokio::test]
 async fn test_many_updates_for_single_key() {
-    setup_logging();
    let mut executor = MockTimeline::new();
-    executor.target_file_size = 1_000_000; // 1 MB
+    executor.target_file_size = 10_000_000; // 10 MB

-    // Ingest 10 MB of updates to a single key.
+    // Ingest 100 MB of updates to a single key.
    for _ in 1..1000 {
        executor.ingest_uniform(100, 10, &(0..100_000)).unwrap();
-        executor.ingest_uniform(1000, 10, &(0..1)).unwrap();
+        executor.ingest_uniform(10_000, 10, &(0..1)).unwrap();
        executor.compact().await.unwrap();
    }

@@ -39,32 +27,9 @@ async fn test_many_updates_for_single_key() {
    }
    for l in executor.live_layers.iter() {
        assert!(l.file_size() < executor.target_file_size * 2);
-        // Sanity check that none of the delta layers are empty either.
+        // sanity check that none of the delta layers are stupidly small either
        if l.is_delta() {
-            assert!(l.file_size() > 0);
+            assert!(l.file_size() > executor.target_file_size / 2);
        }
    }
 }
-
-#[tokio::test]
-async fn test_simple_updates() {
-    setup_logging();
-    let mut executor = MockTimeline::new();
-    executor.target_file_size = 500_000; // 500 KB
-
-    // Ingest some traffic.
-    for _ in 1..400 {
-        executor.ingest_uniform(100, 500, &(0..100_000)).unwrap();
-    }
-
-    for l in executor.live_layers.iter() {
-        println!("layer {}: {}", l.short_id(), l.file_size());
-    }
-
-    println!("Running compaction...");
-    executor.compact().await.unwrap();
-
-    for l in executor.live_layers.iter() {
-        println!("layer {}: {}", l.short_id(), l.file_size());
-    }
-}
--- a/pageserver/ctl/src/draw_timeline_dir.rs
+++ b/pageserver/ctl/src/draw_timeline_dir.rs
@@ -28,8 +28,6 @@
 //! # From an `index_part.json` in S3
 //! (jq -r '.layer_metadata | keys[]' | cargo  run -p pagectl draw-timeline ) < index_part.json-00000016 > out.svg
 //!
-//! # enrich with lines for gc_cutoff and a child branch point
-//! cat <(jq -r '.historic_layers[] | .layer_file_name' < layers.json) <(echo -e 'gc_cutoff:0000001CE3FE32C9\nbranch:0000001DE3FE32C9') | cargo run --bin pagectl draw-timeline >| out.svg
 //! ```
 //!
 //! ## Viewing
@@ -50,8 +48,9 @@
 //! ```
 //!

-use anyhow::{Context, Result};
+use anyhow::Result;
 use pageserver::repository::Key;
+use pageserver::METADATA_FILE_NAME;
 use std::cmp::Ordering;
 use std::io::{self, BufRead};
 use std::path::PathBuf;
@@ -82,11 +81,6 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
    let split: Vec<&str> = name.split("__").collect();
    let keys: Vec<&str> = split[0].split('-').collect();
    let mut lsns: Vec<&str> = split[1].split('-').collect();
-
-    if lsns.last().expect("should").len() == 8 {
-        lsns.pop();
-    }
-
    if lsns.len() == 1 {
        lsns.push(lsns[0]);
    }
@@ -96,33 +90,6 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
    (keys, lsns)
 }

-#[derive(Clone, Copy)]
-enum LineKind {
-    GcCutoff,
-    Branch,
-}
-
-impl From<LineKind> for Fill {
-    fn from(value: LineKind) -> Self {
-        match value {
-            LineKind::GcCutoff => Fill::Color(rgb(255, 0, 0)),
-            LineKind::Branch => Fill::Color(rgb(0, 255, 0)),
-        }
-    }
-}
-
-impl FromStr for LineKind {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> std::prelude::v1::Result<Self, Self::Err> {
-        Ok(match s {
-            "gc_cutoff" => LineKind::GcCutoff,
-            "branch" => LineKind::Branch,
-            _ => anyhow::bail!("unsupported linekind: {s}"),
-        })
-    }
-}
-
 pub fn main() -> Result<()> {
    // Parse layer filenames from stdin
    struct Layer {
@@ -132,32 +99,15 @@ pub fn main() -> Result<()> {
    }
    let mut files: Vec<Layer> = vec![];
    let stdin = io::stdin();
-
-    let mut lines: Vec<(Lsn, LineKind)> = vec![];
-
-    for (lineno, line) in stdin.lock().lines().enumerate() {
-        let lineno = lineno + 1;
-
+    for line in stdin.lock().lines() {
        let line = line.unwrap();
-        if let Some((kind, lsn)) = line.split_once(':') {
-            let (kind, lsn) = LineKind::from_str(kind)
-                .context("parse kind")
-                .and_then(|kind| {
-                    if lsn.contains('/') {
-                        Lsn::from_str(lsn)
-                    } else {
-                        Lsn::from_hex(lsn)
-                    }
-                    .map(|lsn| (kind, lsn))
-                    .context("parse lsn")
-                })
-                .with_context(|| format!("parse {line:?} on {lineno}"))?;
-            lines.push((lsn, kind));
-            continue;
-        }
        let line = PathBuf::from_str(&line).unwrap();
        let filename = line.file_name().unwrap();
        let filename = filename.to_str().unwrap();
+        if filename == METADATA_FILE_NAME {
+            // Don't try and parse "metadata" like a key-lsn range
+            continue;
+        }
        let (key_range, lsn_range) = parse_filename(filename);
        files.push(Layer {
            filename: filename.to_owned(),
@@ -167,9 +117,8 @@ pub fn main() -> Result<()> {
    }

    // Collect all coordinates
-    let mut keys: Vec<Key> = Vec::with_capacity(files.len());
-    let mut lsns: Vec<Lsn> = Vec::with_capacity(files.len() + lines.len());
-
+    let mut keys: Vec<Key> = vec![];
+    let mut lsns: Vec<Lsn> = vec![];
    for Layer {
        key_range: keyr,
        lsn_range: lsnr,
@@ -182,8 +131,6 @@ pub fn main() -> Result<()> {
        lsns.push(lsnr.end);
    }

-    lsns.extend(lines.iter().map(|(lsn, _)| *lsn));
-
    // Analyze
    let key_map = build_coordinate_compression_map(keys);
    let lsn_map = build_coordinate_compression_map(lsns);
@@ -197,13 +144,10 @@ pub fn main() -> Result<()> {
    println!(
        "{}",
        BeginSvg {
-            w: (key_map.len() + 10) as f32,
+            w: key_map.len() as f32,
            h: stretch * lsn_map.len() as f32
        }
    );
-
-    let xmargin = 0.05; // Height-dependent margin to disambiguate overlapping deltas
-
    for Layer {
        filename,
        key_range: keyr,
@@ -225,6 +169,7 @@ pub fn main() -> Result<()> {
        let mut lsn_diff = (lsn_end - lsn_start) as f32;
        let mut fill = Fill::None;
        let mut ymargin = 0.05 * lsn_diff; // Height-dependent margin to disambiguate overlapping deltas
+        let xmargin = 0.05; // Height-dependent margin to disambiguate overlapping deltas
        let mut lsn_offset = 0.0;

        // Fill in and thicken rectangle if it's an
@@ -244,7 +189,7 @@ pub fn main() -> Result<()> {
        println!(
            "    {}",
            rectangle(
-                5.0 + key_start as f32 + stretch * xmargin,
+                key_start as f32 + stretch * xmargin,
                stretch * (lsn_max as f32 - (lsn_end as f32 - ymargin - lsn_offset)),
                key_diff as f32 - stretch * 2.0 * xmargin,
                stretch * (lsn_diff - 2.0 * ymargin)
@@ -255,26 +200,6 @@ pub fn main() -> Result<()> {
            .comment(filename)
        );
    }
-
-    for (lsn, kind) in lines {
-        let lsn_start = *lsn_map.get(&lsn).unwrap();
-        let lsn_end = lsn_start;
-        let stretch = 2.0;
-        let lsn_diff = 0.3;
-        let lsn_offset = -lsn_diff / 2.0;
-        let ymargin = 0.05;
-        println!(
-            "{}",
-            rectangle(
-                0.0f32 + stretch * xmargin,
-                stretch * (lsn_map.len() as f32 - (lsn_end as f32 - ymargin - lsn_offset)),
-                (key_map.len() + 10) as f32,
-                stretch * (lsn_diff - 2.0 * ymargin)
-            )
-            .fill(kind)
-        );
-    }
-
    println!("{}", EndSvg);

    eprintln!("num_images: {}", num_images);
--- a/pageserver/ctl/src/index_part.rs
+++ b/pageserver/ctl/src/index_part.rs
@@ -3,7 +3,7 @@ use std::collections::HashMap;
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
-use pageserver::tenant::storage_layer::LayerName;
+use pageserver::tenant::storage_layer::LayerFileName;
 use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
 use utils::lsn::Lsn;

@@ -19,7 +19,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
            let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
            #[derive(serde::Serialize)]
            struct Output<'a> {
-                layer_metadata: &'a HashMap<LayerName, IndexLayerMetadata>,
+                layer_metadata: &'a HashMap<LayerFileName, IndexLayerMetadata>,
                disk_consistent_lsn: Lsn,
                timeline_metadata: &'a TimelineMetadata,
            }
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -100,7 +100,7 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {

 // Finds the max_holes largest holes, ignoring any that are smaller than MIN_HOLE_LENGTH"
 async fn get_holes(path: &Utf8Path, max_holes: usize, ctx: &RequestContext) -> Result<Vec<Hole>> {
-    let file = VirtualFile::open(path, ctx).await?;
+    let file = VirtualFile::open(path).await?;
    let file_id = page_cache::next_file_id();
    let block_reader = FileBlockReader::new(&file, file_id);
    let summary_blk = block_reader.read_blk(0, ctx).await?;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -61,7 +61,7 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
-    let file = VirtualFile::open(path, ctx).await?;
+    let file = VirtualFile::open(path).await?;
    let file_id = page_cache::next_file_id();
    let block_reader = FileBlockReader::new(&file, file_id);
    let summary_blk = block_reader.read_blk(0, ctx).await?;
--- a/pageserver/pagebench/src/cmd/ondemand_download_churn.rs
+++ b/pageserver/pagebench/src/cmd/ondemand_download_churn.rs
@@ -2,11 +2,9 @@ use pageserver_api::{models::HistoricLayerInfo, shard::TenantShardId};

 use pageserver_client::mgmt_api;
 use rand::seq::SliceRandom;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, info};
 use utils::id::{TenantTimelineId, TimelineId};

-use std::{f64, sync::Arc};
 use tokio::{
    sync::{mpsc, OwnedSemaphorePermit},
    task::JoinSet,
@@ -14,7 +12,10 @@ use tokio::{

 use std::{
    num::NonZeroUsize,
-    sync::atomic::{AtomicU64, Ordering},
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        Arc,
+    },
    time::{Duration, Instant},
 };

@@ -50,31 +51,19 @@ pub(crate) fn main(args: Args) -> anyhow::Result<()> {
    Ok(())
 }

-#[derive(serde::Serialize)]
-struct Output {
-    downloads_count: u64,
-    downloads_bytes: u64,
-    evictions_count: u64,
-    timeline_restarts: u64,
-    #[serde(with = "humantime_serde")]
-    runtime: Duration,
-}
-
 #[derive(Debug, Default)]
 struct LiveStats {
-    evictions_count: AtomicU64,
-    downloads_count: AtomicU64,
-    downloads_bytes: AtomicU64,
+    evictions: AtomicU64,
+    downloads: AtomicU64,
    timeline_restarts: AtomicU64,
 }

 impl LiveStats {
    fn eviction_done(&self) {
-        self.evictions_count.fetch_add(1, Ordering::Relaxed);
+        self.evictions.fetch_add(1, Ordering::Relaxed);
    }
-    fn download_done(&self, size: u64) {
-        self.downloads_count.fetch_add(1, Ordering::Relaxed);
-        self.downloads_bytes.fetch_add(size, Ordering::Relaxed);
+    fn download_done(&self) {
+        self.downloads.fetch_add(1, Ordering::Relaxed);
    }
    fn timeline_restart_done(&self) {
        self.timeline_restarts.fetch_add(1, Ordering::Relaxed);
@@ -103,49 +92,28 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
    )
    .await?;

-    let token = CancellationToken::new();
    let mut tasks = JoinSet::new();

-    let periodic_stats = Arc::new(LiveStats::default());
-    let total_stats = Arc::new(LiveStats::default());
-
-    let start = Instant::now();
+    let live_stats = Arc::new(LiveStats::default());
    tasks.spawn({
-        let periodic_stats = Arc::clone(&periodic_stats);
-        let total_stats = Arc::clone(&total_stats);
-        let cloned_token = token.clone();
+        let live_stats = Arc::clone(&live_stats);
        async move {
            let mut last_at = Instant::now();
            loop {
-                if cloned_token.is_cancelled() {
-                    return;
-                }
                tokio::time::sleep_until((last_at + Duration::from_secs(1)).into()).await;
                let now = Instant::now();
                let delta: Duration = now - last_at;
                last_at = now;

                let LiveStats {
-                    evictions_count,
-                    downloads_count,
-                    downloads_bytes,
+                    evictions,
+                    downloads,
                    timeline_restarts,
-                } = &*periodic_stats;
-                let evictions_count = evictions_count.swap(0, Ordering::Relaxed);
-                let downloads_count = downloads_count.swap(0, Ordering::Relaxed);
-                let downloads_bytes = downloads_bytes.swap(0, Ordering::Relaxed);
+                } = &*live_stats;
+                let evictions = evictions.swap(0, Ordering::Relaxed) as f64 / delta.as_secs_f64();
+                let downloads = downloads.swap(0, Ordering::Relaxed) as f64 / delta.as_secs_f64();
                let timeline_restarts = timeline_restarts.swap(0, Ordering::Relaxed);
-
-                total_stats.evictions_count.fetch_add(evictions_count, Ordering::Relaxed);
-                total_stats.downloads_count.fetch_add(downloads_count, Ordering::Relaxed);
-                total_stats.downloads_bytes.fetch_add(downloads_bytes, Ordering::Relaxed);
-                total_stats.timeline_restarts.fetch_add(timeline_restarts, Ordering::Relaxed);
-
-                let evictions_per_s = evictions_count as f64 / delta.as_secs_f64();
-                let downloads_per_s = downloads_count as f64 / delta.as_secs_f64();
-                let downloads_mibs_per_s = downloads_bytes as f64 / delta.as_secs_f64() / ((1 << 20) as f64);
-
-                info!("evictions={evictions_per_s:.2}/s downloads={downloads_per_s:.2}/s download_bytes={downloads_mibs_per_s:.2}MiB/s timeline_restarts={timeline_restarts}");
+                info!("evictions={evictions:.2}/s downloads={downloads:.2}/s timeline_restarts={timeline_restarts}");
            }
        }
    });
@@ -156,42 +124,14 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
                args,
                Arc::clone(&mgmt_api_client),
                tl,
-                Arc::clone(&periodic_stats),
-                token.clone(),
+                Arc::clone(&live_stats),
            ));
        }
    }
-    if let Some(runtime) = args.runtime {
-        tokio::spawn(async move {
-            tokio::time::sleep(runtime.into()).await;
-            token.cancel();
-        });
-    }

    while let Some(res) = tasks.join_next().await {
        res.unwrap();
    }
-    let end = Instant::now();
-    let duration: Duration = end - start;
-
-    let output = {
-        let LiveStats {
-            evictions_count,
-            downloads_count,
-            downloads_bytes,
-            timeline_restarts,
-        } = &*total_stats;
-        Output {
-            downloads_count: downloads_count.load(Ordering::Relaxed),
-            downloads_bytes: downloads_bytes.load(Ordering::Relaxed),
-            evictions_count: evictions_count.load(Ordering::Relaxed),
-            timeline_restarts: timeline_restarts.load(Ordering::Relaxed),
-            runtime: duration,
-        }
-    };
-    let output = serde_json::to_string_pretty(&output).unwrap();
-    println!("{output}");
-
    Ok(())
 }

@@ -200,7 +140,6 @@ async fn timeline_actor(
    mgmt_api_client: Arc<pageserver_client::mgmt_api::Client>,
    timeline: TenantTimelineId,
    live_stats: Arc<LiveStats>,
-    token: CancellationToken,
 ) {
    // TODO: support sharding
    let tenant_shard_id = TenantShardId::unsharded(timeline.tenant_id);
@@ -210,7 +149,7 @@ async fn timeline_actor(
        layers: Vec<mpsc::Sender<OwnedSemaphorePermit>>,
        concurrency: Arc<tokio::sync::Semaphore>,
    }
-    while !token.is_cancelled() {
+    loop {
        debug!("restarting timeline");
        let layer_map_info = mgmt_api_client
            .layer_map_info(tenant_shard_id, timeline.timeline_id)
@@ -246,7 +185,7 @@ async fn timeline_actor(

        live_stats.timeline_restart_done();

-        while !token.is_cancelled() {
+        loop {
            assert!(!timeline.joinset.is_empty());
            if let Some(res) = timeline.joinset.try_join_next() {
                debug!(?res, "a layer actor exited, should not happen");
@@ -316,7 +255,7 @@ async fn layer_actor(
                    .layer_ondemand_download(tenant_shard_id, timeline_id, layer.layer_file_name())
                    .await
                    .unwrap();
-                live_stats.download_done(layer.layer_file_size());
+                live_stats.download_done();
                did_it
            }
        };
--- a/pageserver/src/aux_file.rs
+++ b/pageserver/src/aux_file.rs
@@ -1,39 +1,14 @@
-use std::sync::Arc;
-
-use ::metrics::IntGauge;
-use bytes::{Buf, BufMut, Bytes};
 use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
 use tracing::warn;

-// BEGIN Copyright (c) 2017 Servo Contributors
-
-/// Const version of FNV hash.
-#[inline]
-#[must_use]
-pub const fn fnv_hash(bytes: &[u8]) -> u128 {
-    const INITIAL_STATE: u128 = 0x6c62272e07bb014262b821756295c58d;
-    const PRIME: u128 = 0x0000000001000000000000000000013B;
-
-    let mut hash = INITIAL_STATE;
-    let mut i = 0;
-    while i < bytes.len() {
-        hash ^= bytes[i] as u128;
-        hash = hash.wrapping_mul(PRIME);
-        i += 1;
-    }
-    hash
-}
-
-// END Copyright (c) 2017 Servo Contributors
-
-/// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, least significant 13B of FNV hash].
+/// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, first 13B of 128b xxhash].
 fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key {
-    let mut key: [u8; 16] = [0; METADATA_KEY_SIZE];
-    let hash = fnv_hash(data).to_be_bytes();
+    let mut key = [0; METADATA_KEY_SIZE];
+    let hash = twox_hash::xxh3::hash128(data).to_be_bytes();
    key[0] = AUX_KEY_PREFIX;
    key[1] = dir_level1;
    key[2] = dir_level2;
-    key[3..16].copy_from_slice(&hash[3..16]);
+    key[3..16].copy_from_slice(&hash[0..13]);
    Key::from_metadata_key_fixed_size(&key)
 }

@@ -86,133 +61,6 @@ pub fn encode_aux_file_key(path: &str) -> Key {
    }
 }

-const AUX_FILE_ENCODING_VERSION: u8 = 0x01;
-
-pub fn decode_file_value(val: &[u8]) -> anyhow::Result<Vec<(&str, &[u8])>> {
-    let mut ptr = val;
-    if ptr.is_empty() {
-        // empty value = no files
-        return Ok(Vec::new());
-    }
-    assert_eq!(
-        ptr.get_u8(),
-        AUX_FILE_ENCODING_VERSION,
-        "unsupported aux file value"
-    );
-    let mut files = vec![];
-    while ptr.has_remaining() {
-        let key_len = ptr.get_u32() as usize;
-        let key = &ptr[..key_len];
-        ptr.advance(key_len);
-        let val_len = ptr.get_u32() as usize;
-        let content = &ptr[..val_len];
-        ptr.advance(val_len);
-
-        let path = std::str::from_utf8(key)?;
-        files.push((path, content));
-    }
-    Ok(files)
-}
-
-/// Decode an aux file key-value pair into a list of files. The returned `Bytes` contains reference
-/// to the original value slice. Be cautious about memory consumption.
-pub fn decode_file_value_bytes(val: &Bytes) -> anyhow::Result<Vec<(String, Bytes)>> {
-    let mut ptr = val.clone();
-    if ptr.is_empty() {
-        // empty value = no files
-        return Ok(Vec::new());
-    }
-    assert_eq!(
-        ptr.get_u8(),
-        AUX_FILE_ENCODING_VERSION,
-        "unsupported aux file value"
-    );
-    let mut files = vec![];
-    while ptr.has_remaining() {
-        let key_len = ptr.get_u32() as usize;
-        let key = ptr.slice(..key_len);
-        ptr.advance(key_len);
-        let val_len = ptr.get_u32() as usize;
-        let content = ptr.slice(..val_len);
-        ptr.advance(val_len);
-
-        let path = std::str::from_utf8(&key)?.to_string();
-        files.push((path, content));
-    }
-    Ok(files)
-}
-
-pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {
-    if files.is_empty() {
-        // no files = empty value
-        return Ok(Vec::new());
-    }
-    let mut encoded = vec![];
-    encoded.put_u8(AUX_FILE_ENCODING_VERSION);
-    for (path, content) in files {
-        if path.len() > u32::MAX as usize {
-            anyhow::bail!("{} exceeds path size limit", path);
-        }
-        encoded.put_u32(path.len() as u32);
-        encoded.put_slice(path.as_bytes());
-        if content.len() > u32::MAX as usize {
-            anyhow::bail!("{} exceeds content size limit", path);
-        }
-        encoded.put_u32(content.len() as u32);
-        encoded.put_slice(content);
-    }
-    Ok(encoded)
-}
-
-/// An estimation of the size of aux files.
-pub struct AuxFileSizeEstimator {
-    aux_file_size_gauge: IntGauge,
-    size: Arc<std::sync::Mutex<Option<isize>>>,
-}
-
-impl AuxFileSizeEstimator {
-    pub fn new(aux_file_size_gauge: IntGauge) -> Self {
-        Self {
-            aux_file_size_gauge,
-            size: Arc::new(std::sync::Mutex::new(None)),
-        }
-    }
-
-    pub fn on_base_backup(&self, new_size: usize) {
-        let mut guard = self.size.lock().unwrap();
-        *guard = Some(new_size as isize);
-        self.report(new_size as isize);
-    }
-
-    pub fn on_add(&self, file_size: usize) {
-        let mut guard = self.size.lock().unwrap();
-        if let Some(size) = &mut *guard {
-            *size += file_size as isize;
-            self.report(*size);
-        }
-    }
-
-    pub fn on_remove(&self, file_size: usize) {
-        let mut guard = self.size.lock().unwrap();
-        if let Some(size) = &mut *guard {
-            *size -= file_size as isize;
-            self.report(*size);
-        }
-    }
-
-    pub fn on_update(&self, old_size: usize, new_size: usize) {
-        let mut guard = self.size.lock().unwrap();
-        if let Some(size) = &mut *guard {
-            *size += new_size as isize - old_size as isize;
-            self.report(*size);
-        }
-    }
-
-    pub fn report(&self, size: isize) {
-        self.aux_file_size_gauge.set(size as i64);
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -221,19 +69,15 @@ mod tests {
    fn test_hash_portable() {
        // AUX file encoding requires the hash to be portable across all platforms. This test case checks
        // if the algorithm produces the same hash across different environments.
-
        assert_eq!(
-            265160408618497461376862998434862070044,
-            super::fnv_hash("test1".as_bytes())
+            305317690835051308206966631765527126151,
+            twox_hash::xxh3::hash128("test1".as_bytes())
        );
        assert_eq!(
-            295486155126299629456360817749600553988,
-            super::fnv_hash("test/test2".as_bytes())
-        );
-        assert_eq!(
-            144066263297769815596495629667062367629,
-            super::fnv_hash("".as_bytes())
+            85104974691013376326742244813280798847,
+            twox_hash::xxh3::hash128("test/test2".as_bytes())
        );
+        assert_eq!(0, twox_hash::xxh3::hash128("".as_bytes()));
    }

    #[test]
@@ -241,45 +85,28 @@ mod tests {
        // To correct retrieve AUX files, the generated keys for the same file must be the same for all versions
        // of the page server.
        assert_eq!(
-            "62000001017F8B83D94F7081693471ABF91C",
-            encode_aux_file_key("pg_logical/mappings/test1").to_string(),
+            "6200000101E5B20C5F8DD5AA3289D6D9EAFA",
+            encode_aux_file_key("pg_logical/mappings/test1").to_string()
        );
        assert_eq!(
-            "62000001027F8E83D94F7081693471ABFCCD",
-            encode_aux_file_key("pg_logical/snapshots/test2").to_string(),
+            "620000010239AAC544893139B26F501B97E6",
+            encode_aux_file_key("pg_logical/snapshots/test2").to_string()
        );
        assert_eq!(
-            "62000001032E07BB014262B821756295C58D",
-            encode_aux_file_key("pg_logical/replorigin_checkpoint").to_string(),
+            "620000010300000000000000000000000000",
+            encode_aux_file_key("pg_logical/replorigin_checkpoint").to_string()
        );
        assert_eq!(
-            "62000001FF4F38E1C74754E7D03C1A660178",
-            encode_aux_file_key("pg_logical/unsupported").to_string(),
+            "62000001FF8635AF2134B7266EC5B4189FD6",
+            encode_aux_file_key("pg_logical/unsupported").to_string()
        );
        assert_eq!(
-            "62000002017F8D83D94F7081693471ABFB92",
+            "6200000201772D0E5D71DE14DA86142A1619",
            encode_aux_file_key("pg_replslot/test3").to_string()
        );
        assert_eq!(
-            "620000FFFF2B6ECC8AEF93F643DC44F15E03",
-            encode_aux_file_key("other_file_not_supported").to_string(),
-        );
-    }
-
-    #[test]
-    fn test_value_encoding() {
-        let files = vec![
-            ("pg_logical/1.file", "1111".as_bytes()),
-            ("pg_logical/2.file", "2222".as_bytes()),
-        ];
-        assert_eq!(
-            files,
-            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
-        );
-        let files = vec![];
-        assert_eq!(
-            files,
-            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
+            "620000FFFF1866EBEB53B807B26A2416F317",
+            encode_aux_file_key("other_file_not_supported").to_string()
        );
    }
 }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -601,7 +601,7 @@ where
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
-            if self.timeline.is_ancestor_lsn(self.lsn) {
+            if self.lsn == self.timeline.get_ancestor_lsn() {
                write!(zenith_signal, "PREV LSN: none")
                    .map_err(|e| BasebackupError::Server(e.into()))?;
            } else {
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -3,7 +3,6 @@
 //! Main entry point for the Page Server executable.

 use std::env::{var, VarError};
-use std::io::Read;
 use std::sync::Arc;
 use std::time::Duration;
 use std::{env, ops::ControlFlow, str::FromStr};
@@ -152,34 +151,37 @@ fn initialize_config(
    workdir: &Utf8Path,
 ) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
    let init = arg_matches.get_flag("init");
+    let update_config = init || arg_matches.get_flag("update-config");

-    let file_contents: Option<toml_edit::Document> = match std::fs::File::open(cfg_file_path) {
-        Ok(mut f) => {
-            if init {
-                anyhow::bail!("config file already exists: {cfg_file_path}");
-            }
-            let md = f.metadata().context("stat config file")?;
-            if md.is_file() {
-                let mut s = String::new();
-                f.read_to_string(&mut s).context("read config file")?;
-                Some(s.parse().context("parse config file toml")?)
-            } else {
-                anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
-            }
-        }
-        Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
-        Err(e) => {
-            anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
+    let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
+        if init {
+            anyhow::bail!(
+                "Config file '{cfg_file_path}' already exists, cannot init it, use --update-config to update it",
+            );
        }
+        // Supplement the CLI arguments with the config file
+        let cfg_file_contents = std::fs::read_to_string(cfg_file_path)
+            .with_context(|| format!("Failed to read pageserver config at '{cfg_file_path}'"))?;
+        (
+            cfg_file_contents
+                .parse::<toml_edit::Document>()
+                .with_context(|| {
+                    format!("Failed to parse '{cfg_file_path}' as pageserver config")
+                })?,
+            true,
+        )
+    } else if cfg_file_path.exists() {
+        anyhow::bail!("Config file '{cfg_file_path}' exists but is not a regular file");
+    } else {
+        // We're initializing the tenant, so there's no config file yet
+        (
+            DEFAULT_CONFIG_FILE
+                .parse::<toml_edit::Document>()
+                .context("could not parse built-in config file")?,
+            false,
+        )
    };

-    let mut effective_config = file_contents.unwrap_or_else(|| {
-        DEFAULT_CONFIG_FILE
-            .parse()
-            .expect("unit tests ensure this works")
-    });
-
-    // Patch with overrides from the command line
    if let Some(values) = arg_matches.get_many::<String>("config-override") {
        for option_line in values {
            let doc = toml_edit::Document::from_str(option_line).with_context(|| {
@@ -187,21 +189,22 @@ fn initialize_config(
            })?;

            for (key, item) in doc.iter() {
-                effective_config.insert(key, item.clone());
+                if config_file_exists && update_config && key == "id" && toml.contains_key(key) {
+                    anyhow::bail!("Pageserver config file exists at '{cfg_file_path}' and has node id already, it cannot be overridden");
+                }
+                toml.insert(key, item.clone());
            }
        }
    }

-    debug!("Resulting toml: {effective_config}");
-
-    // Construct the runtime representation
-    let conf = PageServerConf::parse_and_validate(&effective_config, workdir)
+    debug!("Resulting toml: {toml}");
+    let conf = PageServerConf::parse_and_validate(&toml, workdir)
        .context("Failed to parse pageserver configuration")?;

-    if init {
+    if update_config {
        info!("Writing pageserver config to '{cfg_file_path}'");

-        std::fs::write(cfg_file_path, effective_config.to_string())
+        std::fs::write(cfg_file_path, toml.to_string())
            .with_context(|| format!("Failed to write pageserver config to '{cfg_file_path}'"))?;
        info!("Config successfully written to '{cfg_file_path}'")
    }
@@ -284,6 +287,7 @@ fn start_pageserver(
    ))
    .unwrap();
    pageserver::preinitialize_metrics();
+    pageserver::metrics::wal_redo::set_process_kind_metric(conf.walredo_process_kind);

    // If any failpoints were set from FAILPOINTS environment variable,
    // print them to the log for debugging purposes
@@ -515,12 +519,16 @@ fn start_pageserver(
        }
    });

-    let secondary_controller = secondary::spawn_tasks(
-        tenant_manager.clone(),
-        remote_storage.clone(),
-        background_jobs_barrier.clone(),
-        shutdown_pageserver.clone(),
-    );
+    let secondary_controller = if let Some(remote_storage) = &remote_storage {
+        secondary::spawn_tasks(
+            tenant_manager.clone(),
+            remote_storage.clone(),
+            background_jobs_barrier.clone(),
+            shutdown_pageserver.clone(),
+        )
+    } else {
+        secondary::null_controller()
+    };

    // shared state between the disk-usage backed eviction background task and the http endpoint
    // that allows triggering disk-usage based eviction manually. note that the http endpoint
@@ -528,13 +536,15 @@ fn start_pageserver(
    // been configured.
    let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();

-    launch_disk_usage_global_eviction_task(
-        conf,
-        remote_storage.clone(),
-        disk_usage_eviction_state.clone(),
-        tenant_manager.clone(),
-        background_jobs_barrier.clone(),
-    )?;
+    if let Some(remote_storage) = &remote_storage {
+        launch_disk_usage_global_eviction_task(
+            conf,
+            remote_storage.clone(),
+            disk_usage_eviction_state.clone(),
+            tenant_manager.clone(),
+            background_jobs_barrier.clone(),
+        )?;
+    }

    // Start up the service to handle HTTP mgmt API request. We created the
    // listener earlier already.
@@ -647,20 +657,17 @@ fn start_pageserver(
            None,
            "libpq endpoint listener",
            true,
-            {
-                let tenant_manager = tenant_manager.clone();
-                async move {
-                    page_service::libpq_listener_main(
-                        tenant_manager,
-                        broker_client,
-                        pg_auth,
-                        pageserver_listener,
-                        conf.pg_auth_type,
-                        libpq_ctx,
-                        task_mgr::shutdown_token(),
-                    )
-                    .await
-                }
+            async move {
+                page_service::libpq_listener_main(
+                    conf,
+                    broker_client,
+                    pg_auth,
+                    pageserver_listener,
+                    conf.pg_auth_type,
+                    libpq_ctx,
+                    task_mgr::shutdown_token(),
+                )
+                .await
            },
        );
    }
@@ -689,7 +696,14 @@ fn start_pageserver(
            // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
            // The plan is to change that over time.
            shutdown_pageserver.take();
-            pageserver::shutdown_pageserver(&tenant_manager, deletion_queue.clone(), 0).await;
+            let bg_remote_storage = remote_storage.clone();
+            let bg_deletion_queue = deletion_queue.clone();
+            pageserver::shutdown_pageserver(
+                &tenant_manager,
+                bg_remote_storage.map(|_| bg_deletion_queue),
+                0,
+            )
+            .await;
            unreachable!()
        })
    }
@@ -697,11 +711,12 @@ fn start_pageserver(

 fn create_remote_storage_client(
    conf: &'static PageServerConf,
-) -> anyhow::Result<GenericRemoteStorage> {
+) -> anyhow::Result<Option<GenericRemoteStorage>> {
    let config = if let Some(config) = &conf.remote_storage_config {
        config
    } else {
-        anyhow::bail!("no remote storage configured, this is a deprecated configuration");
+        tracing::warn!("no remote storage configured, this is a deprecated configuration");
+        return Ok(None);
    };

    // Create the client
@@ -721,7 +736,7 @@ fn create_remote_storage_client(
            GenericRemoteStorage::unreliable_wrapper(remote_storage, conf.test_remote_failures);
    }

-    Ok(remote_storage)
+    Ok(Some(remote_storage))
 }

 fn cli() -> Command {
@@ -743,13 +758,18 @@ fn cli() -> Command {
        // See `settings.md` for more details on the extra configuration patameters pageserver can process
        .arg(
            Arg::new("config-override")
-                .long("config-override")
                .short('c')
                .num_args(1)
                .action(ArgAction::Append)
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
        )
+        .arg(
+            Arg::new("update-config")
+                .long("update-config")
+                .action(ArgAction::SetTrue)
+                .help("Update the config file when started"),
+        )
        .arg(
            Arg::new("enabled-features")
                .long("enabled-features")
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -99,7 +99,7 @@ pub mod defaults {

    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;

-    pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "async";
+    pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "sync";

    ///
    /// Default built-in configuration file.
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -38,7 +38,7 @@ use deleter::DeleterMessage;
 use list_writer::ListWriterQueueMessage;
 use validator::ValidatorQueueMessage;

-use crate::{config::PageServerConf, tenant::storage_layer::LayerName};
+use crate::{config::PageServerConf, tenant::storage_layer::LayerFileName};

 // TODO: configurable for how long to wait before executing deletions

@@ -479,7 +479,7 @@ impl DeletionQueueClient {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
    ) -> Result<(), DeletionQueueError> {
        if current_generation.is_none() {
            debug!("Enqueuing deletions in legacy mode, skipping queue");
@@ -511,7 +511,7 @@ impl DeletionQueueClient {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
    ) -> Result<(), DeletionQueueError> {
        metrics::DELETION_QUEUE
            .keys_submitted
@@ -632,7 +632,7 @@ impl DeletionQueue {
    ///
    /// If remote_storage is None, then the returned workers will also be None.
    pub fn new<C>(
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        control_plane_client: Option<C>,
        conf: &'static PageServerConf,
    ) -> (Self, Option<DeletionQueueWorkers<C>>)
@@ -658,6 +658,23 @@ impl DeletionQueue {
        // longer to flush after Tenants have all been torn down.
        let cancel = CancellationToken::new();

+        let remote_storage = match remote_storage {
+            None => {
+                return (
+                    Self {
+                        client: DeletionQueueClient {
+                            tx,
+                            executor_tx,
+                            lsn_table: lsn_table.clone(),
+                        },
+                        cancel,
+                    },
+                    None,
+                )
+            }
+            Some(r) => r,
+        };
+
        (
            Self {
                client: DeletionQueueClient {
@@ -717,20 +734,20 @@ mod test {
    use crate::{
        control_plane_client::RetryForeverError,
        repository::Key,
-        tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
+        tenant::{harness::TenantHarness, storage_layer::DeltaFileName},
    };

    use super::*;
    pub const TIMELINE_ID: TimelineId =
        TimelineId::from_array(hex!("11223344556677881122334455667788"));

-    pub const EXAMPLE_LAYER_NAME: LayerName = LayerName::Delta(DeltaLayerName {
+    pub const EXAMPLE_LAYER_NAME: LayerFileName = LayerFileName::Delta(DeltaFileName {
        key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),
        lsn_range: Lsn(0x00000000016B59D8)..Lsn(0x00000000016B5A51),
    });

    // When you need a second layer in a test.
-    pub const EXAMPLE_LAYER_NAME_ALT: LayerName = LayerName::Delta(DeltaLayerName {
+    pub const EXAMPLE_LAYER_NAME_ALT: LayerFileName = LayerFileName::Delta(DeltaFileName {
        key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF),
        lsn_range: Lsn(0x00000000016B5A51)..Lsn(0x00000000016B5A61),
    });
@@ -748,7 +765,7 @@ mod test {
        /// Simulate a pageserver restart by destroying and recreating the deletion queue
        async fn restart(&mut self) {
            let (deletion_queue, workers) = DeletionQueue::new(
-                self.storage.clone(),
+                Some(self.storage.clone()),
                Some(self.mock_control_plane.clone()),
                self.harness.conf,
            );
@@ -780,7 +797,7 @@ mod test {
        /// Returns remote layer file name, suitable for use in assert_remote_files
        fn write_remote_layer(
            &self,
-            file_name: LayerName,
+            file_name: LayerFileName,
            gen: Generation,
        ) -> anyhow::Result<String> {
            let tenant_shard_id = self.harness.tenant_shard_id;
@@ -858,7 +875,7 @@ mod test {
        let mock_control_plane = MockControlPlane::new();

        let (deletion_queue, worker) = DeletionQueue::new(
-            storage.clone(),
+            Some(storage.clone()),
            Some(mock_control_plane.clone()),
            harness.conf,
        );
@@ -935,7 +952,7 @@ mod test {
        let client = ctx.deletion_queue.new_client();
        client.recover(HashMap::new())?;

-        let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
+        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
        let tenant_shard_id = ctx.harness.tenant_shard_id;

        let content: Vec<u8> = "victim1 contents".into();
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -34,7 +34,7 @@ use crate::deletion_queue::TEMP_SUFFIX;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::LayerFileMetadata;
-use crate::tenant::storage_layer::LayerName;
+use crate::tenant::storage_layer::LayerFileName;
 use crate::virtual_file::on_fatal_io_error;
 use crate::virtual_file::MaybeFatalIo;

@@ -59,7 +59,7 @@ pub(super) struct DeletionOp {
    // `layers` and `objects` are both just lists of objects.  `layers` is used if you do not
    // have a config object handy to project it to a remote key, and need the consuming worker
    // to do it for you.
-    pub(super) layers: Vec<(LayerName, LayerFileMetadata)>,
+    pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
    pub(super) objects: Vec<RemotePath>,

    /// The _current_ generation of the Tenant shard attachment in which we are enqueuing
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -64,7 +64,7 @@ use crate::{
        mgr::TenantManager,
        remote_timeline_client::LayerFileMetadata,
        secondary::SecondaryTenant,
-        storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName},
+        storage_layer::{AsLayerDesc, EvictionError, Layer, LayerFileName},
    },
 };

@@ -540,12 +540,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                    js.spawn(async move {
                        layer
                            .secondary_tenant
-                            .evict_layer(
-                                tenant_manager.get_conf(),
-                                layer.timeline_id,
-                                layer.name,
-                                layer.metadata,
-                            )
+                            .evict_layer(tenant_manager.get_conf(), layer.timeline_id, layer.name)
                            .await;
                        Ok(file_size)
                    });
@@ -604,7 +599,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
 pub(crate) struct EvictionSecondaryLayer {
    pub(crate) secondary_tenant: Arc<SecondaryTenant>,
    pub(crate) timeline_id: TimelineId,
-    pub(crate) name: LayerName,
+    pub(crate) name: LayerFileName,
    pub(crate) metadata: LayerFileMetadata,
 }

@@ -637,9 +632,9 @@ impl EvictionLayer {
        }
    }

-    pub(crate) fn get_name(&self) -> LayerName {
+    pub(crate) fn get_name(&self) -> LayerFileName {
        match self {
-            Self::Attached(l) => l.layer_desc().layer_name(),
+            Self::Attached(l) => l.layer_desc().filename(),
            Self::Secondary(sl) => sl.name.clone(),
        }
    }
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -420,6 +420,25 @@ paths:
          description: Tenant scheduled to load successfully

  /v1/tenant/{tenant_id}/synthetic_size:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+    get:
+      description: |
+        Calculate tenant's synthetic size
+      responses:
+        "200":
+          description: Tenant's synthetic size
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SyntheticSizeResponse"
+
+  # This route has no handler. TODO: remove?
+  /v1/tenant/{tenant_id}/size:
    parameters:
      - name: tenant_id
        in: path
@@ -449,9 +468,19 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "#/components/schemas/SyntheticSizeResponse"
-            text/html:
-              description: SVG representation of the tenant and it's timelines.
+                type: object
+                required:
+                  - id
+                  - size
+                properties:
+                  id:
+                    type: string
+                    format: hex
+                  size:
+                    type: integer
+                    nullable: true
+                    description: |
+                      Size metric in bytes or null if inputs_only=true was given.
        "401":
          description: Unauthorized Error
          content:
@@ -900,9 +929,6 @@ components:
          format: hex
        size:
          type: integer
-          nullable: true
-          description: |
-            Size metric in bytes or null if inputs_only=true was given.
        segment_sizes:
          type: array
          items:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,8 +1,6 @@
 //!
 //! Management HTTP API
 //!
-use std::cmp::Reverse;
-use std::collections::BinaryHeap;
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -26,11 +24,7 @@ use pageserver_api::models::TenantScanRemoteStorageShard;
 use pageserver_api::models::TenantShardLocation;
 use pageserver_api::models::TenantShardSplitRequest;
 use pageserver_api::models::TenantShardSplitResponse;
-use pageserver_api::models::TenantSorting;
 use pageserver_api::models::TenantState;
-use pageserver_api::models::TopTenantShardItem;
-use pageserver_api::models::TopTenantShardsRequest;
-use pageserver_api::models::TopTenantShardsResponse;
 use pageserver_api::models::{
    DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest,
    TenantLoadRequest, TenantLocationConfigRequest,
@@ -69,7 +63,6 @@ use crate::tenant::remote_timeline_client::list_remote_timelines;
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::CompactFlags;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::SpawnMode;
@@ -110,7 +103,7 @@ pub struct State {
    tenant_manager: Arc<TenantManager>,
    auth: Option<Arc<SwappableJwtAuth>>,
    allowlist_routes: Vec<Uri>,
-    remote_storage: GenericRemoteStorage,
+    remote_storage: Option<GenericRemoteStorage>,
    broker_client: storage_broker::BrokerClientChannel,
    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
    deletion_queue_client: DeletionQueueClient,
@@ -124,7 +117,7 @@ impl State {
        conf: &'static PageServerConf,
        tenant_manager: Arc<TenantManager>,
        auth: Option<Arc<SwappableJwtAuth>>,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        broker_client: storage_broker::BrokerClientChannel,
        disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
        deletion_queue_client: DeletionQueueClient,
@@ -819,6 +812,12 @@ async fn tenant_attach_handler(

    let generation = get_request_generation(state, maybe_body.as_ref().and_then(|r| r.generation))?;

+    if state.remote_storage.is_none() {
+        return Err(ApiError::BadRequest(anyhow!(
+            "attach_tenant is not possible because pageserver was configured without remote storage"
+        )));
+    }
+
    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
    let shard_params = ShardParameters::default();
    let location_conf = LocationConf::attached_single(tenant_conf, generation, &shard_params);
@@ -1229,15 +1228,13 @@ async fn layer_download_handler(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let layer_file_name = get_request_param(&request, "layer_file_name")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let layer_name = LayerName::from_str(layer_file_name)
-        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
    let state = get_state(&request);

    let timeline =
        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
            .await?;
    let downloaded = timeline
-        .download_layer(&layer_name)
+        .download_layer(layer_file_name)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -1261,14 +1258,11 @@ async fn evict_timeline_layer_handler(
    let layer_file_name = get_request_param(&request, "layer_file_name")?;
    let state = get_state(&request);

-    let layer_name = LayerName::from_str(layer_file_name)
-        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
-
    let timeline =
        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
            .await?;
    let evicted = timeline
-        .evict_layer(&layer_name)
+        .evict_layer(layer_file_name)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -1643,6 +1637,12 @@ async fn tenant_time_travel_remote_storage_handler(
        )));
    }

+    let Some(storage) = state.remote_storage.as_ref() else {
+        return Err(ApiError::InternalServerError(anyhow::anyhow!(
+            "remote storage not configured, cannot run time travel"
+        )));
+    };
+
    if timestamp > done_if_after {
        return Err(ApiError::BadRequest(anyhow!(
            "The done_if_after timestamp comes before the timestamp to recover to"
@@ -1652,7 +1652,7 @@ async fn tenant_time_travel_remote_storage_handler(
    tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}");

    remote_timeline_client::upload::time_travel_recover_tenant(
-        &state.remote_storage,
+        storage,
        &tenant_shard_id,
        timestamp,
        done_if_after,
@@ -1709,7 +1709,12 @@ async fn timeline_gc_handler(
    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let gc_result = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done = mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx)?;
+    let gc_result = wait_task_done
+        .await
+        .context("wait for gc task")
+        .map_err(ApiError::InternalServerError)?
+        .map_err(ApiError::InternalServerError)?;

    json_response(StatusCode::OK, gc_result)
 }
@@ -1822,81 +1827,17 @@ async fn timeline_download_remote_layers_handler_get(
    json_response(StatusCode::OK, info)
 }

-async fn timeline_detach_ancestor_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    use crate::tenant::timeline::detach_ancestor::Options;
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-
-    let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
-
-    async move {
-        let mut options = Options::default();
-
-        let rewrite_concurrency =
-            parse_query_param::<_, std::num::NonZeroUsize>(&request, "rewrite_concurrency")?;
-        let copy_concurrency =
-            parse_query_param::<_, std::num::NonZeroUsize>(&request, "copy_concurrency")?;
-
-        [
-            (&mut options.rewrite_concurrency, rewrite_concurrency),
-            (&mut options.copy_concurrency, copy_concurrency),
-        ]
-        .into_iter()
-        .filter_map(|(target, val)| val.map(|val| (target, val)))
-        .for_each(|(target, val)| *target = val);
-
-        let state = get_state(&request);
-
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
-
-        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
-        let ctx = RequestContext::new(TaskKind::DetachAncestor, DownloadBehavior::Download);
-        let ctx = &ctx;
-
-        let timeline = tenant
-            .get_timeline(timeline_id, true)
-            .map_err(|e| ApiError::NotFound(e.into()))?;
-
-        let (_guard, prepared) = timeline
-            .prepare_to_detach_from_ancestor(&tenant, options, ctx)
-            .await
-            .map_err(|e| ApiError::InternalServerError(e.into()))?;
-
-        let res = state
-            .tenant_manager
-            .complete_detaching_timeline_ancestor(tenant_shard_id, timeline_id, prepared, ctx)
-            .await;
-
-        match res {
-            Ok(reparented_timelines) => {
-                let resp = pageserver_api::models::detach_ancestor::AncestorDetached {
-                    reparented_timelines,
-                };
-
-                json_response(StatusCode::OK, resp)
-            }
-            Err(e) => Err(ApiError::InternalServerError(
-                e.context("timeline detach completion"),
-            )),
-        }
-    }
-    .instrument(span)
-    .await
-}
-
 async fn deletion_queue_flush(
    r: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    let state = get_state(&r);

+    if state.remote_storage.is_none() {
+        // Nothing to do if remote storage is disabled.
+        return json_response(StatusCode::OK, ());
+    }
+
    let execute = parse_query_param(&r, "execute")?.unwrap_or(false);

    let flush = async {
@@ -2061,11 +2002,18 @@ async fn disk_usage_eviction_run(
    };

    let state = get_state(&r);
+
+    let Some(storage) = state.remote_storage.as_ref() else {
+        return Err(ApiError::InternalServerError(anyhow::anyhow!(
+            "remote storage not configured, cannot run eviction iteration"
+        )));
+    };
+
    let eviction_state = state.disk_usage_eviction_state.clone();

    let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
        &eviction_state,
-        &state.remote_storage,
+        storage,
        usage,
        &state.tenant_manager,
        config.eviction_order,
@@ -2102,23 +2050,29 @@ async fn tenant_scan_remote_handler(
    let state = get_state(&request);
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;

+    let Some(remote_storage) = state.remote_storage.as_ref() else {
+        return Err(ApiError::BadRequest(anyhow::anyhow!(
+            "Remote storage not configured"
+        )));
+    };
+
    let mut response = TenantScanRemoteStorageResponse::default();

    let (shards, _other_keys) =
-        list_remote_tenant_shards(&state.remote_storage, tenant_id, cancel.clone())
+        list_remote_tenant_shards(remote_storage, tenant_id, cancel.clone())
            .await
            .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;

    for tenant_shard_id in shards {
        let (timeline_ids, _other_keys) =
-            list_remote_timelines(&state.remote_storage, tenant_shard_id, cancel.clone())
+            list_remote_timelines(remote_storage, tenant_shard_id, cancel.clone())
                .await
                .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;

        let mut generation = Generation::none();
        for timeline_id in timeline_ids {
            match download_index_part(
-                &state.remote_storage,
+                remote_storage,
                &tenant_shard_id,
                &timeline_id,
                Generation::MAX,
@@ -2329,97 +2283,6 @@ async fn get_utilization(
        .map_err(ApiError::InternalServerError)
 }

-/// Report on the largest tenants on this pageserver, for the storage controller to identify
-/// candidates for splitting
-async fn post_top_tenants(
-    mut r: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    check_permission(&r, None)?;
-    let request: TopTenantShardsRequest = json_request(&mut r).await?;
-    let state = get_state(&r);
-
-    fn get_size_metric(sizes: &TopTenantShardItem, order_by: &TenantSorting) -> u64 {
-        match order_by {
-            TenantSorting::ResidentSize => sizes.resident_size,
-            TenantSorting::MaxLogicalSize => sizes.max_logical_size,
-        }
-    }
-
-    #[derive(Eq, PartialEq)]
-    struct HeapItem {
-        metric: u64,
-        sizes: TopTenantShardItem,
-    }
-
-    impl PartialOrd for HeapItem {
-        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-            Some(self.cmp(other))
-        }
-    }
-
-    /// Heap items have reverse ordering on their metric: this enables using BinaryHeap, which
-    /// supports popping the greatest item but not the smallest.
-    impl Ord for HeapItem {
-        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-            Reverse(self.metric).cmp(&Reverse(other.metric))
-        }
-    }
-
-    let mut top_n: BinaryHeap<HeapItem> = BinaryHeap::with_capacity(request.limit);
-
-    // FIXME: this is a lot of clones to take this tenant list
-    for (tenant_shard_id, tenant_slot) in state.tenant_manager.list() {
-        if let Some(shards_lt) = request.where_shards_lt {
-            // Ignore tenants which already have >= this many shards
-            if tenant_shard_id.shard_count >= shards_lt {
-                continue;
-            }
-        }
-
-        let sizes = match tenant_slot {
-            TenantSlot::Attached(tenant) => tenant.get_sizes(),
-            TenantSlot::Secondary(_) | TenantSlot::InProgress(_) => {
-                continue;
-            }
-        };
-        let metric = get_size_metric(&sizes, &request.order_by);
-
-        if let Some(gt) = request.where_gt {
-            // Ignore tenants whose metric is <= the lower size threshold, to do less sorting work
-            if metric <= gt {
-                continue;
-            }
-        };
-
-        match top_n.peek() {
-            None => {
-                // Top N list is empty: candidate becomes first member
-                top_n.push(HeapItem { metric, sizes });
-            }
-            Some(i) if i.metric > metric && top_n.len() < request.limit => {
-                // Lowest item in list is greater than our candidate, but we aren't at limit yet: push to end
-                top_n.push(HeapItem { metric, sizes });
-            }
-            Some(i) if i.metric > metric => {
-                // List is at limit and lowest value is greater than our candidate, drop it.
-            }
-            Some(_) => top_n.push(HeapItem { metric, sizes }),
-        }
-
-        while top_n.len() > request.limit {
-            top_n.pop();
-        }
-    }
-
-    json_response(
-        StatusCode::OK,
-        TopTenantShardsResponse {
-            shards: top_n.into_iter().map(|i| i.sizes).collect(),
-        },
-    )
-}
-
 /// Common functionality of all the HTTP API handlers.
 ///
 /// - Adds a tracing span to each request (by `request_span`)
@@ -2652,10 +2515,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
            |r| api_handler(r, timeline_download_remote_layers_handler_get),
        )
-        .put(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor",
-            |r| api_handler(r, timeline_detach_ancestor_handler),
-        )
        .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_delete_handler)
        })
@@ -2706,6 +2565,5 @@ pub fn make_router(
        )
        .put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
        .get("/v1/utilization", |r| api_handler(r, get_utilization))
-        .post("/v1/top_tenants", |r| api_handler(r, post_top_tenants))
        .any(handler_404))
 }
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -57,7 +57,7 @@ pub use crate::metrics::preinitialize_metrics;
 #[tracing::instrument(skip_all, fields(%exit_code))]
 pub async fn shutdown_pageserver(
    tenant_manager: &TenantManager,
-    mut deletion_queue: DeletionQueue,
+    deletion_queue: Option<DeletionQueue>,
    exit_code: i32,
 ) {
    use std::time::Duration;
@@ -89,7 +89,9 @@ pub async fn shutdown_pageserver(
    .await;

    // Best effort to persist any outstanding deletions, to avoid leaking objects
-    deletion_queue.shutdown(Duration::from_secs(5)).await;
+    if let Some(mut deletion_queue) = deletion_queue {
+        deletion_queue.shutdown(Duration::from_secs(5)).await;
+    }

    // Shut down the HTTP endpoint last, so that you can still check the server's
    // status while it's shutting down.
@@ -112,6 +114,10 @@ pub async fn shutdown_pageserver(
    std::process::exit(exit_code);
 }

+/// The name of the metadata file pageserver creates per timeline.
+/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>/metadata`.
+pub const METADATA_FILE_NAME: &str = "metadata";
+
 /// Per-tenant configuration file.
 /// Full path: `tenants/<tenant_id>/config`.
 pub(crate) const TENANT_CONFIG_NAME: &str = "config";
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -585,15 +585,6 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    .expect("failed to define current logical size metric")
 });

-static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
-        "pageserver_aux_file_estimated_size",
-        "The size of all aux files for a timeline in aux file v2 store.",
-        &["tenant_id", "shard_id", "timeline_id"]
-    )
-    .expect("failed to define a metric")
-});
-
 pub(crate) mod initial_logical_size {
    use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
    use once_cell::sync::Lazy;
@@ -1521,80 +1512,29 @@ static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy
 });

 pub(crate) struct TenantManagerMetrics {
-    tenant_slots_attached: UIntGauge,
-    tenant_slots_secondary: UIntGauge,
-    tenant_slots_inprogress: UIntGauge,
+    pub(crate) tenant_slots: UIntGauge,
    pub(crate) tenant_slot_writes: IntCounter,
    pub(crate) unexpected_errors: IntCounter,
 }

-impl TenantManagerMetrics {
-    /// Helpers for tracking slots.  Note that these do not track the lifetime of TenantSlot objects
-    /// exactly: they track the lifetime of the slots _in the tenant map_.
-    pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
-        match slot {
-            TenantSlot::Attached(_) => {
-                self.tenant_slots_attached.inc();
-            }
-            TenantSlot::Secondary(_) => {
-                self.tenant_slots_secondary.inc();
-            }
-            TenantSlot::InProgress(_) => {
-                self.tenant_slots_inprogress.inc();
-            }
-        }
-    }
-
-    pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
-        match slot {
-            TenantSlot::Attached(_) => {
-                self.tenant_slots_attached.dec();
-            }
-            TenantSlot::Secondary(_) => {
-                self.tenant_slots_secondary.dec();
-            }
-            TenantSlot::InProgress(_) => {
-                self.tenant_slots_inprogress.dec();
-            }
-        }
-    }
-
-    #[cfg(all(debug_assertions, not(test)))]
-    pub(crate) fn slots_total(&self) -> u64 {
-        self.tenant_slots_attached.get()
-            + self.tenant_slots_secondary.get()
-            + self.tenant_slots_inprogress.get()
-    }
-}
-
 pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
-    let tenant_slots = register_uint_gauge_vec!(
+    TenantManagerMetrics {
+    tenant_slots: register_uint_gauge!(
        "pageserver_tenant_manager_slots",
        "How many slots currently exist, including all attached, secondary and in-progress operations",
-        &["mode"]
    )
-    .expect("failed to define a metric");
-    TenantManagerMetrics {
-        tenant_slots_attached: tenant_slots
-            .get_metric_with_label_values(&["attached"])
-            .unwrap(),
-        tenant_slots_secondary: tenant_slots
-            .get_metric_with_label_values(&["secondary"])
-            .unwrap(),
-        tenant_slots_inprogress: tenant_slots
-            .get_metric_with_label_values(&["inprogress"])
-            .unwrap(),
-        tenant_slot_writes: register_int_counter!(
-            "pageserver_tenant_manager_slot_writes",
-            "Writes to a tenant slot, including all of create/attach/detach/delete"
-        )
-        .expect("failed to define a metric"),
-        unexpected_errors: register_int_counter!(
-            "pageserver_tenant_manager_unexpected_errors_total",
-            "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
-        )
-        .expect("failed to define a metric"),
-    }
+    .expect("failed to define a metric"),
+    tenant_slot_writes: register_int_counter!(
+        "pageserver_tenant_manager_slot_writes",
+        "Writes to a tenant slot, including all of create/attach/detach/delete"
+    )
+    .expect("failed to define a metric"),
+    unexpected_errors: register_int_counter!(
+        "pageserver_tenant_manager_unexpected_errors_total",
+        "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
+    )
+    .expect("failed to define a metric"),
+}
 });

 pub(crate) struct DeletionQueueMetrics {
@@ -1999,6 +1939,29 @@ impl Default for WalRedoProcessCounters {
 pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
    Lazy::new(WalRedoProcessCounters::default);

+#[cfg(not(test))]
+pub mod wal_redo {
+    use super::*;
+
+    static PROCESS_KIND: Lazy<std::sync::Mutex<UIntGaugeVec>> = Lazy::new(|| {
+        std::sync::Mutex::new(
+            register_uint_gauge_vec!(
+                "pageserver_wal_redo_process_kind",
+                "The configured process kind for walredo",
+                &["kind"],
+            )
+            .unwrap(),
+        )
+    });
+
+    pub fn set_process_kind_metric(kind: crate::walredo::ProcessKind) {
+        // use guard to avoid races around the next two steps
+        let guard = PROCESS_KIND.lock().unwrap();
+        guard.reset();
+        guard.with_label_values(&[&format!("{kind}")]).set(1);
+    }
+}
+
 /// Similar to `prometheus::HistogramTimer` but does not record on drop.
 pub(crate) struct StorageTimeMetricsTimer {
    metrics: StorageTimeMetrics,
@@ -2098,10 +2061,9 @@ pub(crate) struct TimelineMetrics {
    pub garbage_collect_histo: StorageTimeMetrics,
    pub find_gc_cutoffs_histo: StorageTimeMetrics,
    pub last_record_gauge: IntGauge,
-    pub resident_physical_size_gauge: UIntGauge,
+    resident_physical_size_gauge: UIntGauge,
    /// copy of LayeredTimeline.current_logical_size
    pub current_logical_size_gauge: UIntGauge,
-    pub aux_file_size_gauge: IntGauge,
    pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
    pub evictions: IntCounter,
    pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
@@ -2174,9 +2136,6 @@ impl TimelineMetrics {
        let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
            .unwrap();
-        let aux_file_size_gauge = AUX_FILE_SIZE
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
-            .unwrap();
        // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
        let directory_entries_count_gauge_closure = {
            let tenant_shard_id = *tenant_shard_id;
@@ -2214,7 +2173,6 @@ impl TimelineMetrics {
            last_record_gauge,
            resident_physical_size_gauge,
            current_logical_size_gauge,
-            aux_file_size_gauge,
            directory_entries_count_gauge,
            evictions,
            evictions_with_low_residence_duration: std::sync::RwLock::new(
@@ -2255,7 +2213,6 @@ impl TimelineMetrics {
            let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
        }
        let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
-        let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);

        self.evictions_with_low_residence_duration
            .write()
@@ -2312,45 +2269,43 @@ use pin_project_lite::pin_project;
 use std::collections::HashMap;
 use std::num::NonZeroUsize;
 use std::pin::Pin;
-use std::sync::atomic::AtomicU64;
 use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
 use std::time::{Duration, Instant};

 use crate::context::{PageContentKind, RequestContext};
 use crate::task_mgr::TaskKind;
-use crate::tenant::mgr::TenantSlot;

 /// Maintain a per timeline gauge in addition to the global gauge.
-pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
-    last_set: AtomicU64,
+struct PerTimelineRemotePhysicalSizeGauge {
+    last_set: u64,
    gauge: UIntGauge,
 }

 impl PerTimelineRemotePhysicalSizeGauge {
    fn new(per_timeline_gauge: UIntGauge) -> Self {
        Self {
-            last_set: AtomicU64::new(0),
+            last_set: per_timeline_gauge.get(),
            gauge: per_timeline_gauge,
        }
    }
-    pub(crate) fn set(&self, sz: u64) {
+    fn set(&mut self, sz: u64) {
        self.gauge.set(sz);
-        let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
-        if sz < prev {
-            REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
+        if sz < self.last_set {
+            REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
        } else {
-            REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
+            REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
        };
+        self.last_set = sz;
    }
-    pub(crate) fn get(&self) -> u64 {
+    fn get(&self) -> u64 {
        self.gauge.get()
    }
 }

 impl Drop for PerTimelineRemotePhysicalSizeGauge {
    fn drop(&mut self) {
-        REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
+        REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
    }
 }

@@ -2358,7 +2313,7 @@ pub(crate) struct RemoteTimelineClientMetrics {
    tenant_id: String,
    shard_id: String,
    timeline_id: String,
-    pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
+    remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
    calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
    bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
    bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
@@ -2366,27 +2321,38 @@ pub(crate) struct RemoteTimelineClientMetrics {

 impl RemoteTimelineClientMetrics {
    pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
-        let tenant_id_str = tenant_shard_id.tenant_id.to_string();
-        let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
-        let timeline_id_str = timeline_id.to_string();
-
-        let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
-            REMOTE_PHYSICAL_SIZE
-                .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
-                .unwrap(),
-        );
-
        RemoteTimelineClientMetrics {
-            tenant_id: tenant_id_str,
-            shard_id: shard_id_str,
-            timeline_id: timeline_id_str,
+            tenant_id: tenant_shard_id.tenant_id.to_string(),
+            shard_id: format!("{}", tenant_shard_id.shard_slug()),
+            timeline_id: timeline_id.to_string(),
            calls: Mutex::new(HashMap::default()),
            bytes_started_counter: Mutex::new(HashMap::default()),
            bytes_finished_counter: Mutex::new(HashMap::default()),
-            remote_physical_size_gauge,
+            remote_physical_size_gauge: Mutex::new(None),
        }
    }

+    pub(crate) fn remote_physical_size_set(&self, sz: u64) {
+        let mut guard = self.remote_physical_size_gauge.lock().unwrap();
+        let gauge = guard.get_or_insert_with(|| {
+            PerTimelineRemotePhysicalSizeGauge::new(
+                REMOTE_PHYSICAL_SIZE
+                    .get_metric_with_label_values(&[
+                        &self.tenant_id,
+                        &self.shard_id,
+                        &self.timeline_id,
+                    ])
+                    .unwrap(),
+            )
+        });
+        gauge.set(sz);
+    }
+
+    pub(crate) fn remote_physical_size_get(&self) -> u64 {
+        let guard = self.remote_physical_size_gauge.lock().unwrap();
+        guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
+    }
+
    pub fn remote_operation_time(
        &self,
        file_kind: &RemoteOpFileKind,
@@ -2911,8 +2877,6 @@ pub fn preinitialize_metrics() {
        &WALRECEIVER_CANDIDATES_REMOVED,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
-        &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
-        &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
    ]
    .into_iter()
    .for_each(|c| {
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -32,7 +32,6 @@ use std::str;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use std::time::Instant;
 use tokio::io::AsyncWriteExt;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::io::StreamReader;
@@ -50,6 +49,7 @@ use utils::{
 use crate::auth::check_permission;
 use crate::basebackup;
 use crate::basebackup::BasebackupError;
+use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics;
@@ -59,15 +59,13 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
+use crate::tenant::mgr;
+use crate::tenant::mgr::get_active_tenant_with_timeout;
 use crate::tenant::mgr::GetActiveTenantError;
-use crate::tenant::mgr::GetTenantError;
-use crate::tenant::mgr::ShardResolveResult;
 use crate::tenant::mgr::ShardSelector;
-use crate::tenant::mgr::TenantManager;
 use crate::tenant::timeline::WaitLsnError;
 use crate::tenant::GetTimelineError;
 use crate::tenant::PageReconstructError;
-use crate::tenant::Tenant;
 use crate::tenant::Timeline;
 use crate::trace::Tracer;
 use pageserver_api::key::rel_block_to_key;
@@ -137,7 +135,7 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()
 /// Listens for connections, and launches a new handler task for each.
 ///
 pub async fn libpq_listener_main(
-    tenant_manager: Arc<TenantManager>,
+    conf: &'static PageServerConf,
    broker_client: storage_broker::BrokerClientChannel,
    auth: Option<Arc<SwappableJwtAuth>>,
    listener: TcpListener,
@@ -182,7 +180,7 @@ pub async fn libpq_listener_main(
                    "serving compute connection task",
                    false,
                    page_service_conn_main(
-                        tenant_manager.clone(),
+                        conf,
                        broker_client.clone(),
                        local_auth,
                        socket,
@@ -205,7 +203,7 @@ pub async fn libpq_listener_main(

 #[instrument(skip_all, fields(peer_addr))]
 async fn page_service_conn_main(
-    tenant_manager: Arc<TenantManager>,
+    conf: &'static PageServerConf,
    broker_client: storage_broker::BrokerClientChannel,
    auth: Option<Arc<SwappableJwtAuth>>,
    socket: tokio::net::TcpStream,
@@ -262,8 +260,7 @@ async fn page_service_conn_main(
    // and create a child per-query context when it invokes process_query.
    // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
    // and create the per-query context in process_query ourselves.
-    let mut conn_handler =
-        PageServerHandler::new(tenant_manager, broker_client, auth, connection_ctx);
+    let mut conn_handler = PageServerHandler::new(conf, broker_client, auth, connection_ctx);
    let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;

    match pgbackend
@@ -294,12 +291,11 @@ struct HandlerTimeline {
 }

 struct PageServerHandler {
+    _conf: &'static PageServerConf,
    broker_client: storage_broker::BrokerClientChannel,
    auth: Option<Arc<SwappableJwtAuth>>,
    claims: Option<Claims>,

-    tenant_manager: Arc<TenantManager>,
-
    /// The context created for the lifetime of the connection
    /// services by this PageServerHandler.
    /// For each query received over the connection,
@@ -385,13 +381,13 @@ impl From<WaitLsnError> for QueryError {

 impl PageServerHandler {
    pub fn new(
-        tenant_manager: Arc<TenantManager>,
+        conf: &'static PageServerConf,
        broker_client: storage_broker::BrokerClientChannel,
        auth: Option<Arc<SwappableJwtAuth>>,
        connection_ctx: RequestContext,
    ) -> Self {
        PageServerHandler {
-            tenant_manager,
+            _conf: conf,
            broker_client,
            auth,
            claims: None,
@@ -556,9 +552,13 @@ impl PageServerHandler {
    {
        debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id();

-        let tenant = self
-            .get_active_tenant_with_timeout(tenant_id, ShardSelector::First, ACTIVE_TENANT_TIMEOUT)
-            .await?;
+        let tenant = mgr::get_active_tenant_with_timeout(
+            tenant_id,
+            ShardSelector::First,
+            ACTIVE_TENANT_TIMEOUT,
+            &task_mgr::shutdown_token(),
+        )
+        .await?;

        // Make request tracer if needed
        let mut tracer = if tenant.get_trace_read_requests() {
@@ -726,9 +726,13 @@ impl PageServerHandler {

        // Create empty timeline
        info!("creating new timeline");
-        let tenant = self
-            .get_active_tenant_with_timeout(tenant_id, ShardSelector::Zero, ACTIVE_TENANT_TIMEOUT)
-            .await?;
+        let tenant = get_active_tenant_with_timeout(
+            tenant_id,
+            ShardSelector::Zero,
+            ACTIVE_TENANT_TIMEOUT,
+            &task_mgr::shutdown_token(),
+        )
+        .await?;
        let timeline = tenant
            .create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)
            .await?;
@@ -1366,72 +1370,20 @@ impl PageServerHandler {
        timeline_id: TimelineId,
        selector: ShardSelector,
    ) -> Result<Arc<Timeline>, GetActiveTimelineError> {
-        let tenant = self
-            .get_active_tenant_with_timeout(tenant_id, selector, ACTIVE_TENANT_TIMEOUT)
-            .await
-            .map_err(GetActiveTimelineError::Tenant)?;
+        let tenant = get_active_tenant_with_timeout(
+            tenant_id,
+            selector,
+            ACTIVE_TENANT_TIMEOUT,
+            &task_mgr::shutdown_token(),
+        )
+        .await
+        .map_err(GetActiveTimelineError::Tenant)?;
        let timeline = tenant.get_timeline(timeline_id, true)?;
        set_tracing_field_shard_id(&timeline);
        Ok(timeline)
    }
-
-    /// Get a shard's [`Tenant`] in its active state, if present.  If we don't find the shard and some
-    /// slots for this tenant are `InProgress` then we will wait.
-    /// If we find the [`Tenant`] and it's not yet in state [`TenantState::Active`], we will wait.
-    ///
-    /// `timeout` is used as a total timeout for the whole wait operation.
-    async fn get_active_tenant_with_timeout(
-        &self,
-        tenant_id: TenantId,
-        shard_selector: ShardSelector,
-        timeout: Duration,
-    ) -> Result<Arc<Tenant>, GetActiveTenantError> {
-        let wait_start = Instant::now();
-        let deadline = wait_start + timeout;
-
-        // Resolve TenantId to TenantShardId.  This is usually a quick one-shot thing, the loop is
-        // for handling the rare case that the slot we're accessing is InProgress.
-        let tenant_shard = loop {
-            let resolved = self
-                .tenant_manager
-                .resolve_attached_shard(&tenant_id, shard_selector);
-            match resolved {
-                ShardResolveResult::Found(tenant_shard) => break tenant_shard,
-                ShardResolveResult::NotFound => {
-                    return Err(GetActiveTenantError::NotFound(GetTenantError::NotFound(
-                        tenant_id,
-                    )));
-                }
-                ShardResolveResult::InProgress(barrier) => {
-                    // We can't authoritatively answer right now: wait for InProgress state
-                    // to end, then try again
-                    tokio::select! {
-                        _ = self.await_connection_cancelled() => {
-                            return Err(GetActiveTenantError::Cancelled)
-                        },
-                        _  = barrier.wait() => {
-                            // The barrier completed: proceed around the loop to try looking up again
-                        },
-                        _ = tokio::time::sleep(deadline.duration_since(Instant::now())) => {
-                            return Err(GetActiveTenantError::WaitForActiveTimeout {
-                                latest_state: None,
-                                wait_time: timeout,
-                            });
-                        }
-                    }
-                }
-            };
-        };
-
-        tracing::debug!("Waiting for tenant to enter active state...");
-        tenant_shard
-            .wait_to_become_active(deadline.duration_since(Instant::now()))
-            .await?;
-        Ok(tenant_shard)
-    }
 }

-#[async_trait::async_trait]
 impl<IO> postgres_backend::Handler<IO> for PageServerHandler
 where
    IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
@@ -1818,13 +1770,13 @@ where

            self.check_permission(Some(tenant_id))?;

-            let tenant = self
-                .get_active_tenant_with_timeout(
-                    tenant_id,
-                    ShardSelector::Zero,
-                    ACTIVE_TENANT_TIMEOUT,
-                )
-                .await?;
+            let tenant = get_active_tenant_with_timeout(
+                tenant_id,
+                ShardSelector::Zero,
+                ACTIVE_TENANT_TIMEOUT,
+                &task_mgr::shutdown_token(),
+            )
+            .await?;
            pgb.write_message_noflush(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
                RowDescriptor::int8_col(b"checkpoint_timeout"),
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -10,9 +10,9 @@ use super::tenant::{PageReconstructError, Timeline};
 use crate::context::RequestContext;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::metrics::WAL_INGEST;
+use crate::repository::*;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
 use crate::walrecord::NeonWalRecord;
-use crate::{aux_file, repository::*};
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
@@ -24,7 +24,6 @@ use pageserver_api::key::{
    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
 };
 use pageserver_api::keyspace::SparseKeySpace;
-use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -671,7 +670,7 @@ impl Timeline {
        self.get(CHECKPOINT_KEY, lsn, ctx).await
    }

-    async fn list_aux_files_v1(
+    pub(crate) async fn list_aux_files(
        &self,
        lsn: Lsn,
        ctx: &RequestContext,
@@ -689,67 +688,6 @@ impl Timeline {
        }
    }

-    async fn list_aux_files_v2(
-        &self,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {
-        let kv = self
-            .scan(KeySpace::single(Key::metadata_aux_key_range()), lsn, ctx)
-            .await
-            .context("scan")?;
-        let mut result = HashMap::new();
-        let mut sz = 0;
-        for (_, v) in kv {
-            let v = v.context("get value")?;
-            let v = aux_file::decode_file_value_bytes(&v).context("value decode")?;
-            for (fname, content) in v {
-                sz += fname.len();
-                sz += content.len();
-                result.insert(fname, content);
-            }
-        }
-        self.aux_file_size_estimator.on_base_backup(sz);
-        Ok(result)
-    }
-
-    pub(crate) async fn list_aux_files(
-        &self,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {
-        match self.get_switch_aux_file_policy() {
-            AuxFilePolicy::V1 => self.list_aux_files_v1(lsn, ctx).await,
-            AuxFilePolicy::V2 => self.list_aux_files_v2(lsn, ctx).await,
-            AuxFilePolicy::CrossValidation => {
-                let v1_result = self.list_aux_files_v1(lsn, ctx).await;
-                let v2_result = self.list_aux_files_v2(lsn, ctx).await;
-                match (v1_result, v2_result) {
-                    (Ok(v1), Ok(v2)) => {
-                        if v1 != v2 {
-                            tracing::error!(
-                                "unmatched aux file v1 v2 result:\nv1 {v1:?}\nv2 {v2:?}"
-                            );
-                            return Err(PageReconstructError::Other(anyhow::anyhow!(
-                                "unmatched aux file v1 v2 result"
-                            )));
-                        }
-                        Ok(v1)
-                    }
-                    (Ok(_), Err(v2)) => {
-                        tracing::error!("aux file v1 returns Ok while aux file v2 returns an err");
-                        Err(v2)
-                    }
-                    (Err(v1), Ok(_)) => {
-                        tracing::error!("aux file v2 returns Ok while aux file v1 returns an err");
-                        Err(v1)
-                    }
-                    (Err(_), Err(v2)) => Err(v2),
-                }
-            }
-        }
-    }
-
    /// Does the same as get_current_logical_size but counted on demand.
    /// Used to initialize the logical size tracking on startup.
    ///
@@ -1451,9 +1389,6 @@ impl<'a> DatadirModification<'a> {
    }

    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
-        if let AuxFilePolicy::V2 = self.tline.get_switch_aux_file_policy() {
-            return Ok(());
-        }
        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
            files: HashMap::new(),
        })?;
@@ -1469,144 +1404,90 @@ impl<'a> DatadirModification<'a> {
        content: &[u8],
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        let policy = self.tline.get_switch_aux_file_policy();
-        if let AuxFilePolicy::V2 | AuxFilePolicy::CrossValidation = policy {
-            let key = aux_file::encode_aux_file_key(path);
-            // retrieve the key from the engine
-            let old_val = match self.get(key, ctx).await {
-                Ok(val) => Some(val),
-                Err(PageReconstructError::MissingKey(_)) => None,
-                Err(e) => return Err(e.into()),
-            };
-            let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val {
-                aux_file::decode_file_value(old_val)?
+        let file_path = path.to_string();
+        let content = if content.is_empty() {
+            None
+        } else {
+            Some(Bytes::copy_from_slice(content))
+        };
+
+        let n_files;
+        let mut aux_files = self.tline.aux_files.lock().await;
+        if let Some(mut dir) = aux_files.dir.take() {
+            // We already updated aux files in `self`: emit a delta and update our latest value.
+            dir.upsert(file_path.clone(), content.clone());
+            n_files = dir.files.len();
+            if aux_files.n_deltas == MAX_AUX_FILE_DELTAS {
+                self.put(
+                    AUX_FILES_KEY,
+                    Value::Image(Bytes::from(
+                        AuxFilesDirectory::ser(&dir).context("serialize")?,
+                    )),
+                );
+                aux_files.n_deltas = 0;
            } else {
-                Vec::new()
-            };
-            let mut other_files = Vec::with_capacity(files.len());
-            let mut modifying_file = None;
-            for file @ (p, content) in files {
-                if path == p {
-                    assert!(
-                        modifying_file.is_none(),
-                        "duplicated entries found for {}",
-                        path
+                self.put(
+                    AUX_FILES_KEY,
+                    Value::WalRecord(NeonWalRecord::AuxFile { file_path, content }),
+                );
+                aux_files.n_deltas += 1;
+            }
+            aux_files.dir = Some(dir);
+        } else {
+            // Check if the AUX_FILES_KEY is initialized
+            match self.get(AUX_FILES_KEY, ctx).await {
+                Ok(dir_bytes) => {
+                    let mut dir = AuxFilesDirectory::des(&dir_bytes)?;
+                    // Key is already set, we may append a delta
+                    self.put(
+                        AUX_FILES_KEY,
+                        Value::WalRecord(NeonWalRecord::AuxFile {
+                            file_path: file_path.clone(),
+                            content: content.clone(),
+                        }),
                    );
-                    modifying_file = Some(content);
-                } else {
-                    other_files.push(file);
+                    dir.upsert(file_path, content);
+                    n_files = dir.files.len();
+                    aux_files.dir = Some(dir);
                }
-            }
-            let mut new_files = other_files;
-            match (modifying_file, content.is_empty()) {
-                (Some(old_content), false) => {
-                    self.tline
-                        .aux_file_size_estimator
-                        .on_update(old_content.len(), content.len());
-                    new_files.push((path, content));
+                Err(
+                    e @ (PageReconstructError::AncestorStopping(_)
+                    | PageReconstructError::Cancelled
+                    | PageReconstructError::AncestorLsnTimeout(_)),
+                ) => {
+                    // Important that we do not interpret a shutdown error as "not found" and thereby
+                    // reset the map.
+                    return Err(e.into());
                }
-                (Some(old_content), true) => {
-                    self.tline
-                        .aux_file_size_estimator
-                        .on_remove(old_content.len());
-                    // not adding the file key to the final `new_files` vec.
-                }
-                (None, false) => {
-                    self.tline.aux_file_size_estimator.on_add(content.len());
-                    new_files.push((path, content));
-                }
-                (None, true) => anyhow::bail!("removing non-existing aux file: {}", path),
-            }
-            let new_val = aux_file::encode_file_value(&new_files)?;
-            self.put(key, Value::Image(new_val.into()));
-        }
+                // Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but
+                // the original code assumes all other errors are missing keys. Therefore, we keep the code path
+                // the same for now, though in theory, we should only match the `MissingKey` variant.
+                Err(
+                    PageReconstructError::Other(_)
+                    | PageReconstructError::WalRedo(_)
+                    | PageReconstructError::MissingKey { .. },
+                ) => {
+                    // Key is missing, we must insert an image as the basis for subsequent deltas.

-        if let AuxFilePolicy::V1 | AuxFilePolicy::CrossValidation = policy {
-            let file_path = path.to_string();
-            let content = if content.is_empty() {
-                None
-            } else {
-                Some(Bytes::copy_from_slice(content))
-            };
-
-            let n_files;
-            let mut aux_files = self.tline.aux_files.lock().await;
-            if let Some(mut dir) = aux_files.dir.take() {
-                // We already updated aux files in `self`: emit a delta and update our latest value.
-                dir.upsert(file_path.clone(), content.clone());
-                n_files = dir.files.len();
-                if aux_files.n_deltas == MAX_AUX_FILE_DELTAS {
+                    let mut dir = AuxFilesDirectory {
+                        files: HashMap::new(),
+                    };
+                    dir.upsert(file_path, content);
                    self.put(
                        AUX_FILES_KEY,
                        Value::Image(Bytes::from(
                            AuxFilesDirectory::ser(&dir).context("serialize")?,
                        )),
                    );
-                    aux_files.n_deltas = 0;
-                } else {
-                    self.put(
-                        AUX_FILES_KEY,
-                        Value::WalRecord(NeonWalRecord::AuxFile { file_path, content }),
-                    );
-                    aux_files.n_deltas += 1;
-                }
-                aux_files.dir = Some(dir);
-            } else {
-                // Check if the AUX_FILES_KEY is initialized
-                match self.get(AUX_FILES_KEY, ctx).await {
-                    Ok(dir_bytes) => {
-                        let mut dir = AuxFilesDirectory::des(&dir_bytes)?;
-                        // Key is already set, we may append a delta
-                        self.put(
-                            AUX_FILES_KEY,
-                            Value::WalRecord(NeonWalRecord::AuxFile {
-                                file_path: file_path.clone(),
-                                content: content.clone(),
-                            }),
-                        );
-                        dir.upsert(file_path, content);
-                        n_files = dir.files.len();
-                        aux_files.dir = Some(dir);
-                    }
-                    Err(
-                        e @ (PageReconstructError::AncestorStopping(_)
-                        | PageReconstructError::Cancelled
-                        | PageReconstructError::AncestorLsnTimeout(_)),
-                    ) => {
-                        // Important that we do not interpret a shutdown error as "not found" and thereby
-                        // reset the map.
-                        return Err(e.into());
-                    }
-                    // Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but
-                    // the original code assumes all other errors are missing keys. Therefore, we keep the code path
-                    // the same for now, though in theory, we should only match the `MissingKey` variant.
-                    Err(
-                        PageReconstructError::Other(_)
-                        | PageReconstructError::WalRedo(_)
-                        | PageReconstructError::MissingKey { .. },
-                    ) => {
-                        // Key is missing, we must insert an image as the basis for subsequent deltas.
-
-                        let mut dir = AuxFilesDirectory {
-                            files: HashMap::new(),
-                        };
-                        dir.upsert(file_path, content);
-                        self.put(
-                            AUX_FILES_KEY,
-                            Value::Image(Bytes::from(
-                                AuxFilesDirectory::ser(&dir).context("serialize")?,
-                            )),
-                        );
-                        n_files = 1;
-                        aux_files.dir = Some(dir);
-                    }
+                    n_files = 1;
+                    aux_files.dir = Some(dir);
                }
            }
-
-            self.pending_directory_entries
-                .push((DirectoryKind::AuxFiles, n_files));
        }

+        self.pending_directory_entries
+            .push((DirectoryKind::AuxFiles, n_files));
+
        Ok(())
    }

@@ -1697,7 +1578,7 @@ impl<'a> DatadirModification<'a> {
        }

        if !self.pending_deletions.is_empty() {
-            writer.delete_batch(&self.pending_deletions, ctx).await?;
+            writer.delete_batch(&self.pending_deletions).await?;
            self.pending_deletions.clear();
        }

--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -33,6 +33,7 @@ impl Value {
    }
 }

+#[cfg(test)]
 #[derive(Debug, PartialEq)]
 pub(crate) enum InvalidInput {
    TooShortValue,
@@ -41,8 +42,10 @@ pub(crate) enum InvalidInput {

 /// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
 /// use this type for querying if a slice looks some particular way.
+#[cfg(test)]
 pub(crate) struct ValueBytes;

+#[cfg(test)]
 impl ValueBytes {
    pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
        if raw.len() < 12 {
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -319,9 +319,6 @@ pub enum TaskKind {
    // Eviction. One per timeline.
    Eviction,

-    // Ingest housekeeping (flushing ephemeral layers on time threshold or disk pressure)
-    IngestHousekeeping,
-
    /// See [`crate::disk_usage_eviction_task`].
    DiskUsageEviction,

@@ -370,8 +367,6 @@ pub enum TaskKind {

    #[cfg(test)]
    UnitTest,
-
-    DetachAncestor,
 }

 #[derive(Default)]
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -21,7 +21,6 @@ use futures::FutureExt;
 use futures::StreamExt;
 use pageserver_api::models;
 use pageserver_api::models::TimelineState;
-use pageserver_api::models::TopTenantShardItem;
 use pageserver_api::models::WalRedoManagerStatus;
 use pageserver_api::shard::ShardIdentity;
 use pageserver_api::shard::ShardStripeSize;
@@ -191,7 +190,7 @@ pub const TENANT_DELETED_MARKER_FILE_NAME: &str = "deleted";
 #[derive(Clone)]
 pub struct TenantSharedResources {
    pub broker_client: storage_broker::BrokerClientChannel,
-    pub remote_storage: GenericRemoteStorage,
+    pub remote_storage: Option<GenericRemoteStorage>,
    pub deletion_queue_client: DeletionQueueClient,
 }

@@ -293,7 +292,7 @@ pub struct Tenant {
    walredo_mgr: Option<Arc<WalRedoManager>>,

    // provides access to timeline data sitting in the remote storage
-    pub(crate) remote_storage: GenericRemoteStorage,
+    pub(crate) remote_storage: Option<GenericRemoteStorage>,

    // Access to global deletion queue for when this tenant wants to schedule a deletion
    deletion_queue_client: DeletionQueueClient,
@@ -323,9 +322,6 @@ pub struct Tenant {
    /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
    pub(crate) timeline_get_throttle:
        Arc<throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>>,
-
-    /// An ongoing timeline detach must be checked during attempts to GC or compact a timeline.
-    ongoing_timeline_detach: std::sync::Mutex<Option<(TimelineId, utils::completion::Barrier)>>,
 }

 impl std::fmt::Debug for Tenant {
@@ -552,22 +548,21 @@ impl Tenant {
        );

        if let Some(index_part) = index_part.as_ref() {
-            timeline.remote_client.init_upload_queue(index_part)?;
-        } else {
+            timeline
+                .remote_client
+                .as_ref()
+                .unwrap()
+                .init_upload_queue(index_part)?;
+        } else if self.remote_storage.is_some() {
            // No data on the remote storage, but we have local metadata file. We can end up
            // here with timeline_create being interrupted before finishing index part upload.
            // By doing what we do here, the index part upload is retried.
            // If control plane retries timeline creation in the meantime, the mgmt API handler
            // for timeline creation will coalesce on the upload we queue here.
-
            // FIXME: this branch should be dead code as we no longer write local metadata.
-
-            timeline
-                .remote_client
-                .init_upload_queue_for_empty_remote(&metadata)?;
-            timeline
-                .remote_client
-                .schedule_index_upload_for_full_metadata_update(&metadata)?;
+            let rtc = timeline.remote_client.as_ref().unwrap();
+            rtc.init_upload_queue_for_empty_remote(&metadata)?;
+            rtc.schedule_index_upload_for_full_metadata_update(&metadata)?;
        }

        timeline
@@ -779,14 +774,14 @@ impl Tenant {
                    AttachType::Normal
                };

-                let preload = match &mode {
-                    SpawnMode::Create => {
+                let preload = match (&mode, &remote_storage) {
+                    (SpawnMode::Create, _) => {
                        None
                    },
-                    SpawnMode::Eager | SpawnMode::Lazy => {
+                    (SpawnMode::Eager | SpawnMode::Lazy, Some(remote_storage)) => {
                        let _preload_timer = TENANT.preload.start_timer();
                        let res = tenant_clone
-                            .preload(&remote_storage, task_mgr::shutdown_token())
+                            .preload(remote_storage, task_mgr::shutdown_token())
                            .await;
                        match res {
                            Ok(p) => Some(p),
@@ -796,7 +791,10 @@ impl Tenant {
                            }
                        }
                    }
-
+                    (_, None) => {
+                        let _preload_timer = TENANT.preload.start_timer();
+                        None
+                    }
                };

                // Remote preload is complete.
@@ -1020,7 +1018,7 @@ impl Tenant {
                index_part,
                remote_metadata,
                TimelineResources {
-                    remote_client,
+                    remote_client: Some(remote_client),
                    deletion_queue_client: self.deletion_queue_client.clone(),
                    timeline_get_throttle: self.timeline_get_throttle.clone(),
                },
@@ -1046,7 +1044,7 @@ impl Tenant {
                Arc::clone(self),
                timeline_id,
                &index_part.metadata,
-                remote_timeline_client,
+                Some(remote_timeline_client),
                self.deletion_queue_client.clone(),
            )
            .instrument(tracing::info_span!("timeline_delete", %timeline_id))
@@ -1138,7 +1136,9 @@ impl Tenant {
        let mut size = 0;

        for timeline in self.list_timelines() {
-            size += timeline.remote_client.get_remote_physical_size();
+            if let Some(remote_client) = &timeline.remote_client {
+                size += remote_client.get_remote_physical_size();
+            }
        }

        size
@@ -1188,7 +1188,6 @@ impl Tenant {
    pub fn create_broken_tenant(
        conf: &'static PageServerConf,
        tenant_shard_id: TenantShardId,
-        remote_storage: GenericRemoteStorage,
        reason: String,
    ) -> Arc<Tenant> {
        Arc::new(Tenant::new(
@@ -1203,7 +1202,7 @@ impl Tenant {
            ShardIdentity::broken(tenant_shard_id.shard_number, tenant_shard_id.shard_count),
            None,
            tenant_shard_id,
-            remote_storage,
+            None,
            DeletionQueueClient::broken(),
        ))
    }
@@ -1396,7 +1395,13 @@ impl Tenant {
        tline.freeze_and_flush().await.context("freeze_and_flush")?;

        // Make sure the freeze_and_flush reaches remote storage.
-        tline.remote_client.wait_completion().await.unwrap();
+        tline
+            .remote_client
+            .as_ref()
+            .unwrap()
+            .wait_completion()
+            .await
+            .unwrap();

        let tl = uninit_tl.finish_creation()?;
        // The non-test code would call tl.activate() here.
@@ -1462,19 +1467,20 @@ impl Tenant {
                    return Err(CreateTimelineError::Conflict);
                }

-                // Wait for uploads to complete, so that when we return Ok, the timeline
-                // is known to be durable on remote storage. Just like we do at the end of
-                // this function, after we have created the timeline ourselves.
-                //
-                // We only really care that the initial version of `index_part.json` has
-                // been uploaded. That's enough to remember that the timeline
-                // exists. However, there is no function to wait specifically for that so
-                // we just wait for all in-progress uploads to finish.
-                existing
-                    .remote_client
-                    .wait_completion()
-                    .await
-                    .context("wait for timeline uploads to complete")?;
+                if let Some(remote_client) = existing.remote_client.as_ref() {
+                    // Wait for uploads to complete, so that when we return Ok, the timeline
+                    // is known to be durable on remote storage. Just like we do at the end of
+                    // this function, after we have created the timeline ourselves.
+                    //
+                    // We only really care that the initial version of `index_part.json` has
+                    // been uploaded. That's enough to remember that the timeline
+                    // exists. However, there is no function to wait specifically for that so
+                    // we just wait for all in-progress uploads to finish.
+                    remote_client
+                        .wait_completion()
+                        .await
+                        .context("wait for timeline uploads to complete")?;
+                }

                return Ok(existing);
            }
@@ -1550,14 +1556,14 @@ impl Tenant {
        // the timeline is visible in [`Self::timelines`], but it is _not_ durable yet.  We must
        // not send a success to the caller until it is.  The same applies to handling retries,
        // see the handling of [`TimelineExclusionError::AlreadyExists`] above.
-        let kind = ancestor_timeline_id
-            .map(|_| "branched")
-            .unwrap_or("bootstrapped");
-        loaded_timeline
-            .remote_client
-            .wait_completion()
-            .await
-            .with_context(|| format!("wait for {} timeline initial uploads to complete", kind))?;
+        if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
+            let kind = ancestor_timeline_id
+                .map(|_| "branched")
+                .unwrap_or("bootstrapped");
+            remote_client.wait_completion().await.with_context(|| {
+                format!("wait for {} timeline initial uploads to complete", kind)
+            })?;
+        }

        loaded_timeline.activate(self.clone(), broker_client, None, ctx);

@@ -1670,34 +1676,6 @@ impl Tenant {
        Ok(())
    }

-    // Call through to all timelines to freeze ephemeral layers if needed.  Usually
-    // this happens during ingest: this background housekeeping is for freezing layers
-    // that are open but haven't been written to for some time.
-    async fn ingest_housekeeping(&self) {
-        // Scan through the hashmap and collect a list of all the timelines,
-        // while holding the lock. Then drop the lock and actually perform the
-        // compactions.  We don't want to block everything else while the
-        // compaction runs.
-        let timelines = {
-            self.timelines
-                .lock()
-                .unwrap()
-                .values()
-                .filter_map(|timeline| {
-                    if timeline.is_active() {
-                        Some(timeline.clone())
-                    } else {
-                        None
-                    }
-                })
-                .collect::<Vec<_>>()
-        };
-
-        for timeline in &timelines {
-            timeline.maybe_freeze_ephemeral_layer().await;
-        }
-    }
-
    pub fn current_state(&self) -> TenantState {
        self.state.borrow().clone()
    }
@@ -2152,26 +2130,32 @@ impl Tenant {
    ) -> anyhow::Result<()> {
        let timelines = self.timelines.lock().unwrap().clone();
        for timeline in timelines.values() {
+            let Some(tl_client) = &timeline.remote_client else {
+                anyhow::bail!("Remote storage is mandatory");
+            };
+
+            let Some(remote_storage) = &self.remote_storage else {
+                anyhow::bail!("Remote storage is mandatory");
+            };
+
            // We do not block timeline creation/deletion during splits inside the pageserver: it is up to higher levels
            // to ensure that they do not start a split if currently in the process of doing these.

            // Upload an index from the parent: this is partly to provide freshness for the
            // child tenants that will copy it, and partly for general ease-of-debugging: there will
            // always be a parent shard index in the same generation as we wrote the child shard index.
-            timeline
-                .remote_client
-                .schedule_index_upload_for_file_changes()?;
-            timeline.remote_client.wait_completion().await?;
+            tl_client.schedule_index_upload_for_file_changes()?;
+            tl_client.wait_completion().await?;

            // Shut down the timeline's remote client: this means that the indices we write
            // for child shards will not be invalidated by the parent shard deleting layers.
-            timeline.remote_client.shutdown().await;
+            tl_client.shutdown().await;

            // Download methods can still be used after shutdown, as they don't flow through the remote client's
            // queue.  In principal the RemoteTimelineClient could provide this without downloading it, but this
            // operation is rare, so it's simpler to just download it (and robustly guarantees that the index
            // we use here really is the remotely persistent one).
-            let result = timeline.remote_client
+            let result = tl_client
                .download_index_file(&self.cancel)
                .instrument(info_span!("download_index_file", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))
                .await?;
@@ -2184,7 +2168,7 @@ impl Tenant {

            for child_shard in child_shards {
                upload_index_part(
-                    &self.remote_storage,
+                    remote_storage,
                    child_shard,
                    &timeline.timeline_id,
                    self.generation,
@@ -2197,31 +2181,6 @@ impl Tenant {

        Ok(())
    }
-
-    pub(crate) fn get_sizes(&self) -> TopTenantShardItem {
-        let mut result = TopTenantShardItem {
-            id: self.tenant_shard_id,
-            resident_size: 0,
-            physical_size: 0,
-            max_logical_size: 0,
-        };
-
-        for timeline in self.timelines.lock().unwrap().values() {
-            result.resident_size += timeline.metrics.resident_physical_size_gauge.get();
-
-            result.physical_size += timeline
-                .remote_client
-                .metrics
-                .remote_physical_size_gauge
-                .get();
-            result.max_logical_size = std::cmp::max(
-                result.max_logical_size,
-                timeline.metrics.current_logical_size_gauge.get(),
-            );
-        }
-
-        result
-    }
 }

 /// Given a Vec of timelines and their ancestors (timeline_id, ancestor_id),
@@ -2485,7 +2444,7 @@ impl Tenant {
        shard_identity: ShardIdentity,
        walredo_mgr: Option<Arc<WalRedoManager>>,
        tenant_shard_id: TenantShardId,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        deletion_queue_client: DeletionQueueClient,
    ) -> Tenant {
        let (state, mut rx) = watch::channel(state);
@@ -2570,7 +2529,6 @@ impl Tenant {
                &crate::metrics::tenant_throttling::TIMELINE_GET,
            )),
            tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
-            ongoing_timeline_detach: std::sync::Mutex::default(),
        }
    }

@@ -2810,7 +2768,7 @@ impl Tenant {
        // See comments in [`Tenant::branch_timeline`] for more information about why branch
        // creation task can run concurrently with timeline's GC iteration.
        for timeline in gc_timelines {
-            if cancel.is_cancelled() {
+            if task_mgr::is_shutdown_requested() || cancel.is_cancelled() {
                // We were requested to shut down. Stop and return with the progress we
                // made.
                break;
@@ -3129,10 +3087,11 @@ impl Tenant {
        // We still need to upload its metadata eagerly: if other nodes `attach` the tenant and miss this timeline, their GC
        // could get incorrect information and remove more layers, than needed.
        // See also https://github.com/neondatabase/neon/issues/3865
-        new_timeline
-            .remote_client
-            .schedule_index_upload_for_full_metadata_update(&metadata)
-            .context("branch initial metadata upload")?;
+        if let Some(remote_client) = new_timeline.remote_client.as_ref() {
+            remote_client
+                .schedule_index_upload_for_full_metadata_update(&metadata)
+                .context("branch initial metadata upload")?;
+        }

        Ok(new_timeline)
    }
@@ -3164,6 +3123,11 @@ impl Tenant {
        pgdata_path: &Utf8PathBuf,
        timeline_id: &TimelineId,
    ) -> anyhow::Result<()> {
+        let Some(storage) = &self.remote_storage else {
+            // No remote storage?  No upload.
+            return Ok(());
+        };
+
        let temp_path = timelines_path.join(format!(
            "{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}"
        ));
@@ -3187,7 +3151,7 @@ impl Tenant {
        backoff::retry(
            || async {
                self::remote_timeline_client::upload_initdb_dir(
-                    &self.remote_storage,
+                    storage,
                    &self.tenant_shard_id.tenant_id,
                    timeline_id,
                    pgdata_zstd.try_clone().await?,
@@ -3244,6 +3208,9 @@ impl Tenant {
            }
        }
        if let Some(existing_initdb_timeline_id) = load_existing_initdb {
+            let Some(storage) = &self.remote_storage else {
+                bail!("no storage configured but load_existing_initdb set to {existing_initdb_timeline_id}");
+            };
            if existing_initdb_timeline_id != timeline_id {
                let source_path = &remote_initdb_archive_path(
                    &self.tenant_shard_id.tenant_id,
@@ -3253,7 +3220,7 @@ impl Tenant {
                    &remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &timeline_id);

                // if this fails, it will get retried by retried control plane requests
-                self.remote_storage
+                storage
                    .copy_object(source_path, dest_path, &self.cancel)
                    .await
                    .context("copy initdb tar")?;
@@ -3261,7 +3228,7 @@ impl Tenant {
            let (initdb_tar_zst_path, initdb_tar_zst) =
                self::remote_timeline_client::download_initdb_tar_zst(
                    self.conf,
-                    &self.remote_storage,
+                    storage,
                    &self.tenant_shard_id,
                    &existing_initdb_timeline_id,
                    &self.cancel,
@@ -3356,14 +3323,20 @@ impl Tenant {

    /// Call this before constructing a timeline, to build its required structures
    fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
-        let remote_client = RemoteTimelineClient::new(
-            self.remote_storage.clone(),
-            self.deletion_queue_client.clone(),
-            self.conf,
-            self.tenant_shard_id,
-            timeline_id,
-            self.generation,
-        );
+        let remote_client = if let Some(remote_storage) = self.remote_storage.as_ref() {
+            let remote_client = RemoteTimelineClient::new(
+                remote_storage.clone(),
+                self.deletion_queue_client.clone(),
+                self.conf,
+                self.tenant_shard_id,
+                timeline_id,
+                self.generation,
+            );
+            Some(remote_client)
+        } else {
+            None
+        };
+
        TimelineResources {
            remote_client,
            deletion_queue_client: self.deletion_queue_client.clone(),
@@ -3387,9 +3360,9 @@ impl Tenant {
        let tenant_shard_id = self.tenant_shard_id;

        let resources = self.build_timeline_resources(new_timeline_id);
-        resources
-            .remote_client
-            .init_upload_queue_for_empty_remote(new_metadata)?;
+        if let Some(remote_client) = &resources.remote_client {
+            remote_client.init_upload_queue_for_empty_remote(new_metadata)?;
+        }

        let timeline_struct = self
            .create_timeline_struct(
@@ -3557,7 +3530,9 @@ impl Tenant {
            tracing::info!(timeline_id=%timeline.timeline_id, "Flushing...");
            timeline.freeze_and_flush().await?;
            tracing::info!(timeline_id=%timeline.timeline_id, "Waiting for uploads...");
-            timeline.remote_client.wait_completion().await?;
+            if let Some(client) = &timeline.remote_client {
+                client.wait_completion().await?;
+            }

            Ok(())
        }
@@ -3751,7 +3726,7 @@ pub(crate) mod harness {
                image_layer_creation_check_threshold: Some(
                    tenant_conf.image_layer_creation_check_threshold,
                ),
-                switch_aux_file_policy: Some(tenant_conf.switch_aux_file_policy),
+                switch_to_aux_file_v2: Some(tenant_conf.switch_to_aux_file_v2),
            }
        }
    }
@@ -3871,7 +3846,7 @@ pub(crate) mod harness {
                ShardIdentity::unsharded(),
                Some(walredo_mgr),
                self.tenant_shard_id,
-                self.remote_storage.clone(),
+                Some(self.remote_storage.clone()),
                self.deletion_queue.new_client(),
            ));

--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -299,7 +299,7 @@ mod tests {
        // Write part (in block to drop the file)
        let mut offsets = Vec::new();
        {
-            let file = VirtualFile::create(pathbuf.as_path(), &ctx).await?;
+            let file = VirtualFile::create(pathbuf.as_path()).await?;
            let mut wtr = BlobWriter::<BUFFERED>::new(file, 0);
            for blob in blobs.iter() {
                let (_, res) = wtr.write_blob(blob.clone(), &ctx).await;
@@ -314,7 +314,7 @@ mod tests {
            wtr.flush_buffer(&ctx).await?;
        }

-        let file = VirtualFile::open(pathbuf.as_path(), &ctx).await?;
+        let file = VirtualFile::open(pathbuf.as_path()).await?;
        let rdr = BlockReaderRef::VirtualFile(&file);
        let rdr = BlockCursor::new(rdr);
        for (idx, (blob, offset)) in blobs.iter().zip(offsets.iter()).enumerate() {
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -102,7 +102,7 @@ impl<'a> BlockReaderRef<'a> {
            #[cfg(test)]
            TestDisk(r) => r.read_blk(blknum),
            #[cfg(test)]
-            VirtualFile(r) => r.read_blk(blknum, ctx).await,
+            VirtualFile(r) => r.read_blk(blknum).await,
        }
    }
 }
@@ -177,11 +177,10 @@ impl<'a> FileBlockReader<'a> {
        &self,
        buf: PageWriteGuard<'static>,
        blkno: u32,
-        ctx: &RequestContext,
    ) -> Result<PageWriteGuard<'static>, std::io::Error> {
        assert!(buf.len() == PAGE_SZ);
        self.file
-            .read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64, ctx)
+            .read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64)
            .await
    }
    /// Read a block.
@@ -207,7 +206,7 @@ impl<'a> FileBlockReader<'a> {
            ReadBufResult::Found(guard) => Ok(guard.into()),
            ReadBufResult::NotFound(write_guard) => {
                // Read the page from disk into the buffer
-                let write_guard = self.fill_buffer(write_guard, blknum, ctx).await?;
+                let write_guard = self.fill_buffer(write_guard, blknum).await?;
                Ok(write_guard.mark_valid().into())
            }
        }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,7 +9,6 @@
 //! may lead to a data loss.
 //!
 use anyhow::bail;
-use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::CompactionAlgorithm;
 use pageserver_api::models::EvictionPolicy;
 use pageserver_api::models::{self, ThrottleConfig};
@@ -371,9 +370,9 @@ pub struct TenantConf {
    // Expresed in multiples of checkpoint distance.
    pub image_layer_creation_check_threshold: u8,

-    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
+    /// Switch to aux file v2. Switching this flag requires the user has not written any aux file into
    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
-    pub switch_aux_file_policy: AuxFilePolicy,
+    pub switch_to_aux_file_v2: bool,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -472,7 +471,7 @@ pub struct TenantConfOpt {

    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
+    pub switch_to_aux_file_v2: Option<bool>,
 }

 impl TenantConfOpt {
@@ -530,9 +529,9 @@ impl TenantConfOpt {
            image_layer_creation_check_threshold: self
                .image_layer_creation_check_threshold
                .unwrap_or(global_conf.image_layer_creation_check_threshold),
-            switch_aux_file_policy: self
-                .switch_aux_file_policy
-                .unwrap_or(global_conf.switch_aux_file_policy),
+            switch_to_aux_file_v2: self
+                .switch_to_aux_file_v2
+                .unwrap_or(global_conf.switch_to_aux_file_v2),
        }
    }
 }
@@ -574,7 +573,7 @@ impl Default for TenantConf {
            lazy_slru_download: false,
            timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
-            switch_aux_file_policy: AuxFilePolicy::V1,
+            switch_to_aux_file_v2: false,
        }
    }
 }
@@ -649,7 +648,7 @@ impl From<TenantConfOpt> for models::TenantConfig {
            lazy_slru_download: value.lazy_slru_download,
            timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
            image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
-            switch_aux_file_policy: value.switch_aux_file_policy,
+            switch_to_aux_file_v2: value.switch_to_aux_file_v2,
        }
    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -181,23 +181,25 @@ async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), Del

 async fn remove_tenant_remote_delete_mark(
    conf: &PageServerConf,
-    remote_storage: &GenericRemoteStorage,
+    remote_storage: Option<&GenericRemoteStorage>,
    tenant_shard_id: &TenantShardId,
    cancel: &CancellationToken,
 ) -> Result<(), DeleteTenantError> {
-    let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
-    backoff::retry(
-        || async { remote_storage.delete(&path, cancel).await },
-        TimeoutOrCancel::caused_by_cancel,
-        FAILED_UPLOAD_WARN_THRESHOLD,
-        FAILED_REMOTE_OP_RETRIES,
-        "remove_tenant_remote_delete_mark",
-        cancel,
-    )
-    .await
-    .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
-    .and_then(|x| x)
-    .context("remove_tenant_remote_delete_mark")?;
+    if let Some(remote_storage) = remote_storage {
+        let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
+        backoff::retry(
+            || async { remote_storage.delete(&path, cancel).await },
+            TimeoutOrCancel::caused_by_cancel,
+            FAILED_UPLOAD_WARN_THRESHOLD,
+            FAILED_REMOTE_OP_RETRIES,
+            "remove_tenant_remote_delete_mark",
+            cancel,
+        )
+        .await
+        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
+        .and_then(|x| x)
+        .context("remove_tenant_remote_delete_mark")?;
+    }
    Ok(())
 }

@@ -295,7 +297,7 @@ impl DeleteTenantFlow {
    #[instrument(skip_all)]
    pub(crate) async fn run(
        conf: &'static PageServerConf,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        tenants: &'static std::sync::RwLock<TenantsMap>,
        tenant: Arc<Tenant>,
        cancel: &CancellationToken,
@@ -306,7 +308,9 @@ impl DeleteTenantFlow {

        let mut guard = Self::prepare(&tenant).await?;

-        if let Err(e) = Self::run_inner(&mut guard, conf, &remote_storage, &tenant, cancel).await {
+        if let Err(e) =
+            Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant, cancel).await
+        {
            tenant.set_broken(format!("{e:#}")).await;
            return Err(e);
        }
@@ -323,7 +327,7 @@ impl DeleteTenantFlow {
    async fn run_inner(
        guard: &mut OwnedMutexGuard<Self>,
        conf: &'static PageServerConf,
-        remote_storage: &GenericRemoteStorage,
+        remote_storage: Option<&GenericRemoteStorage>,
        tenant: &Tenant,
        cancel: &CancellationToken,
    ) -> Result<(), DeleteTenantError> {
@@ -335,9 +339,14 @@ impl DeleteTenantFlow {
            ))?
        });

-        create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
-            .await
-            .context("remote_mark")?;
+        // IDEA: implement detach as delete without remote storage. Then they would use the same lock (deletion_progress) so wont contend.
+        // Though sounds scary, different mark name?
+        // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
+        if let Some(remote_storage) = &remote_storage {
+            create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
+                .await
+                .context("remote_mark")?
+        }

        fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
            Err(anyhow::anyhow!(
@@ -474,7 +483,7 @@ impl DeleteTenantFlow {
    fn schedule_background(
        guard: OwnedMutexGuard<Self>,
        conf: &'static PageServerConf,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        tenants: &'static std::sync::RwLock<TenantsMap>,
        tenant: Arc<Tenant>,
    ) {
@@ -503,7 +512,7 @@ impl DeleteTenantFlow {
    async fn background(
        mut guard: OwnedMutexGuard<Self>,
        conf: &PageServerConf,
-        remote_storage: GenericRemoteStorage,
+        remote_storage: Option<GenericRemoteStorage>,
        tenants: &'static std::sync::RwLock<TenantsMap>,
        tenant: &Arc<Tenant>,
    ) -> Result<(), DeleteTenantError> {
@@ -542,7 +551,7 @@ impl DeleteTenantFlow {

        remove_tenant_remote_delete_mark(
            conf,
-            &remote_storage,
+            remote_storage.as_ref(),
            &tenant.tenant_shard_id,
            &task_mgr::shutdown_token(),
        )
@@ -576,20 +585,9 @@ impl DeleteTenantFlow {

                    // FIXME: we should not be modifying this from outside of mgr.rs.
                    // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
-
-                    // Update stats
-                    match &removed {
-                        TenantsMapRemoveResult::Occupied(slot) => {
-                            crate::metrics::TENANT_MANAGER.slot_removed(slot);
-                        }
-                        TenantsMapRemoveResult::InProgress(barrier) => {
-                            crate::metrics::TENANT_MANAGER
-                                .slot_removed(&TenantSlot::InProgress(barrier.clone()));
-                        }
-                        TenantsMapRemoveResult::Vacant => {
-                            // Nothing changed in map, no metric update
-                        }
-                    }
+                    crate::metrics::TENANT_MANAGER
+                        .tenant_slots
+                        .set(locked.len() as u64);

                    match removed {
                        TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -28,7 +28,6 @@ impl EphemeralFile {
        conf: &PageServerConf,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
-        ctx: &RequestContext,
    ) -> Result<EphemeralFile, io::Error> {
        static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
        let filename_disambiguator =
@@ -46,7 +45,6 @@ impl EphemeralFile {
                .read(true)
                .write(true)
                .create(true),
-            ctx,
        )
        .await?;

@@ -155,7 +153,7 @@ mod tests {
    async fn test_ephemeral_blobs() -> Result<(), io::Error> {
        let (conf, tenant_id, timeline_id, ctx) = harness("ephemeral_blobs")?;

-        let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &ctx).await?;
+        let mut file = EphemeralFile::create(conf, tenant_id, timeline_id).await?;

        let pos_foo = file.write_blob(b"foo", &ctx).await?;
        assert_eq!(
--- a/pageserver/src/tenant/ephemeral_file/page_caching.rs
+++ b/pageserver/src/tenant/ephemeral_file/page_caching.rs
@@ -78,7 +78,7 @@ impl RW {
                    page_cache::ReadBufResult::NotFound(write_guard) => {
                        let write_guard = writer
                            .file
-                            .read_exact_at_page(write_guard, blknum as u64 * PAGE_SZ as u64, ctx)
+                            .read_exact_at_page(write_guard, blknum as u64 * PAGE_SZ as u64)
                            .await?;
                        let read_guard = write_guard.mark_valid();
                        return Ok(BlockLease::PageReadGuard(read_guard));
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -207,24 +207,6 @@ impl TimelineMetadata {
        self.body.ancestor_lsn
    }

-    /// When reparenting, the `ancestor_lsn` does not change.
-    pub fn reparent(&mut self, timeline: &TimelineId) {
-        assert!(self.body.ancestor_timeline.is_some());
-        // no assertion for redoing this: it's fine, we may have to repeat this multiple times over
-        self.body.ancestor_timeline = Some(*timeline);
-    }
-
-    pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) {
-        if let Some(ancestor) = self.body.ancestor_timeline {
-            assert_eq!(ancestor, branchpoint.0);
-        }
-        if self.body.ancestor_lsn != Lsn(0) {
-            assert_eq!(self.body.ancestor_lsn, branchpoint.1);
-        }
-        self.body.ancestor_timeline = None;
-        self.body.ancestor_lsn = Lsn(0);
-    }
-
    pub fn latest_gc_cutoff_lsn(&self) -> Lsn {
        self.body.latest_gc_cutoff_lsn
    }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -16,9 +16,10 @@ use std::cmp::Ordering;
 use std::collections::{BTreeMap, HashMap};
 use std::ops::Deref;
 use std::sync::Arc;
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use sysinfo::SystemExt;
 use tokio::fs;
+use utils::timeout::{timeout_cancellable, TimeoutCancellableError};

 use anyhow::Context;
 use once_cell::sync::Lazy;
@@ -46,7 +47,7 @@ use crate::tenant::span::debug_assert_current_span_has_tenant_id;
 use crate::tenant::storage_layer::inmemory_layer;
 use crate::tenant::timeline::ShutdownMode;
 use crate::tenant::{AttachedTenantConf, SpawnMode, Tenant, TenantState};
-use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
+use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TEMP_FILE_SUFFIX};

 use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext::PathExt;
@@ -55,7 +56,6 @@ use utils::id::{TenantId, TimelineId};

 use super::delete::DeleteTenantError;
 use super::secondary::SecondaryTenant;
-use super::timeline::detach_ancestor::PreparedTimelineDetach;
 use super::TenantSharedResources;

 /// For a tenant that appears in TenantsMap, it may either be
@@ -118,7 +118,6 @@ pub(crate) enum TenantsMapRemoveResult {

 /// When resolving a TenantId to a shard, we may be looking for the 0th
 /// shard, or we might be looking for whichever shard holds a particular page.
-#[derive(Copy, Clone)]
 pub(crate) enum ShardSelector {
    /// Only return the 0th shard, if it is present.  If a non-0th shard is present,
    /// ignore it.
@@ -169,14 +168,6 @@ impl TenantStartupMode {
    }
 }

-/// Result type for looking up a TenantId to a specific shard
-pub(crate) enum ShardResolveResult {
-    NotFound,
-    Found(Arc<Tenant>),
-    // Wait for this barrrier, then query again
-    InProgress(utils::completion::Barrier),
-}
-
 impl TenantsMap {
    /// Convenience function for typical usage, where we want to get a `Tenant` object, for
    /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
@@ -190,6 +181,51 @@ impl TenantsMap {
        }
    }

+    /// A page service client sends a TenantId, and to look up the correct Tenant we must
+    /// resolve this to a fully qualified TenantShardId.
+    fn resolve_attached_shard(
+        &self,
+        tenant_id: &TenantId,
+        selector: ShardSelector,
+    ) -> Option<TenantShardId> {
+        let mut want_shard = None;
+        match self {
+            TenantsMap::Initializing => None,
+            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
+                for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {
+                    // Ignore all slots that don't contain an attached tenant
+                    let tenant = match &slot.1 {
+                        TenantSlot::Attached(t) => t,
+                        _ => continue,
+                    };
+
+                    match selector {
+                        ShardSelector::First => return Some(*slot.0),
+                        ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
+                            return Some(*slot.0)
+                        }
+                        ShardSelector::Page(key) => {
+                            // First slot we see for this tenant, calculate the expected shard number
+                            // for the key: we will use this for checking if this and subsequent
+                            // slots contain the key, rather than recalculating the hash each time.
+                            if want_shard.is_none() {
+                                want_shard = Some(tenant.shard_identity.get_shard_number(&key));
+                            }
+
+                            if Some(tenant.shard_identity.number) == want_shard {
+                                return Some(*slot.0);
+                            }
+                        }
+                        _ => continue,
+                    }
+                }
+
+                // Fall through: we didn't find an acceptable shard
+                None
+            }
+        }
+    }
+
    /// Only for use from DeleteTenantFlow.  This method directly removes a TenantSlot from the map.
    ///
    /// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
@@ -210,7 +246,6 @@ impl TenantsMap {
        }
    }

-    #[cfg(all(debug_assertions, not(test)))]
    pub(crate) fn len(&self) -> usize {
        match self {
            TenantsMap::Initializing => 0,
@@ -354,17 +389,22 @@ async fn init_load_generations(
    // deletion list entries may still be valid.  We provide that by pushing a recovery operation into
    // the queue. Sequential processing of te queue ensures that recovery is done before any new tenant deletions
    // are processed, even though we don't block on recovery completing here.
-    let attached_tenants = generations
-        .iter()
-        .flat_map(|(id, start_mode)| {
-            match start_mode {
-                TenantStartupMode::Attached((_mode, generation)) => Some(generation),
-                TenantStartupMode::Secondary => None,
-            }
-            .map(|gen| (*id, *gen))
-        })
-        .collect();
-    resources.deletion_queue_client.recover(attached_tenants)?;
+    //
+    // Must only do this if remote storage is enabled, otherwise deletion queue
+    // is not running and channel push will fail.
+    if resources.remote_storage.is_some() {
+        let attached_tenants = generations
+            .iter()
+            .flat_map(|(id, start_mode)| {
+                match start_mode {
+                    TenantStartupMode::Attached((_mode, generation)) => Some(generation),
+                    TenantStartupMode::Secondary => None,
+                }
+                .map(|gen| (*id, *gen))
+            })
+            .collect();
+        resources.deletion_queue_client.recover(attached_tenants)?;
+    }

    Ok(Some(generations))
 }
@@ -418,6 +458,53 @@ fn load_tenant_config(
        }
    };

+    // Clean up legacy `metadata` files.
+    // Doing it here because every single tenant directory is visited here.
+    // In any later code, there's different treatment of tenant dirs
+    // ... depending on whether the tenant is in re-attach response or not
+    // ... epending on whether the tenant is ignored or not
+    assert_eq!(
+        &conf.tenant_path(&tenant_shard_id),
+        &tenant_dir_path,
+        "later use of conf....path() methods would be dubious"
+    );
+    let timelines: Vec<TimelineId> = match conf.timelines_path(&tenant_shard_id).read_dir_utf8() {
+        Ok(iter) => {
+            let mut timelines = Vec::new();
+            for res in iter {
+                let p = res?;
+                let Some(timeline_id) = p.file_name().parse::<TimelineId>().ok() else {
+                    // skip any entries that aren't TimelineId, such as
+                    // - *.___temp dirs
+                    // - unfinished initdb uploads (test_non_uploaded_root_timeline_is_deleted_after_restart)
+                    continue;
+                };
+                timelines.push(timeline_id);
+            }
+            timelines
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => vec![],
+        Err(e) => return Err(anyhow::anyhow!(e)),
+    };
+    for timeline_id in timelines {
+        let timeline_path = &conf.timeline_path(&tenant_shard_id, &timeline_id);
+        let metadata_path = timeline_path.join(METADATA_FILE_NAME);
+        match std::fs::remove_file(&metadata_path) {
+            Ok(()) => {
+                crashsafe::fsync(timeline_path)
+                    .context("fsync timeline dir after removing legacy metadata file")?;
+                info!("removed legacy metadata file at {metadata_path}");
+            }
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                // something removed the file earlier, or it was never there
+                // We don't care, this software version doesn't write it again, so, we're good.
+            }
+            Err(e) => {
+                anyhow::bail!("remove legacy metadata file: {e}: {metadata_path}");
+            }
+        }
+    }
+
    let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
    if tenant_ignore_mark_file.exists() {
        info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
@@ -522,7 +609,6 @@ pub async fn init_tenant_mgr(
                    TenantSlot::Attached(Tenant::create_broken_tenant(
                        conf,
                        tenant_shard_id,
-                        resources.remote_storage.clone(),
                        format!("{}", e),
                    )),
                );
@@ -660,7 +746,6 @@ pub async fn init_tenant_mgr(
            }
        };

-        METRICS.slot_inserted(&slot);
        tenants.insert(tenant_shard_id, slot);
    }

@@ -668,7 +753,7 @@ pub async fn init_tenant_mgr(

    let mut tenants_map = TENANTS.write().unwrap();
    assert!(matches!(&*tenants_map, &TenantsMap::Initializing));
-
+    METRICS.tenant_slots.set(tenants.len() as u64);
    *tenants_map = TenantsMap::Open(tenants);

    Ok(TenantManager {
@@ -715,7 +800,6 @@ fn tenant_spawn(
        "Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
    );

-    let remote_storage = resources.remote_storage.clone();
    let tenant = match Tenant::spawn(
        conf,
        tenant_shard_id,
@@ -730,7 +814,7 @@ fn tenant_spawn(
        Ok(tenant) => tenant,
        Err(e) => {
            error!("Failed to spawn tenant {tenant_shard_id}, reason: {e:#}");
-            Tenant::create_broken_tenant(conf, tenant_shard_id, remote_storage, format!("{e:#}"))
+            Tenant::create_broken_tenant(conf, tenant_shard_id, format!("{e:#}"))
        }
    };

@@ -740,14 +824,6 @@ fn tenant_spawn(
 async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
    let mut join_set = JoinSet::new();

-    #[cfg(all(debug_assertions, not(test)))]
-    {
-        // Check that our metrics properly tracked the size of the tenants map.  This is a convenient location to check,
-        // as it happens implicitly at the end of tests etc.
-        let m = tenants.read().unwrap();
-        debug_assert_eq!(METRICS.slots_total(), m.len() as u64);
-    }
-
    // Atomically, 1. create the shutdown tasks and 2. prevent creation of new tenants.
    let (total_in_progress, total_attached) = {
        let mut m = tenants.write().unwrap();
@@ -1921,167 +1997,6 @@ impl TenantManager {
            })
            .collect())
    }
-
-    /// Completes an earlier prepared timeline detach ancestor.
-    pub(crate) async fn complete_detaching_timeline_ancestor(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        prepared: PreparedTimelineDetach,
-        ctx: &RequestContext,
-    ) -> Result<Vec<TimelineId>, anyhow::Error> {
-        struct RevertOnDropSlot(Option<SlotGuard>);
-
-        impl Drop for RevertOnDropSlot {
-            fn drop(&mut self) {
-                if let Some(taken) = self.0.take() {
-                    taken.revert();
-                }
-            }
-        }
-
-        impl RevertOnDropSlot {
-            fn into_inner(mut self) -> SlotGuard {
-                self.0.take().unwrap()
-            }
-        }
-
-        impl std::ops::Deref for RevertOnDropSlot {
-            type Target = SlotGuard;
-
-            fn deref(&self) -> &Self::Target {
-                self.0.as_ref().unwrap()
-            }
-        }
-
-        let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
-        let slot_guard = RevertOnDropSlot(Some(slot_guard));
-
-        let tenant = {
-            let Some(old_slot) = slot_guard.get_old_value() else {
-                anyhow::bail!(
-                    "Tenant not found when trying to complete detaching timeline ancestor"
-                );
-            };
-
-            let Some(tenant) = old_slot.get_attached() else {
-                anyhow::bail!("Tenant is not in attached state");
-            };
-
-            if !tenant.is_active() {
-                anyhow::bail!("Tenant is not active");
-            }
-
-            tenant.clone()
-        };
-
-        let timeline = tenant.get_timeline(timeline_id, true)?;
-
-        let reparented = timeline
-            .complete_detaching_timeline_ancestor(&tenant, prepared, ctx)
-            .await?;
-
-        let mut slot_guard = slot_guard.into_inner();
-
-        let (_guard, progress) = utils::completion::channel();
-        match tenant.shutdown(progress, ShutdownMode::Hard).await {
-            Ok(()) => {
-                slot_guard.drop_old_value()?;
-            }
-            Err(_barrier) => {
-                slot_guard.revert();
-                // this really should not happen, at all, unless shutdown was already going?
-                anyhow::bail!("Cannot restart Tenant, already shutting down");
-            }
-        }
-
-        let tenant_path = self.conf.tenant_path(&tenant_shard_id);
-        let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;
-
-        let shard_identity = config.shard;
-        let tenant = tenant_spawn(
-            self.conf,
-            tenant_shard_id,
-            &tenant_path,
-            self.resources.clone(),
-            AttachedTenantConf::try_from(config)?,
-            shard_identity,
-            None,
-            self.tenants,
-            SpawnMode::Eager,
-            ctx,
-        )?;
-
-        slot_guard.upsert(TenantSlot::Attached(tenant))?;
-
-        Ok(reparented)
-    }
-
-    /// A page service client sends a TenantId, and to look up the correct Tenant we must
-    /// resolve this to a fully qualified TenantShardId.
-    ///
-    /// During shard splits: we shall see parent shards in InProgress state and skip them, and
-    /// instead match on child shards which should appear in Attached state.  Very early in a shard
-    /// split, or in other cases where a shard is InProgress, we will return our own InProgress result
-    /// to instruct the caller to wait for that to finish before querying again.
-    pub(crate) fn resolve_attached_shard(
-        &self,
-        tenant_id: &TenantId,
-        selector: ShardSelector,
-    ) -> ShardResolveResult {
-        let tenants = self.tenants.read().unwrap();
-        let mut want_shard = None;
-        let mut any_in_progress = None;
-
-        match &*tenants {
-            TenantsMap::Initializing => ShardResolveResult::NotFound,
-            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                for slot in m.range(TenantShardId::tenant_range(*tenant_id)) {
-                    // Ignore all slots that don't contain an attached tenant
-                    let tenant = match &slot.1 {
-                        TenantSlot::Attached(t) => t,
-                        TenantSlot::InProgress(barrier) => {
-                            // We might still find a usable shard, but in case we don't, remember that
-                            // we saw at least one InProgress slot, so that we can distinguish this case
-                            // from a simple NotFound in our return value.
-                            any_in_progress = Some(barrier.clone());
-                            continue;
-                        }
-                        _ => continue,
-                    };
-
-                    match selector {
-                        ShardSelector::First => return ShardResolveResult::Found(tenant.clone()),
-                        ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => {
-                            return ShardResolveResult::Found(tenant.clone())
-                        }
-                        ShardSelector::Page(key) => {
-                            // First slot we see for this tenant, calculate the expected shard number
-                            // for the key: we will use this for checking if this and subsequent
-                            // slots contain the key, rather than recalculating the hash each time.
-                            if want_shard.is_none() {
-                                want_shard = Some(tenant.shard_identity.get_shard_number(&key));
-                            }
-
-                            if Some(tenant.shard_identity.number) == want_shard {
-                                return ShardResolveResult::Found(tenant.clone());
-                            }
-                        }
-                        _ => continue,
-                    }
-                }
-
-                // Fall through: we didn't find a slot that was in Attached state & matched our selector.  If
-                // we found one or more InProgress slot, indicate to caller that they should retry later.  Otherwise
-                // this requested shard simply isn't found.
-                if let Some(barrier) = any_in_progress {
-                    ShardResolveResult::InProgress(barrier)
-                } else {
-                    ShardResolveResult::NotFound
-                }
-            }
-        }
-    }
 }

 #[derive(Debug, thiserror::Error)]
@@ -2130,6 +2045,105 @@ pub(crate) enum GetActiveTenantError {
    Broken(String),
 }

+/// Get a [`Tenant`] in its active state. If the tenant_id is currently in [`TenantSlot::InProgress`]
+/// state, then wait for up to `timeout`.  If the [`Tenant`] is not currently in [`TenantState::Active`],
+/// then wait for up to `timeout` (minus however long we waited for the slot).
+pub(crate) async fn get_active_tenant_with_timeout(
+    tenant_id: TenantId,
+    shard_selector: ShardSelector,
+    timeout: Duration,
+    cancel: &CancellationToken,
+) -> Result<Arc<Tenant>, GetActiveTenantError> {
+    enum WaitFor {
+        Barrier(utils::completion::Barrier),
+        Tenant(Arc<Tenant>),
+    }
+
+    let wait_start = Instant::now();
+    let deadline = wait_start + timeout;
+
+    let (wait_for, tenant_shard_id) = {
+        let locked = TENANTS.read().unwrap();
+
+        // Resolve TenantId to TenantShardId
+        let tenant_shard_id = locked
+            .resolve_attached_shard(&tenant_id, shard_selector)
+            .ok_or(GetActiveTenantError::NotFound(GetTenantError::NotFound(
+                tenant_id,
+            )))?;
+
+        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)
+            .map_err(GetTenantError::MapState)?;
+        match peek_slot {
+            Some(TenantSlot::Attached(tenant)) => {
+                match tenant.current_state() {
+                    TenantState::Active => {
+                        // Fast path: we don't need to do any async waiting.
+                        return Ok(tenant.clone());
+                    }
+                    _ => {
+                        tenant.activate_now();
+                        (WaitFor::Tenant(tenant.clone()), tenant_shard_id)
+                    }
+                }
+            }
+            Some(TenantSlot::Secondary(_)) => {
+                return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
+                    tenant_shard_id,
+                )))
+            }
+            Some(TenantSlot::InProgress(barrier)) => {
+                (WaitFor::Barrier(barrier.clone()), tenant_shard_id)
+            }
+            None => {
+                return Err(GetActiveTenantError::NotFound(GetTenantError::NotFound(
+                    tenant_id,
+                )))
+            }
+        }
+    };
+
+    let tenant = match wait_for {
+        WaitFor::Barrier(barrier) => {
+            tracing::debug!("Waiting for tenant InProgress state to pass...");
+            timeout_cancellable(
+                deadline.duration_since(Instant::now()),
+                cancel,
+                barrier.wait(),
+            )
+            .await
+            .map_err(|e| match e {
+                TimeoutCancellableError::Timeout => GetActiveTenantError::WaitForActiveTimeout {
+                    latest_state: None,
+                    wait_time: wait_start.elapsed(),
+                },
+                TimeoutCancellableError::Cancelled => GetActiveTenantError::Cancelled,
+            })?;
+            {
+                let locked = TENANTS.read().unwrap();
+                let peek_slot =
+                    tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)
+                        .map_err(GetTenantError::MapState)?;
+                match peek_slot {
+                    Some(TenantSlot::Attached(tenant)) => tenant.clone(),
+                    _ => {
+                        return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
+                            tenant_shard_id,
+                        )))
+                    }
+                }
+            }
+        }
+        WaitFor::Tenant(tenant) => tenant,
+    };
+
+    tracing::debug!("Waiting for tenant to enter active state...");
+    tenant
+        .wait_to_become_active(deadline.duration_since(Instant::now()))
+        .await?;
+    Ok(tenant)
+}
+
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum DeleteTimelineError {
    #[error("Tenant {0}")]
@@ -2156,7 +2170,7 @@ pub(crate) async fn load_tenant(
    tenant_id: TenantId,
    generation: Generation,
    broker_client: storage_broker::BrokerClientChannel,
-    remote_storage: GenericRemoteStorage,
+    remote_storage: Option<GenericRemoteStorage>,
    deletion_queue_client: DeletionQueueClient,
    ctx: &RequestContext,
 ) -> Result<(), TenantMapInsertError> {
@@ -2414,13 +2428,10 @@ impl SlotGuard {
                TenantsMap::Open(m) => m,
            };

-            METRICS.slot_inserted(&new_value);
-
            let replaced = m.insert(self.tenant_shard_id, new_value);
            self.upserted = true;
-            if let Some(replaced) = replaced.as_ref() {
-                METRICS.slot_removed(replaced);
-            }
+
+            METRICS.tenant_slots.set(m.len() as u64);

            replaced
        };
@@ -2530,13 +2541,9 @@ impl Drop for SlotGuard {
                }

                if self.old_value_is_shutdown() {
-                    METRICS.slot_removed(entry.get());
                    entry.remove();
                } else {
-                    let inserting = self.old_value.take().unwrap();
-                    METRICS.slot_inserted(&inserting);
-                    let replaced = entry.insert(inserting);
-                    METRICS.slot_removed(&replaced);
+                    entry.insert(self.old_value.take().unwrap());
                }
            }
            Entry::Vacant(_) => {
@@ -2547,6 +2554,8 @@ impl Drop for SlotGuard {
                );
            }
        }
+
+        METRICS.tenant_slots.set(m.len() as u64);
    }
 }

@@ -2626,9 +2635,7 @@ fn tenant_map_acquire_slot_impl(
            }
            _ => {
                let (completion, barrier) = utils::completion::channel();
-                let inserting = TenantSlot::InProgress(barrier);
-                METRICS.slot_inserted(&inserting);
-                v.insert(inserting);
+                v.insert(TenantSlot::InProgress(barrier));
                tracing::debug!("Vacant, inserted InProgress");
                Ok(SlotGuard::new(*tenant_shard_id, None, completion))
            }
@@ -2664,10 +2671,7 @@ fn tenant_map_acquire_slot_impl(
                _ => {
                    // Happy case: the slot was not in any state that violated our mode
                    let (completion, barrier) = utils::completion::channel();
-                    let in_progress = TenantSlot::InProgress(barrier);
-                    METRICS.slot_inserted(&in_progress);
-                    let old_value = o.insert(in_progress);
-                    METRICS.slot_removed(&old_value);
+                    let old_value = o.insert(TenantSlot::InProgress(barrier));
                    tracing::debug!("Occupied, replaced with InProgress");
                    Ok(SlotGuard::new(
                        *tenant_shard_id,
@@ -2760,73 +2764,86 @@ use {
    utils::http::error::ApiError,
 };

-#[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id))]
-pub(crate) async fn immediate_gc(
+pub(crate) fn immediate_gc(
    tenant_shard_id: TenantShardId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
    cancel: CancellationToken,
    ctx: &RequestContext,
-) -> Result<GcResult, ApiError> {
-    let tenant = {
-        let guard = TENANTS.read().unwrap();
-        guard
-            .get(&tenant_shard_id)
-            .cloned()
-            .with_context(|| format!("tenant {tenant_shard_id}"))
-            .map_err(|e| ApiError::NotFound(e.into()))?
-    };
+) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
+    let guard = TENANTS.read().unwrap();
+
+    let tenant = guard
+        .get(&tenant_shard_id)
+        .cloned()
+        .with_context(|| format!("tenant {tenant_shard_id}"))
+        .map_err(|e| ApiError::NotFound(e.into()))?;

    let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
    // Use tenant's pitr setting
    let pitr = tenant.get_pitr_interval();

-    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
    // Run in task_mgr to avoid race with tenant_detach operation
-    let ctx: RequestContext =
-        ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
+    let ctx = ctx.detached_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
+    let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
+    let span = info_span!("manual_gc", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);

-    let _gate_guard = tenant.gate.enter().map_err(|_| ApiError::ShuttingDown)?;
+    // TODO: spawning is redundant now, need to hold the gate
+    task_mgr::spawn(
+        &tokio::runtime::Handle::current(),
+        TaskKind::GarbageCollector,
+        Some(tenant_shard_id),
+        Some(timeline_id),
+        &format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
+        false,
+        async move {
+            fail::fail_point!("immediate_gc_task_pre");

-    fail::fail_point!("immediate_gc_task_pre");
+            #[allow(unused_mut)]
+            let mut result = tenant
+                .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
+                .await;
+                // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
+                // better once the types support it.

-    #[allow(unused_mut)]
-    let mut result = tenant
-        .gc_iteration(Some(timeline_id), gc_horizon, pitr, &cancel, &ctx)
-        .await;
-    // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
-    // better once the types support it.
+            #[cfg(feature = "testing")]
+            {
+                // we need to synchronize with drop completion for python tests without polling for
+                // log messages
+                if let Ok(result) = result.as_mut() {
+                    let mut js = tokio::task::JoinSet::new();
+                    for layer in std::mem::take(&mut result.doomed_layers) {
+                        js.spawn(layer.wait_drop());
+                    }
+                    tracing::info!(total = js.len(), "starting to wait for the gc'd layers to be dropped");
+                    while let Some(res) = js.join_next().await {
+                        res.expect("wait_drop should not panic");
+                    }
+                }

-    #[cfg(feature = "testing")]
-    {
-        // we need to synchronize with drop completion for python tests without polling for
-        // log messages
-        if let Ok(result) = result.as_mut() {
-            let mut js = tokio::task::JoinSet::new();
-            for layer in std::mem::take(&mut result.doomed_layers) {
-                js.spawn(layer.wait_drop());
+                let timeline = tenant.get_timeline(timeline_id, false).ok();
+                let rtc = timeline.as_ref().and_then(|x| x.remote_client.as_ref());
+
+                if let Some(rtc) = rtc {
+                    // layer drops schedule actions on remote timeline client to actually do the
+                    // deletions; don't care about the shutdown error, just exit fast
+                    drop(rtc.wait_completion().await);
+                }
            }
-            tracing::info!(
-                total = js.len(),
-                "starting to wait for the gc'd layers to be dropped"
-            );
-            while let Some(res) = js.join_next().await {
-                res.expect("wait_drop should not panic");
+
+            match task_done.send(result) {
+                Ok(_) => (),
+                Err(result) => error!("failed to send gc result: {result:?}"),
            }
+            Ok(())
        }
+        .instrument(span)
+    );

-        let timeline = tenant.get_timeline(timeline_id, false).ok();
-        let rtc = timeline.as_ref().map(|x| &x.remote_client);
+    // drop the guard until after we've spawned the task so that timeline shutdown will wait for the task
+    drop(guard);

-        if let Some(rtc) = rtc {
-            // layer drops schedule actions on remote timeline client to actually do the
-            // deletions; don't care about the shutdown error, just exit fast
-            drop(rtc.wait_completion().await);
-        }
-    }
-
-    result.map_err(ApiError::InternalServerError)
+    Ok(wait_task_done)
 }

 #[cfg(test)]
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -240,7 +240,7 @@ use utils::id::{TenantId, TimelineId};
 use self::index::IndexPart;

 use super::metadata::MetadataUpdate;
-use super::storage_layer::{Layer, LayerName, ResidentLayer};
+use super::storage_layer::{Layer, LayerFileName, ResidentLayer};
 use super::upload_queue::SetDeletedFlagProgress;
 use super::Generation;

@@ -317,7 +317,7 @@ pub struct RemoteTimelineClient {

    upload_queue: Mutex<UploadQueue>,

-    pub(crate) metrics: Arc<RemoteTimelineClientMetrics>,
+    metrics: Arc<RemoteTimelineClientMetrics>,

    storage_impl: GenericRemoteStorage,

@@ -437,19 +437,6 @@ impl RemoteTimelineClient {
        }
    }

-    /// Returns true if this timeline was previously detached at this Lsn and the remote timeline
-    /// client is currently initialized.
-    pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
-        // technically this is a dirty read, but given how timeline detach ancestor is implemented
-        // via tenant restart, the lineage has always been uploaded.
-        self.upload_queue
-            .lock()
-            .unwrap()
-            .initialized_mut()
-            .map(|uq| uq.latest_lineage.is_previous_ancestor_lsn(lsn))
-            .unwrap_or(false)
-    }
-
    fn update_remote_physical_size_gauge(&self, current_remote_index_part: Option<&IndexPart>) {
        let size: u64 = if let Some(current_remote_index_part) = current_remote_index_part {
            current_remote_index_part
@@ -461,11 +448,11 @@ impl RemoteTimelineClient {
        } else {
            0
        };
-        self.metrics.remote_physical_size_gauge.set(size);
+        self.metrics.remote_physical_size_set(size);
    }

    pub fn get_remote_physical_size(&self) -> u64 {
-        self.metrics.remote_physical_size_gauge.get()
+        self.metrics.remote_physical_size_get()
    }

    //
@@ -516,7 +503,7 @@ impl RemoteTimelineClient {
    /// On success, returns the size of the downloaded file.
    pub async fn download_layer_file(
        &self,
-        layer_file_name: &LayerName,
+        layer_file_name: &LayerFileName,
        layer_metadata: &LayerFileMetadata,
        cancel: &CancellationToken,
        ctx: &RequestContext,
@@ -583,7 +570,7 @@ impl RemoteTimelineClient {
        // ahead of what's _actually_ on the remote during index upload.
        upload_queue.latest_metadata = metadata.clone();

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());

        Ok(())
    }
@@ -604,7 +591,7 @@ impl RemoteTimelineClient {

        upload_queue.latest_metadata.apply(update);

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());

        Ok(())
    }
@@ -624,14 +611,18 @@ impl RemoteTimelineClient {
        let upload_queue = guard.initialized_mut()?;

        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());
        }

        Ok(())
    }

    /// Launch an index-file upload operation in the background (internal function)
-    fn schedule_index_upload(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {
+    fn schedule_index_upload(
+        self: &Arc<Self>,
+        upload_queue: &mut UploadQueueInitialized,
+        metadata: TimelineMetadata,
+    ) {
        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();

        info!(
@@ -640,8 +631,12 @@ impl RemoteTimelineClient {
            upload_queue.latest_files_changes_since_metadata_upload_scheduled,
        );

-        let index_part = IndexPart::from(&*upload_queue);
-        let op = UploadOp::UploadMetadata(Box::new(index_part), disk_consistent_lsn);
+        let index_part = IndexPart::new(
+            upload_queue.latest_files.clone(),
+            disk_consistent_lsn,
+            metadata,
+        );
+        let op = UploadOp::UploadMetadata(index_part, disk_consistent_lsn);
        self.metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
        upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;
@@ -650,67 +645,9 @@ impl RemoteTimelineClient {
        self.launch_queued_tasks(upload_queue);
    }

-    pub(crate) async fn schedule_reparenting_and_wait(
-        self: &Arc<Self>,
-        new_parent: &TimelineId,
-    ) -> anyhow::Result<()> {
-        // FIXME: because of how Timeline::schedule_uploads works when called from layer flushing
-        // and reads the in-memory part we cannot do the detaching like this
-        let receiver = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = guard.initialized_mut()?;
-
-            let Some(prev) = upload_queue.latest_metadata.ancestor_timeline() else {
-                return Err(anyhow::anyhow!(
-                    "cannot reparent without a current ancestor"
-                ));
-            };
-
-            upload_queue.latest_metadata.reparent(new_parent);
-            upload_queue.latest_lineage.record_previous_ancestor(&prev);
-
-            self.schedule_index_upload(upload_queue);
-
-            self.schedule_barrier0(upload_queue)
-        };
-
-        Self::wait_completion0(receiver).await
-    }
-
-    /// Schedules uploading a new version of `index_part.json` with the given layers added,
-    /// detaching from ancestor and waits for it to complete.
    ///
-    /// This is used with `Timeline::detach_ancestor` functionality.
-    pub(crate) async fn schedule_adding_existing_layers_to_index_detach_and_wait(
-        self: &Arc<Self>,
-        layers: &[Layer],
-        adopted: (TimelineId, Lsn),
-    ) -> anyhow::Result<()> {
-        let barrier = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = guard.initialized_mut()?;
-
-            upload_queue.latest_metadata.detach_from_ancestor(&adopted);
-            upload_queue.latest_lineage.record_detaching(&adopted);
-
-            for layer in layers {
-                upload_queue
-                    .latest_files
-                    .insert(layer.layer_desc().layer_name(), layer.metadata());
-            }
-
-            self.schedule_index_upload(upload_queue);
-
-            let barrier = self.schedule_barrier0(upload_queue);
-            self.launch_queued_tasks(upload_queue);
-            barrier
-        };
-
-        Self::wait_completion0(barrier).await
-    }
-
-    /// Launch an upload operation in the background; the file is added to be included in next
-    /// `index_part.json` upload.
+    /// Launch an upload operation in the background.
+    ///
    pub(crate) fn schedule_layer_file_upload(
        self: &Arc<Self>,
        layer: ResidentLayer,
@@ -732,15 +669,13 @@ impl RemoteTimelineClient {

        upload_queue
            .latest_files
-            .insert(layer.layer_desc().layer_name(), metadata.clone());
+            .insert(layer.layer_desc().filename(), metadata.clone());
        upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;

        info!(
-            gen=?metadata.generation,
-            shard=?metadata.shard,
-            "scheduled layer file upload {layer}",
+            "scheduled layer file upload {layer} gen={:?} shard={:?}",
+            metadata.generation, metadata.shard
        );
-
        let op = UploadOp::UploadLayer(layer, metadata);
        self.metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -756,7 +691,7 @@ impl RemoteTimelineClient {
    /// successfully.
    pub fn schedule_layer_file_deletion(
        self: &Arc<Self>,
-        names: &[LayerName],
+        names: &[LayerFileName],
    ) -> anyhow::Result<()> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;
@@ -784,7 +719,7 @@ impl RemoteTimelineClient {
        // the layer files as "dangling". this is fine, at worst case we create work for the
        // scrubber.

-        let names = gc_layers.iter().map(|x| x.layer_desc().layer_name());
+        let names = gc_layers.iter().map(|x| x.layer_desc().filename());

        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);

@@ -799,10 +734,14 @@ impl RemoteTimelineClient {
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
        names: I,
-    ) -> Vec<(LayerName, LayerFileMetadata)>
+    ) -> Vec<(LayerFileName, LayerFileMetadata)>
    where
-        I: IntoIterator<Item = LayerName>,
+        I: IntoIterator<Item = LayerFileName>,
    {
+        // Deleting layers doesn't affect the values stored in TimelineMetadata,
+        // so we don't need update it. Just serialize it.
+        let metadata = upload_queue.latest_metadata.clone();
+
        // Decorate our list of names with each name's metadata, dropping
        // names that are unexpectedly missing from our metadata.  This metadata
        // is later used when physically deleting layers, to construct key paths.
@@ -841,7 +780,7 @@ impl RemoteTimelineClient {
        // index_part update, because that needs to be uploaded before we can actually delete the
        // files.
        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue, metadata);
        }

        with_metadata
@@ -851,7 +790,7 @@ impl RemoteTimelineClient {
    /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].
    pub(crate) fn schedule_deletion_of_unlinked(
        self: &Arc<Self>,
-        layers: Vec<(LayerName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, LayerFileMetadata)>,
    ) -> anyhow::Result<()> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;
@@ -864,7 +803,7 @@ impl RemoteTimelineClient {
    fn schedule_deletion_of_unlinked0(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
-        mut with_metadata: Vec<(LayerName, LayerFileMetadata)>,
+        mut with_metadata: Vec<(LayerFileName, LayerFileMetadata)>,
    ) {
        // Filter out any layers which were not created by this tenant shard.  These are
        // layers that originate from some ancestor shard after a split, and may still
@@ -933,7 +872,7 @@ impl RemoteTimelineClient {
            self.schedule_layer_file_upload0(upload_queue, layer.clone());
        }

-        let names = compacted_from.iter().map(|x| x.layer_desc().layer_name());
+        let names = compacted_from.iter().map(|x| x.layer_desc().filename());

        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
        self.launch_queued_tasks(upload_queue);
@@ -943,18 +882,12 @@ impl RemoteTimelineClient {

    /// Wait for all previously scheduled uploads/deletions to complete
    pub(crate) async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
-        let receiver = {
+        let mut receiver = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
            self.schedule_barrier0(upload_queue)
        };

-        Self::wait_completion0(receiver).await
-    }
-
-    async fn wait_completion0(
-        mut receiver: tokio::sync::watch::Receiver<()>,
-    ) -> anyhow::Result<()> {
        if receiver.changed().await.is_err() {
            anyhow::bail!("wait_completion aborted because upload queue was stopped");
        }
@@ -1070,7 +1003,8 @@ impl RemoteTimelineClient {
            let deleted_at = Utc::now().naive_utc();
            stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);

-            let mut index_part = IndexPart::from(&stopped.upload_queue_for_deletion);
+            let mut index_part = IndexPart::try_from(&stopped.upload_queue_for_deletion)
+                .context("IndexPart serialize")?;
            index_part.deleted_at = Some(deleted_at);
            index_part
        };
@@ -1127,11 +1061,6 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    pub(crate) fn is_deleting(&self) -> bool {
-        let mut locked = self.upload_queue.lock().unwrap();
-        locked.stopped_mut().is_ok()
-    }
-
    pub(crate) async fn preserve_initdb_archive(
        self: &Arc<Self>,
        tenant_id: &TenantId,
@@ -1156,93 +1085,6 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    /// Uploads the given layer **without** adding it to be part of a future `index_part.json` upload.
-    ///
-    /// This is not normally needed.
-    pub(crate) async fn upload_layer_file(
-        self: &Arc<Self>,
-        uploaded: &ResidentLayer,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
-            &uploaded.layer_desc().layer_name(),
-            uploaded.metadata().generation,
-        );
-
-        backoff::retry(
-            || async {
-                upload::upload_timeline_layer(
-                    &self.storage_impl,
-                    uploaded.local_path(),
-                    &remote_path,
-                    uploaded.metadata().file_size(),
-                    cancel,
-                )
-                .await
-            },
-            TimeoutOrCancel::caused_by_cancel,
-            FAILED_UPLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-            "upload a layer without adding it to latest files",
-            cancel,
-        )
-        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
-        .and_then(|x| x)
-        .context("upload a layer without adding it to latest files")
-    }
-
-    /// Copies the `adopted` remote existing layer to the remote path of `adopted_as`. The layer is
-    /// not added to be part of a future `index_part.json` upload.
-    pub(crate) async fn copy_timeline_layer(
-        self: &Arc<Self>,
-        adopted: &Layer,
-        adopted_as: &Layer,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let source_remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &adopted
-                .get_timeline_id()
-                .expect("Source timeline should be alive"),
-            self.tenant_shard_id.to_index(),
-            &adopted.layer_desc().layer_name(),
-            adopted.metadata().generation,
-        );
-
-        let target_remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
-            &adopted_as.layer_desc().layer_name(),
-            adopted_as.metadata().generation,
-        );
-
-        backoff::retry(
-            || async {
-                upload::copy_timeline_layer(
-                    &self.storage_impl,
-                    &source_remote_path,
-                    &target_remote_path,
-                    cancel,
-                )
-                .await
-            },
-            TimeoutOrCancel::caused_by_cancel,
-            FAILED_UPLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-            "copy timeline layer",
-            cancel,
-        )
-        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
-        .and_then(|x| x)
-        .context("remote copy timeline layer")
-    }
-
    async fn flush_deletion_queue(&self) -> Result<(), DeletionQueueError> {
        match tokio::time::timeout(
            DELETION_QUEUE_FLUSH_TIMEOUT,
@@ -1414,7 +1256,7 @@ impl RemoteTimelineClient {
        while let Some(next_op) = upload_queue.queued_operations.front() {
            // Can we run this task now?
            let can_run_now = match next_op {
-                UploadOp::UploadLayer(..) => {
+                UploadOp::UploadLayer(_, _) => {
                    // Can always be scheduled.
                    true
                }
@@ -1541,25 +1383,13 @@ impl RemoteTimelineClient {

            let upload_result: anyhow::Result<()> = match &task.op {
                UploadOp::UploadLayer(ref layer, ref layer_metadata) => {
-                    let local_path = layer.local_path();
-
-                    // We should only be uploading layers created by this `Tenant`'s lifetime, so
-                    // the metadata in the upload should always match our current generation.
-                    assert_eq!(layer_metadata.generation, self.generation);
-
-                    let remote_path = remote_layer_path(
-                        &self.tenant_shard_id.tenant_id,
-                        &self.timeline_id,
-                        layer_metadata.shard,
-                        &layer.layer_desc().layer_name(),
-                        layer_metadata.generation,
-                    );
-
+                    let path = layer.local_path();
                    upload::upload_timeline_layer(
+                        self.conf,
                        &self.storage_impl,
-                        local_path,
-                        &remote_path,
-                        layer_metadata.file_size(),
+                        path,
+                        layer_metadata,
+                        self.generation,
                        &self.cancel,
                    )
                    .measure_remote_op(
@@ -1835,7 +1665,6 @@ impl RemoteTimelineClient {
                        latest_files: initialized.latest_files.clone(),
                        latest_files_changes_since_metadata_upload_scheduled: 0,
                        latest_metadata: initialized.latest_metadata.clone(),
-                        latest_lineage: initialized.latest_lineage.clone(),
                        projected_remote_consistent_lsn: None,
                        visible_remote_consistent_lsn: initialized
                            .visible_remote_consistent_lsn
@@ -1921,14 +1750,14 @@ pub fn remote_layer_path(
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
    shard: ShardIndex,
-    layer_file_name: &LayerName,
+    layer_file_name: &LayerFileName,
    generation: Generation,
 ) -> RemotePath {
    // Generation-aware key format
    let path = format!(
        "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
        shard.get_suffix(),
-        layer_file_name,
+        layer_file_name.file_name(),
        generation.get_suffix()
    );

@@ -1989,6 +1818,29 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
    }
 }

+/// Files on the remote storage are stored with paths, relative to the workdir.
+/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
+///
+/// Errors if the path provided does not start from pageserver's workdir.
+pub fn remote_path(
+    conf: &PageServerConf,
+    local_path: &Utf8Path,
+    generation: Generation,
+) -> anyhow::Result<RemotePath> {
+    let stripped = local_path
+        .strip_prefix(&conf.workdir)
+        .context("Failed to strip workdir prefix")?;
+
+    let suffixed = format!("{0}{1}", stripped, generation.get_suffix());
+
+    RemotePath::new(Utf8Path::new(&suffixed)).with_context(|| {
+        format!(
+            "to resolve remote part of path {:?} for base {:?}",
+            local_path, conf.workdir
+        )
+    })
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -1996,7 +1848,6 @@ mod tests {
        context::RequestContext,
        tenant::{
            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::layer::local_layer_path,
            Tenant, Timeline,
        },
        DEFAULT_PG_VERSION,
@@ -2025,8 +1876,8 @@ mod tests {
        TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()
    }

-    fn assert_file_list(a: &HashSet<LayerName>, b: &[&str]) {
-        let mut avec: Vec<String> = a.iter().map(|x| x.to_string()).collect();
+    fn assert_file_list(a: &HashSet<LayerFileName>, b: &[&str]) {
+        let mut avec: Vec<String> = a.iter().map(|x| x.file_name()).collect();
        avec.sort();

        let mut bvec = b.to_vec();
@@ -2137,7 +1988,7 @@ mod tests {
            tenant_ctx: _tenant_ctx,
        } = test_setup;

-        let client = &timeline.remote_client;
+        let client = timeline.remote_client.as_ref().unwrap();

        // Download back the index.json, and check that the list of files is correct
        let initial_index_part = match client
@@ -2152,7 +2003,7 @@ mod tests {
            .layer_metadata
            .keys()
            .map(|f| f.to_owned())
-            .collect::<HashSet<LayerName>>();
+            .collect::<HashSet<LayerFileName>>();
        let initial_layer = {
            assert!(initial_layers.len() == 1);
            initial_layers.into_iter().next().unwrap()
@@ -2178,21 +2029,12 @@ mod tests {
            ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53".parse().unwrap(), dummy_contents("baz"))
        ]
        .into_iter()
-        .map(|(name, contents): (LayerName, Vec<u8>)| {
-
-            let local_path = local_layer_path(
-                harness.conf,
-                &timeline.tenant_shard_id,
-                &timeline.timeline_id,
-                &name,
-                &generation,
-            );
-            std::fs::write(&local_path, &contents).unwrap();
+        .map(|(name, contents): (LayerFileName, Vec<u8>)| {
+            std::fs::write(timeline_path.join(name.file_name()), &contents).unwrap();

            Layer::for_resident(
                harness.conf,
                &timeline,
-                local_path,
                name,
                LayerFileMetadata::new(contents.len() as u64, generation, shard),
            )
@@ -2259,9 +2101,9 @@ mod tests {
                .map(|f| f.to_owned())
                .collect(),
            &[
-                &initial_layer.to_string(),
-                &layers[0].layer_desc().layer_name().to_string(),
-                &layers[1].layer_desc().layer_name().to_string(),
+                &initial_layer.file_name(),
+                &layers[0].layer_desc().filename().file_name(),
+                &layers[1].layer_desc().filename().file_name(),
            ],
        );
        assert_eq!(index_part.metadata, metadata);
@@ -2275,7 +2117,7 @@ mod tests {
        // keep using schedule_layer_file_deletion because we don't have a way to wait for the
        // spawn_blocking started by the drop.
        client
-            .schedule_layer_file_deletion(&[layers[0].layer_desc().layer_name()])
+            .schedule_layer_file_deletion(&[layers[0].layer_desc().filename()])
            .unwrap();
        {
            let mut guard = client.upload_queue.lock().unwrap();
@@ -2293,9 +2135,9 @@ mod tests {
        }
        assert_remote_files(
            &[
-                &initial_layer.to_string(),
-                &layers[0].layer_desc().layer_name().to_string(),
-                &layers[1].layer_desc().layer_name().to_string(),
+                &initial_layer.file_name(),
+                &layers[0].layer_desc().filename().file_name(),
+                &layers[1].layer_desc().filename().file_name(),
                "index_part.json",
            ],
            &remote_timeline_dir,
@@ -2308,9 +2150,9 @@ mod tests {

        assert_remote_files(
            &[
-                &initial_layer.to_string(),
-                &layers[1].layer_desc().layer_name().to_string(),
-                &layers[2].layer_desc().layer_name().to_string(),
+                &initial_layer.file_name(),
+                &layers[1].layer_desc().filename().file_name(),
+                &layers[2].layer_desc().filename().file_name(),
                "index_part.json",
            ],
            &remote_timeline_dir,
@@ -2328,23 +2170,20 @@ mod tests {
            timeline,
            ..
        } = TestSetup::new("metrics").await.unwrap();
-        let client = &timeline.remote_client;
+        let client = timeline.remote_client.as_ref().unwrap();
+        let timeline_path = harness.timeline_path(&TIMELINE_ID);

-        let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
-        let local_path = local_layer_path(
-            harness.conf,
-            &timeline.tenant_shard_id,
-            &timeline.timeline_id,
-            &layer_file_name_1,
-            &harness.generation,
-        );
+        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
        let content_1 = dummy_contents("foo");
-        std::fs::write(&local_path, &content_1).unwrap();
+        std::fs::write(
+            timeline_path.join(layer_file_name_1.file_name()),
+            &content_1,
+        )
+        .unwrap();

        let layer_file_1 = Layer::for_resident(
            harness.conf,
            &timeline,
-            local_path,
            layer_file_name_1.clone(),
            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),
        );
@@ -2413,7 +2252,12 @@ mod tests {

    async fn inject_index_part(test_state: &TestSetup, generation: Generation) -> IndexPart {
        // An empty IndexPart, just sufficient to ensure deserialization will succeed
-        let example_index_part = IndexPart::example();
+        let example_metadata = TimelineMetadata::example();
+        let example_index_part = IndexPart::new(
+            HashMap::new(),
+            example_metadata.disk_consistent_lsn(),
+            example_metadata,
+        );

        let index_part_bytes = serde_json::to_vec(&example_index_part).unwrap();

--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -21,8 +21,7 @@ use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
-use crate::tenant::storage_layer::layer::local_layer_path;
-use crate::tenant::storage_layer::LayerName;
+use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::Generation;
 use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
 use crate::TEMP_FILE_SUFFIX;
@@ -48,7 +47,7 @@ pub async fn download_layer_file<'a>(
    storage: &'a GenericRemoteStorage,
    tenant_shard_id: TenantShardId,
    timeline_id: TimelineId,
-    layer_file_name: &'a LayerName,
+    layer_file_name: &'a LayerFileName,
    layer_metadata: &'a LayerFileMetadata,
    cancel: &CancellationToken,
    ctx: &RequestContext,
@@ -56,13 +55,7 @@ pub async fn download_layer_file<'a>(
    debug_assert_current_span_has_tenant_and_timeline_id();

    let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);
-    let local_path = local_layer_path(
-        conf,
-        &tenant_shard_id,
-        &timeline_id,
-        layer_file_name,
-        &layer_metadata.generation,
-    );
+    let local_path = timeline_path.join(layer_file_name.file_name());

    let remote_path = remote_layer_path(
        &tenant_shard_id.tenant_id,
@@ -112,17 +105,14 @@ pub async fn download_layer_file<'a>(
    // We use fatal_err() below because the after the rename above,
    // the in-memory state of the filesystem already has the layer file in its final place,
    // and subsequent pageserver code could think it's durable while it really isn't.
-    let work = {
-        let ctx = ctx.detached_child(ctx.task_kind(), ctx.download_behavior());
-        async move {
-            let timeline_dir = VirtualFile::open(&timeline_path, &ctx)
-                .await
-                .fatal_err("VirtualFile::open for timeline dir fsync");
-            timeline_dir
-                .sync_all()
-                .await
-                .fatal_err("VirtualFile::sync_all timeline dir");
-        }
+    let work = async move {
+        let timeline_dir = VirtualFile::open(&timeline_path)
+            .await
+            .fatal_err("VirtualFile::open for timeline dir fsync");
+        timeline_dir
+            .sync_all()
+            .await
+            .fatal_err("VirtualFile::sync_all timeline dir");
    };
    crate::virtual_file::io_engine::get()
        .spawn_blocking_and_block_on_if_std(work)
@@ -199,7 +189,7 @@ async fn download_object<'a>(
            use crate::virtual_file::owned_buffers_io::{self, util::size_tracking_writer};
            use bytes::BytesMut;
            async {
-                let destination_file = VirtualFile::create(dst_path, ctx)
+                let destination_file = VirtualFile::create(dst_path)
                    .await
                    .with_context(|| format!("create a destination file for layer '{dst_path}'"))
                    .map_err(DownloadError::Other)?;
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -6,10 +6,10 @@ use std::collections::HashMap;

 use chrono::NaiveDateTime;
 use serde::{Deserialize, Serialize};
-use utils::id::TimelineId;
+use utils::bin_ser::SerializeError;

 use crate::tenant::metadata::TimelineMetadata;
-use crate::tenant::storage_layer::LayerName;
+use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::upload_queue::UploadQueueInitialized;
 use crate::tenant::Generation;
 use pageserver_api::shard::ShardIndex;
@@ -76,7 +76,7 @@ pub struct IndexPart {
    ///
    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
    /// that latest version stores.
-    pub layer_metadata: HashMap<LayerName, IndexLayerMetadata>,
+    pub layer_metadata: HashMap<LayerFileName, IndexLayerMetadata>,

    // 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
    // It's duplicated for convenience when reading the serialized structure, but is
@@ -85,9 +85,6 @@ pub struct IndexPart {

    #[serde(rename = "metadata_bytes")]
    pub metadata: TimelineMetadata,
-
-    #[serde(default)]
-    pub(crate) lineage: Lineage,
 }

 impl IndexPart {
@@ -100,23 +97,22 @@ impl IndexPart {
    /// - 3: no longer deserialize `timeline_layers` (serialized format is the same, but timeline_layers
    ///      is always generated from the keys of `layer_metadata`)
    /// - 4: timeline_layers is fully removed.
-    /// - 5: lineage was added
-    const LATEST_VERSION: usize = 5;
+    const LATEST_VERSION: usize = 4;

    // Versions we may see when reading from a bucket.
-    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5];
+    pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4];

    pub const FILE_NAME: &'static str = "index_part.json";

-    fn new(
-        layers_and_metadata: &HashMap<LayerName, LayerFileMetadata>,
+    pub fn new(
+        layers_and_metadata: HashMap<LayerFileName, LayerFileMetadata>,
        disk_consistent_lsn: Lsn,
        metadata: TimelineMetadata,
-        lineage: Lineage,
    ) -> Self {
+        // Transform LayerFileMetadata into IndexLayerMetadata
        let layer_metadata = layers_and_metadata
-            .iter()
-            .map(|(k, v)| (k.to_owned(), IndexLayerMetadata::from(v)))
+            .into_iter()
+            .map(|(k, v)| (k, IndexLayerMetadata::from(v)))
            .collect();

        Self {
@@ -125,7 +121,6 @@ impl IndexPart {
            disk_consistent_lsn,
            metadata,
            deleted_at: None,
-            lineage,
        }
    }

@@ -146,26 +141,20 @@ impl IndexPart {
    pub fn to_s3_bytes(&self) -> serde_json::Result<Vec<u8>> {
        serde_json::to_vec(self)
    }
-
-    #[cfg(test)]
-    pub(crate) fn example() -> Self {
-        let example_metadata = TimelineMetadata::example();
-        Self::new(
-            &HashMap::new(),
-            example_metadata.disk_consistent_lsn(),
-            example_metadata,
-            Default::default(),
-        )
-    }
 }

-impl From<&UploadQueueInitialized> for IndexPart {
-    fn from(uq: &UploadQueueInitialized) -> Self {
-        let disk_consistent_lsn = uq.latest_metadata.disk_consistent_lsn();
-        let metadata = uq.latest_metadata.clone();
-        let lineage = uq.latest_lineage.clone();
+impl TryFrom<&UploadQueueInitialized> for IndexPart {
+    type Error = SerializeError;

-        Self::new(&uq.latest_files, disk_consistent_lsn, metadata, lineage)
+    fn try_from(upload_queue: &UploadQueueInitialized) -> Result<Self, Self::Error> {
+        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
+        let metadata = upload_queue.latest_metadata.clone();
+
+        Ok(Self::new(
+            upload_queue.latest_files.clone(),
+            disk_consistent_lsn,
+            metadata,
+        ))
    }
 }

@@ -183,8 +172,8 @@ pub struct IndexLayerMetadata {
    pub shard: ShardIndex,
 }

-impl From<&LayerFileMetadata> for IndexLayerMetadata {
-    fn from(other: &LayerFileMetadata) -> Self {
+impl From<LayerFileMetadata> for IndexLayerMetadata {
+    fn from(other: LayerFileMetadata) -> Self {
        IndexLayerMetadata {
            file_size: other.file_size,
            generation: other.generation,
@@ -193,76 +182,8 @@ impl From<&LayerFileMetadata> for IndexLayerMetadata {
    }
 }

-/// Limited history of earlier ancestors.
-///
-/// A timeline can have more than 1 earlier ancestor, in the rare case that it was repeatedly
-/// reparented by having an later timeline be detached from it's ancestor.
-#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
-pub(crate) struct Lineage {
-    /// Has the `reparenting_history` been truncated to [`Lineage::REMEMBER_AT_MOST`].
-    #[serde(skip_serializing_if = "is_false", default)]
-    reparenting_history_truncated: bool,
-
-    /// Earlier ancestors, truncated when [`Self::reparenting_history_truncated`]
-    ///
-    /// These are stored in case we want to support WAL based DR on the timeline. There can be many
-    /// of these and at most one [`Self::original_ancestor`]. There cannot be more reparentings
-    /// after [`Self::original_ancestor`] has been set.
-    #[serde(skip_serializing_if = "Vec::is_empty", default)]
-    reparenting_history: Vec<TimelineId>,
-
-    /// The ancestor from which this timeline has been detached from and when.
-    ///
-    /// If you are adding support for detaching from a hierarchy, consider changing the ancestry
-    /// into a `Vec<(TimelineId, Lsn)>` to be a path instead.
-    #[serde(skip_serializing_if = "Option::is_none", default)]
-    original_ancestor: Option<(TimelineId, Lsn, NaiveDateTime)>,
-}
-
-fn is_false(b: &bool) -> bool {
-    !b
-}
-
-impl Lineage {
-    const REMEMBER_AT_MOST: usize = 100;
-
-    pub(crate) fn record_previous_ancestor(&mut self, old_ancestor: &TimelineId) {
-        if self.reparenting_history.last() == Some(old_ancestor) {
-            // do not re-record it
-            return;
-        }
-
-        let drop_oldest = self.reparenting_history.len() + 1 >= Self::REMEMBER_AT_MOST;
-
-        self.reparenting_history_truncated |= drop_oldest;
-        if drop_oldest {
-            self.reparenting_history.remove(0);
-        }
-        self.reparenting_history.push(*old_ancestor);
-    }
-
-    pub(crate) fn record_detaching(&mut self, branchpoint: &(TimelineId, Lsn)) {
-        assert!(self.original_ancestor.is_none());
-
-        self.original_ancestor =
-            Some((branchpoint.0, branchpoint.1, chrono::Utc::now().naive_utc()));
-    }
-
-    /// The queried lsn is most likely the basebackup lsn, and this answers question "is it allowed
-    /// to start a read/write primary at this lsn".
-    ///
-    /// Returns true if the Lsn was previously a branch point.
-    pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
-        self.original_ancestor
-            .as_ref()
-            .is_some_and(|(_, ancestor_lsn, _)| lsn == *ancestor_lsn)
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use std::str::FromStr;
-
    use super::*;

    #[test]
@@ -298,7 +219,6 @@ mod tests {
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
            deleted_at: None,
-            lineage: Lineage::default(),
        };

        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
@@ -339,7 +259,6 @@ mod tests {
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
            deleted_at: None,
-            lineage: Lineage::default(),
        };

        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
@@ -381,8 +300,7 @@ mod tests {
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
            deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
-                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
-            lineage: Lineage::default(),
+                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap())
        };

        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
@@ -427,7 +345,6 @@ mod tests {
            ])
            .unwrap(),
            deleted_at: None,
-            lineage: Lineage::default(),
        };

        let empty_layers_parsed = IndexPart::from_s3_bytes(empty_layers_json.as_bytes()).unwrap();
@@ -466,58 +383,11 @@ mod tests {
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
-            deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
-            lineage: Lineage::default(),
+            deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
+                "2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
        };

        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }
-
-    #[test]
-    fn v5_indexpart_is_parsed() {
-        let example = r#"{
-            "version":5,
-            "layer_metadata":{
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499":{"file_size":23289856,"generation":1},
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619":{"file_size":1015808,"generation":1}},
-                "disk_consistent_lsn":"0/15A7618",
-                "metadata_bytes":[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
-                "lineage":{
-                    "original_ancestor":["e2bfd8c633d713d279e6fcd2bcc15b6d","0/15A7618","2024-05-07T18:52:36.322426563"],
-                    "reparenting_history":["e1bfd8c633d713d279e6fcd2bcc15b6d"]
-                }
-        }"#;
-
-        let expected = IndexPart {
-            version: 5,
-            layer_metadata: HashMap::from([
-                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF420-00000000014EF499".parse().unwrap(), IndexLayerMetadata {
-                    file_size: 23289856,
-                    generation: Generation::new(1),
-                    shard: ShardIndex::unsharded(),
-                }),
-                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014EF499-00000000015A7619".parse().unwrap(), IndexLayerMetadata {
-                    file_size: 1015808,
-                    generation: Generation::new(1),
-                    shard: ShardIndex::unsharded(),
-                })
-            ]),
-            disk_consistent_lsn: Lsn::from_str("0/15A7618").unwrap(),
-            metadata: TimelineMetadata::from_bytes(&[226,88,25,241,0,46,0,4,0,0,0,0,1,90,118,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,78,244,32,0,0,0,0,1,78,244,32,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
-            deleted_at: None,
-            lineage: Lineage {
-                reparenting_history_truncated: false,
-                reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
-                original_ancestor: Some((TimelineId::from_str("e2bfd8c633d713d279e6fcd2bcc15b6d").unwrap(), Lsn::from_str("0/15A7618").unwrap(), parse_naive_datetime("2024-05-07T18:52:36.322426563"))),
-            },
-        };
-
-        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
-        assert_eq!(part, expected);
-    }
-
-    fn parse_naive_datetime(s: &str) -> NaiveDateTime {
-        chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f").unwrap()
-    }
 }
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -12,13 +12,18 @@ use tokio_util::sync::CancellationToken;
 use utils::backoff;

 use super::Generation;
-use crate::tenant::remote_timeline_client::{
-    index::IndexPart, remote_index_path, remote_initdb_archive_path,
-    remote_initdb_preserved_archive_path,
+use crate::{
+    config::PageServerConf,
+    tenant::remote_timeline_client::{
+        index::IndexPart, remote_index_path, remote_initdb_archive_path,
+        remote_initdb_preserved_archive_path, remote_path,
+    },
 };
-use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
+use remote_storage::{GenericRemoteStorage, TimeTravelError};
 use utils::id::{TenantId, TimelineId};

+use super::index::LayerFileMetadata;
+
 use tracing::info;

 /// Serializes and uploads the given index part data to the remote storage.
@@ -60,10 +65,11 @@ pub(crate) async fn upload_index_part<'a>(
 ///
 /// On an error, bumps the retries count and reschedules the entire task.
 pub(super) async fn upload_timeline_layer<'a>(
+    conf: &'static PageServerConf,
    storage: &'a GenericRemoteStorage,
-    local_path: &'a Utf8Path,
-    remote_path: &'a RemotePath,
-    metadata_size: u64,
+    source_path: &'a Utf8Path,
+    known_metadata: &'a LayerFileMetadata,
+    generation: Generation,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
    fail_point!("before-upload-layer", |_| {
@@ -72,7 +78,8 @@ pub(super) async fn upload_timeline_layer<'a>(

    pausable_failpoint!("before-upload-layer-pausable");

-    let source_file_res = fs::File::open(&local_path).await;
+    let storage_path = remote_path(conf, source_path, generation)?;
+    let source_file_res = fs::File::open(&source_path).await;
    let source_file = match source_file_res {
        Ok(source_file) => source_file,
        Err(e) if e.kind() == ErrorKind::NotFound => {
@@ -83,49 +90,34 @@ pub(super) async fn upload_timeline_layer<'a>(
            // it has been written to disk yet.
            //
            // This is tested against `test_compaction_delete_before_upload`
-            info!(path = %local_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.");
+            info!(path = %source_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.");
            return Ok(());
        }
-        Err(e) => Err(e).with_context(|| format!("open a source file for layer {local_path:?}"))?,
+        Err(e) => {
+            Err(e).with_context(|| format!("open a source file for layer {source_path:?}"))?
+        }
    };

    let fs_size = source_file
        .metadata()
        .await
-        .with_context(|| format!("get the source file metadata for layer {local_path:?}"))?
+        .with_context(|| format!("get the source file metadata for layer {source_path:?}"))?
        .len();

+    let metadata_size = known_metadata.file_size();
    if metadata_size != fs_size {
-        bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
+        bail!("File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
    }

    let fs_size = usize::try_from(fs_size)
-        .with_context(|| format!("convert {local_path:?} size {fs_size} usize"))?;
+        .with_context(|| format!("convert {source_path:?} size {fs_size} usize"))?;

    let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);

    storage
-        .upload(reader, fs_size, remote_path, None, cancel)
+        .upload(reader, fs_size, &storage_path, None, cancel)
        .await
-        .with_context(|| format!("upload layer from local path '{local_path}'"))
-}
-
-pub(super) async fn copy_timeline_layer(
-    storage: &GenericRemoteStorage,
-    source_path: &RemotePath,
-    target_path: &RemotePath,
-    cancel: &CancellationToken,
-) -> anyhow::Result<()> {
-    fail_point!("before-copy-layer", |_| {
-        bail!("failpoint before-copy-layer")
-    });
-
-    pausable_failpoint!("before-copy-layer-pausable");
-
-    storage
-        .copy_object(source_path, target_path, cancel)
-        .await
-        .with_context(|| format!("copy layer {source_path} to {target_path}"))
+        .with_context(|| format!("upload layer from local path '{source_path}'"))
 }

 /// Uploads the given `initdb` data to the remote storage.
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -21,9 +21,8 @@ use self::{
 use super::{
    config::{SecondaryLocationConfig, TenantConfOpt},
    mgr::TenantManager,
-    remote_timeline_client::LayerFileMetadata,
    span::debug_assert_current_span_has_tenant_id,
-    storage_layer::{layer::local_layer_path, LayerName},
+    storage_layer::LayerFileName,
 };

 use pageserver_api::{
@@ -182,8 +181,7 @@ impl SecondaryTenant {
        self: &Arc<Self>,
        conf: &PageServerConf,
        timeline_id: TimelineId,
-        name: LayerName,
-        metadata: LayerFileMetadata,
+        name: LayerFileName,
    ) {
        debug_assert_current_span_has_tenant_id();

@@ -197,13 +195,9 @@ impl SecondaryTenant {

        let now = SystemTime::now();

-        let local_path = local_layer_path(
-            conf,
-            &self.tenant_shard_id,
-            &timeline_id,
-            &name,
-            &metadata.generation,
-        );
+        let path = conf
+            .timeline_path(&self.tenant_shard_id, &timeline_id)
+            .join(name.file_name());

        let this = self.clone();

@@ -214,7 +208,7 @@ impl SecondaryTenant {
            // it, the secondary downloader could have seen an updated heatmap that
            // resulted in a layer being deleted.
            // Other local I/O errors are process-fatal: these should never happen.
-            let deleted = std::fs::remove_file(local_path);
+            let deleted = std::fs::remove_file(path);

            let not_found = deleted
                .as_ref()
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -22,11 +22,11 @@ use crate::{
            FAILED_REMOTE_OP_RETRIES,
        },
        span::debug_assert_current_span_has_tenant_id,
-        storage_layer::{layer::local_layer_path, LayerName},
+        storage_layer::LayerFileName,
        tasks::{warn_when_period_overrun, BackgroundLoopKind},
    },
    virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
-    TEMP_FILE_SUFFIX,
+    METADATA_FILE_NAME, TEMP_FILE_SUFFIX,
 };

 use super::{
@@ -45,10 +45,10 @@ use crate::tenant::{

 use camino::Utf8PathBuf;
 use chrono::format::{DelayedFormat, StrftimeItems};
-use futures::{Future, StreamExt};
+use futures::Future;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
-use remote_storage::{DownloadError, Etag, GenericRemoteStorage, RemoteStorageActivity};
+use remote_storage::{DownloadError, Etag, GenericRemoteStorage};

 use tokio_util::sync::CancellationToken;
 use tracing::{info_span, instrument, warn, Instrument};
@@ -71,12 +71,6 @@ use super::{
 /// `<ttps://github.com/neondatabase/neon/issues/6200>`
 const DOWNLOAD_FRESHEN_INTERVAL: Duration = Duration::from_millis(60000);

-/// Range of concurrency we may use when downloading layers within a timeline.  This is independent
-/// for each tenant we're downloading: the concurrency of _tenants_ is defined separately in
-/// `PageServerConf::secondary_download_concurrency`
-const MAX_LAYER_CONCURRENCY: usize = 16;
-const MIN_LAYER_CONCURRENCY: usize = 1;
-
 pub(super) async fn downloader_task(
    tenant_manager: Arc<TenantManager>,
    remote_storage: GenericRemoteStorage,
@@ -85,15 +79,14 @@ pub(super) async fn downloader_task(
    cancel: CancellationToken,
    root_ctx: RequestContext,
 ) {
-    // How many tenants' secondary download operations we will run concurrently
-    let tenant_concurrency = tenant_manager.get_conf().secondary_download_concurrency;
+    let concurrency = tenant_manager.get_conf().secondary_download_concurrency;

    let generator = SecondaryDownloader {
        tenant_manager,
        remote_storage,
        root_ctx,
    };
-    let mut scheduler = Scheduler::new(generator, tenant_concurrency);
+    let mut scheduler = Scheduler::new(generator, concurrency);

    scheduler
        .run(command_queue, background_jobs_can_start, cancel)
@@ -118,7 +111,7 @@ impl OnDiskState {
        _conf: &'static PageServerConf,
        _tenant_shard_id: &TenantShardId,
        _imeline_id: &TimelineId,
-        _ame: LayerName,
+        _ame: LayerFileName,
        metadata: LayerFileMetadata,
        access_time: SystemTime,
    ) -> Self {
@@ -131,10 +124,10 @@ impl OnDiskState {

 #[derive(Debug, Clone, Default)]
 pub(super) struct SecondaryDetailTimeline {
-    pub(super) on_disk_layers: HashMap<LayerName, OnDiskState>,
+    pub(super) on_disk_layers: HashMap<LayerFileName, OnDiskState>,

    /// We remember when layers were evicted, to prevent re-downloading them.
-    pub(super) evicted_at: HashMap<LayerName, SystemTime>,
+    pub(super) evicted_at: HashMap<LayerFileName, SystemTime>,
 }

 /// This state is written by the secondary downloader, it is opaque
@@ -628,12 +621,12 @@ impl<'a> TenantDownloader<'a> {
                let layers_in_heatmap = heatmap_timeline
                    .layers
                    .iter()
-                    .map(|l| (&l.name, l.metadata.generation))
+                    .map(|l| &l.name)
                    .collect::<HashSet<_>>();
                let layers_on_disk = timeline_state
                    .on_disk_layers
                    .iter()
-                    .map(|l| (l.0, l.1.metadata.generation))
+                    .map(|l| l.0)
                    .collect::<HashSet<_>>();

                let mut layer_count = layers_on_disk.len();
@@ -644,24 +637,16 @@ impl<'a> TenantDownloader<'a> {
                    .sum();

                // Remove on-disk layers that are no longer present in heatmap
-                for (layer_file_name, generation) in layers_on_disk.difference(&layers_in_heatmap) {
+                for layer in layers_on_disk.difference(&layers_in_heatmap) {
                    layer_count -= 1;
                    layer_byte_count -= timeline_state
                        .on_disk_layers
-                        .get(layer_file_name)
+                        .get(layer)
                        .unwrap()
                        .metadata
                        .file_size();

-                    let local_path = local_layer_path(
-                        self.conf,
-                        self.secondary_state.get_tenant_shard_id(),
-                        timeline_id,
-                        layer_file_name,
-                        generation,
-                    );
-
-                    delete_layers.push((*timeline_id, (*layer_file_name).clone(), local_path));
+                    delete_layers.push((*timeline_id, (*layer).clone()));
                }

                progress.bytes_downloaded += layer_byte_count;
@@ -676,7 +661,11 @@ impl<'a> TenantDownloader<'a> {
        }

        // Execute accumulated deletions
-        for (timeline_id, layer_name, local_path) in delete_layers {
+        for (timeline_id, layer_name) in delete_layers {
+            let timeline_path = self
+                .conf
+                .timeline_path(self.secondary_state.get_tenant_shard_id(), &timeline_id);
+            let local_path = timeline_path.join(layer_name.to_string());
            tracing::info!(timeline_id=%timeline_id, "Removing secondary local layer {layer_name} because it's absent in heatmap",);

            tokio::fs::remove_file(&local_path)
@@ -765,6 +754,9 @@ impl<'a> TenantDownloader<'a> {
    ) -> Result<(), UpdateError> {
        debug_assert_current_span_has_tenant_and_timeline_id();
        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
+        let timeline_path = self
+            .conf
+            .timeline_path(tenant_shard_id, &timeline.timeline_id);

        // Accumulate updates to the state
        let mut touched = Vec::new();
@@ -799,8 +791,6 @@ impl<'a> TenantDownloader<'a> {

        tracing::debug!(timeline_id=%timeline.timeline_id, "Downloading layers, {} in heatmap", timeline.layers.len());

-        let mut download_futs = Vec::new();
-
        // Download heatmap layers that are not present on local disk, or update their
        // access time if they are already present.
        for layer in timeline.layers {
@@ -816,14 +806,10 @@ impl<'a> TenantDownloader<'a> {
                if cfg!(debug_assertions) {
                    // Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
                    // are already present on disk are really there.
-                    let local_path = local_layer_path(
-                        self.conf,
-                        tenant_shard_id,
-                        &timeline.timeline_id,
-                        &layer.name,
-                        &layer.metadata.generation,
-                    );
-
+                    let local_path = self
+                        .conf
+                        .timeline_path(tenant_shard_id, &timeline.timeline_id)
+                        .join(layer.name.file_name());
                    match tokio::fs::metadata(&local_path).await {
                        Ok(meta) => {
                            tracing::debug!(
@@ -883,33 +869,61 @@ impl<'a> TenantDownloader<'a> {
                }
            }

-            download_futs.push(self.download_layer(
-                tenant_shard_id,
-                &timeline.timeline_id,
-                layer,
+            // Failpoint for simulating slow remote storage
+            failpoint_support::sleep_millis_async!(
+                "secondary-layer-download-sleep",
+                &self.secondary_state.cancel
+            );
+
+            // Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
+            let downloaded_bytes = match download_layer_file(
+                self.conf,
+                self.remote_storage,
+                *tenant_shard_id,
+                timeline.timeline_id,
+                &layer.name,
+                &LayerFileMetadata::from(&layer.metadata),
+                &self.secondary_state.cancel,
                ctx,
-            ));
-        }
-
-        // Break up layer downloads into chunks, so that for each chunk we can re-check how much
-        // concurrency to use based on activity level of remote storage.
-        while !download_futs.is_empty() {
-            let chunk =
-                download_futs.split_off(download_futs.len().saturating_sub(MAX_LAYER_CONCURRENCY));
-
-            let concurrency = Self::layer_concurrency(self.remote_storage.activity());
-
-            let mut result_stream = futures::stream::iter(chunk).buffered(concurrency);
-            let mut result_stream = std::pin::pin!(result_stream);
-            while let Some(result) = result_stream.next().await {
-                match result {
-                    Err(e) => return Err(e),
-                    Ok(None) => {
-                        // No error, but we didn't download the layer.  Don't mark it touched
-                    }
-                    Ok(Some(layer)) => touched.push(layer),
+            )
+            .await
+            {
+                Ok(bytes) => bytes,
+                Err(DownloadError::NotFound) => {
+                    // A heatmap might be out of date and refer to a layer that doesn't exist any more.
+                    // This is harmless: continue to download the next layer. It is expected during compaction
+                    // GC.
+                    tracing::debug!(
+                        "Skipped downloading missing layer {}, raced with compaction/gc?",
+                        layer.name
+                    );
+                    continue;
                }
+                Err(e) => return Err(e.into()),
+            };
+
+            if downloaded_bytes != layer.metadata.file_size {
+                let local_path = timeline_path.join(layer.name.to_string());
+
+                tracing::warn!(
+                    "Downloaded layer {} with unexpected size {} != {}.  Removing download.",
+                    layer.name,
+                    downloaded_bytes,
+                    layer.metadata.file_size
+                );
+
+                tokio::fs::remove_file(&local_path)
+                    .await
+                    .or_else(fs_ext::ignore_not_found)?;
+            } else {
+                tracing::info!("Downloaded layer {}, size {}", layer.name, downloaded_bytes);
+                let mut progress = self.secondary_state.progress.lock().unwrap();
+                progress.bytes_downloaded += downloaded_bytes;
+                progress.layers_downloaded += 1;
            }
+
+            SECONDARY_MODE.download_layer.inc();
+            touched.push(layer)
        }

        // Write updates to state to record layers we just downloaded or touched.
@@ -941,90 +955,6 @@ impl<'a> TenantDownloader<'a> {

        Ok(())
    }
-
-    async fn download_layer(
-        &self,
-        tenant_shard_id: &TenantShardId,
-        timeline_id: &TimelineId,
-        layer: HeatMapLayer,
-        ctx: &RequestContext,
-    ) -> Result<Option<HeatMapLayer>, UpdateError> {
-        // Failpoint for simulating slow remote storage
-        failpoint_support::sleep_millis_async!(
-            "secondary-layer-download-sleep",
-            &self.secondary_state.cancel
-        );
-
-        // Note: no backoff::retry wrapper here because download_layer_file does its own retries internally
-        let downloaded_bytes = match download_layer_file(
-            self.conf,
-            self.remote_storage,
-            *tenant_shard_id,
-            *timeline_id,
-            &layer.name,
-            &LayerFileMetadata::from(&layer.metadata),
-            &self.secondary_state.cancel,
-            ctx,
-        )
-        .await
-        {
-            Ok(bytes) => bytes,
-            Err(DownloadError::NotFound) => {
-                // A heatmap might be out of date and refer to a layer that doesn't exist any more.
-                // This is harmless: continue to download the next layer. It is expected during compaction
-                // GC.
-                tracing::debug!(
-                    "Skipped downloading missing layer {}, raced with compaction/gc?",
-                    layer.name
-                );
-                return Ok(None);
-            }
-            Err(e) => return Err(e.into()),
-        };
-
-        if downloaded_bytes != layer.metadata.file_size {
-            let local_path = local_layer_path(
-                self.conf,
-                tenant_shard_id,
-                timeline_id,
-                &layer.name,
-                &layer.metadata.generation,
-            );
-
-            tracing::warn!(
-                "Downloaded layer {} with unexpected size {} != {}.  Removing download.",
-                layer.name,
-                downloaded_bytes,
-                layer.metadata.file_size
-            );
-
-            tokio::fs::remove_file(&local_path)
-                .await
-                .or_else(fs_ext::ignore_not_found)?;
-        } else {
-            tracing::info!("Downloaded layer {}, size {}", layer.name, downloaded_bytes);
-            let mut progress = self.secondary_state.progress.lock().unwrap();
-            progress.bytes_downloaded += downloaded_bytes;
-            progress.layers_downloaded += 1;
-        }
-
-        SECONDARY_MODE.download_layer.inc();
-
-        Ok(Some(layer))
-    }
-
-    /// Calculate the currently allowed parallelism of layer download tasks, based on activity level of the remote storage
-    fn layer_concurrency(activity: RemoteStorageActivity) -> usize {
-        // When less than 75% of units are available, use minimum concurrency.  Else, do a linear mapping
-        // of our concurrency range to the units available within the remaining 25%.
-        let clamp_at = (activity.read_total * 3) / 4;
-        if activity.read_available > clamp_at {
-            (MAX_LAYER_CONCURRENCY * (activity.read_available - clamp_at))
-                / (activity.read_total - clamp_at)
-        } else {
-            MIN_LAYER_CONCURRENCY
-        }
-    }
 }

 /// Scan local storage and build up Layer objects based on the metadata in a HeatMapTimeline
@@ -1056,7 +986,7 @@ async fn init_timeline_state(

    // As we iterate through layers found on disk, we will look up their metadata from this map.
    // Layers not present in metadata will be discarded.
-    let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> =
+    let heatmap_metadata: HashMap<&LayerFileName, &HeatMapLayer> =
        heatmap.layers.iter().map(|l| (&l.name, l)).collect();

    while let Some(dentry) = dir
@@ -1074,7 +1004,11 @@ async fn init_timeline_state(
            .fatal_err(&format!("Read metadata on {}", file_path));

        let file_name = file_path.file_name().expect("created it from the dentry");
-        if crate::is_temporary(&file_path)
+        if file_name == METADATA_FILE_NAME {
+            // Secondary mode doesn't use local metadata files, but they might have been left behind by an attached tenant.
+            warn!(path=?dentry.path(), "found legacy metadata file, these should have been removed in load_tenant_config");
+            continue;
+        } else if crate::is_temporary(&file_path)
            || is_temp_download_file(&file_path)
            || is_ephemeral_file(file_name)
        {
@@ -1089,7 +1023,7 @@ async fn init_timeline_state(
            continue;
        }

-        match LayerName::from_str(file_name) {
+        match LayerFileName::from_str(file_name) {
            Ok(name) => {
                let remote_meta = heatmap_metadata.get(&name);
                match remote_meta {
@@ -1147,58 +1081,3 @@ async fn init_timeline_state(

    detail
 }
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn layer_concurrency() {
-        // Totally idle
-        assert_eq!(
-            TenantDownloader::layer_concurrency(RemoteStorageActivity {
-                read_available: 16,
-                read_total: 16,
-                write_available: 16,
-                write_total: 16
-            }),
-            MAX_LAYER_CONCURRENCY
-        );
-
-        // Totally busy
-        assert_eq!(
-            TenantDownloader::layer_concurrency(RemoteStorageActivity {
-                read_available: 0,
-                read_total: 16,
-
-                write_available: 16,
-                write_total: 16
-            }),
-            MIN_LAYER_CONCURRENCY
-        );
-
-        // Edge of the range at which we interpolate
-        assert_eq!(
-            TenantDownloader::layer_concurrency(RemoteStorageActivity {
-                read_available: 12,
-                read_total: 16,
-
-                write_available: 16,
-                write_total: 16
-            }),
-            MIN_LAYER_CONCURRENCY
-        );
-
-        // Midpoint of the range in which we interpolate
-        assert_eq!(
-            TenantDownloader::layer_concurrency(RemoteStorageActivity {
-                read_available: 14,
-                read_total: 16,
-
-                write_available: 16,
-                write_total: 16
-            }),
-            MAX_LAYER_CONCURRENCY / 2
-        );
-    }
-}
--- a/pageserver/src/tenant/secondary/heatmap.rs
+++ b/pageserver/src/tenant/secondary/heatmap.rs
@@ -1,6 +1,8 @@
 use std::time::SystemTime;

-use crate::tenant::{remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerName};
+use crate::tenant::{
+    remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerFileName,
+};

 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
@@ -15,14 +17,6 @@ pub(super) struct HeatMapTenant {
    pub(super) generation: Generation,

    pub(super) timelines: Vec<HeatMapTimeline>,
-
-    /// Uploaders provide their own upload period in the heatmap, as a hint to downloaders
-    /// of how frequently it is worthwhile to check for updates.
-    ///
-    /// This is optional for backward compat, and because we sometimes might upload
-    /// a heatmap explicitly via API for a tenant that has no periodic upload configured.
-    #[serde(default)]
-    pub(super) upload_period_ms: Option<u128>,
 }

 #[serde_as]
@@ -37,7 +31,7 @@ pub(crate) struct HeatMapTimeline {
 #[serde_as]
 #[derive(Serialize, Deserialize)]
 pub(crate) struct HeatMapLayer {
-    pub(super) name: LayerName,
+    pub(super) name: LayerFileName,
    pub(super) metadata: IndexLayerMetadata,

    #[serde_as(as = "TimestampSeconds<i64>")]
@@ -48,7 +42,7 @@ pub(crate) struct HeatMapLayer {

 impl HeatMapLayer {
    pub(crate) fn new(
-        name: LayerName,
+        name: LayerFileName,
        metadata: IndexLayerMetadata,
        access_time: SystemTime,
    ) -> Self {
@@ -89,21 +83,4 @@ impl HeatMapTenant {

        stats
    }
-
-    pub(crate) fn strip_atimes(self) -> Self {
-        Self {
-            timelines: self
-                .timelines
-                .into_iter()
-                .map(|mut tl| {
-                    for layer in &mut tl.layers {
-                        layer.access_time = SystemTime::UNIX_EPOCH;
-                    }
-                    tl
-                })
-                .collect(),
-            generation: self.generation,
-            upload_period_ms: self.upload_period_ms,
-        }
-    }
 }
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -80,7 +80,7 @@ impl RunningJob for WriteInProgress {

 struct UploadPending {
    tenant: Arc<Tenant>,
-    last_upload: Option<LastUploadState>,
+    last_digest: Option<md5::Digest>,
    target_time: Option<Instant>,
    period: Option<Duration>,
 }
@@ -94,7 +94,7 @@ impl scheduler::PendingJob for UploadPending {
 struct WriteComplete {
    tenant_shard_id: TenantShardId,
    completed_at: Instant,
-    uploaded: Option<LastUploadState>,
+    digest: Option<md5::Digest>,
    next_upload: Option<Instant>,
 }

@@ -115,7 +115,10 @@ struct UploaderTenantState {
    tenant: Weak<Tenant>,

    /// Digest of the serialized heatmap that we last successfully uploaded
-    last_upload_state: Option<LastUploadState>,
+    ///
+    /// md5 is generally a bad hash.  We use it because it's convenient for interop with AWS S3's ETag,
+    /// which is also an md5sum.
+    last_digest: Option<md5::Digest>,

    /// When the last upload attempt completed (may have been successful or failed)
    last_upload: Option<Instant>,
@@ -184,7 +187,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
                    tenant: Arc::downgrade(&tenant),
                    last_upload: None,
                    next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)),
-                    last_upload_state: None,
+                    last_digest: None,
                });

            // Decline to do the upload if insufficient time has passed
@@ -192,10 +195,10 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
                return;
            }

-            let last_upload = state.last_upload_state.clone();
+            let last_digest = state.last_digest;
            result.jobs.push(UploadPending {
                tenant,
-                last_upload,
+                last_digest,
                target_time: state.next_upload,
                period: Some(period),
            });
@@ -215,7 +218,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
    ) {
        let UploadPending {
            tenant,
-            last_upload,
+            last_digest,
            target_time,
            period,
        } = job;
@@ -228,16 +231,16 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            let _completion = completion;

            let started_at = Instant::now();
-            let uploaded = match upload_tenant_heatmap(remote_storage, &tenant, last_upload.clone()).await {
-                Ok(UploadHeatmapOutcome::Uploaded(uploaded)) => {
+            let digest = match upload_tenant_heatmap(remote_storage, &tenant, last_digest).await {
+                Ok(UploadHeatmapOutcome::Uploaded(digest)) => {
                    let duration = Instant::now().duration_since(started_at);
                    SECONDARY_MODE
                        .upload_heatmap_duration
                        .observe(duration.as_secs_f64());
                    SECONDARY_MODE.upload_heatmap.inc();
-                    Some(uploaded)
+                    Some(digest)
                }
-                Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_upload,
+                Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_digest,
                Err(UploadHeatmapError::Upload(e)) => {
                    tracing::warn!(
                        "Failed to upload heatmap for tenant {}: {e:#}",
@@ -248,11 +251,11 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
                        .upload_heatmap_duration
                        .observe(duration.as_secs_f64());
                    SECONDARY_MODE.upload_heatmap_errors.inc();
-                    last_upload
+                    last_digest
                }
                Err(UploadHeatmapError::Cancelled) => {
                    tracing::info!("Cancelled heatmap upload, shutting down");
-                    last_upload
+                    last_digest
                }
            };

@@ -274,7 +277,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            WriteComplete {
                    tenant_shard_id: *tenant.get_tenant_shard_id(),
                    completed_at: now,
-                    uploaded,
+                    digest,
                    next_upload,
                }
        }.instrument(info_span!(parent: None, "heatmap_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))))
@@ -296,7 +299,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>

        Ok(UploadPending {
            // Ignore our state for last digest: this forces an upload even if nothing has changed
-            last_upload: None,
+            last_digest: None,
            tenant,
            target_time: None,
            period: None,
@@ -309,7 +312,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
        let WriteComplete {
            tenant_shard_id,
            completed_at,
-            uploaded,
+            digest,
            next_upload,
        } = completion;
        use std::collections::hash_map::Entry;
@@ -319,7 +322,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            }
            Entry::Occupied(mut entry) => {
                entry.get_mut().last_upload = Some(completed_at);
-                entry.get_mut().last_upload_state = uploaded;
+                entry.get_mut().last_digest = digest;
                entry.get_mut().next_upload = next_upload
            }
        }
@@ -328,7 +331,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>

 enum UploadHeatmapOutcome {
    /// We successfully wrote to remote storage, with this digest.
-    Uploaded(LastUploadState),
+    Uploaded(md5::Digest),
    /// We did not upload because the heatmap digest was unchanged since the last upload
    NoChange,
    /// We skipped the upload for some reason, such as tenant/timeline not ready
@@ -344,25 +347,12 @@ enum UploadHeatmapError {
    Upload(#[from] anyhow::Error),
 }

-/// Digests describing the heatmap we most recently uploaded successfully.
-///
-/// md5 is generally a bad hash.  We use it because it's convenient for interop with AWS S3's ETag,
-/// which is also an md5sum.
-#[derive(Clone)]
-struct LastUploadState {
-    // Digest of json-encoded HeatMapTenant
-    uploaded_digest: md5::Digest,
-
-    // Digest without atimes set.
-    layers_only_digest: md5::Digest,
-}
-
 /// The inner upload operation.  This will skip if `last_digest` is Some and matches the digest
 /// of the object we would have uploaded.
 async fn upload_tenant_heatmap(
    remote_storage: GenericRemoteStorage,
    tenant: &Arc<Tenant>,
-    last_upload: Option<LastUploadState>,
+    last_digest: Option<md5::Digest>,
 ) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
    debug_assert_current_span_has_tenant_id();

@@ -378,7 +368,6 @@ async fn upload_tenant_heatmap(
    let mut heatmap = HeatMapTenant {
        timelines: Vec::new(),
        generation,
-        upload_period_ms: tenant.get_heatmap_period().map(|p| p.as_millis()),
    };
    let timelines = tenant.timelines.lock().unwrap().clone();

@@ -407,31 +396,15 @@ async fn upload_tenant_heatmap(

    // Serialize the heatmap
    let bytes = serde_json::to_vec(&heatmap).map_err(|e| anyhow::anyhow!(e))?;
+    let bytes = bytes::Bytes::from(bytes);
+    let size = bytes.len();

    // Drop out early if nothing changed since our last upload
    let digest = md5::compute(&bytes);
-    if Some(&digest) == last_upload.as_ref().map(|d| &d.uploaded_digest) {
+    if Some(digest) == last_digest {
        return Ok(UploadHeatmapOutcome::NoChange);
    }

-    // Calculate a digest that omits atimes, so that we can distinguish actual changes in
-    // layers from changes only in atimes.
-    let heatmap_size_bytes = heatmap.get_stats().bytes;
-    let layers_only_bytes =
-        serde_json::to_vec(&heatmap.strip_atimes()).map_err(|e| anyhow::anyhow!(e))?;
-    let layers_only_digest = md5::compute(&layers_only_bytes);
-    if heatmap_size_bytes < tenant.get_checkpoint_distance() {
-        // For small tenants, skip upload if only atimes changed. This avoids doing frequent
-        // uploads from long-idle tenants whose atimes are just incremented by periodic
-        // size calculations.
-        if Some(&layers_only_digest) == last_upload.as_ref().map(|d| &d.layers_only_digest) {
-            return Ok(UploadHeatmapOutcome::NoChange);
-        }
-    }
-
-    let bytes = bytes::Bytes::from(bytes);
-    let size = bytes.len();
-
    let path = remote_heatmap_path(tenant.get_tenant_shard_id());

    let cancel = &tenant.cancel;
@@ -463,8 +436,5 @@ async fn upload_tenant_heatmap(

    tracing::info!("Successfully uploaded {size} byte heatmap to {path}");

-    Ok(UploadHeatmapOutcome::Uploaded(LastUploadState {
-        uploaded_digest: digest,
-        layers_only_digest,
-    }))
+    Ok(UploadHeatmapOutcome::Uploaded(digest))
 }
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -1,11 +1,11 @@
 //! Common traits and structs for layers

 pub mod delta_layer;
+mod filename;
 pub mod image_layer;
 pub(crate) mod inmemory_layer;
 pub(crate) mod layer;
 mod layer_desc;
-mod layer_name;

 use crate::context::{AccessStatsBehavior, RequestContext};
 use crate::repository::Value;
@@ -34,10 +34,10 @@ use utils::rate_limit::RateLimit;
 use utils::{id::TimelineId, lsn::Lsn};

 pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
+pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
 pub use image_layer::{ImageLayer, ImageLayerWriter};
 pub use inmemory_layer::InMemoryLayer;
 pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
-pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};

 pub(crate) use layer::{EvictionError, Layer, ResidentLayer};

@@ -646,8 +646,8 @@ pub mod tests {

    use super::*;

-    impl From<DeltaLayerName> for PersistentLayerDesc {
-        fn from(value: DeltaLayerName) -> Self {
+    impl From<DeltaFileName> for PersistentLayerDesc {
+        fn from(value: DeltaFileName) -> Self {
            PersistentLayerDesc::new_delta(
                TenantShardId::from([0; 18]),
                TimelineId::from_array([0; 16]),
@@ -658,8 +658,8 @@ pub mod tests {
        }
    }

-    impl From<ImageLayerName> for PersistentLayerDesc {
-        fn from(value: ImageLayerName) -> Self {
+    impl From<ImageFileName> for PersistentLayerDesc {
+        fn from(value: ImageFileName) -> Self {
            PersistentLayerDesc::new_img(
                TenantShardId::from([0; 18]),
                TimelineId::from_array([0; 16]),
@@ -670,11 +670,11 @@ pub mod tests {
        }
    }

-    impl From<LayerName> for PersistentLayerDesc {
-        fn from(value: LayerName) -> Self {
+    impl From<LayerFileName> for PersistentLayerDesc {
+        fn from(value: LayerFileName) -> Self {
            match value {
-                LayerName::Delta(d) => Self::from(d),
-                LayerName::Image(i) => Self::from(i),
+                LayerFileName::Delta(d) => Self::from(d),
+                LayerFileName::Image(i) => Self::from(i),
            }
        }
    }
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -57,7 +57,6 @@ use std::fs::File;
 use std::io::SeekFrom;
 use std::ops::Range;
 use std::os::unix::fs::FileExt;
-use std::str::FromStr;
 use std::sync::Arc;
 use tokio::sync::OnceCell;
 use tracing::*;
@@ -69,8 +68,7 @@ use utils::{
 };

 use super::{
-    AsLayerDesc, LayerAccessStats, LayerName, PersistentLayerDesc, ResidentLayer,
-    ValuesReconstructState,
+    AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
 };

 ///
@@ -311,13 +309,13 @@ impl DeltaLayer {
            .and_then(|res| res)?;

        // not production code
-        let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();
-        let expected_layer_name = self.layer_desc().layer_name();
+        let actual_filename = path.file_name().unwrap().to_owned();
+        let expected_filename = self.layer_desc().filename().file_name();

-        if actual_layer_name != expected_layer_name {
+        if actual_filename != expected_filename {
            println!("warning: filename does not match what is expected from in-file summary");
-            println!("actual: {:?}", actual_layer_name.to_string());
-            println!("expected: {:?}", expected_layer_name.to_string());
+            println!("actual: {:?}", actual_filename);
+            println!("expected: {:?}", expected_filename);
        }

        Ok(Arc::new(loaded))
@@ -394,7 +392,6 @@ impl DeltaLayerWriterInner {
        tenant_shard_id: TenantShardId,
        key_start: Key,
        lsn_range: Range<Lsn>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        // Create the file initially with a temporary filename. We don't know
        // the end key yet, so we cannot form the final filename yet. We will
@@ -405,7 +402,7 @@ impl DeltaLayerWriterInner {
        let path =
            DeltaLayer::temp_path_for(conf, &tenant_shard_id, &timeline_id, key_start, &lsn_range);

-        let mut file = VirtualFile::create(&path, ctx).await?;
+        let mut file = VirtualFile::create(&path).await?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64);
@@ -587,7 +584,6 @@ impl DeltaLayerWriter {
        tenant_shard_id: TenantShardId,
        key_start: Key,
        lsn_range: Range<Lsn>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
            inner: Some(
@@ -597,7 +593,6 @@ impl DeltaLayerWriter {
                    tenant_shard_id,
                    key_start,
                    lsn_range,
-                    ctx,
                )
                .await?,
            ),
@@ -704,7 +699,6 @@ impl DeltaLayer {
        let mut file = VirtualFile::open_with_options(
            path,
            virtual_file::OpenOptions::new().read(true).write(true),
-            ctx,
        )
        .await
        .with_context(|| format!("Failed to open file '{}'", path))?;
@@ -738,7 +732,7 @@ impl DeltaLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
-        let file = match VirtualFile::open(path, ctx).await {
+        let file = match VirtualFile::open(path).await {
            Ok(file) => file,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
        };
@@ -912,7 +906,7 @@ impl DeltaLayerInner {
        .await
        .map_err(GetVectoredError::Other)?;

-        self.do_reads_and_update_state(reads, reconstruct_state, ctx)
+        self.do_reads_and_update_state(reads, reconstruct_state)
            .await;

        reconstruct_state.on_lsn_advanced(&keyspace, self.lsn_range.start);
@@ -1016,7 +1010,6 @@ impl DeltaLayerInner {
        &self,
        reads: Vec<VectoredRead>,
        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
    ) {
        let vectored_blob_reader = VectoredBlobReader::new(&self.file);
        let mut ignore_key_with_err = None;
@@ -1034,7 +1027,7 @@ impl DeltaLayerInner {
        // track when a key is done.
        for read in reads.into_iter().rev() {
            let res = vectored_blob_reader
-                .read_blobs(&read, buf.take().expect("Should have a buffer"), ctx)
+                .read_blobs(&read, buf.take().expect("Should have a buffer"))
                .await;

            let blobs_buf = match res {
@@ -1146,15 +1139,15 @@ impl DeltaLayerInner {
        Ok(all_keys)
    }

-    /// Using the given writer, write out a version which has the earlier Lsns than `until`.
-    ///
-    /// Return the amount of key value records pushed to the writer.
+    /// Using the given writer, write out a truncated version, where LSNs higher than the
+    /// truncate_at are missing.
+    #[cfg(test)]
    pub(super) async fn copy_prefix(
        &self,
        writer: &mut DeltaLayerWriter,
-        until: Lsn,
+        truncate_at: Lsn,
        ctx: &RequestContext,
-    ) -> anyhow::Result<usize> {
+    ) -> anyhow::Result<()> {
        use crate::tenant::vectored_blob_io::{
            BlobMeta, VectoredReadBuilder, VectoredReadExtended,
        };
@@ -1218,8 +1211,6 @@ impl DeltaLayerInner {
        // FIXME: buffering of DeltaLayerWriter
        let mut per_blob_copy = Vec::new();

-        let mut records = 0;
-
        while let Some(item) = stream.try_next().await? {
            tracing::debug!(?item, "popped");
            let offset = item
@@ -1238,7 +1229,7 @@ impl DeltaLayerInner {

            prev = Option::from(item);

-            let actionable = actionable.filter(|x| x.0.lsn < until);
+            let actionable = actionable.filter(|x| x.0.lsn < truncate_at);

            let builder = if let Some((meta, offsets)) = actionable {
                // extend or create a new builder
@@ -1279,7 +1270,7 @@ impl DeltaLayerInner {

                buf.clear();
                buf.reserve(read.size());
-                let res = reader.read_blobs(&read, buf, ctx).await?;
+                let res = reader.read_blobs(&read, buf).await?;

                for blob in res.blobs {
                    let key = blob.meta.key;
@@ -1306,7 +1297,7 @@ impl DeltaLayerInner {
                    let will_init = crate::repository::ValueBytes::will_init(data)
                        .inspect_err(|_e| {
                            #[cfg(feature = "testing")]
-                            tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
+                            tracing::error!(data=?utils::Hex(data), err=?_e, "failed to parse will_init out of serialized value");
                        })
                        .unwrap_or(false);

@@ -1323,10 +1314,7 @@ impl DeltaLayerInner {
                        )
                        .await;
                    per_blob_copy = tmp;
-
                    res?;
-
-                    records += 1;
                }

                buffer = Some(res.buf);
@@ -1338,7 +1326,7 @@ impl DeltaLayerInner {
            "with the sentinel above loop should had handled all"
        );

-        Ok(records)
+        Ok(())
    }

    pub(super) async fn dump(&self, ctx: &RequestContext) -> anyhow::Result<()> {
@@ -1411,6 +1399,7 @@ impl DeltaLayerInner {
        Ok(())
    }

+    #[cfg(test)]
    fn stream_index_forwards<'a, R>(
        &'a self,
        reader: &'a DiskBtreeReader<R, DELTA_KEY_SIZE>,
@@ -1796,7 +1785,6 @@ mod test {
            harness.tenant_shard_id,
            entries_meta.key_range.start,
            entries_meta.lsn_range.clone(),
-            &ctx,
        )
        .await?;

@@ -1854,7 +1842,7 @@ mod test {

            for read in vectored_reads {
                let blobs_buf = vectored_blob_reader
-                    .read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
+                    .read_blobs(&read, buf.take().expect("Should have a buffer"))
                    .await?;
                for meta in blobs_buf.blobs.iter() {
                    let value = &blobs_buf.buf[meta.start..meta.end];
@@ -1984,7 +1972,6 @@ mod test {
                tenant.tenant_shard_id,
                Key::MIN,
                Lsn(0x11)..truncate_at,
-                ctx,
            )
            .await
            .unwrap();
--- a/pageserver/src/tenant/storage_layer/layer_name.rs
+++ b/pageserver/src/tenant/storage_layer/layer_name.rs
@@ -2,42 +2,40 @@
 //! Helper functions for dealing with filenames of the image and delta layer files.
 //!
 use crate::repository::Key;
-use std::borrow::Cow;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Range;
 use std::str::FromStr;

-use regex::Regex;
 use utils::lsn::Lsn;

 use super::PersistentLayerDesc;

 // Note: Timeline::load_layer_map() relies on this sort order
 #[derive(PartialEq, Eq, Clone, Hash)]
-pub struct DeltaLayerName {
+pub struct DeltaFileName {
    pub key_range: Range<Key>,
    pub lsn_range: Range<Lsn>,
 }

-impl std::fmt::Debug for DeltaLayerName {
+impl std::fmt::Debug for DeltaFileName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use super::RangeDisplayDebug;

-        f.debug_struct("DeltaLayerName")
+        f.debug_struct("DeltaFileName")
            .field("key_range", &RangeDisplayDebug(&self.key_range))
            .field("lsn_range", &self.lsn_range)
            .finish()
    }
 }

-impl PartialOrd for DeltaLayerName {
+impl PartialOrd for DeltaFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl Ord for DeltaLayerName {
+impl Ord for DeltaFileName {
    fn cmp(&self, other: &Self) -> Ordering {
        let mut cmp = self.key_range.start.cmp(&other.key_range.start);
        if cmp != Ordering::Equal {
@@ -57,14 +55,16 @@ impl Ord for DeltaLayerName {
    }
 }

-/// Represents the region of the LSN-Key space covered by a DeltaLayer
+/// Represents the filename of a DeltaLayer
 ///
 /// ```text
 ///    <key start>-<key end>__<LSN start>-<LSN end>
 /// ```
-impl DeltaLayerName {
-    /// Parse the part of a delta layer's file name that represents the LayerName. Returns None
-    /// if the filename does not match the expected pattern.
+impl DeltaFileName {
+    ///
+    /// Parse a string as a delta file name. Returns None if the filename does not
+    /// match the expected pattern.
+    ///
    pub fn parse_str(fname: &str) -> Option<Self> {
        let mut parts = fname.split("__");
        let mut key_parts = parts.next()?.split('-');
@@ -74,19 +74,10 @@ impl DeltaLayerName {
        let key_end_str = key_parts.next()?;
        let lsn_start_str = lsn_parts.next()?;
        let lsn_end_str = lsn_parts.next()?;
-
        if parts.next().is_some() || key_parts.next().is_some() || key_parts.next().is_some() {
            return None;
        }

-        if key_start_str.len() != 36
-            || key_end_str.len() != 36
-            || lsn_start_str.len() != 16
-            || lsn_end_str.len() != 16
-        {
-            return None;
-        }
-
        let key_start = Key::from_hex(key_start_str).ok()?;
        let key_end = Key::from_hex(key_end_str).ok()?;

@@ -103,14 +94,14 @@ impl DeltaLayerName {
            // or panic?
        }

-        Some(DeltaLayerName {
+        Some(DeltaFileName {
            key_range: key_start..key_end,
            lsn_range: start_lsn..end_lsn,
        })
    }
 }

-impl fmt::Display for DeltaLayerName {
+impl fmt::Display for DeltaFileName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
@@ -124,29 +115,29 @@ impl fmt::Display for DeltaLayerName {
 }

 #[derive(PartialEq, Eq, Clone, Hash)]
-pub struct ImageLayerName {
+pub struct ImageFileName {
    pub key_range: Range<Key>,
    pub lsn: Lsn,
 }

-impl std::fmt::Debug for ImageLayerName {
+impl std::fmt::Debug for ImageFileName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use super::RangeDisplayDebug;

-        f.debug_struct("ImageLayerName")
+        f.debug_struct("ImageFileName")
            .field("key_range", &RangeDisplayDebug(&self.key_range))
            .field("lsn", &self.lsn)
            .finish()
    }
 }

-impl PartialOrd for ImageLayerName {
+impl PartialOrd for ImageFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
 }

-impl Ord for ImageLayerName {
+impl Ord for ImageFileName {
    fn cmp(&self, other: &Self) -> Ordering {
        let mut cmp = self.key_range.start.cmp(&other.key_range.start);
        if cmp != Ordering::Equal {
@@ -162,7 +153,7 @@ impl Ord for ImageLayerName {
    }
 }

-impl ImageLayerName {
+impl ImageFileName {
    pub fn lsn_as_range(&self) -> Range<Lsn> {
        // Saves from having to copypaste this all over
        PersistentLayerDesc::image_layer_lsn_range(self.lsn)
@@ -170,14 +161,16 @@ impl ImageLayerName {
 }

 ///
-/// Represents the part of the Key-LSN space covered by an ImageLayer
+/// Represents the filename of an ImageLayer
 ///
 /// ```text
 ///    <key start>-<key end>__<LSN>
 /// ```
-impl ImageLayerName {
-    /// Parse a string as then LayerName part of an image layer file name. Returns None if the
-    /// filename does not match the expected pattern.
+impl ImageFileName {
+    ///
+    /// Parse a string as an image file name. Returns None if the filename does not
+    /// match the expected pattern.
+    ///
    pub fn parse_str(fname: &str) -> Option<Self> {
        let mut parts = fname.split("__");
        let mut key_parts = parts.next()?.split('-');
@@ -189,23 +182,19 @@ impl ImageLayerName {
            return None;
        }

-        if key_start_str.len() != 36 || key_end_str.len() != 36 || lsn_str.len() != 16 {
-            return None;
-        }
-
        let key_start = Key::from_hex(key_start_str).ok()?;
        let key_end = Key::from_hex(key_end_str).ok()?;

        let lsn = Lsn::from_hex(lsn_str).ok()?;

-        Some(ImageLayerName {
+        Some(ImageFileName {
            key_range: key_start..key_end,
            lsn,
        })
    }
 }

-impl fmt::Display for ImageLayerName {
+impl fmt::Display for ImageFileName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
@@ -216,24 +205,21 @@ impl fmt::Display for ImageLayerName {
        )
    }
 }
-
-/// LayerName is the logical identity of a layer within a LayerMap at a moment in time.  The
-/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
-/// over time (e.g. across shard splits or compression). The physical filenames of layers in local
-/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers
-/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])
-/// and [`crate::tenant::storage_layer::layer::local_layer_path`])
 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
-pub enum LayerName {
-    Image(ImageLayerName),
-    Delta(DeltaLayerName),
+pub enum LayerFileName {
+    Image(ImageFileName),
+    Delta(DeltaFileName),
 }

-impl LayerName {
+impl LayerFileName {
+    pub fn file_name(&self) -> String {
+        self.to_string()
+    }
+
    /// Determines if this layer file is considered to be in future meaning we will discard these
    /// layers during timeline initialization from the given disk_consistent_lsn.
    pub(crate) fn is_in_future(&self, disk_consistent_lsn: Lsn) -> bool {
-        use LayerName::*;
+        use LayerFileName::*;
        match self {
            Image(file_name) if file_name.lsn > disk_consistent_lsn => true,
            Delta(file_name) if file_name.lsn_range.end > disk_consistent_lsn + 1 => true,
@@ -242,7 +228,7 @@ impl LayerName {
    }

    pub(crate) fn kind(&self) -> &'static str {
-        use LayerName::*;
+        use LayerFileName::*;
        match self {
            Delta(_) => "delta",
            Image(_) => "image",
@@ -250,7 +236,7 @@ impl LayerName {
    }
 }

-impl fmt::Display for LayerName {
+impl fmt::Display for LayerFileName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Image(fname) => write!(f, "{fname}"),
@@ -259,36 +245,23 @@ impl fmt::Display for LayerName {
    }
 }

-impl From<ImageLayerName> for LayerName {
-    fn from(fname: ImageLayerName) -> Self {
+impl From<ImageFileName> for LayerFileName {
+    fn from(fname: ImageFileName) -> Self {
        Self::Image(fname)
    }
 }
-impl From<DeltaLayerName> for LayerName {
-    fn from(fname: DeltaLayerName) -> Self {
+impl From<DeltaFileName> for LayerFileName {
+    fn from(fname: DeltaFileName) -> Self {
        Self::Delta(fname)
    }
 }

-impl FromStr for LayerName {
+impl FromStr for LayerFileName {
    type Err = String;

-    /// Conversion from either a physical layer filename, or the string-ization of
-    /// Self. When loading a physical layer filename, we drop any extra information
-    /// not needed to build Self.
    fn from_str(value: &str) -> Result<Self, Self::Err> {
-        let gen_suffix_regex = Regex::new("^(?<base>.+)(?<gen>-v1-[0-9a-f]{8})$").unwrap();
-        let file_name: Cow<str> = match gen_suffix_regex.captures(value) {
-            Some(captures) => captures
-                .name("base")
-                .expect("Non-optional group")
-                .as_str()
-                .into(),
-            None => value.into(),
-        };
-
-        let delta = DeltaLayerName::parse_str(&file_name);
-        let image = ImageLayerName::parse_str(&file_name);
+        let delta = DeltaFileName::parse_str(value);
+        let image = ImageFileName::parse_str(value);
        let ok = match (delta, image) {
            (None, None) => {
                return Err(format!(
@@ -303,7 +276,7 @@ impl FromStr for LayerName {
    }
 }

-impl serde::Serialize for LayerName {
+impl serde::Serialize for LayerFileName {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
@@ -315,19 +288,19 @@ impl serde::Serialize for LayerName {
    }
 }

-impl<'de> serde::Deserialize<'de> for LayerName {
+impl<'de> serde::Deserialize<'de> for LayerFileName {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
-        deserializer.deserialize_string(LayerNameVisitor)
+        deserializer.deserialize_string(LayerFileNameVisitor)
    }
 }

-struct LayerNameVisitor;
+struct LayerFileNameVisitor;

-impl<'de> serde::de::Visitor<'de> for LayerNameVisitor {
-    type Value = LayerName;
+impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor {
+    type Value = LayerFileName;

    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(
@@ -342,42 +315,3 @@ impl<'de> serde::de::Visitor<'de> for LayerNameVisitor {
        v.parse().map_err(|e| E::custom(e))
    }
 }
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    #[test]
-    fn image_layer_parse() -> anyhow::Result<()> {
-        let expected = LayerName::Image(ImageLayerName {
-            key_range: Key::from_i128(0)
-                ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
-            lsn: Lsn::from_hex("00000000014FED58").unwrap(),
-        });
-        let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected,);
-
-        // Omitting generation suffix is valid
-        let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected,);
-
-        Ok(())
-    }
-
-    #[test]
-    fn delta_layer_parse() -> anyhow::Result<()> {
-        let expected = LayerName::Delta(DeltaLayerName {
-            key_range: Key::from_i128(0)
-                ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
-            lsn_range: Lsn::from_hex("00000000014FED58").unwrap()
-                ..Lsn::from_hex("000000000154C481").unwrap(),
-        });
-        let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected);
-
-        // Omitting generation suffix is valid
-        let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected);
-
-        Ok(())
-    }
-}
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -54,7 +54,6 @@ use std::fs::File;
 use std::io::SeekFrom;
 use std::ops::Range;
 use std::os::unix::prelude::FileExt;
-use std::str::FromStr;
 use std::sync::Arc;
 use tokio::sync::OnceCell;
 use tokio_stream::StreamExt;
@@ -66,10 +65,8 @@ use utils::{
    lsn::Lsn,
 };

-use super::layer_name::ImageLayerName;
-use super::{
-    AsLayerDesc, Layer, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
-};
+use super::filename::ImageFileName;
+use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer, ValuesReconstructState};

 ///
 /// Header stored in the beginning of the file
@@ -234,7 +231,7 @@ impl ImageLayer {
        conf: &PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
-        fname: &ImageLayerName,
+        fname: &ImageFileName,
    ) -> Utf8PathBuf {
        let rand_string: String = rand::thread_rng()
            .sample_iter(&Alphanumeric)
@@ -270,13 +267,13 @@ impl ImageLayer {
            .and_then(|res| res)?;

        // not production code
-        let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap();
-        let expected_layer_name = self.layer_desc().layer_name();
+        let actual_filename = path.file_name().unwrap().to_owned();
+        let expected_filename = self.layer_desc().filename().file_name();

-        if actual_layer_name != expected_layer_name {
+        if actual_filename != expected_filename {
            println!("warning: filename does not match what is expected from in-file summary");
-            println!("actual: {:?}", actual_layer_name.to_string());
-            println!("expected: {:?}", expected_layer_name.to_string());
+            println!("actual: {:?}", actual_filename);
+            println!("expected: {:?}", expected_filename);
        }

        Ok(loaded)
@@ -343,7 +340,6 @@ impl ImageLayer {
        let mut file = VirtualFile::open_with_options(
            path,
            virtual_file::OpenOptions::new().read(true).write(true),
-            ctx,
        )
        .await
        .with_context(|| format!("Failed to open file '{}'", path))?;
@@ -378,7 +374,7 @@ impl ImageLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> Result<Result<Self, anyhow::Error>, anyhow::Error> {
-        let file = match VirtualFile::open(path, ctx).await {
+        let file = match VirtualFile::open(path).await {
            Ok(file) => file,
            Err(e) => return Ok(Err(anyhow::Error::new(e).context("open layer file"))),
        };
@@ -475,7 +471,7 @@ impl ImageLayerInner {
            .await
            .map_err(GetVectoredError::Other)?;

-        self.do_reads_and_update_state(reads, reconstruct_state, ctx)
+        self.do_reads_and_update_state(reads, reconstruct_state)
            .await;

        Ok(())
@@ -538,7 +534,6 @@ impl ImageLayerInner {
        &self,
        reads: Vec<VectoredRead>,
        reconstruct_state: &mut ValuesReconstructState,
-        ctx: &RequestContext,
    ) {
        let max_vectored_read_bytes = self
            .max_vectored_read_bytes
@@ -567,7 +562,7 @@ impl ImageLayerInner {
            }

            let buf = BytesMut::with_capacity(buf_size);
-            let res = vectored_blob_reader.read_blobs(&read, buf, ctx).await;
+            let res = vectored_blob_reader.read_blobs(&read, buf).await;

            match res {
                Ok(blobs_buf) => {
@@ -633,7 +628,6 @@ impl ImageLayerWriterInner {
        tenant_shard_id: TenantShardId,
        key_range: &Range<Key>,
        lsn: Lsn,
-        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        // Create the file initially with a temporary filename.
        // We'll atomically rename it to the final name when we're done.
@@ -641,7 +635,7 @@ impl ImageLayerWriterInner {
            conf,
            timeline_id,
            tenant_shard_id,
-            &ImageLayerName {
+            &ImageFileName {
                key_range: key_range.clone(),
                lsn,
            },
@@ -653,7 +647,6 @@ impl ImageLayerWriterInner {
                virtual_file::OpenOptions::new()
                    .write(true)
                    .create_new(true),
-                ctx,
            )
            .await?
        };
@@ -808,11 +801,10 @@ impl ImageLayerWriter {
        tenant_shard_id: TenantShardId,
        key_range: &Range<Key>,
        lsn: Lsn,
-        ctx: &RequestContext,
    ) -> anyhow::Result<ImageLayerWriter> {
        Ok(Self {
            inner: Some(
-                ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn, ctx)
+                ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn)
                    .await?,
            ),
        })
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -473,11 +473,10 @@ impl InMemoryLayer {
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        start_lsn: Lsn,
-        ctx: &RequestContext,
    ) -> Result<InMemoryLayer> {
        trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}");

-        let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, ctx).await?;
+        let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id).await?;
        let key = InMemoryLayerFileId(file.page_cache_file_id());

        Ok(InMemoryLayer {
@@ -643,7 +642,6 @@ impl InMemoryLayer {
            self.tenant_shard_id,
            Key::MIN,
            self.start_lsn..end_lsn,
-            ctx,
        )
        .await?;

--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -4,13 +4,12 @@ use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::{
    HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
 };
-use pageserver_api::shard::{ShardIndex, TenantShardId};
+use pageserver_api::shard::ShardIndex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::{Arc, Weak};
 use std::time::{Duration, SystemTime};
 use tracing::Instrument;
-use utils::id::TimelineId;
 use utils::lsn::Lsn;
 use utils::sync::heavier_once_cell;

@@ -25,7 +24,7 @@ use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
 use super::delta_layer::{self, DeltaEntry};
 use super::image_layer;
 use super::{
-    AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerName, PersistentLayerDesc,
+    AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc,
    ValueReconstructResult, ValueReconstructState, ValuesReconstructState,
 };

@@ -124,39 +123,14 @@ impl PartialEq for Layer {
    }
 }

-pub(crate) fn local_layer_path(
-    conf: &PageServerConf,
-    tenant_shard_id: &TenantShardId,
-    timeline_id: &TimelineId,
-    layer_file_name: &LayerName,
-    generation: &Generation,
-) -> Utf8PathBuf {
-    let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id);
-
-    if generation.is_none() {
-        // Without a generation, we may only use legacy path style
-        timeline_path.join(layer_file_name.to_string())
-    } else {
-        timeline_path.join(format!("{}-v1{}", layer_file_name, generation.get_suffix()))
-    }
-}
-
 impl Layer {
    /// Creates a layer value for a file we know to not be resident.
    pub(crate) fn for_evicted(
        conf: &'static PageServerConf,
        timeline: &Arc<Timeline>,
-        file_name: LayerName,
+        file_name: LayerFileName,
        metadata: LayerFileMetadata,
    ) -> Self {
-        let local_path = local_layer_path(
-            conf,
-            &timeline.tenant_shard_id,
-            &timeline.timeline_id,
-            &file_name,
-            &metadata.generation,
-        );
-
        let desc = PersistentLayerDesc::from_filename(
            timeline.tenant_shard_id,
            timeline.timeline_id,
@@ -169,7 +143,6 @@ impl Layer {
        let owner = Layer(Arc::new(LayerInner::new(
            conf,
            timeline,
-            local_path,
            access_stats,
            desc,
            None,
@@ -186,8 +159,7 @@ impl Layer {
    pub(crate) fn for_resident(
        conf: &'static PageServerConf,
        timeline: &Arc<Timeline>,
-        local_path: Utf8PathBuf,
-        file_name: LayerName,
+        file_name: LayerFileName,
        metadata: LayerFileMetadata,
    ) -> ResidentLayer {
        let desc = PersistentLayerDesc::from_filename(
@@ -212,7 +184,6 @@ impl Layer {
            LayerInner::new(
                conf,
                timeline,
-                local_path,
                access_stats,
                desc,
                Some(inner),
@@ -254,19 +225,9 @@ impl Layer {
                LayerResidenceStatus::Resident,
                LayerResidenceEventReason::LayerCreate,
            );
-
-            let local_path = local_layer_path(
-                conf,
-                &timeline.tenant_shard_id,
-                &timeline.timeline_id,
-                &desc.layer_name(),
-                &timeline.generation,
-            );
-
            LayerInner::new(
                conf,
                timeline,
-                local_path,
                access_stats,
                desc,
                Some(inner),
@@ -449,13 +410,6 @@ impl Layer {
        self.0.metadata()
    }

-    pub(crate) fn get_timeline_id(&self) -> Option<TimelineId> {
-        self.0
-            .timeline
-            .upgrade()
-            .map(|timeline| timeline.timeline_id)
-    }
-
    /// Traditional debug dumping facility
    #[allow(unused)]
    pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> {
@@ -585,6 +539,9 @@ struct LayerInner {
    /// [`Timeline::gate`] at the same time.
    timeline: Weak<Timeline>,

+    /// Cached knowledge of [`Timeline::remote_client`] being `Some`.
+    have_remote_client: bool,
+
    access_stats: LayerAccessStats,

    /// This custom OnceCell is backed by std mutex, but only held for short time periods.
@@ -684,7 +641,7 @@ impl Drop for LayerInner {
        let span = tracing::info_span!(parent: None, "layer_delete", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);

        let path = std::mem::take(&mut self.path);
-        let file_name = self.layer_desc().layer_name();
+        let file_name = self.layer_desc().filename();
        let file_size = self.layer_desc().file_size;
        let timeline = self.timeline.clone();
        let meta = self.metadata();
@@ -729,40 +686,42 @@ impl Drop for LayerInner {
            if removed {
                timeline.metrics.resident_physical_size_sub(file_size);
            }
-            let res = timeline
-                .remote_client
-                .schedule_deletion_of_unlinked(vec![(file_name, meta)]);
+            if let Some(remote_client) = timeline.remote_client.as_ref() {
+                let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);

-            if let Err(e) = res {
-                // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
-                // demonstrating this deadlock (without spawn_blocking): stop will drop
-                // queued items, which will have ResidentLayer's, and those drops would try
-                // to re-entrantly lock the RemoteTimelineClient inner state.
-                if !timeline.is_active() {
-                    tracing::info!("scheduling deletion on drop failed: {e:#}");
+                if let Err(e) = res {
+                    // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
+                    // demonstrating this deadlock (without spawn_blocking): stop will drop
+                    // queued items, which will have ResidentLayer's, and those drops would try
+                    // to re-entrantly lock the RemoteTimelineClient inner state.
+                    if !timeline.is_active() {
+                        tracing::info!("scheduling deletion on drop failed: {e:#}");
+                    } else {
+                        tracing::warn!("scheduling deletion on drop failed: {e:#}");
+                    }
+                    LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
                } else {
-                    tracing::warn!("scheduling deletion on drop failed: {e:#}");
+                    LAYER_IMPL_METRICS.inc_completed_deletes();
                }
-                LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
-            } else {
-                LAYER_IMPL_METRICS.inc_completed_deletes();
            }
        });
    }
 }

 impl LayerInner {
-    #[allow(clippy::too_many_arguments)]
    fn new(
        conf: &'static PageServerConf,
        timeline: &Arc<Timeline>,
-        local_path: Utf8PathBuf,
        access_stats: LayerAccessStats,
        desc: PersistentLayerDesc,
        downloaded: Option<Arc<DownloadedLayer>>,
        generation: Generation,
        shard: ShardIndex,
    ) -> Self {
+        let path = conf
+            .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
+            .join(desc.filename().to_string());
+
        let (inner, version, init_status) = if let Some(inner) = downloaded {
            let version = inner.version;
            let resident = ResidentOrWantedEvicted::Resident(inner);
@@ -777,12 +736,11 @@ impl LayerInner {

        LayerInner {
            conf,
-            debug_str: {
-                format!("timelines/{}/{}", timeline.timeline_id, desc.layer_name()).into()
-            },
-            path: local_path,
+            debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() },
+            path,
            desc,
            timeline: Arc::downgrade(timeline),
+            have_remote_client: timeline.remote_client.is_some(),
            access_stats,
            wanted_deleted: AtomicBool::new(false),
            inner,
@@ -811,6 +769,8 @@ impl LayerInner {
    /// in a new attempt to evict OR join the previously started attempt.
    #[tracing::instrument(level = tracing::Level::DEBUG, skip_all, ret, err(level = tracing::Level::DEBUG), fields(layer=%self))]
    pub(crate) async fn evict_and_wait(&self, timeout: Duration) -> Result<(), EvictionError> {
+        assert!(self.have_remote_client);
+
        let mut rx = self.status.as_ref().unwrap().subscribe();

        {
@@ -967,6 +927,10 @@ impl LayerInner {
            return Err(DownloadError::NotFile(ft));
        }

+        if timeline.remote_client.as_ref().is_none() {
+            return Err(DownloadError::NoRemoteStorage);
+        }
+
        if let Some(ctx) = ctx {
            self.check_expected_download(ctx)?;
        }
@@ -1103,10 +1067,14 @@ impl LayerInner {
        permit: heavier_once_cell::InitPermit,
        ctx: &RequestContext,
    ) -> anyhow::Result<Arc<DownloadedLayer>> {
-        let result = timeline
+        let client = timeline
            .remote_client
+            .as_ref()
+            .expect("checked before download_init_and_wait");
+
+        let result = client
            .download_layer_file(
-                &self.desc.layer_name(),
+                &self.desc.filename(),
                &self.metadata(),
                &timeline.cancel,
                ctx,
@@ -1243,7 +1211,7 @@ impl LayerInner {
    }

    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
-        let layer_name = self.desc.layer_name().to_string();
+        let layer_file_name = self.desc.filename().file_name();

        let resident = self
            .inner
@@ -1257,7 +1225,7 @@ impl LayerInner {
            let lsn_range = &self.desc.lsn_range;

            HistoricLayerInfo::Delta {
-                layer_file_name: layer_name,
+                layer_file_name,
                layer_file_size: self.desc.file_size,
                lsn_start: lsn_range.start,
                lsn_end: lsn_range.end,
@@ -1268,7 +1236,7 @@ impl LayerInner {
            let lsn = self.desc.image_layer_lsn();

            HistoricLayerInfo::Image {
-                layer_file_name: layer_name,
+                layer_file_name,
                layer_file_size: self.desc.file_size,
                lsn_start: lsn,
                remote: !resident,
@@ -1279,10 +1247,20 @@ impl LayerInner {

    /// `DownloadedLayer` is being dropped, so it calls this method.
    fn on_downloaded_layer_drop(self: Arc<LayerInner>, only_version: usize) {
+        let can_evict = self.have_remote_client;
+
        // we cannot know without inspecting LayerInner::inner if we should evict or not, even
        // though here it is very likely
        let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, version=%only_version);

+        if !can_evict {
+            // it would be nice to assert this case out, but we are in drop
+            span.in_scope(|| {
+                tracing::error!("bug in struct Layer: ResidentOrWantedEvicted has been downgraded while we have no remote storage");
+            });
+            return;
+        }
+
        // NOTE: this scope *must* never call `self.inner.get` because evict_and_wait might
        // drop while the `self.inner` is being locked, leading to a deadlock.

@@ -1554,6 +1532,8 @@ pub(crate) enum EvictionError {
 pub(crate) enum DownloadError {
    #[error("timeline has already shutdown")]
    TimelineShutdown,
+    #[error("no remote storage configured")]
+    NoRemoteStorage,
    #[error("context denies downloading")]
    ContextAndConfigReallyDeniesDownloads,
    #[error("downloading is really required but not allowed by this method")]
@@ -1817,23 +1797,25 @@ impl ResidentLayer {
        }
    }

-    /// Returns the amount of keys and values written to the writer.
-    pub(crate) async fn copy_delta_prefix(
+    /// FIXME: truncate is bad name because we are not truncating anything, but copying the
+    /// filtered parts.
+    #[cfg(test)]
+    pub(super) async fn copy_delta_prefix(
        &self,
        writer: &mut super::delta_layer::DeltaLayerWriter,
-        until: Lsn,
+        truncate_at: Lsn,
        ctx: &RequestContext,
-    ) -> anyhow::Result<usize> {
+    ) -> anyhow::Result<()> {
        use LayerKind::*;

        let owner = &self.owner.0;

        match self.downloaded.get(owner, ctx).await? {
            Delta(ref d) => d
-                .copy_prefix(writer, until, ctx)
+                .copy_prefix(writer, truncate_at, ctx)
                .await
-                .with_context(|| format!("copy_delta_prefix until {until} of {self}")),
-            Image(_) => anyhow::bail!(format!("cannot copy_lsn_prefix of image layer {self}")),
+                .with_context(|| format!("truncate {self}")),
+            Image(_) => anyhow::bail!(format!("cannot truncate image layer {self}")),
        }
    }

--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -145,7 +145,7 @@ async fn smoke_test() {
        .await
        .expect("the local layer file still exists");

-    let rtc = &timeline.remote_client;
+    let rtc = timeline.remote_client.as_ref().unwrap();

    {
        let layers = &[layer];
@@ -761,7 +761,13 @@ async fn eviction_cancellation_on_drop() {
    timeline.freeze_and_flush().await.unwrap();

    // wait for the upload to complete so our Arc::strong_count assertion holds
-    timeline.remote_client.wait_completion().await.unwrap();
+    timeline
+        .remote_client
+        .as_ref()
+        .unwrap()
+        .wait_completion()
+        .await
+        .unwrap();

    let (evicted_layer, not_evicted) = {
        let mut layers = {
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -5,7 +5,7 @@ use utils::{id::TimelineId, lsn::Lsn};

 use crate::repository::Key;

-use super::{DeltaLayerName, ImageLayerName, LayerName};
+use super::{DeltaFileName, ImageFileName, LayerFileName};

 use serde::{Deserialize, Serialize};

@@ -51,7 +51,7 @@ impl PersistentLayerDesc {
    }

    pub fn short_id(&self) -> impl Display {
-        self.layer_name()
+        self.filename()
    }

    #[cfg(test)]
@@ -103,14 +103,14 @@ impl PersistentLayerDesc {
    pub fn from_filename(
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
-        filename: LayerName,
+        filename: LayerFileName,
        file_size: u64,
    ) -> Self {
        match filename {
-            LayerName::Image(i) => {
+            LayerFileName::Image(i) => {
                Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size)
            }
-            LayerName::Delta(d) => Self::new_delta(
+            LayerFileName::Delta(d) => Self::new_delta(
                tenant_shard_id,
                timeline_id,
                d.key_range,
@@ -132,34 +132,34 @@ impl PersistentLayerDesc {
        lsn..(lsn + 1)
    }

-    /// Get a delta layer name for this layer.
+    /// Get a delta file name for this layer.
    ///
    /// Panic: if this is not a delta layer.
-    pub fn delta_layer_name(&self) -> DeltaLayerName {
+    pub fn delta_file_name(&self) -> DeltaFileName {
        assert!(self.is_delta);
-        DeltaLayerName {
+        DeltaFileName {
            key_range: self.key_range.clone(),
            lsn_range: self.lsn_range.clone(),
        }
    }

-    /// Get a image layer name for this layer.
+    /// Get a delta file name for this layer.
    ///
    /// Panic: if this is not an image layer, or the lsn range is invalid
-    pub fn image_layer_name(&self) -> ImageLayerName {
+    pub fn image_file_name(&self) -> ImageFileName {
        assert!(!self.is_delta);
        assert!(self.lsn_range.start + 1 == self.lsn_range.end);
-        ImageLayerName {
+        ImageFileName {
            key_range: self.key_range.clone(),
            lsn: self.lsn_range.start,
        }
    }

-    pub fn layer_name(&self) -> LayerName {
+    pub fn filename(&self) -> LayerFileName {
        if self.is_delta {
-            self.delta_layer_name().into()
+            self.delta_file_name().into()
        } else {
-            self.image_layer_name().into()
+            self.image_file_name().into()
        }
    }

--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -2,7 +2,6 @@
 //! such as compaction and GC

 use std::ops::ControlFlow;
-use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};

@@ -10,11 +9,9 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::config::defaults::DEFAULT_COMPACTION_PERIOD;
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
-use rand::Rng;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::{backoff, completion};
@@ -41,13 +38,12 @@ static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore
        tokio::sync::Semaphore::new(permits)
    });

-#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr, enum_map::Enum)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, strum_macros::IntoStaticStr)]
 #[strum(serialize_all = "snake_case")]
 pub(crate) enum BackgroundLoopKind {
    Compaction,
    Gc,
    Eviction,
-    IngestHouseKeeping,
    ConsumptionMetricsCollectMetrics,
    ConsumptionMetricsSyntheticSizeWorker,
    InitialLogicalSizeCalculation,
@@ -57,25 +53,19 @@ pub(crate) enum BackgroundLoopKind {

 impl BackgroundLoopKind {
    fn as_static_str(&self) -> &'static str {
-        self.into()
+        let s: &'static str = self.into();
+        s
    }
 }

-static PERMIT_GAUGES: once_cell::sync::Lazy<
-    enum_map::EnumMap<BackgroundLoopKind, metrics::IntCounterPair>,
-> = once_cell::sync::Lazy::new(|| {
-    enum_map::EnumMap::from_array(std::array::from_fn(|i| {
-        let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
-        crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE.with_label_values(&[kind.into()])
-    }))
-});
-
 /// Cancellation safe.
 pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
    loop_kind: BackgroundLoopKind,
    _ctx: &RequestContext,
 ) -> tokio::sync::SemaphorePermit<'static> {
-    let _guard = PERMIT_GAUGES[loop_kind].guard();
+    let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE
+        .with_label_values(&[loop_kind.as_static_str()])
+        .guard();

    pausable_failpoint!(
        "initial-size-calculation-permit-pause",
@@ -142,30 +132,6 @@ pub fn start_background_loops(
            }
        },
    );
-
-    task_mgr::spawn(
-        BACKGROUND_RUNTIME.handle(),
-        TaskKind::IngestHousekeeping,
-        Some(tenant_shard_id),
-        None,
-        &format!("ingest housekeeping for tenant {tenant_shard_id}"),
-        false,
-        {
-            let tenant = Arc::clone(tenant);
-            let background_jobs_can_start = background_jobs_can_start.cloned();
-            async move {
-                let cancel = task_mgr::shutdown_token();
-                tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()) },
-                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
-                };
-                ingest_housekeeping_loop(tenant, cancel)
-                    .instrument(info_span!("ingest_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
-                    .await;
-                Ok(())
-            }
-        },
-    );
 }

 ///
@@ -413,61 +379,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
 }

-async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
-    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-    async {
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => {
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(()) => (),
-                },
-            }
-
-            // We run ingest housekeeping with the same frequency as compaction: it is not worth
-            // having a distinct setting.  But we don't run it in the same task, because compaction
-            // blocks on acquiring the background job semaphore.
-            let period = tenant.get_compaction_period();
-
-            // If compaction period is set to zero (to disable it), then we will use a reasonable default
-            let period = if period == Duration::ZERO {
-                humantime::Duration::from_str(DEFAULT_COMPACTION_PERIOD)
-                    .unwrap()
-                    .into()
-            } else {
-                period
-            };
-
-            // Jitter the period by +/- 5%
-            let period =
-                rand::thread_rng().gen_range((period * (95)) / 100..(period * (105)) / 100);
-
-            // Always sleep first: we do not need to do ingest housekeeping early in the lifetime of
-            // a tenant, since it won't have started writing any ephemeral files yet.
-            if tokio::time::timeout(period, cancel.cancelled())
-                .await
-                .is_ok()
-            {
-                break;
-            }
-
-            let started_at = Instant::now();
-            tenant.ingest_housekeeping().await;
-
-            warn_when_period_overrun(
-                started_at.elapsed(),
-                period,
-                BackgroundLoopKind::IngestHouseKeeping,
-            );
-        }
-    }
-    .await;
-    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-}
-
 async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
    // if the tenant has a proper status already, no need to wait for anything
    if tenant.current_state() == TenantState::Active {
@@ -509,6 +420,8 @@ pub(crate) async fn random_init_delay(
    period: Duration,
    cancel: &CancellationToken,
 ) -> Result<(), Cancelled> {
+    use rand::Rng;
+
    if period == Duration::ZERO {
        return Ok(());
    }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1,6 +1,5 @@
 mod compaction;
 pub mod delete;
-pub(crate) mod detach_ancestor;
 mod eviction_task;
 mod init;
 pub mod layer_manager;
@@ -23,9 +22,8 @@ use pageserver_api::{
    },
    keyspace::{KeySpaceAccum, SparseKeyPartitioning},
    models::{
-        AuxFilePolicy, CompactionAlgorithm, DownloadRemoteLayersTaskInfo,
-        DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, InMemoryLayerInfo, LayerMapInfo,
-        TimelineState,
+        CompactionAlgorithm, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
+        EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState,
    },
    reltag::BlockNumber,
    shard::{ShardIdentity, ShardNumber, TenantShardId},
@@ -60,13 +58,9 @@ use std::{
    ops::ControlFlow,
 };

-use crate::tenant::timeline::init::LocalLayerFileMetadata;
-use crate::{
-    aux_file::AuxFileSizeEstimator,
-    tenant::{
-        layer_map::{LayerMap, SearchResult},
-        metadata::TimelineMetadata,
-    },
+use crate::tenant::{
+    layer_map::{LayerMap, SearchResult},
+    metadata::TimelineMetadata,
 };
 use crate::{
    context::{DownloadBehavior, RequestContext},
@@ -78,7 +72,7 @@ use crate::{
    disk_usage_eviction_task::finite_f32,
    tenant::storage_layer::{
        AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
-        LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructResult,
+        LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
        ValueReconstructState, ValuesReconstructState,
    },
 };
@@ -200,7 +194,7 @@ fn drop_wlock<T>(rlock: tokio::sync::RwLockWriteGuard<'_, T>) {

 /// The outward-facing resources required to build a Timeline
 pub struct TimelineResources {
-    pub remote_client: RemoteTimelineClient,
+    pub remote_client: Option<RemoteTimelineClient>,
    pub deletion_queue_client: DeletionQueueClient,
    pub timeline_get_throttle: Arc<
        crate::tenant::throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>,
@@ -272,7 +266,7 @@ pub struct Timeline {

    /// Remote storage client.
    /// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.
-    pub remote_client: Arc<RemoteTimelineClient>,
+    pub remote_client: Option<Arc<RemoteTimelineClient>>,

    // What page versions do we hold in the repository? If we get a
    // request > last_record_lsn, we need to wait until we receive all
@@ -412,8 +406,6 @@ pub struct Timeline {

    /// Keep aux directory cache to avoid it's reconstruction on each update
    pub(crate) aux_files: tokio::sync::Mutex<AuxFilesState>,
-
-    pub(crate) aux_file_size_estimator: AuxFileSizeEstimator,
 }

 pub struct WalReceiverInfo {
@@ -870,13 +862,9 @@ impl Timeline {
                // Initialise the reconstruct state for the key with the cache
                // entry returned above.
                let mut reconstruct_state = ValuesReconstructState::new();
-
-                // Only add the cached image to the reconstruct state when it exists.
-                if cached_page_img.is_some() {
-                    let mut key_state = VectoredValueReconstructState::default();
-                    key_state.img = cached_page_img;
-                    reconstruct_state.keys.insert(key, Ok(key_state));
-                }
+                let mut key_state = VectoredValueReconstructState::default();
+                key_state.img = cached_page_img;
+                reconstruct_state.keys.insert(key, Ok(key_state));

                let vectored_res = self
                    .get_vectored_impl(keyspace.clone(), lsn, reconstruct_state, ctx)
@@ -1088,7 +1076,7 @@ impl Timeline {
        // We should generalize this into Keyspace::contains in the future.
        for range in &keyspace.ranges {
            if range.start.field1 < METADATA_KEY_BEGIN_PREFIX
-                || range.end.field1 > METADATA_KEY_END_PREFIX
+                || range.end.field1 >= METADATA_KEY_END_PREFIX
            {
                return Err(GetVectoredError::Other(anyhow::anyhow!(
                    "only metadata keyspace can be scanned"
@@ -1225,17 +1213,11 @@ impl Timeline {
        }
        reconstruct_timer.stop_and_record();

-        // For aux file keys (v1 or v2) the vectored read path does not return an error
-        // when they're missing. Instead they are omitted from the resulting btree
-        // (this is a requirement, not a bug). Skip updating the metric in these cases
-        // to avoid infinite results.
-        if !results.is_empty() {
-            // Note that this is an approximation. Tracking the exact number of layers visited
-            // per key requires virtually unbounded memory usage and is inefficient
-            // (i.e. segment tree tracking each range queried from a layer)
-            crate::metrics::VEC_READ_NUM_LAYERS_VISITED
-                .observe(layers_visited as f64 / results.len() as f64);
-        }
+        // Note that this is an approximation. Tracking the exact number of layers visited
+        // per key requires virtually unbounded memory usage and is inefficient
+        // (i.e. segment tree tracking each range queried from a layer)
+        crate::metrics::VEC_READ_NUM_LAYERS_VISITED
+            .observe(layers_visited as f64 / results.len() as f64);

        Ok(results)
    }
@@ -1375,14 +1357,22 @@ impl Timeline {
    /// not validated with control plane yet.
    /// See [`Self::get_remote_consistent_lsn_visible`].
    pub(crate) fn get_remote_consistent_lsn_projected(&self) -> Option<Lsn> {
-        self.remote_client.remote_consistent_lsn_projected()
+        if let Some(remote_client) = &self.remote_client {
+            remote_client.remote_consistent_lsn_projected()
+        } else {
+            None
+        }
    }

    /// remote_consistent_lsn which the tenant is guaranteed not to go backward from,
    /// i.e. a value of remote_consistent_lsn_projected which has undergone
    /// generation validation in the deletion queue.
    pub(crate) fn get_remote_consistent_lsn_visible(&self) -> Option<Lsn> {
-        self.remote_client.remote_consistent_lsn_visible()
+        if let Some(remote_client) = &self.remote_client {
+            remote_client.remote_consistent_lsn_visible()
+        } else {
+            None
+        }
    }

    /// The sum of the file size of all historic layers in the layer map.
@@ -1504,21 +1494,15 @@ impl Timeline {
    /// Flush to disk all data that was written with the put_* functions
    #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
    pub(crate) async fn freeze_and_flush(&self) -> anyhow::Result<()> {
-        self.freeze_and_flush0().await
-    }
-
-    // This exists to provide a non-span creating version of `freeze_and_flush` we can call without
-    // polluting the span hierarchy.
-    pub(crate) async fn freeze_and_flush0(&self) -> anyhow::Result<()> {
        let to_lsn = self.freeze_inmem_layer(false).await;
        self.flush_frozen_layers_and_wait(to_lsn).await
    }

-    // Check if an open ephemeral layer should be closed: this provides
-    // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping
-    // an ephemeral layer open forever when idle.  It also freezes layers if the global limit on
-    // ephemeral layer bytes has been breached.
-    pub(super) async fn maybe_freeze_ephemeral_layer(&self) {
+    /// If there is no writer, and conditions for rolling the latest layer are met, then freeze it.
+    ///
+    /// This is for use in background housekeeping, to provide guarantees of layers closing eventually
+    /// even if there are no ongoing writes to drive that.
+    async fn maybe_freeze_ephemeral_layer(&self) {
        let Ok(_write_guard) = self.write_lock.try_lock() else {
            // If the write lock is held, there is an active wal receiver: rolling open layers
            // is their responsibility while they hold this lock.
@@ -1545,11 +1529,13 @@ impl Timeline {
                // we are a sharded tenant and have skipped some WAL
                let last_freeze_ts = *self.last_freeze_ts.read().unwrap();
                if last_freeze_ts.elapsed() >= self.get_checkpoint_timeout() {
-                    // Only do this if have been layer-less longer than get_checkpoint_timeout, so that a shard
-                    // without any data ingested (yet) doesn't write a remote index as soon as it
+                    // This should be somewhat rare, so we log it at INFO level.
+                    //
+                    // We checked for checkpoint timeout so that a shard without any
+                    // data ingested (yet) doesn't write a remote index as soon as it
                    // sees its LSN advance: we only do this if we've been layer-less
                    // for some time.
-                    tracing::debug!(
+                    tracing::info!(
                        "Advancing disk_consistent_lsn past WAL ingest gap {} -> {}",
                        disk_consistent_lsn,
                        last_record_lsn
@@ -1639,6 +1625,11 @@ impl Timeline {
            (guard, permit)
        };

+        // Prior to compaction, check if an open ephemeral layer should be closed: this provides
+        // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping
+        // an ephemeral layer open forever when idle.
+        self.maybe_freeze_ephemeral_layer().await;
+
        // this wait probably never needs any "long time spent" logging, because we already nag if
        // compaction task goes over it's period (20s) which is quite often in production.
        let (_guard, _permit) = tokio::select! {
@@ -1752,14 +1743,16 @@ impl Timeline {
            match self.freeze_and_flush().await {
                Ok(_) => {
                    // drain the upload queue
-                    // if we did not wait for completion here, it might be our shutdown process
-                    // didn't wait for remote uploads to complete at all, as new tasks can forever
-                    // be spawned.
-                    //
-                    // what is problematic is the shutting down of RemoteTimelineClient, because
-                    // obviously it does not make sense to stop while we wait for it, but what
-                    // about corner cases like s3 suddenly hanging up?
-                    self.remote_client.shutdown().await;
+                    if let Some(client) = self.remote_client.as_ref() {
+                        // if we did not wait for completion here, it might be our shutdown process
+                        // didn't wait for remote uploads to complete at all, as new tasks can forever
+                        // be spawned.
+                        //
+                        // what is problematic is the shutting down of RemoteTimelineClient, because
+                        // obviously it does not make sense to stop while we wait for it, but what
+                        // about corner cases like s3 suddenly hanging up?
+                        client.shutdown().await;
+                    }
                }
                Err(e) => {
                    // Non-fatal.  Shutdown is infallible.  Failures to flush just mean that
@@ -1775,16 +1768,18 @@ impl Timeline {

        // Transition the remote_client into a state where it's only useful for timeline deletion.
        // (The deletion use case is why we can't just hook up remote_client to Self::cancel).)
-        self.remote_client.stop();
-        // As documented in remote_client.stop()'s doc comment, it's our responsibility
-        // to shut down the upload queue tasks.
-        // TODO: fix that, task management should be encapsulated inside remote_client.
-        task_mgr::shutdown_tasks(
-            Some(TaskKind::RemoteUploadTask),
-            Some(self.tenant_shard_id),
-            Some(self.timeline_id),
-        )
-        .await;
+        if let Some(remote_client) = self.remote_client.as_ref() {
+            remote_client.stop();
+            // As documented in remote_client.stop()'s doc comment, it's our responsibility
+            // to shut down the upload queue tasks.
+            // TODO: fix that, task management should be encapsulated inside remote_client.
+            task_mgr::shutdown_tasks(
+                Some(TaskKind::RemoteUploadTask),
+                Some(self.tenant_shard_id),
+                Some(self.timeline_id),
+            )
+            .await;
+        }

        // TODO: work toward making this a no-op. See this funciton's doc comment for more context.
        tracing::debug!("Waiting for tasks...");
@@ -1904,12 +1899,16 @@ impl Timeline {
    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
    pub(crate) async fn download_layer(
        &self,
-        layer_file_name: &LayerName,
+        layer_file_name: &str,
    ) -> anyhow::Result<Option<bool>> {
        let Some(layer) = self.find_layer(layer_file_name).await else {
            return Ok(None);
        };

+        if self.remote_client.is_none() {
+            return Ok(Some(false));
+        }
+
        layer.download().await?;

        Ok(Some(true))
@@ -1918,10 +1917,7 @@ impl Timeline {
    /// Evict just one layer.
    ///
    /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.
-    pub(crate) async fn evict_layer(
-        &self,
-        layer_file_name: &LayerName,
-    ) -> anyhow::Result<Option<bool>> {
+    pub(crate) async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
        let _gate = self
            .gate
            .enter()
@@ -1995,12 +1991,13 @@ const REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE: u64 = 10;

 // Private functions
 impl Timeline {
-    pub(crate) fn get_switch_aux_file_policy(&self) -> AuxFilePolicy {
+    #[allow(dead_code)]
+    pub(crate) fn get_switch_to_aux_file_v2(&self) -> bool {
        let tenant_conf = self.tenant_conf.load();
        tenant_conf
            .tenant_conf
-            .switch_aux_file_policy
-            .unwrap_or(self.conf.default_tenant_conf.switch_aux_file_policy)
+            .switch_to_aux_file_v2
+            .unwrap_or(self.conf.default_tenant_conf.switch_to_aux_file_v2)
    }

    pub(crate) fn get_lazy_slru_download(&self) -> bool {
@@ -2150,16 +2147,6 @@ impl Timeline {
        };

        Arc::new_cyclic(|myself| {
-            let metrics = TimelineMetrics::new(
-                &tenant_shard_id,
-                &timeline_id,
-                crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
-                    "mtime",
-                    evictions_low_residence_duration_metric_threshold,
-                ),
-            );
-            let aux_file_metrics = metrics.aux_file_size_gauge.clone();
-
            let mut result = Timeline {
                conf,
                tenant_conf,
@@ -2174,7 +2161,7 @@ impl Timeline {
                walredo_mgr,
                walreceiver: Mutex::new(None),

-                remote_client: Arc::new(resources.remote_client),
+                remote_client: resources.remote_client.map(Arc::new),

                // initialize in-memory 'last_record_lsn' from 'disk_consistent_lsn'.
                last_record_lsn: SeqWait::new(RecordLsn {
@@ -2191,7 +2178,14 @@ impl Timeline {
                ancestor_timeline: ancestor,
                ancestor_lsn: metadata.ancestor_lsn(),

-                metrics,
+                metrics: TimelineMetrics::new(
+                    &tenant_shard_id,
+                    &timeline_id,
+                    crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
+                        "mtime",
+                        evictions_low_residence_duration_metric_threshold,
+                    ),
+                ),

                query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new(
                    &tenant_shard_id,
@@ -2255,8 +2249,6 @@ impl Timeline {
                    dir: None,
                    n_deltas: 0,
                }),
-
-                aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics),
            };
            result.repartition_threshold =
                result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;
@@ -2387,13 +2379,13 @@ impl Timeline {
        index_part: Option<IndexPart>,
    ) -> anyhow::Result<()> {
        use init::{Decision::*, Discovered, DismissedLayer};
-        use LayerName::*;
+        use LayerFileName::*;

        let mut guard = self.layers.write().await;

        let timer = self.metrics.load_layer_map_histo.start_timer();

-        // Scan timeline directory and create ImageLayerName and DeltaFilename
+        // Scan timeline directory and create ImageFileName and DeltaFilename
        // structs representing all files on disk
        let timeline_path = self
            .conf
@@ -2417,8 +2409,12 @@ impl Timeline {

                for discovered in discovered {
                    let (name, kind) = match discovered {
-                        Discovered::Layer(layer_file_name, local_path, file_size) => {
-                            discovered_layers.push((layer_file_name, local_path, file_size));
+                        Discovered::Layer(file_name, file_size) => {
+                            discovered_layers.push((file_name, file_size));
+                            continue;
+                        }
+                        Discovered::Metadata => {
+                            warn!("found legacy metadata file, these should have been removed in load_tenant_config");
                            continue;
                        }
                        Discovered::IgnoredBackup => {
@@ -2464,32 +2460,31 @@ impl Timeline {
                        Ok(UseRemote { local, remote }) => {
                            // Remote is authoritative, but we may still choose to retain
                            // the local file if the contents appear to match
-                            if local.metadata.file_size() == remote.file_size() {
+                            if local.file_size() == remote.file_size() {
                                // Use the local file, but take the remote metadata so that we pick up
                                // the correct generation.
-                                UseLocal(LocalLayerFileMetadata {
-                                    metadata: remote,
-                                    local_path: local.local_path,
-                                })
+                                UseLocal(remote)
                            } else {
-                                init::cleanup_local_file_for_remote(&local, &remote)?;
+                                path.push(name.file_name());
+                                init::cleanup_local_file_for_remote(&path, &local, &remote)?;
+                                path.pop();
                                UseRemote { local, remote }
                            }
                        }
                        Ok(decision) => decision,
                        Err(DismissedLayer::Future { local }) => {
-                            if let Some(local) = local {
-                                init::cleanup_future_layer(
-                                    &local.local_path,
-                                    &name,
-                                    disk_consistent_lsn,
-                                )?;
+                            if local.is_some() {
+                                path.push(name.file_name());
+                                init::cleanup_future_layer(&path, &name, disk_consistent_lsn)?;
+                                path.pop();
                            }
                            needs_cleanup.push(name);
                            continue;
                        }
                        Err(DismissedLayer::LocalOnly(local)) => {
-                            init::cleanup_local_only_file(&name, &local)?;
+                            path.push(name.file_name());
+                            init::cleanup_local_only_file(&path, &name, &local)?;
+                            path.pop();
                            // this file never existed remotely, we will have to do rework
                            continue;
                        }
@@ -2503,10 +2498,9 @@ impl Timeline {
                    tracing::debug!(layer=%name, ?decision, "applied");

                    let layer = match decision {
-                        UseLocal(local) => {
-                            total_physical_size += local.metadata.file_size();
-                            Layer::for_resident(conf, &this, local.local_path, name, local.metadata)
-                                .drop_eviction_guard()
+                        UseLocal(m) => {
+                            total_physical_size += m.file_size();
+                            Layer::for_resident(conf, &this, name, m).drop_eviction_guard()
                        }
                        Evicted(remote) | UseRemote { remote, .. } => {
                            Layer::for_evicted(conf, &this, name, remote)
@@ -2526,36 +2520,36 @@ impl Timeline {

        guard.initialize_local_layers(loaded_layers, disk_consistent_lsn + 1);

-        self.remote_client
-            .schedule_layer_file_deletion(&needs_cleanup)?;
-        self.remote_client
-            .schedule_index_upload_for_file_changes()?;
-        // This barrier orders above DELETEs before any later operations.
-        // This is critical because code executing after the barrier might
-        // create again objects with the same key that we just scheduled for deletion.
-        // For example, if we just scheduled deletion of an image layer "from the future",
-        // later compaction might run again and re-create the same image layer.
-        // "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
-        // "same" here means same key range and LSN.
-        //
-        // Without a barrier between above DELETEs and the re-creation's PUTs,
-        // the upload queue may execute the PUT first, then the DELETE.
-        // In our example, we will end up with an IndexPart referencing a non-existent object.
-        //
-        // 1. a future image layer is created and uploaded
-        // 2. ps restart
-        // 3. the future layer from (1) is deleted during load layer map
-        // 4. image layer is re-created and uploaded
-        // 5. deletion queue would like to delete (1) but actually deletes (4)
-        // 6. delete by name works as expected, but it now deletes the wrong (later) version
-        //
-        // See https://github.com/neondatabase/neon/issues/5878
-        //
-        // NB: generation numbers naturally protect against this because they disambiguate
-        //     (1) and (4)
-        self.remote_client.schedule_barrier()?;
-        // Tenant::create_timeline will wait for these uploads to happen before returning, or
-        // on retry.
+        if let Some(rtc) = self.remote_client.as_ref() {
+            rtc.schedule_layer_file_deletion(&needs_cleanup)?;
+            rtc.schedule_index_upload_for_file_changes()?;
+            // This barrier orders above DELETEs before any later operations.
+            // This is critical because code executing after the barrier might
+            // create again objects with the same key that we just scheduled for deletion.
+            // For example, if we just scheduled deletion of an image layer "from the future",
+            // later compaction might run again and re-create the same image layer.
+            // "from the future" here means an image layer whose LSN is > IndexPart::disk_consistent_lsn.
+            // "same" here means same key range and LSN.
+            //
+            // Without a barrier between above DELETEs and the re-creation's PUTs,
+            // the upload queue may execute the PUT first, then the DELETE.
+            // In our example, we will end up with an IndexPart referencing a non-existent object.
+            //
+            // 1. a future image layer is created and uploaded
+            // 2. ps restart
+            // 3. the future layer from (1) is deleted during load layer map
+            // 4. image layer is re-created and uploaded
+            // 5. deletion queue would like to delete (1) but actually deletes (4)
+            // 6. delete by name works as expected, but it now deletes the wrong (later) version
+            //
+            // See https://github.com/neondatabase/neon/issues/5878
+            //
+            // NB: generation numbers naturally protect against this because they disambiguate
+            //     (1) and (4)
+            rtc.schedule_barrier()?;
+            // Tenant::create_timeline will wait for these uploads to happen before returning, or
+            // on retry.
+        }

        info!(
            "loaded layer map with {} layers at {}, total physical size: {}",
@@ -2614,7 +2608,6 @@ impl Timeline {
                            // Don't make noise.
                        } else {
                            warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
-                            debug_assert!(false);
                        }
                    }
                };
@@ -2988,11 +2981,11 @@ impl Timeline {
        }
    }

-    async fn find_layer(&self, layer_name: &LayerName) -> Option<Layer> {
+    async fn find_layer(&self, layer_file_name: &str) -> Option<Layer> {
        let guard = self.layers.read().await;
        for historic_layer in guard.layer_map().iter_historic_layers() {
-            let historic_layer_name = historic_layer.layer_name();
-            if layer_name == &historic_layer_name {
+            let historic_layer_name = historic_layer.filename().file_name();
+            if layer_file_name == historic_layer_name {
                return Some(guard.get_from_desc(&historic_layer));
            }
        }
@@ -3008,6 +3001,9 @@ impl Timeline {
    /// should treat this as a cue to simply skip doing any heatmap uploading
    /// for this timeline.
    pub(crate) async fn generate_heatmap(&self) -> Option<HeatMapTimeline> {
+        // no point in heatmaps without remote client
+        let _remote_client = self.remote_client.as_ref()?;
+
        if !self.is_active() {
            return None;
        }
@@ -3018,8 +3014,8 @@ impl Timeline {
            let last_activity_ts = layer.access_stats().latest_activity_or_now();

            HeatMapLayer::new(
-                layer.layer_desc().layer_name(),
-                (&layer.metadata()).into(),
+                layer.layer_desc().filename(),
+                layer.metadata().into(),
                last_activity_ts,
            )
        });
@@ -3028,15 +3024,6 @@ impl Timeline {

        Some(HeatMapTimeline::new(self.timeline_id, layers))
    }
-
-    /// Returns true if the given lsn is or was an ancestor branchpoint.
-    pub(crate) fn is_ancestor_lsn(&self, lsn: Lsn) -> bool {
-        // upon timeline detach, we set the ancestor_lsn to Lsn::INVALID and the store the original
-        // branchpoint in the value in IndexPart::lineage
-        self.ancestor_lsn == lsn
-            || (self.ancestor_lsn == Lsn::INVALID
-                && self.remote_client.is_previous_ancestor_lsn(lsn))
-    }
 }

 type TraversalId = Arc<str>;
@@ -3174,7 +3161,7 @@ impl Timeline {
            if let Some(open_layer) = &layers.open_layer {
                let start_lsn = open_layer.get_lsn_range().start;
                if cont_lsn > start_lsn {
-                    //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.layer_name().display());
+                    //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.filename().display());
                    // Get all the data needed to reconstruct the page version from this layer.
                    // But if we have an older cached page image, no need to go past that.
                    let lsn_floor = max(cached_lsn + 1, start_lsn);
@@ -3203,7 +3190,7 @@ impl Timeline {
            for frozen_layer in layers.frozen_layers.iter().rev() {
                let start_lsn = frozen_layer.get_lsn_range().start;
                if cont_lsn > start_lsn {
-                    //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.layer_name().display());
+                    //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display());
                    let lsn_floor = max(cached_lsn + 1, start_lsn);

                    let frozen_layer = frozen_layer.clone();
@@ -3530,7 +3517,7 @@ impl Timeline {
        Ok(ancestor)
    }

-    pub(crate) fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
+    fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
        let ancestor = self.ancestor_timeline.as_ref().with_context(|| {
            format!(
                "Ancestor is missing. Timeline id: {} Ancestor id {:?}",
@@ -3548,11 +3535,7 @@ impl Timeline {
    ///
    /// Get a handle to the latest layer for appending.
    ///
-    async fn get_layer_for_write(
-        &self,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<Arc<InMemoryLayer>> {
+    async fn get_layer_for_write(&self, lsn: Lsn) -> anyhow::Result<Arc<InMemoryLayer>> {
        let mut guard = self.layers.write().await;
        let layer = guard
            .get_layer_for_write(
@@ -3561,7 +3544,6 @@ impl Timeline {
                self.conf,
                self.timeline_id,
                self.tenant_shard_id,
-                ctx,
            )
            .await?;
        Ok(layer)
@@ -3826,8 +3808,8 @@ impl Timeline {
                );
                self.create_delta_layer(
                    &frozen_layer,
-                    Some(metadata_keyspace.0.ranges[0].clone()),
                    ctx,
+                    Some(metadata_keyspace.0.ranges[0].clone()),
                )
                .await?
            } else {
@@ -3856,7 +3838,7 @@ impl Timeline {
            // Normal case, write out a L0 delta layer file.
            // `create_delta_layer` will not modify the layer map.
            // We will remove frozen layer and add delta layer in one atomic operation later.
-            let Some(layer) = self.create_delta_layer(&frozen_layer, None, ctx).await? else {
+            let Some(layer) = self.create_delta_layer(&frozen_layer, ctx, None).await? else {
                panic!("delta layer cannot be empty if no filter is applied");
            };
            (
@@ -3955,23 +3937,29 @@ impl Timeline {
            x.unwrap()
        ));

-        for layer in layers_to_upload {
-            self.remote_client.schedule_layer_file_upload(layer)?;
+        if let Some(remote_client) = &self.remote_client {
+            for layer in layers_to_upload {
+                remote_client.schedule_layer_file_upload(layer)?;
+            }
+            remote_client.schedule_index_upload_for_metadata_update(&update)?;
        }
-        self.remote_client
-            .schedule_index_upload_for_metadata_update(&update)?;

        Ok(())
    }

    pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> {
-        self.remote_client
-            .preserve_initdb_archive(
-                &self.tenant_shard_id.tenant_id,
-                &self.timeline_id,
-                &self.cancel,
-            )
-            .await
+        if let Some(remote_client) = &self.remote_client {
+            remote_client
+                .preserve_initdb_archive(
+                    &self.tenant_shard_id.tenant_id,
+                    &self.timeline_id,
+                    &self.cancel,
+                )
+                .await?;
+        } else {
+            bail!("No remote storage configured, but was asked to backup the initdb archive for {} / {}", self.tenant_shard_id.tenant_id, self.timeline_id);
+        }
+        Ok(())
    }

    // Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked
@@ -3979,8 +3967,8 @@ impl Timeline {
    async fn create_delta_layer(
        self: &Arc<Self>,
        frozen_layer: &Arc<InMemoryLayer>,
-        key_range: Option<Range<Key>>,
        ctx: &RequestContext,
+        key_range: Option<Range<Key>>,
    ) -> anyhow::Result<Option<ResidentLayer>> {
        let self_clone = Arc::clone(self);
        let frozen_layer = Arc::clone(frozen_layer);
@@ -4003,7 +3991,6 @@ impl Timeline {
                &self_clone
                    .conf
                    .timeline_path(&self_clone.tenant_shard_id, &self_clone.timeline_id),
-                &ctx,
            )
            .await
            .fatal_err("VirtualFile::open for timeline dir fsync");
@@ -4197,7 +4184,6 @@ impl Timeline {
                self.tenant_shard_id,
                &img_range,
                lsn,
-                ctx,
            )
            .await?;

@@ -4231,7 +4217,7 @@ impl Timeline {

                    // Maybe flush `key_rest_accum`
                    if key_request_accum.raw_size() >= Timeline::MAX_GET_VECTORED_KEYS
-                        || (last_key_in_range && key_request_accum.raw_size() > 0)
+                        || last_key_in_range
                    {
                        let results = self
                            .get_vectored(key_request_accum.consume_keyspace(), lsn, ctx)
@@ -4302,7 +4288,6 @@ impl Timeline {
                &self
                    .conf
                    .timeline_path(&self.tenant_shard_id, &self.timeline_id),
-                ctx,
            )
            .await
            .fatal_err("VirtualFile::open for timeline dir fsync");
@@ -4327,16 +4312,6 @@ impl Timeline {
    /// this Timeline is shut down.  Calling this function will cause the initial
    /// logical size calculation to skip waiting for the background jobs barrier.
    pub(crate) async fn await_initial_logical_size(self: Arc<Self>) {
-        if !self.shard_identity.is_shard_zero() {
-            // We don't populate logical size on shard >0: skip waiting for it.
-            return;
-        }
-
-        if self.remote_client.is_deleting() {
-            // The timeline was created in a deletion-resume state, we don't expect logical size to be populated
-            return;
-        }
-
        if let Some(await_bg_cancel) = self
            .current_logical_size
            .cancel_wait_for_background_loop_concurrency_limit_semaphore
@@ -4348,10 +4323,9 @@ impl Timeline {
            // the logical size cancellation to skip the concurrency limit semaphore.
            // TODO: this is an unexpected case.  We should restructure so that it
            // can't happen.
-            tracing::warn!(
+            tracing::info!(
                "await_initial_logical_size: can't get semaphore cancel token, skipping"
            );
-            debug_assert!(false);
        }

        tokio::select!(
@@ -4359,48 +4333,6 @@ impl Timeline {
            _ = self.cancel.cancelled() => {}
        )
    }
-
-    /// Detach this timeline from its ancestor by copying all of ancestors layers as this
-    /// Timelines layers up to the ancestor_lsn.
-    ///
-    /// Requires a timeline that:
-    /// - has an ancestor to detach from
-    /// - the ancestor does not have an ancestor -- follows from the original RFC limitations, not
-    /// a technical requirement
-    ///
-    /// After the operation has been started, it cannot be canceled. Upon restart it needs to be
-    /// polled again until completion.
-    ///
-    /// During the operation all timelines sharing the data with this timeline will be reparented
-    /// from our ancestor to be branches of this timeline.
-    pub(crate) async fn prepare_to_detach_from_ancestor(
-        self: &Arc<Timeline>,
-        tenant: &crate::tenant::Tenant,
-        options: detach_ancestor::Options,
-        ctx: &RequestContext,
-    ) -> Result<
-        (
-            completion::Completion,
-            detach_ancestor::PreparedTimelineDetach,
-        ),
-        detach_ancestor::Error,
-    > {
-        detach_ancestor::prepare(self, tenant, options, ctx).await
-    }
-
-    /// Completes the ancestor detach. This method is to be called while holding the
-    /// TenantManager's tenant slot, so during this method we cannot be deleted nor can any
-    /// timeline be deleted. After this method returns successfully, tenant must be reloaded.
-    ///
-    /// Pageserver receiving a SIGKILL during this operation is not supported (yet).
-    pub(crate) async fn complete_detaching_timeline_ancestor(
-        self: &Arc<Timeline>,
-        tenant: &crate::tenant::Tenant,
-        prepared: detach_ancestor::PreparedTimelineDetach,
-        ctx: &RequestContext,
-    ) -> Result<Vec<TimelineId>, anyhow::Error> {
-        detach_ancestor::complete(self, tenant, prepared, ctx).await
-    }
 }

 /// Top-level failure to compact.
@@ -4500,44 +4432,30 @@ impl Timeline {
        // deletion will happen later, the layer file manager calls garbage_collect_on_drop
        guard.finish_compact_l0(&remove_layers, &insert_layers, &self.metrics);

-        self.remote_client
-            .schedule_compaction_update(&remove_layers, new_deltas)?;
+        if let Some(remote_client) = self.remote_client.as_ref() {
+            remote_client.schedule_compaction_update(&remove_layers, new_deltas)?;
+        }

        drop_wlock(guard);

        Ok(())
    }

-    async fn rewrite_layers(
-        self: &Arc<Self>,
-        replace_layers: Vec<(Layer, ResidentLayer)>,
-        drop_layers: Vec<Layer>,
-    ) -> anyhow::Result<()> {
-        let mut guard = self.layers.write().await;
-
-        guard.rewrite_layers(&replace_layers, &drop_layers, &self.metrics);
-
-        let upload_layers: Vec<_> = replace_layers.into_iter().map(|r| r.1).collect();
-
-        self.remote_client
-            .schedule_compaction_update(&drop_layers, &upload_layers)?;
-
-        Ok(())
-    }
-
    /// Schedules the uploads of the given image layers
    fn upload_new_image_layers(
        self: &Arc<Self>,
        new_images: impl IntoIterator<Item = ResidentLayer>,
    ) -> anyhow::Result<()> {
+        let Some(remote_client) = &self.remote_client else {
+            return Ok(());
+        };
        for layer in new_images {
-            self.remote_client.schedule_layer_file_upload(layer)?;
+            remote_client.schedule_layer_file_upload(layer)?;
        }
        // should any new image layer been created, not uploading index_part will
        // result in a mismatch between remote_physical_size and layermap calculated
        // size, which will fail some tests, but should not be an issue otherwise.
-        self.remote_client
-            .schedule_index_upload_for_file_changes()?;
+        remote_client.schedule_index_upload_for_file_changes()?;
        Ok(())
    }

@@ -4635,9 +4553,11 @@ impl Timeline {
    pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
        // this is most likely the background tasks, but it might be the spawned task from
        // immediate_gc
+        let cancel = crate::task_mgr::shutdown_token();
        let _g = tokio::select! {
            guard = self.gc_lock.lock() => guard,
            _ = self.cancel.cancelled() => return Ok(GcResult::default()),
+            _ = cancel.cancelled() => return Ok(GcResult::default()),
        };
        let timer = self.metrics.garbage_collect_histo.start_timer();

@@ -4679,8 +4599,6 @@ impl Timeline {
        retain_lsns: Vec<Lsn>,
        new_gc_cutoff: Lsn,
    ) -> anyhow::Result<GcResult> {
-        // FIXME: if there is an ongoing detach_from_ancestor, we should just skip gc
-
        let now = SystemTime::now();
        let mut result: GcResult = GcResult::default();

@@ -4734,7 +4652,7 @@ impl Timeline {
            if l.get_lsn_range().end > horizon_cutoff {
                debug!(
                    "keeping {} because it's newer than horizon_cutoff {}",
-                    l.layer_name(),
+                    l.filename(),
                    horizon_cutoff,
                );
                result.layers_needed_by_cutoff += 1;
@@ -4745,7 +4663,7 @@ impl Timeline {
            if l.get_lsn_range().end > pitr_cutoff {
                debug!(
                    "keeping {} because it's newer than pitr_cutoff {}",
-                    l.layer_name(),
+                    l.filename(),
                    pitr_cutoff,
                );
                result.layers_needed_by_pitr += 1;
@@ -4764,7 +4682,7 @@ impl Timeline {
                if &l.get_lsn_range().start <= retain_lsn {
                    debug!(
                        "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}",
-                        l.layer_name(),
+                        l.filename(),
                        retain_lsn,
                        l.is_incremental(),
                    );
@@ -4795,7 +4713,7 @@ impl Timeline {
            if !layers
                .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
            {
-                debug!("keeping {} because it is the latest layer", l.layer_name());
+                debug!("keeping {} because it is the latest layer", l.filename());
                result.layers_not_updated += 1;
                continue 'outer;
            }
@@ -4803,7 +4721,7 @@ impl Timeline {
            // We didn't find any reason to keep this file, so remove it.
            debug!(
                "garbage collecting {} is_dropped: xx is_incremental: {}",
-                l.layer_name(),
+                l.filename(),
                l.is_incremental(),
            );
            layers_to_remove.push(l);
@@ -4823,7 +4741,9 @@ impl Timeline {

            result.layers_removed = gc_layers.len() as u64;

-            self.remote_client.schedule_gc_update(&gc_layers)?;
+            if let Some(remote_client) = self.remote_client.as_ref() {
+                remote_client.schedule_gc_update(&gc_layers)?;
+            }

            guard.finish_gc_timeline(&gc_layers);

@@ -5207,7 +5127,7 @@ impl<'a> TimelineWriter<'a> {
        let buf_size: u64 = buf.len().try_into().expect("oversized value buf");

        let action = self.get_open_layer_action(lsn, buf_size);
-        let layer = self.handle_open_layer_action(lsn, action, ctx).await?;
+        let layer = self.handle_open_layer_action(lsn, action).await?;
        let res = layer.put_value(key, lsn, &buf, ctx).await;

        if res.is_ok() {
@@ -5230,15 +5150,14 @@ impl<'a> TimelineWriter<'a> {
        &mut self,
        at: Lsn,
        action: OpenLayerAction,
-        ctx: &RequestContext,
    ) -> anyhow::Result<&Arc<InMemoryLayer>> {
        match action {
            OpenLayerAction::Roll => {
                let freeze_at = self.write_guard.as_ref().unwrap().max_lsn.unwrap();
                self.roll_layer(freeze_at).await?;
-                self.open_layer(at, ctx).await?;
+                self.open_layer(at).await?;
            }
-            OpenLayerAction::Open => self.open_layer(at, ctx).await?,
+            OpenLayerAction::Open => self.open_layer(at).await?,
            OpenLayerAction::None => {
                assert!(self.write_guard.is_some());
            }
@@ -5247,8 +5166,8 @@ impl<'a> TimelineWriter<'a> {
        Ok(&self.write_guard.as_ref().unwrap().open_layer)
    }

-    async fn open_layer(&mut self, at: Lsn, ctx: &RequestContext) -> anyhow::Result<()> {
-        let layer = self.tl.get_layer_for_write(at, ctx).await?;
+    async fn open_layer(&mut self, at: Lsn) -> anyhow::Result<()> {
+        let layer = self.tl.get_layer_for_write(at).await?;
        let initial_size = layer.size().await?;

        let last_freeze_at = self.last_freeze_at.load();
@@ -5325,14 +5244,10 @@ impl<'a> TimelineWriter<'a> {
        Ok(())
    }

-    pub(crate) async fn delete_batch(
-        &mut self,
-        batch: &[(Range<Key>, Lsn)],
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    pub(crate) async fn delete_batch(&mut self, batch: &[(Range<Key>, Lsn)]) -> anyhow::Result<()> {
        if let Some((_, lsn)) = batch.first() {
            let action = self.get_open_layer_action(*lsn, 0);
-            let layer = self.handle_open_layer_action(*lsn, action, ctx).await?;
+            let layer = self.handle_open_layer_action(*lsn, action).await?;
            layer.put_tombstones(batch).await?;
        }

--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -15,8 +15,7 @@ use anyhow::{anyhow, Context};
 use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
-use pageserver_api::keyspace::ShardedRange;
-use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
+use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, info_span, trace, warn, Instrument};
 use utils::id::TimelineId;
@@ -94,7 +93,7 @@ impl Timeline {
        // Define partitioning schema if needed

        // FIXME: the match should only cover repartitioning, not the next steps
-        let partition_count = match self
+        match self
            .repartition(
                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
@@ -147,7 +146,6 @@ impl Timeline {
                assert!(sparse_layers.is_empty());

                self.upload_new_image_layers(dense_layers)?;
-                dense_partitioning.parts.len()
            }
            Err(err) => {
                // no partitioning? This is normal, if the timeline was just created
@@ -159,148 +157,9 @@ impl Timeline {
                if !self.cancel.is_cancelled() {
                    tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
                }
-                1
            }
        };

-        if self.shard_identity.count >= ShardCount::new(2) {
-            // Limit the number of layer rewrites to the number of partitions: this means its
-            // runtime should be comparable to a full round of image layer creations, rather than
-            // being potentially much longer.
-            let rewrite_max = partition_count;
-
-            self.compact_shard_ancestors(rewrite_max, ctx).await?;
-        }
-
-        Ok(())
-    }
-
-    /// Check for layers that are elegible to be rewritten:
-    /// - Shard splitting: After a shard split, ancestor layers beyond pitr_interval, so that
-    ///   we don't indefinitely retain keys in this shard that aren't needed.
-    /// - For future use: layers beyond pitr_interval that are in formats we would
-    ///   rather not maintain compatibility with indefinitely.
-    ///
-    /// Note: this phase may read and write many gigabytes of data: use rewrite_max to bound
-    /// how much work it will try to do in each compaction pass.
-    async fn compact_shard_ancestors(
-        self: &Arc<Self>,
-        rewrite_max: usize,
-        _ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        let mut drop_layers = Vec::new();
-        let layers_to_rewrite: Vec<Layer> = Vec::new();
-
-        // We will use the PITR cutoff as a condition for rewriting layers.
-        let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.pitr;
-
-        let layers = self.layers.read().await;
-        for layer_desc in layers.layer_map().iter_historic_layers() {
-            let layer = layers.get_from_desc(&layer_desc);
-            if layer.metadata().shard.shard_count == self.shard_identity.count {
-                // This layer does not belong to a historic ancestor, no need to re-image it.
-                continue;
-            }
-
-            // This layer was created on an ancestor shard: check if it contains any data for this shard.
-            let sharded_range = ShardedRange::new(layer_desc.get_key_range(), &self.shard_identity);
-            let layer_local_page_count = sharded_range.page_count();
-            let layer_raw_page_count = ShardedRange::raw_size(&layer_desc.get_key_range());
-            if layer_local_page_count == 0 {
-                // This ancestral layer only covers keys that belong to other shards.
-                // We include the full metadata in the log: if we had some critical bug that caused
-                // us to incorrectly drop layers, this would simplify manually debugging + reinstating those layers.
-                info!(%layer, old_metadata=?layer.metadata(),
-                    "dropping layer after shard split, contains no keys for this shard.",
-                );
-
-                if cfg!(debug_assertions) {
-                    // Expensive, exhaustive check of keys in this layer: this guards against ShardedRange's calculations being
-                    // wrong.  If ShardedRange claims the local page count is zero, then no keys in this layer
-                    // should be !is_key_disposable()
-                    let range = layer_desc.get_key_range();
-                    let mut key = range.start;
-                    while key < range.end {
-                        debug_assert!(self.shard_identity.is_key_disposable(&key));
-                        key = key.next();
-                    }
-                }
-
-                drop_layers.push(layer);
-                continue;
-            } else if layer_local_page_count != u32::MAX
-                && layer_local_page_count == layer_raw_page_count
-            {
-                debug!(%layer,
-                    "layer is entirely shard local ({} keys), no need to filter it",
-                    layer_local_page_count
-                );
-                continue;
-            }
-
-            // Don't bother re-writing a layer unless it will at least halve its size
-            if layer_local_page_count != u32::MAX
-                && layer_local_page_count > layer_raw_page_count / 2
-            {
-                debug!(%layer,
-                    "layer is already mostly local ({}/{}), not rewriting",
-                    layer_local_page_count,
-                    layer_raw_page_count
-                );
-            }
-
-            // Don't bother re-writing a layer if it is within the PITR window: it will age-out eventually
-            // without incurring the I/O cost of a rewrite.
-            if layer_desc.get_lsn_range().end >= pitr_cutoff {
-                debug!(%layer, "Skipping rewrite of layer still in PITR window ({} >= {})",
-                    layer_desc.get_lsn_range().end, pitr_cutoff);
-                continue;
-            }
-
-            if layer_desc.is_delta() {
-                // We do not yet implement rewrite of delta layers
-                debug!(%layer, "Skipping rewrite of delta layer");
-                continue;
-            }
-
-            // Only rewrite layers if they would have different remote paths: either they belong to this
-            // shard but an old generation, or they belonged to another shard.  This also implicitly
-            // guarantees that the layer is persistent in remote storage (as only remote persistent
-            // layers are carried across shard splits, any local-only layer would be in the current generation)
-            if layer.metadata().generation == self.generation
-                && layer.metadata().shard.shard_count == self.shard_identity.count
-            {
-                debug!(%layer, "Skipping rewrite, is not from old generation");
-                continue;
-            }
-
-            if layers_to_rewrite.len() >= rewrite_max {
-                tracing::info!(%layer, "Will rewrite layer on a future compaction, already rewrote {}",
-                    layers_to_rewrite.len()
-                );
-                continue;
-            }
-
-            // Fall through: all our conditions for doing a rewrite passed.
-            // TODO: implement rewriting
-            tracing::debug!(%layer, "Would rewrite layer");
-        }
-
-        // Drop the layers read lock: we will acquire it for write in [`Self::rewrite_layers`]
-        drop(layers);
-
-        // TODO: collect layers to rewrite
-        let replace_layers = Vec::new();
-
-        // Update the LayerMap so that readers will use the new layers, and enqueue it for writing to remote storage
-        self.rewrite_layers(replace_layers, drop_layers).await?;
-
-        // We wait for all uploads to complete before finishing this compaction stage.  This is not
-        // necessary for correctness, but it simplifies testing, and avoids proceeding with another
-        // Timeline's compaction while this timeline's uploads may be generating lots of disk I/O
-        // load.
-        self.remote_client.wait_completion().await?;
-
        Ok(())
    }

@@ -698,7 +557,6 @@ impl Timeline {
                                debug!("Create new layer {}..{}", lsn_range.start, lsn_range.end);
                                lsn_range.clone()
                            },
-                            ctx,
                        )
                        .await?,
                    );
@@ -754,7 +612,6 @@ impl Timeline {
                &self
                    .conf
                    .timeline_path(&self.tenant_shard_id, &self.timeline_id),
-                ctx,
            )
            .await
            .fatal_err("VirtualFile::open for timeline dir fsync");
@@ -1093,7 +950,6 @@ impl CompactionJobExecutor for TimelineAdaptor {
            self.timeline.tenant_shard_id,
            key_range.start,
            lsn_range.clone(),
-            ctx,
        )
        .await?;

@@ -1168,7 +1024,6 @@ impl TimelineAdaptor {
            self.timeline.tenant_shard_id,
            key_range,
            lsn,
-            ctx,
        )
        .await?;

--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -26,21 +26,19 @@ use super::{Timeline, TimelineResources};
 /// during attach or pageserver restart.
 /// See comment in persist_index_part_with_deleted_flag.
 async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
-    match timeline
-        .remote_client
-        .persist_index_part_with_deleted_flag()
-        .await
-    {
-        // If we (now, or already) marked it successfully as deleted, we can proceed
-        Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
-        // Bail out otherwise
-        //
-        // AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
-        // two tasks from performing the deletion at the same time. The first task
-        // that starts deletion should run it to completion.
-        Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
-        | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
-            return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
+    if let Some(remote_client) = timeline.remote_client.as_ref() {
+        match remote_client.persist_index_part_with_deleted_flag().await {
+            // If we (now, or already) marked it successfully as deleted, we can proceed
+            Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
+            // Bail out otherwise
+            //
+            // AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
+            // two tasks from performing the deletion at the same time. The first task
+            // that starts deletion should run it to completion.
+            Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
+            | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
+                return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
+            }
        }
    }
    Ok(())
@@ -119,11 +117,11 @@ pub(super) async fn delete_local_timeline_directory(

 /// Removes remote layers and an index file after them.
 async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
-    timeline
-        .remote_client
-        .delete_all()
-        .await
-        .context("delete_all")
+    if let Some(remote_client) = &timeline.remote_client {
+        remote_client.delete_all().await.context("delete_all")?
+    };
+
+    Ok(())
 }

 // This function removs remaining traces of a timeline on disk.
@@ -262,7 +260,7 @@ impl DeleteTimelineFlow {
        tenant: Arc<Tenant>,
        timeline_id: TimelineId,
        local_metadata: &TimelineMetadata,
-        remote_client: RemoteTimelineClient,
+        remote_client: Option<RemoteTimelineClient>,
        deletion_queue_client: DeletionQueueClient,
    ) -> anyhow::Result<()> {
        // Note: here we even skip populating layer map. Timeline is essentially uninitialized.
@@ -424,10 +422,6 @@ impl DeleteTimelineFlow {
    pub(crate) fn is_finished(&self) -> bool {
        matches!(self, Self::Finished)
    }
-
-    pub(crate) fn is_not_started(&self) -> bool {
-        matches!(self, Self::NotStarted)
-    }
 }

 struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -1,529 +0,0 @@
-use std::sync::Arc;
-
-use super::{layer_manager::LayerManager, Timeline};
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    task_mgr::TaskKind,
-    tenant::{
-        storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
-        Tenant,
-    },
-    virtual_file::{MaybeFatalIo, VirtualFile},
-};
-use tokio_util::sync::CancellationToken;
-use tracing::Instrument;
-use utils::{completion, generation::Generation, id::TimelineId, lsn::Lsn};
-
-#[derive(Debug, thiserror::Error)]
-pub(crate) enum Error {
-    #[error("no ancestors")]
-    NoAncestor,
-    #[error("too many ancestors")]
-    TooManyAncestors,
-    #[error("shutting down, please retry later")]
-    ShuttingDown,
-    #[error("flushing failed")]
-    FlushAncestor(#[source] anyhow::Error),
-    #[error("layer download failed")]
-    RewrittenDeltaDownloadFailed(#[source] anyhow::Error),
-    #[error("copying LSN prefix locally failed")]
-    CopyDeltaPrefix(#[source] anyhow::Error),
-    #[error("upload rewritten layer")]
-    UploadRewritten(#[source] anyhow::Error),
-
-    #[error("ancestor is already being detached by: {}", .0)]
-    OtherTimelineDetachOngoing(TimelineId),
-
-    #[error("remote copying layer failed")]
-    CopyFailed(#[source] anyhow::Error),
-
-    #[error("unexpected error")]
-    Unexpected(#[source] anyhow::Error),
-}
-
-pub(crate) struct PreparedTimelineDetach {
-    layers: Vec<Layer>,
-}
-
-/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
-#[derive(Debug)]
-pub(crate) struct Options {
-    pub(crate) rewrite_concurrency: std::num::NonZeroUsize,
-    pub(crate) copy_concurrency: std::num::NonZeroUsize,
-}
-
-impl Default for Options {
-    fn default() -> Self {
-        Self {
-            rewrite_concurrency: std::num::NonZeroUsize::new(2).unwrap(),
-            copy_concurrency: std::num::NonZeroUsize::new(10).unwrap(),
-        }
-    }
-}
-
-/// See [`Timeline::prepare_to_detach_from_ancestor`]
-pub(super) async fn prepare(
-    detached: &Arc<Timeline>,
-    tenant: &Tenant,
-    options: Options,
-    ctx: &RequestContext,
-) -> Result<(completion::Completion, PreparedTimelineDetach), Error> {
-    use Error::*;
-
-    let Some((ancestor, ancestor_lsn)) = detached
-        .ancestor_timeline
-        .as_ref()
-        .map(|tl| (tl.clone(), detached.ancestor_lsn))
-    else {
-        return Err(NoAncestor);
-    };
-
-    if !ancestor_lsn.is_valid() {
-        return Err(NoAncestor);
-    }
-
-    if ancestor.ancestor_timeline.is_some() {
-        // non-technical requirement; we could flatten N ancestors just as easily but we chose
-        // not to
-        return Err(TooManyAncestors);
-    }
-
-    // before we acquire the gate, we must mark the ancestor as having a detach operation
-    // ongoing which will block other concurrent detach operations so we don't get to ackward
-    // situations where there would be two branches trying to reparent earlier branches.
-    let (guard, barrier) = completion::channel();
-
-    {
-        let mut guard = tenant.ongoing_timeline_detach.lock().unwrap();
-        if let Some((tl, other)) = guard.as_ref() {
-            if !other.is_ready() {
-                return Err(OtherTimelineDetachOngoing(*tl));
-            }
-        }
-        *guard = Some((detached.timeline_id, barrier));
-    }
-
-    let _gate_entered = detached.gate.enter().map_err(|_| ShuttingDown)?;
-
-    if ancestor_lsn >= ancestor.get_disk_consistent_lsn() {
-        let span =
-            tracing::info_span!("freeze_and_flush", ancestor_timeline_id=%ancestor.timeline_id);
-        async {
-            let started_at = std::time::Instant::now();
-            let freeze_and_flush = ancestor.freeze_and_flush0();
-            let mut freeze_and_flush = std::pin::pin!(freeze_and_flush);
-
-            let res =
-                tokio::time::timeout(std::time::Duration::from_secs(1), &mut freeze_and_flush)
-                    .await;
-
-            let res = match res {
-                Ok(res) => res,
-                Err(_elapsed) => {
-                    tracing::info!("freezing and flushing ancestor is still ongoing");
-                    freeze_and_flush.await
-                }
-            };
-
-            res.map_err(FlushAncestor)?;
-
-            // we do not need to wait for uploads to complete but we do need `struct Layer`,
-            // copying delta prefix is unsupported currently for `InMemoryLayer`.
-            tracing::info!(
-                elapsed_ms = started_at.elapsed().as_millis(),
-                "froze and flushed the ancestor"
-            );
-            Ok(())
-        }
-        .instrument(span)
-        .await?;
-    }
-
-    let end_lsn = ancestor_lsn + 1;
-
-    let (filtered_layers, straddling_branchpoint, rest_of_historic) = {
-        // we do not need to start from our layers, because they can only be layers that come
-        // *after* ancestor_lsn
-        let layers = tokio::select! {
-            guard = ancestor.layers.read() => guard,
-            _ = detached.cancel.cancelled() => {
-                return Err(ShuttingDown);
-            }
-            _ = ancestor.cancel.cancelled() => {
-                return Err(ShuttingDown);
-            }
-        };
-
-        // between retries, these can change if compaction or gc ran in between. this will mean
-        // we have to redo work.
-        partition_work(ancestor_lsn, &layers)
-    };
-
-    // TODO: layers are already sorted by something: use that to determine how much of remote
-    // copies are already done.
-    tracing::info!(filtered=%filtered_layers, to_rewrite = straddling_branchpoint.len(), historic=%rest_of_historic.len(), "collected layers");
-
-    // TODO: copying and lsn prefix copying could be done at the same time with a single fsync after
-    let mut new_layers: Vec<Layer> =
-        Vec::with_capacity(straddling_branchpoint.len() + rest_of_historic.len());
-
-    {
-        tracing::debug!(to_rewrite = %straddling_branchpoint.len(), "copying prefix of delta layers");
-
-        let mut tasks = tokio::task::JoinSet::new();
-
-        let mut wrote_any = false;
-
-        let limiter = Arc::new(tokio::sync::Semaphore::new(
-            options.rewrite_concurrency.get(),
-        ));
-
-        for layer in straddling_branchpoint {
-            let limiter = limiter.clone();
-            let timeline = detached.clone();
-            let ctx = ctx.detached_child(TaskKind::DetachAncestor, DownloadBehavior::Download);
-
-            tasks.spawn(async move {
-                let _permit = limiter.acquire().await;
-                let copied =
-                    upload_rewritten_layer(end_lsn, &layer, &timeline, &timeline.cancel, &ctx)
-                        .await?;
-                Ok(copied)
-            });
-        }
-
-        while let Some(res) = tasks.join_next().await {
-            match res {
-                Ok(Ok(Some(copied))) => {
-                    wrote_any = true;
-                    tracing::info!(layer=%copied, "rewrote and uploaded");
-                    new_layers.push(copied);
-                }
-                Ok(Ok(None)) => {}
-                Ok(Err(e)) => return Err(e),
-                Err(je) => return Err(Unexpected(je.into())),
-            }
-        }
-
-        // FIXME: the fsync should be mandatory, after both rewrites and copies
-        if wrote_any {
-            let timeline_dir = VirtualFile::open(
-                &detached
-                    .conf
-                    .timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
-                ctx,
-            )
-            .await
-            .fatal_err("VirtualFile::open for timeline dir fsync");
-            timeline_dir
-                .sync_all()
-                .await
-                .fatal_err("VirtualFile::sync_all timeline dir");
-        }
-    }
-
-    let mut tasks = tokio::task::JoinSet::new();
-    let limiter = Arc::new(tokio::sync::Semaphore::new(options.copy_concurrency.get()));
-
-    for adopted in rest_of_historic {
-        let limiter = limiter.clone();
-        let timeline = detached.clone();
-
-        tasks.spawn(
-            async move {
-                let _permit = limiter.acquire().await;
-                let owned =
-                    remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?;
-                tracing::info!(layer=%owned, "remote copied");
-                Ok(owned)
-            }
-            .in_current_span(),
-        );
-    }
-
-    while let Some(res) = tasks.join_next().await {
-        match res {
-            Ok(Ok(owned)) => {
-                new_layers.push(owned);
-            }
-            Ok(Err(failed)) => {
-                return Err(failed);
-            }
-            Err(je) => return Err(Unexpected(je.into())),
-        }
-    }
-
-    // TODO: fsync directory again if we hardlinked something
-
-    let prepared = PreparedTimelineDetach { layers: new_layers };
-
-    Ok((guard, prepared))
-}
-
-fn partition_work(
-    ancestor_lsn: Lsn,
-    source_layermap: &LayerManager,
-) -> (usize, Vec<Layer>, Vec<Layer>) {
-    let mut straddling_branchpoint = vec![];
-    let mut rest_of_historic = vec![];
-
-    let mut later_by_lsn = 0;
-
-    for desc in source_layermap.layer_map().iter_historic_layers() {
-        // off by one chances here:
-        // - start is inclusive
-        // - end is exclusive
-        if desc.lsn_range.start > ancestor_lsn {
-            later_by_lsn += 1;
-            continue;
-        }
-
-        let target = if desc.lsn_range.start <= ancestor_lsn
-            && desc.lsn_range.end > ancestor_lsn
-            && desc.is_delta
-        {
-            // TODO: image layer at Lsn optimization
-            &mut straddling_branchpoint
-        } else {
-            &mut rest_of_historic
-        };
-
-        target.push(source_layermap.get_from_desc(&desc));
-    }
-
-    (later_by_lsn, straddling_branchpoint, rest_of_historic)
-}
-
-async fn upload_rewritten_layer(
-    end_lsn: Lsn,
-    layer: &Layer,
-    target: &Arc<Timeline>,
-    cancel: &CancellationToken,
-    ctx: &RequestContext,
-) -> Result<Option<Layer>, Error> {
-    use Error::UploadRewritten;
-    let copied = copy_lsn_prefix(end_lsn, layer, target, ctx).await?;
-
-    let Some(copied) = copied else {
-        return Ok(None);
-    };
-
-    // FIXME: better shuttingdown error
-    target
-        .remote_client
-        .upload_layer_file(&copied, cancel)
-        .await
-        .map_err(UploadRewritten)?;
-
-    Ok(Some(copied.into()))
-}
-
-async fn copy_lsn_prefix(
-    end_lsn: Lsn,
-    layer: &Layer,
-    target_timeline: &Arc<Timeline>,
-    ctx: &RequestContext,
-) -> Result<Option<ResidentLayer>, Error> {
-    use Error::{CopyDeltaPrefix, RewrittenDeltaDownloadFailed};
-
-    tracing::debug!(%layer, %end_lsn, "copying lsn prefix");
-
-    let mut writer = DeltaLayerWriter::new(
-        target_timeline.conf,
-        target_timeline.timeline_id,
-        target_timeline.tenant_shard_id,
-        layer.layer_desc().key_range.start,
-        layer.layer_desc().lsn_range.start..end_lsn,
-        ctx,
-    )
-    .await
-    .map_err(CopyDeltaPrefix)?;
-
-    let resident = layer
-        .download_and_keep_resident()
-        .await
-        // likely shutdown
-        .map_err(RewrittenDeltaDownloadFailed)?;
-
-    let records = resident
-        .copy_delta_prefix(&mut writer, end_lsn, ctx)
-        .await
-        .map_err(CopyDeltaPrefix)?;
-
-    drop(resident);
-
-    tracing::debug!(%layer, records, "copied records");
-
-    if records == 0 {
-        drop(writer);
-        // TODO: we might want to store an empty marker in remote storage for this
-        // layer so that we will not needlessly walk `layer` on repeated attempts.
-        Ok(None)
-    } else {
-        // reuse the key instead of adding more holes between layers by using the real
-        // highest key in the layer.
-        let reused_highest_key = layer.layer_desc().key_range.end;
-        let copied = writer
-            .finish(reused_highest_key, target_timeline, ctx)
-            .await
-            .map_err(CopyDeltaPrefix)?;
-
-        tracing::debug!(%layer, %copied, "new layer produced");
-
-        Ok(Some(copied))
-    }
-}
-
-/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
-/// storage on successful return without the adopted layer being added to `index_part.json`.
-async fn remote_copy(
-    adopted: &Layer,
-    adoptee: &Arc<Timeline>,
-    generation: Generation,
-    cancel: &CancellationToken,
-) -> Result<Layer, Error> {
-    use Error::CopyFailed;
-
-    // depending if Layer::keep_resident we could hardlink
-
-    let mut metadata = adopted.metadata();
-    debug_assert!(metadata.generation <= generation);
-    metadata.generation = generation;
-
-    let owned = crate::tenant::storage_layer::Layer::for_evicted(
-        adoptee.conf,
-        adoptee,
-        adopted.layer_desc().layer_name(),
-        metadata,
-    );
-
-    // FIXME: better shuttingdown error
-    adoptee
-        .remote_client
-        .copy_timeline_layer(adopted, &owned, cancel)
-        .await
-        .map(move |()| owned)
-        .map_err(CopyFailed)
-}
-
-/// See [`Timeline::complete_detaching_timeline_ancestor`].
-pub(super) async fn complete(
-    detached: &Arc<Timeline>,
-    tenant: &Tenant,
-    prepared: PreparedTimelineDetach,
-    _ctx: &RequestContext,
-) -> Result<Vec<TimelineId>, anyhow::Error> {
-    let PreparedTimelineDetach { layers } = prepared;
-
-    let ancestor = detached
-        .get_ancestor_timeline()
-        .expect("must still have a ancestor");
-    let ancestor_lsn = detached.get_ancestor_lsn();
-
-    // publish the prepared layers before we reparent any of the timelines, so that on restart
-    // reparented timelines find layers. also do the actual detaching.
-    //
-    // if we crash after this operation, we will at least come up having detached a timeline, but
-    // we cannot go back and reparent the timelines which would had been reparented in normal
-    // execution.
-    //
-    // this is not perfect, but it avoids us a retry happening after a compaction or gc on restart
-    // which could give us a completely wrong layer combination.
-    detached
-        .remote_client
-        .schedule_adding_existing_layers_to_index_detach_and_wait(
-            &layers,
-            (ancestor.timeline_id, ancestor_lsn),
-        )
-        .await?;
-
-    let mut tasks = tokio::task::JoinSet::new();
-
-    // because we are now keeping the slot in progress, it is unlikely that there will be any
-    // timeline deletions during this time. if we raced one, then we'll just ignore it.
-    tenant
-        .timelines
-        .lock()
-        .unwrap()
-        .values()
-        .filter_map(|tl| {
-            if Arc::ptr_eq(tl, detached) {
-                return None;
-            }
-
-            if !tl.is_active() {
-                return None;
-            }
-
-            let tl_ancestor = tl.ancestor_timeline.as_ref()?;
-            let is_same = Arc::ptr_eq(&ancestor, tl_ancestor);
-            let is_earlier = tl.get_ancestor_lsn() <= ancestor_lsn;
-
-            let is_deleting = tl
-                .delete_progress
-                .try_lock()
-                .map(|flow| !flow.is_not_started())
-                .unwrap_or(true);
-
-            if is_same && is_earlier && !is_deleting {
-                Some(tl.clone())
-            } else {
-                None
-            }
-        })
-        .for_each(|timeline| {
-            // important in this scope: we are holding the Tenant::timelines lock
-            let span = tracing::info_span!("reparent", reparented=%timeline.timeline_id);
-            let new_parent = detached.timeline_id;
-
-            tasks.spawn(
-                async move {
-                    let res = timeline
-                        .remote_client
-                        .schedule_reparenting_and_wait(&new_parent)
-                        .await;
-
-                    match res {
-                        Ok(()) => Some(timeline),
-                        Err(e) => {
-                            // with the use of tenant slot, we no longer expect these.
-                            tracing::warn!("reparenting failed: {e:#}");
-                            None
-                        }
-                    }
-                }
-                .instrument(span),
-            );
-        });
-
-    let reparenting_candidates = tasks.len();
-    let mut reparented = Vec::with_capacity(tasks.len());
-
-    while let Some(res) = tasks.join_next().await {
-        match res {
-            Ok(Some(timeline)) => {
-                tracing::info!(reparented=%timeline.timeline_id, "reparenting done");
-                reparented.push(timeline.timeline_id);
-            }
-            Ok(None) => {
-                // lets just ignore this for now. one or all reparented timelines could had
-                // started deletion, and that is fine.
-            }
-            Err(je) if je.is_cancelled() => unreachable!("not used"),
-            Err(je) if je.is_panic() => {
-                // ignore; it's better to continue with a single reparenting failing (or even
-                // all of them) in order to get to the goal state.
-                //
-                // these timelines will never be reparentable, but they can be always detached as
-                // separate tree roots.
-            }
-            Err(je) => tracing::error!("unexpected join error: {je:?}"),
-        }
-    }
-
-    if reparenting_candidates != reparented.len() {
-        tracing::info!("failed to reparent some candidates");
-    }
-
-    Ok(reparented)
-}
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -23,7 +23,7 @@ use std::{
 use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, info, info_span, instrument, warn, Instrument};
+use tracing::{debug, error, info, info_span, instrument, warn, Instrument};

 use crate::{
    context::{DownloadBehavior, RequestContext},
@@ -211,6 +211,11 @@ impl Timeline {

        // So, we just need to deal with this.

+        if self.remote_client.is_none() {
+            error!("no remote storage configured, cannot evict layers");
+            return ControlFlow::Continue(());
+        }
+
        let mut js = tokio::task::JoinSet::new();
        {
            let guard = self.layers.read().await;
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -6,12 +6,13 @@ use crate::{
            self,
            index::{IndexPart, LayerFileMetadata},
        },
-        storage_layer::LayerName,
+        storage_layer::LayerFileName,
        Generation,
    },
+    METADATA_FILE_NAME,
 };
 use anyhow::Context;
-use camino::{Utf8Path, Utf8PathBuf};
+use camino::Utf8Path;
 use pageserver_api::shard::ShardIndex;
 use std::{collections::HashMap, str::FromStr};
 use utils::lsn::Lsn;
@@ -19,13 +20,15 @@ use utils::lsn::Lsn;
 /// Identified files in the timeline directory.
 pub(super) enum Discovered {
    /// The only one we care about
-    Layer(LayerName, Utf8PathBuf, u64),
+    Layer(LayerFileName, u64),
    /// Old ephmeral files from previous launches, should be removed
    Ephemeral(String),
    /// Old temporary timeline files, unsure what these really are, should be removed
    Temporary(String),
    /// Temporary on-demand download files, should be removed
    TemporaryDownload(String),
+    /// "metadata" file we persist locally and include in `index_part.json`
+    Metadata,
    /// Backup file from previously future layers
    IgnoredBackup,
    /// Unrecognized, warn about these
@@ -40,13 +43,15 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovere
        let direntry = direntry?;
        let file_name = direntry.file_name().to_string();

-        let discovered = match LayerName::from_str(&file_name) {
+        let discovered = match LayerFileName::from_str(&file_name) {
            Ok(file_name) => {
                let file_size = direntry.metadata()?.len();
-                Discovered::Layer(file_name, direntry.path().to_owned(), file_size)
+                Discovered::Layer(file_name, file_size)
            }
            Err(_) => {
-                if file_name.ends_with(".old") {
+                if file_name == METADATA_FILE_NAME {
+                    Discovered::Metadata
+                } else if file_name.ends_with(".old") {
                    // ignore these
                    Discovered::IgnoredBackup
                } else if remote_timeline_client::is_temp_download_file(direntry.path()) {
@@ -67,28 +72,6 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovere
    Ok(ret)
 }

-/// Whereas `LayerFileMetadata` describes the metadata we would store in remote storage,
-/// this structure extends it with metadata describing the layer's presence in local storage.
-#[derive(Clone, Debug)]
-pub(super) struct LocalLayerFileMetadata {
-    pub(super) metadata: LayerFileMetadata,
-    pub(super) local_path: Utf8PathBuf,
-}
-
-impl LocalLayerFileMetadata {
-    pub fn new(
-        local_path: Utf8PathBuf,
-        file_size: u64,
-        generation: Generation,
-        shard: ShardIndex,
-    ) -> Self {
-        Self {
-            local_path,
-            metadata: LayerFileMetadata::new(file_size, generation, shard),
-        }
-    }
-}
-
 /// Decision on what to do with a layer file after considering its local and remote metadata.
 #[derive(Clone, Debug)]
 pub(super) enum Decision {
@@ -97,11 +80,11 @@ pub(super) enum Decision {
    /// The layer is present locally, but local metadata does not match remote; we must
    /// delete it and treat it as evicted.
    UseRemote {
-        local: LocalLayerFileMetadata,
+        local: LayerFileMetadata,
        remote: LayerFileMetadata,
    },
    /// The layer is present locally, and metadata matches.
-    UseLocal(LocalLayerFileMetadata),
+    UseLocal(LayerFileMetadata),
 }

 /// A layer needs to be left out of the layer map.
@@ -109,42 +92,39 @@ pub(super) enum Decision {
 pub(super) enum DismissedLayer {
    /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.
    Future {
-        /// `None` if the layer is only known through [`IndexPart`].
-        local: Option<LocalLayerFileMetadata>,
+        /// The local metadata. `None` if the layer is only known through [`IndexPart`].
+        local: Option<LayerFileMetadata>,
    },
    /// The layer only exists locally.
    ///
    /// In order to make crash safe updates to layer map, we must dismiss layers which are only
    /// found locally or not yet included in the remote `index_part.json`.
-    LocalOnly(LocalLayerFileMetadata),
+    LocalOnly(LayerFileMetadata),
 }

 /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
 pub(super) fn reconcile(
-    discovered: Vec<(LayerName, Utf8PathBuf, u64)>,
+    discovered: Vec<(LayerFileName, u64)>,
    index_part: Option<&IndexPart>,
    disk_consistent_lsn: Lsn,
    generation: Generation,
    shard: ShardIndex,
-) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {
+) -> Vec<(LayerFileName, Result<Decision, DismissedLayer>)> {
    use Decision::*;

-    // name => (local_metadata, remote_metadata)
-    type Collected =
-        HashMap<LayerName, (Option<LocalLayerFileMetadata>, Option<LayerFileMetadata>)>;
+    // name => (local, remote)
+    type Collected = HashMap<LayerFileName, (Option<LayerFileMetadata>, Option<LayerFileMetadata>)>;

    let mut discovered = discovered
        .into_iter()
-        .map(|(layer_name, local_path, file_size)| {
+        .map(|(name, file_size)| {
            (
-                layer_name,
+                name,
                // The generation and shard here will be corrected to match IndexPart in the merge below, unless
                // it is not in IndexPart, in which case using our current generation makes sense
                // because it will be uploaded in this generation.
                (
-                    Some(LocalLayerFileMetadata::new(
-                        local_path, file_size, generation, shard,
-                    )),
+                    Some(LayerFileMetadata::new(file_size, generation, shard)),
                    None,
                ),
            )
@@ -173,7 +153,7 @@ pub(super) fn reconcile(
                Err(DismissedLayer::Future { local })
            } else {
                match (local, remote) {
-                    (Some(local), Some(remote)) if local.metadata != remote => {
+                    (Some(local), Some(remote)) if local != remote => {
                        Ok(UseRemote { local, remote })
                    }
                    (Some(x), Some(_)) => Ok(UseLocal(x)),
@@ -197,12 +177,12 @@ pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {
 }

 pub(super) fn cleanup_local_file_for_remote(
-    local: &LocalLayerFileMetadata,
+    path: &Utf8Path,
+    local: &LayerFileMetadata,
    remote: &LayerFileMetadata,
 ) -> anyhow::Result<()> {
-    let local_size = local.metadata.file_size();
+    let local_size = local.file_size();
    let remote_size = remote.file_size();
-    let path = &local.local_path;

    let file_name = path.file_name().expect("must be file path");
    tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
@@ -219,7 +199,7 @@ pub(super) fn cleanup_local_file_for_remote(

 pub(super) fn cleanup_future_layer(
    path: &Utf8Path,
-    name: &LayerName,
+    name: &LayerFileName,
    disk_consistent_lsn: Lsn,
 ) -> anyhow::Result<()> {
    // future image layers are allowed to be produced always for not yet flushed to disk
@@ -231,14 +211,12 @@ pub(super) fn cleanup_future_layer(
 }

 pub(super) fn cleanup_local_only_file(
-    name: &LayerName,
-    local: &LocalLayerFileMetadata,
+    path: &Utf8Path,
+    name: &LayerFileName,
+    local: &LayerFileMetadata,
 ) -> anyhow::Result<()> {
    let kind = name.kind();
-    tracing::info!(
-        "found local-only {kind} layer {name}, metadata {:?}",
-        local.metadata
-    );
-    std::fs::remove_file(&local.local_path)?;
+    tracing::info!("found local-only {kind} layer {name}, metadata {local:?}");
+    std::fs::remove_file(path)?;
    Ok(())
 }
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -9,7 +9,6 @@ use utils::{

 use crate::{
    config::PageServerConf,
-    context::RequestContext,
    metrics::TimelineMetrics,
    tenant::{
        layer_map::{BatchedUpdates, LayerMap},
@@ -70,7 +69,6 @@ impl LayerManager {
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
-        ctx: &RequestContext,
    ) -> Result<Arc<InMemoryLayer>> {
        ensure!(lsn.is_aligned());

@@ -107,7 +105,7 @@ impl LayerManager {
            );

            let new_layer =
-                InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn, ctx).await?;
+                InMemoryLayer::create(conf, timeline_id, tenant_shard_id, start_lsn).await?;
            let layer = Arc::new(new_layer);

            self.layer_map.open_layer = Some(layer.clone());
@@ -207,24 +205,6 @@ impl LayerManager {
        updates.flush();
    }

-    /// Called when compaction is completed.
-    pub(crate) fn rewrite_layers(
-        &mut self,
-        rewrite_layers: &[(Layer, ResidentLayer)],
-        drop_layers: &[Layer],
-        _metrics: &TimelineMetrics,
-    ) {
-        let mut updates = self.layer_map.batch_update();
-
-        // TODO: implement rewrites (currently this code path only used for drops)
-        assert!(rewrite_layers.is_empty());
-
-        for l in drop_layers {
-            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
-        }
-        updates.flush();
-    }
-
    /// Called when garbage collect has selected the layers to be removed.
    pub(crate) fn finish_gc_timeline(&mut self, gc_layers: &[Layer]) {
        let mut updates = self.layer_map.batch_update();
@@ -296,7 +276,7 @@ impl<T: AsLayerDesc + Clone> LayerFileManager<T> {
        // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor.
        self.0
            .get(&desc.key())
-            .with_context(|| format!("get layer from desc: {}", desc.layer_name()))
+            .with_context(|| format!("get layer from desc: {}", desc.filename()))
            .expect("not found")
            .clone()
    }
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -1,9 +1,8 @@
-use super::storage_layer::LayerName;
+use super::storage_layer::LayerFileName;
 use super::storage_layer::ResidentLayer;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
-use crate::tenant::remote_timeline_client::index::Lineage;
 use std::collections::{HashMap, VecDeque};
 use std::fmt::Debug;

@@ -46,7 +45,7 @@ pub(crate) struct UploadQueueInitialized {

    /// All layer files stored in the remote storage, taking into account all
    /// in-progress and queued operations
-    pub(crate) latest_files: HashMap<LayerName, LayerFileMetadata>,
+    pub(crate) latest_files: HashMap<LayerFileName, LayerFileMetadata>,

    /// How many file uploads or deletions been scheduled, since the
    /// last (scheduling of) metadata index upload?
@@ -57,9 +56,6 @@ pub(crate) struct UploadQueueInitialized {
    /// DANGER: do not return to outside world, e.g., safekeepers.
    pub(crate) latest_metadata: TimelineMetadata,

-    /// Part of the flattened "next" `index_part.json`.
-    pub(crate) latest_lineage: Lineage,
-
    /// `disk_consistent_lsn` from the last metadata file that was successfully
    /// uploaded. `Lsn(0)` if nothing was uploaded yet.
    /// Unlike `latest_files` or `latest_metadata`, this value is never ahead.
@@ -93,7 +89,7 @@ pub(crate) struct UploadQueueInitialized {
    /// Putting this behind a testing feature to catch problems in tests, but assuming we could have a
    /// bug causing leaks, then it's better to not leave this enabled for production builds.
    #[cfg(feature = "testing")]
-    pub(crate) dangling_files: HashMap<LayerName, Generation>,
+    pub(crate) dangling_files: HashMap<LayerFileName, Generation>,

    /// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
    pub(crate) shutting_down: bool,
@@ -175,7 +171,6 @@ impl UploadQueue {
            latest_files: HashMap::new(),
            latest_files_changes_since_metadata_upload_scheduled: 0,
            latest_metadata: metadata.clone(),
-            latest_lineage: Lineage::default(),
            projected_remote_consistent_lsn: None,
            visible_remote_consistent_lsn: Arc::new(AtomicLsn::new(0)),
            // what follows are boring default initializations
@@ -223,7 +218,6 @@ impl UploadQueue {
            latest_files: files,
            latest_files_changes_since_metadata_upload_scheduled: 0,
            latest_metadata: index_part.metadata.clone(),
-            latest_lineage: index_part.lineage.clone(),
            projected_remote_consistent_lsn: Some(index_part.metadata.disk_consistent_lsn()),
            visible_remote_consistent_lsn: Arc::new(
                index_part.metadata.disk_consistent_lsn().into(),
@@ -287,7 +281,7 @@ pub(crate) struct UploadTask {
 /// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
 #[derive(Debug)]
 pub(crate) struct Delete {
-    pub(crate) layers: Vec<(LayerName, LayerFileMetadata)>,
+    pub(crate) layers: Vec<(LayerFileName, LayerFileMetadata)>,
 }

 #[derive(Debug)]
@@ -296,7 +290,7 @@ pub(crate) enum UploadOp {
    UploadLayer(ResidentLayer, LayerFileMetadata),

    /// Upload the metadata file
-    UploadMetadata(Box<IndexPart>, Lsn),
+    UploadMetadata(IndexPart, Lsn),

    /// Delete layer files
    Delete(Delete),
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -23,7 +23,6 @@ use pageserver_api::key::Key;
 use utils::lsn::Lsn;
 use utils::vec_map::VecMap;

-use crate::context::RequestContext;
 use crate::virtual_file::VirtualFile;

 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -286,7 +285,6 @@ impl<'a> VectoredBlobReader<'a> {
        &self,
        read: &VectoredRead,
        buf: BytesMut,
-        ctx: &RequestContext,
    ) -> Result<VectoredBlobsBuf, std::io::Error> {
        assert!(read.size() > 0);
        assert!(
@@ -297,7 +295,7 @@ impl<'a> VectoredBlobReader<'a> {
        );
        let buf = self
            .file
-            .read_exact_at_n(buf, read.start, read.size(), ctx)
+            .read_exact_at_n(buf, read.start, read.size())
            .await?;

        let blobs_at = read.blobs_at.as_slice();
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -344,23 +344,16 @@ macro_rules! with_file {

 impl VirtualFile {
    /// Open a file in read-only mode. Like File::open.
-    pub async fn open(
-        path: &Utf8Path,
-        ctx: &RequestContext,
-    ) -> Result<VirtualFile, std::io::Error> {
-        Self::open_with_options(path, OpenOptions::new().read(true), ctx).await
+    pub async fn open(path: &Utf8Path) -> Result<VirtualFile, std::io::Error> {
+        Self::open_with_options(path, OpenOptions::new().read(true)).await
    }

    /// Create a new file for writing. If the file exists, it will be truncated.
    /// Like File::create.
-    pub async fn create(
-        path: &Utf8Path,
-        ctx: &RequestContext,
-    ) -> Result<VirtualFile, std::io::Error> {
+    pub async fn create(path: &Utf8Path) -> Result<VirtualFile, std::io::Error> {
        Self::open_with_options(
            path,
            OpenOptions::new().write(true).create(true).truncate(true),
-            ctx,
        )
        .await
    }
@@ -373,7 +366,6 @@ impl VirtualFile {
    pub async fn open_with_options(
        path: &Utf8Path,
        open_options: &OpenOptions,
-        _ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
    ) -> Result<VirtualFile, std::io::Error> {
        let path_str = path.to_string();
        let parts = path_str.split('/').collect::<Vec<&str>>();
@@ -584,34 +576,21 @@ impl VirtualFile {
        Ok(self.pos)
    }

-    pub async fn read_exact_at<B>(
-        &self,
-        buf: B,
-        offset: u64,
-        ctx: &RequestContext,
-    ) -> Result<B, Error>
+    pub async fn read_exact_at<B>(&self, buf: B, offset: u64) -> Result<B, Error>
    where
        B: IoBufMut + Send,
    {
-        let (buf, res) = read_exact_at_impl(buf, offset, None, |buf, offset| {
-            self.read_at(buf, offset, ctx)
-        })
-        .await;
+        let (buf, res) =
+            read_exact_at_impl(buf, offset, None, |buf, offset| self.read_at(buf, offset)).await;
        res.map(|()| buf)
    }

-    pub async fn read_exact_at_n<B>(
-        &self,
-        buf: B,
-        offset: u64,
-        count: usize,
-        ctx: &RequestContext,
-    ) -> Result<B, Error>
+    pub async fn read_exact_at_n<B>(&self, buf: B, offset: u64, count: usize) -> Result<B, Error>
    where
        B: IoBufMut + Send,
    {
        let (buf, res) = read_exact_at_impl(buf, offset, Some(count), |buf, offset| {
-            self.read_at(buf, offset, ctx)
+            self.read_at(buf, offset)
        })
        .await;
        res.map(|()| buf)
@@ -622,13 +601,12 @@ impl VirtualFile {
        &self,
        page: PageWriteGuard<'static>,
        offset: u64,
-        ctx: &RequestContext,
    ) -> Result<PageWriteGuard<'static>, Error> {
        let buf = PageWriteGuardBuf {
            page,
            init_up_to: 0,
        };
-        let res = self.read_exact_at(buf, offset, ctx).await;
+        let res = self.read_exact_at(buf, offset).await;
        res.map(|PageWriteGuardBuf { page, .. }| page)
            .map_err(|e| Error::new(ErrorKind::Other, e))
    }
@@ -721,12 +699,7 @@ impl VirtualFile {
        (buf, Ok(n))
    }

-    pub(crate) async fn read_at<B>(
-        &self,
-        buf: B,
-        offset: u64,
-        _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */
-    ) -> (B, Result<usize, Error>)
+    pub(crate) async fn read_at<B>(&self, buf: B, offset: u64) -> (B, Result<usize, Error>)
    where
        B: tokio_epoll_uring::BoundedBufMut + Send,
    {
@@ -1047,21 +1020,20 @@ impl VirtualFile {
    pub(crate) async fn read_blk(
        &self,
        blknum: u32,
-        ctx: &RequestContext,
    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
        use crate::page_cache::PAGE_SZ;
        let buf = vec![0; PAGE_SZ];
        let buf = self
-            .read_exact_at(buf, blknum as u64 * (PAGE_SZ as u64), ctx)
+            .read_exact_at(buf, blknum as u64 * (PAGE_SZ as u64))
            .await?;
        Ok(crate::tenant::block_io::BlockLease::Vec(buf))
    }

-    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
+    async fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<(), Error> {
        let mut tmp = vec![0; 128];
        loop {
            let res;
-            (tmp, res) = self.read_at(tmp, self.pos, ctx).await;
+            (tmp, res) = self.read_at(tmp, self.pos).await;
            match res {
                Ok(0) => return Ok(()),
                Ok(n) => {
@@ -1187,6 +1159,7 @@ mod tests {
    use rand::seq::SliceRandom;
    use rand::thread_rng;
    use rand::Rng;
+    use std::future::Future;
    use std::io::Write;
    use std::os::unix::fs::FileExt;
    use std::sync::Arc;
@@ -1203,14 +1176,9 @@ mod tests {
    }

    impl MaybeVirtualFile {
-        async fn read_exact_at(
-            &self,
-            mut buf: Vec<u8>,
-            offset: u64,
-            ctx: &RequestContext,
-        ) -> Result<Vec<u8>, Error> {
+        async fn read_exact_at(&self, mut buf: Vec<u8>, offset: u64) -> Result<Vec<u8>, Error> {
            match self {
-                MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset, ctx).await,
+                MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset).await,
                MaybeVirtualFile::File(file) => file.read_exact_at(&mut buf, offset).map(|()| buf),
            }
        }
@@ -1262,13 +1230,13 @@ mod tests {

        // Helper function to slurp contents of a file, starting at the current position,
        // into a string
-        async fn read_string(&mut self, ctx: &RequestContext) -> Result<String, Error> {
+        async fn read_string(&mut self) -> Result<String, Error> {
            use std::io::Read;
            let mut buf = String::new();
            match self {
                MaybeVirtualFile::VirtualFile(file) => {
                    let mut buf = Vec::new();
-                    file.read_to_end(&mut buf, ctx).await?;
+                    file.read_to_end(&mut buf).await?;
                    return Ok(String::from_utf8(buf).unwrap());
                }
                MaybeVirtualFile::File(file) => {
@@ -1279,14 +1247,9 @@ mod tests {
        }

        // Helper function to slurp a portion of a file into a string
-        async fn read_string_at(
-            &mut self,
-            pos: u64,
-            len: usize,
-            ctx: &RequestContext,
-        ) -> Result<String, Error> {
+        async fn read_string_at(&mut self, pos: u64, len: usize) -> Result<String, Error> {
            let buf = vec![0; len];
-            let buf = self.read_exact_at(buf, pos, ctx).await?;
+            let buf = self.read_exact_at(buf, pos).await?;
            Ok(String::from_utf8(buf).unwrap())
        }
    }
@@ -1300,101 +1263,73 @@ mod tests {
        // results with VirtualFiles as with native Files. (Except that with
        // native files, you will run out of file descriptors if the ulimit
        // is low enough.)
-        struct A;
-
-        impl Adapter for A {
-            async fn open(
-                path: Utf8PathBuf,
-                opts: OpenOptions,
-                ctx: &RequestContext,
-            ) -> Result<MaybeVirtualFile, anyhow::Error> {
-                let vf = VirtualFile::open_with_options(&path, &opts, ctx).await?;
-                Ok(MaybeVirtualFile::VirtualFile(vf))
-            }
-        }
-        test_files::<A>("virtual_files").await
+        test_files("virtual_files", |path, open_options| async move {
+            let vf = VirtualFile::open_with_options(&path, &open_options).await?;
+            Ok(MaybeVirtualFile::VirtualFile(vf))
+        })
+        .await
    }

    #[tokio::test]
    async fn test_physical_files() -> anyhow::Result<()> {
-        struct B;
-
-        impl Adapter for B {
-            async fn open(
-                path: Utf8PathBuf,
-                opts: OpenOptions,
-                _ctx: &RequestContext,
-            ) -> Result<MaybeVirtualFile, anyhow::Error> {
-                Ok(MaybeVirtualFile::File({
-                    let owned_fd = opts.open(path.as_std_path()).await?;
-                    File::from(owned_fd)
-                }))
-            }
-        }
-
-        test_files::<B>("physical_files").await
+        test_files("physical_files", |path, open_options| async move {
+            Ok(MaybeVirtualFile::File({
+                let owned_fd = open_options.open(path.as_std_path()).await?;
+                File::from(owned_fd)
+            }))
+        })
+        .await
    }

-    /// This is essentially a closure which returns a MaybeVirtualFile, but because rust edition
-    /// 2024 is not yet out with new lifetime capture or outlives rules, this is a async function
-    /// in trait which benefits from the new lifetime capture rules already.
-    trait Adapter {
-        async fn open(
-            path: Utf8PathBuf,
-            opts: OpenOptions,
-            ctx: &RequestContext,
-        ) -> Result<MaybeVirtualFile, anyhow::Error>;
-    }
-
-    async fn test_files<A>(testname: &str) -> anyhow::Result<()>
+    async fn test_files<OF, FT>(testname: &str, openfunc: OF) -> anyhow::Result<()>
    where
-        A: Adapter,
+        OF: Fn(Utf8PathBuf, OpenOptions) -> FT,
+        FT: Future<Output = Result<MaybeVirtualFile, std::io::Error>>,
    {
        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir = crate::config::PageServerConf::test_repo_dir(testname);
        std::fs::create_dir_all(&testdir)?;

        let path_a = testdir.join("file_a");
-        let mut file_a = A::open(
+        let mut file_a = openfunc(
            path_a.clone(),
            OpenOptions::new()
                .write(true)
                .create(true)
                .truncate(true)
                .to_owned(),
-            &ctx,
        )
        .await?;
        file_a.write_all(b"foobar".to_vec(), &ctx).await?;

        // cannot read from a file opened in write-only mode
-        let _ = file_a.read_string(&ctx).await.unwrap_err();
+        let _ = file_a.read_string().await.unwrap_err();

        // Close the file and re-open for reading
-        let mut file_a = A::open(path_a, OpenOptions::new().read(true).to_owned(), &ctx).await?;
+        let mut file_a = openfunc(path_a, OpenOptions::new().read(true).to_owned()).await?;

        // cannot write to a file opened in read-only mode
        let _ = file_a.write_all(b"bar".to_vec(), &ctx).await.unwrap_err();

        // Try simple read
-        assert_eq!("foobar", file_a.read_string(&ctx).await?);
+        assert_eq!("foobar", file_a.read_string().await?);

        // It's positioned at the EOF now.
-        assert_eq!("", file_a.read_string(&ctx).await?);
+        assert_eq!("", file_a.read_string().await?);

        // Test seeks.
        assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
-        assert_eq!("oobar", file_a.read_string(&ctx).await?);
+        assert_eq!("oobar", file_a.read_string().await?);

        assert_eq!(file_a.seek(SeekFrom::End(-2)).await?, 4);
-        assert_eq!("ar", file_a.read_string(&ctx).await?);
+        assert_eq!("ar", file_a.read_string().await?);

        assert_eq!(file_a.seek(SeekFrom::Start(1)).await?, 1);
        assert_eq!(file_a.seek(SeekFrom::Current(2)).await?, 3);
-        assert_eq!("bar", file_a.read_string(&ctx).await?);
+        assert_eq!("bar", file_a.read_string().await?);

        assert_eq!(file_a.seek(SeekFrom::Current(-5)).await?, 1);
-        assert_eq!("oobar", file_a.read_string(&ctx).await?);
+        assert_eq!("oobar", file_a.read_string().await?);

        // Test erroneous seeks to before byte 0
        file_a.seek(SeekFrom::End(-7)).await.unwrap_err();
@@ -1402,11 +1337,11 @@ mod tests {
        file_a.seek(SeekFrom::Current(-2)).await.unwrap_err();

        // the erroneous seek should have left the position unchanged
-        assert_eq!("oobar", file_a.read_string(&ctx).await?);
+        assert_eq!("oobar", file_a.read_string().await?);

        // Create another test file, and try FileExt functions on it.
        let path_b = testdir.join("file_b");
-        let mut file_b = A::open(
+        let mut file_b = openfunc(
            path_b.clone(),
            OpenOptions::new()
                .read(true)
@@ -1414,13 +1349,12 @@ mod tests {
                .create(true)
                .truncate(true)
                .to_owned(),
-            &ctx,
        )
        .await?;
        file_b.write_all_at(b"BAR".to_vec(), 3, &ctx).await?;
        file_b.write_all_at(b"FOO".to_vec(), 0, &ctx).await?;

-        assert_eq!(file_b.read_string_at(2, 3, &ctx).await?, "OBA");
+        assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");

        // Open a lot of files, enough to cause some evictions. (Or to be precise,
        // open the same file many times. The effect is the same.)
@@ -1430,13 +1364,9 @@ mod tests {

        let mut vfiles = Vec::new();
        for _ in 0..100 {
-            let mut vfile = A::open(
-                path_b.clone(),
-                OpenOptions::new().read(true).to_owned(),
-                &ctx,
-            )
-            .await?;
-            assert_eq!("FOOBAR", vfile.read_string(&ctx).await?);
+            let mut vfile =
+                openfunc(path_b.clone(), OpenOptions::new().read(true).to_owned()).await?;
+            assert_eq!("FOOBAR", vfile.read_string().await?);
            vfiles.push(vfile);
        }

@@ -1445,13 +1375,13 @@ mod tests {

        // The underlying file descriptor for 'file_a' should be closed now. Try to read
        // from it again. We left the file positioned at offset 1 above.
-        assert_eq!("oobar", file_a.read_string(&ctx).await?);
+        assert_eq!("oobar", file_a.read_string().await?);

        // Check that all the other FDs still work too. Use them in random order for
        // good measure.
        vfiles.as_mut_slice().shuffle(&mut thread_rng());
        for vfile in vfiles.iter_mut() {
-            assert_eq!("OOBAR", vfile.read_string_at(1, 5, &ctx).await?);
+            assert_eq!("OOBAR", vfile.read_string_at(1, 5).await?);
        }

        Ok(())
@@ -1467,7 +1397,6 @@ mod tests {
        const THREADS: usize = 100;
        const SAMPLE: [u8; SIZE] = [0xADu8; SIZE];

-        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir = crate::config::PageServerConf::test_repo_dir("vfile_concurrency");
        std::fs::create_dir_all(&testdir)?;

@@ -1481,12 +1410,8 @@ mod tests {
        // Open the file many times.
        let mut files = Vec::new();
        for _ in 0..VIRTUAL_FILES {
-            let f = VirtualFile::open_with_options(
-                &test_file_path,
-                OpenOptions::new().read(true),
-                &ctx,
-            )
-            .await?;
+            let f = VirtualFile::open_with_options(&test_file_path, OpenOptions::new().read(true))
+                .await?;
            files.push(f);
        }
        let files = Arc::new(files);
@@ -1500,13 +1425,12 @@ mod tests {
        let mut hdls = Vec::new();
        for _threadno in 0..THREADS {
            let files = files.clone();
-            let ctx = ctx.detached_child(TaskKind::UnitTest, DownloadBehavior::Error);
            let hdl = rt.spawn(async move {
                let mut buf = vec![0u8; SIZE];
                let mut rng = rand::rngs::OsRng;
                for _ in 1..1000 {
                    let f = &files[rng.gen_range(0..files.len())];
-                    buf = f.read_exact_at(buf, 0, &ctx).await.unwrap();
+                    buf = f.read_exact_at(buf, 0).await.unwrap();
                    assert!(buf == SAMPLE);
                }
            });
@@ -1522,7 +1446,6 @@ mod tests {

    #[tokio::test]
    async fn test_atomic_overwrite_basic() {
-        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir = crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_basic");
        std::fs::create_dir_all(&testdir).unwrap();

@@ -1532,8 +1455,8 @@ mod tests {
        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
            .await
            .unwrap();
-        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
-        let post = file.read_string(&ctx).await.unwrap();
+        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
+        let post = file.read_string().await.unwrap();
        assert_eq!(post, "foo");
        assert!(!tmp_path.exists());
        drop(file);
@@ -1541,8 +1464,8 @@ mod tests {
        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
            .await
            .unwrap();
-        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
-        let post = file.read_string(&ctx).await.unwrap();
+        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
+        let post = file.read_string().await.unwrap();
        assert_eq!(post, "bar");
        assert!(!tmp_path.exists());
        drop(file);
@@ -1550,7 +1473,6 @@ mod tests {

    #[tokio::test]
    async fn test_atomic_overwrite_preexisting_tmp() {
-        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir =
            crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_preexisting_tmp");
        std::fs::create_dir_all(&testdir).unwrap();
@@ -1565,8 +1487,8 @@ mod tests {
            .await
            .unwrap();

-        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
-        let post = file.read_string(&ctx).await.unwrap();
+        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path).await.unwrap());
+        let post = file.read_string().await.unwrap();
        assert_eq!(post, "foo");
        assert!(!tmp_path.exists());
        drop(file);
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -153,7 +153,10 @@ impl PostgresRedoManager {
            process: self
                .redo_process
                .get()
-                .map(|p| WalRedoManagerProcessStatus { pid: p.id() }),
+                .map(|p| WalRedoManagerProcessStatus {
+                    pid: p.id(),
+                    kind: std::borrow::Cow::Borrowed(p.kind().into()),
+                }),
        }
    }
 }
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -1,10 +1,7 @@
-/// Layer of indirection previously used to support multiple implementations.
-/// Subject to removal: <https://github.com/neondatabase/neon/issues/7753>
 use std::time::Duration;

 use bytes::Bytes;
 use pageserver_api::{reltag::RelTag, shard::TenantShardId};
-use tracing::warn;
 use utils::lsn::Lsn;

 use crate::{config::PageServerConf, walrecord::NeonWalRecord};
@@ -15,6 +12,7 @@ mod protocol;

 mod process_impl {
    pub(super) mod process_async;
+    pub(super) mod process_std;
 }

 #[derive(
@@ -36,7 +34,10 @@ pub enum Kind {
    Async,
 }

-pub(crate) struct Process(process_impl::process_async::WalRedoProcess);
+pub(crate) enum Process {
+    Sync(process_impl::process_std::WalRedoProcess),
+    Async(process_impl::process_async::WalRedoProcess),
+}

 impl Process {
    #[inline(always)]
@@ -45,17 +46,18 @@ impl Process {
        tenant_shard_id: TenantShardId,
        pg_version: u32,
    ) -> anyhow::Result<Self> {
-        if conf.walredo_process_kind != Kind::Async {
-            warn!(
-                configured = %conf.walredo_process_kind,
-                "the walredo_process_kind setting has been turned into a no-op, using async implementation"
-            );
-        }
-        Ok(Self(process_impl::process_async::WalRedoProcess::launch(
-            conf,
-            tenant_shard_id,
-            pg_version,
-        )?))
+        Ok(match conf.walredo_process_kind {
+            Kind::Sync => Self::Sync(process_impl::process_std::WalRedoProcess::launch(
+                conf,
+                tenant_shard_id,
+                pg_version,
+            )?),
+            Kind::Async => Self::Async(process_impl::process_async::WalRedoProcess::launch(
+                conf,
+                tenant_shard_id,
+                pg_version,
+            )?),
+        })
    }

    #[inline(always)]
@@ -67,12 +69,29 @@ impl Process {
        records: &[(Lsn, NeonWalRecord)],
        wal_redo_timeout: Duration,
    ) -> anyhow::Result<Bytes> {
-        self.0
-            .apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
-            .await
+        match self {
+            Process::Sync(p) => {
+                p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
+                    .await
+            }
+            Process::Async(p) => {
+                p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
+                    .await
+            }
+        }
    }

    pub(crate) fn id(&self) -> u32 {
-        self.0.id()
+        match self {
+            Process::Sync(p) => p.id(),
+            Process::Async(p) => p.id(),
+        }
+    }
+
+    pub(crate) fn kind(&self) -> Kind {
+        match self {
+            Process::Sync(_) => Kind::Sync,
+            Process::Async(_) => Kind::Async,
+        }
    }
 }
--- a/pageserver/src/walredo/process/process_impl/process_std.rs
+++ b/pageserver/src/walredo/process/process_impl/process_std.rs
@@ -0,0 +1,405 @@
+use self::no_leak_child::NoLeakChild;
+use crate::{
+    config::PageServerConf,
+    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
+    walrecord::NeonWalRecord,
+    walredo::process::{no_leak_child, protocol},
+};
+use anyhow::Context;
+use bytes::Bytes;
+use nix::poll::{PollFd, PollFlags};
+use pageserver_api::{reltag::RelTag, shard::TenantShardId};
+use postgres_ffi::BLCKSZ;
+use std::os::fd::AsRawFd;
+#[cfg(feature = "testing")]
+use std::sync::atomic::AtomicUsize;
+use std::{
+    collections::VecDeque,
+    io::{Read, Write},
+    process::{ChildStdin, ChildStdout, Command, Stdio},
+    sync::{Mutex, MutexGuard},
+    time::Duration,
+};
+use tracing::{debug, error, instrument, Instrument};
+use utils::{lsn::Lsn, nonblock::set_nonblock};
+
+pub struct WalRedoProcess {
+    #[allow(dead_code)]
+    conf: &'static PageServerConf,
+    tenant_shard_id: TenantShardId,
+    // Some() on construction, only becomes None on Drop.
+    child: Option<NoLeakChild>,
+    stdout: Mutex<ProcessOutput>,
+    stdin: Mutex<ProcessInput>,
+    /// Counter to separate same sized walredo inputs failing at the same millisecond.
+    #[cfg(feature = "testing")]
+    dump_sequence: AtomicUsize,
+}
+
+struct ProcessInput {
+    stdin: ChildStdin,
+    n_requests: usize,
+}
+
+struct ProcessOutput {
+    stdout: ChildStdout,
+    pending_responses: VecDeque<Option<Bytes>>,
+    n_processed_responses: usize,
+}
+
+impl WalRedoProcess {
+    //
+    // Start postgres binary in special WAL redo mode.
+    //
+    #[instrument(skip_all,fields(pg_version=pg_version))]
+    pub(crate) fn launch(
+        conf: &'static PageServerConf,
+        tenant_shard_id: TenantShardId,
+        pg_version: u32,
+    ) -> anyhow::Result<Self> {
+        crate::span::debug_assert_current_span_has_tenant_id();
+
+        let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
+        let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
+
+        use no_leak_child::NoLeakChildCommandExt;
+        // Start postgres itself
+        let child = Command::new(pg_bin_dir_path.join("postgres"))
+            // the first arg must be --wal-redo so the child process enters into walredo mode
+            .arg("--wal-redo")
+            // the child doesn't process this arg, but, having it in the argv helps indentify the
+            // walredo process for a particular tenant when debugging a pagserver
+            .args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
+            .stdin(Stdio::piped())
+            .stderr(Stdio::piped())
+            .stdout(Stdio::piped())
+            .env_clear()
+            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
+            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
+            // NB: The redo process is not trusted after we sent it the first
+            // walredo work. Before that, it is trusted. Specifically, we trust
+            // it to
+            // 1. close all file descriptors except stdin, stdout, stderr because
+            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all
+            //    the files it opens, and
+            // 2. to use seccomp to sandbox itself before processing the first
+            //    walredo request.
+            .spawn_no_leak_child(tenant_shard_id)
+            .context("spawn process")?;
+        WAL_REDO_PROCESS_COUNTERS.started.inc();
+        let mut child = scopeguard::guard(child, |child| {
+            error!("killing wal-redo-postgres process due to a problem during launch");
+            child.kill_and_wait(WalRedoKillCause::Startup);
+        });
+
+        let stdin = child.stdin.take().unwrap();
+        let stdout = child.stdout.take().unwrap();
+        let stderr = child.stderr.take().unwrap();
+        let stderr = tokio::process::ChildStderr::from_std(stderr)
+            .context("convert to tokio::ChildStderr")?;
+        macro_rules! set_nonblock_or_log_err {
+        ($file:ident) => {{
+            let res = set_nonblock($file.as_raw_fd());
+            if let Err(e) = &res {
+                error!(error = %e, file = stringify!($file), pid = child.id(), "set_nonblock failed");
+            }
+            res
+        }};
+    }
+        set_nonblock_or_log_err!(stdin)?;
+        set_nonblock_or_log_err!(stdout)?;
+
+        // all fallible operations post-spawn are complete, so get rid of the guard
+        let child = scopeguard::ScopeGuard::into_inner(child);
+
+        tokio::spawn(
+            async move {
+                scopeguard::defer! {
+                    debug!("wal-redo-postgres stderr_logger_task finished");
+                    crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();
+                }
+                debug!("wal-redo-postgres stderr_logger_task started");
+                crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();
+
+                use tokio::io::AsyncBufReadExt;
+                let mut stderr_lines = tokio::io::BufReader::new(stderr);
+                let mut buf = Vec::new();
+                let res = loop {
+                    buf.clear();
+                    // TODO we don't trust the process to cap its stderr length.
+                    // Currently it can do unbounded Vec allocation.
+                    match stderr_lines.read_until(b'\n', &mut buf).await {
+                        Ok(0) => break Ok(()), // eof
+                        Ok(num_bytes) => {
+                            let output = String::from_utf8_lossy(&buf[..num_bytes]);
+                            error!(%output, "received output");
+                        }
+                        Err(e) => {
+                            break Err(e);
+                        }
+                    }
+                };
+                match res {
+                    Ok(()) => (),
+                    Err(e) => {
+                        error!(error=?e, "failed to read from walredo stderr");
+                    }
+                }
+            }.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
+        );
+
+        Ok(Self {
+            conf,
+            tenant_shard_id,
+            child: Some(child),
+            stdin: Mutex::new(ProcessInput {
+                stdin,
+                n_requests: 0,
+            }),
+            stdout: Mutex::new(ProcessOutput {
+                stdout,
+                pending_responses: VecDeque::new(),
+                n_processed_responses: 0,
+            }),
+            #[cfg(feature = "testing")]
+            dump_sequence: AtomicUsize::default(),
+        })
+    }
+
+    pub(crate) fn id(&self) -> u32 {
+        self.child
+            .as_ref()
+            .expect("must not call this during Drop")
+            .id()
+    }
+
+    // Apply given WAL records ('records') over an old page image. Returns
+    // new page image.
+    //
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
+    pub(crate) async fn apply_wal_records(
+        &self,
+        rel: RelTag,
+        blknum: u32,
+        base_img: &Option<Bytes>,
+        records: &[(Lsn, NeonWalRecord)],
+        wal_redo_timeout: Duration,
+    ) -> anyhow::Result<Bytes> {
+        let tag = protocol::BufferTag { rel, blknum };
+        let input = self.stdin.lock().unwrap();
+
+        // Serialize all the messages to send the WAL redo process first.
+        //
+        // This could be problematic if there are millions of records to replay,
+        // but in practice the number of records is usually so small that it doesn't
+        // matter, and it's better to keep this code simple.
+        //
+        // Most requests start with a before-image with BLCKSZ bytes, followed by
+        // by some other WAL records. Start with a buffer that can hold that
+        // comfortably.
+        let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
+        protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);
+        if let Some(img) = base_img {
+            protocol::build_push_page_msg(tag, img, &mut writebuf);
+        }
+        for (lsn, rec) in records.iter() {
+            if let NeonWalRecord::Postgres {
+                will_init: _,
+                rec: postgres_rec,
+            } = rec
+            {
+                protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);
+            } else {
+                anyhow::bail!("tried to pass neon wal record to postgres WAL redo");
+            }
+        }
+        protocol::build_get_page_msg(tag, &mut writebuf);
+        WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);
+
+        let res = self.apply_wal_records0(&writebuf, input, wal_redo_timeout);
+
+        if res.is_err() {
+            // not all of these can be caused by this particular input, however these are so rare
+            // in tests so capture all.
+            self.record_and_log(&writebuf);
+        }
+
+        res
+    }
+
+    fn apply_wal_records0(
+        &self,
+        writebuf: &[u8],
+        input: MutexGuard<ProcessInput>,
+        wal_redo_timeout: Duration,
+    ) -> anyhow::Result<Bytes> {
+        let mut proc = { input }; // TODO: remove this legacy rename, but this keep the patch small.
+        let mut nwrite = 0usize;
+
+        while nwrite < writebuf.len() {
+            let mut stdin_pollfds = [PollFd::new(&proc.stdin, PollFlags::POLLOUT)];
+            let n = loop {
+                match nix::poll::poll(&mut stdin_pollfds[..], wal_redo_timeout.as_millis() as i32) {
+                    Err(nix::errno::Errno::EINTR) => continue,
+                    res => break res,
+                }
+            }?;
+
+            if n == 0 {
+                anyhow::bail!("WAL redo timed out");
+            }
+
+            // If 'stdin' is writeable, do write.
+            let in_revents = stdin_pollfds[0].revents().unwrap();
+            if in_revents & (PollFlags::POLLERR | PollFlags::POLLOUT) != PollFlags::empty() {
+                nwrite += proc.stdin.write(&writebuf[nwrite..])?;
+            }
+            if in_revents.contains(PollFlags::POLLHUP) {
+                // We still have more data to write, but the process closed the pipe.
+                anyhow::bail!("WAL redo process closed its stdin unexpectedly");
+            }
+        }
+        let request_no = proc.n_requests;
+        proc.n_requests += 1;
+        drop(proc);
+
+        // To improve walredo performance we separate sending requests and receiving
+        // responses. Them are protected by different mutexes (output and input).
+        // If thread T1, T2, T3 send requests D1, D2, D3 to walredo process
+        // then there is not warranty that T1 will first granted output mutex lock.
+        // To address this issue we maintain number of sent requests, number of processed
+        // responses and ring buffer with pending responses. After sending response
+        // (under input mutex), threads remembers request number. Then it releases
+        // input mutex, locks output mutex and fetch in ring buffer all responses until
+        // its stored request number. The it takes correspondent element from
+        // pending responses ring buffer and truncate all empty elements from the front,
+        // advancing processed responses number.
+
+        let mut output = self.stdout.lock().unwrap();
+        let n_processed_responses = output.n_processed_responses;
+        while n_processed_responses + output.pending_responses.len() <= request_no {
+            // We expect the WAL redo process to respond with an 8k page image. We read it
+            // into this buffer.
+            let mut resultbuf = vec![0; BLCKSZ.into()];
+            let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
+            while nresult < BLCKSZ.into() {
+                let mut stdout_pollfds = [PollFd::new(&output.stdout, PollFlags::POLLIN)];
+                // We do two things simultaneously: reading response from stdout
+                // and forward any logging information that the child writes to its stderr to the page server's log.
+                let n = loop {
+                    match nix::poll::poll(
+                        &mut stdout_pollfds[..],
+                        wal_redo_timeout.as_millis() as i32,
+                    ) {
+                        Err(nix::errno::Errno::EINTR) => continue,
+                        res => break res,
+                    }
+                }?;
+
+                if n == 0 {
+                    anyhow::bail!("WAL redo timed out");
+                }
+
+                // If we have some data in stdout, read it to the result buffer.
+                let out_revents = stdout_pollfds[0].revents().unwrap();
+                if out_revents & (PollFlags::POLLERR | PollFlags::POLLIN) != PollFlags::empty() {
+                    nresult += output.stdout.read(&mut resultbuf[nresult..])?;
+                }
+                if out_revents.contains(PollFlags::POLLHUP) {
+                    anyhow::bail!("WAL redo process closed its stdout unexpectedly");
+                }
+            }
+            output
+                .pending_responses
+                .push_back(Some(Bytes::from(resultbuf)));
+        }
+        // Replace our request's response with None in `pending_responses`.
+        // Then make space in the ring buffer by clearing out any seqence of contiguous
+        // `None`'s from the front of `pending_responses`.
+        // NB: We can't pop_front() because other requests' responses because another
+        // requester might have grabbed the output mutex before us:
+        // T1: grab input mutex
+        // T1: send request_no 23
+        // T1: release input mutex
+        // T2: grab input mutex
+        // T2: send request_no 24
+        // T2: release input mutex
+        // T2: grab output mutex
+        // T2: n_processed_responses + output.pending_responses.len() <= request_no
+        //            23                                0                   24
+        // T2: enters poll loop that reads stdout
+        // T2: put response for 23 into pending_responses
+        // T2: put response for 24 into pending_resposnes
+        // pending_responses now looks like this: Front Some(response_23) Some(response_24) Back
+        // T2: takes its response_24
+        // pending_responses now looks like this: Front Some(response_23) None Back
+        // T2: does the while loop below
+        // pending_responses now looks like this: Front Some(response_23) None Back
+        // T2: releases output mutex
+        // T1: grabs output mutex
+        // T1: n_processed_responses + output.pending_responses.len() > request_no
+        //            23                                2                   23
+        // T1: skips poll loop that reads stdout
+        // T1: takes its response_23
+        // pending_responses now looks like this: Front None None Back
+        // T2: does the while loop below
+        // pending_responses now looks like this: Front Back
+        // n_processed_responses now has value 25
+        let res = output.pending_responses[request_no - n_processed_responses]
+            .take()
+            .expect("we own this request_no, nobody else is supposed to take it");
+        while let Some(front) = output.pending_responses.front() {
+            if front.is_none() {
+                output.pending_responses.pop_front();
+                output.n_processed_responses += 1;
+            } else {
+                break;
+            }
+        }
+        Ok(res)
+    }
+
+    #[cfg(feature = "testing")]
+    fn record_and_log(&self, writebuf: &[u8]) {
+        use std::sync::atomic::Ordering;
+
+        let millis = std::time::SystemTime::now()
+            .duration_since(std::time::SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_millis();
+
+        let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);
+
+        // these files will be collected to an allure report
+        let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
+
+        let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);
+
+        let res = std::fs::OpenOptions::new()
+            .write(true)
+            .create_new(true)
+            .read(true)
+            .open(path)
+            .and_then(|mut f| f.write_all(writebuf));
+
+        // trip up allowed_errors
+        if let Err(e) = res {
+            tracing::error!(target=%filename, length=writebuf.len(), "failed to write out the walredo errored input: {e}");
+        } else {
+            tracing::error!(filename, "erroring walredo input saved");
+        }
+    }
+
+    #[cfg(not(feature = "testing"))]
+    fn record_and_log(&self, _: &[u8]) {}
+}
+
+impl Drop for WalRedoProcess {
+    fn drop(&mut self) {
+        self.child
+            .take()
+            .expect("we only do this once")
+            .kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);
+        // no way to wait for stderr_logger_task from Drop because that is async only
+    }
+}
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -14,8 +14,7 @@ OBJS = \
 	relsize_cache.o \
 	walproposer.o \
 	walproposer_pg.o \
-	control_plane_connector.o \
-	walsender_hooks.o
+	control_plane_connector.o

 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Arpad Müller	e97e86eb43	poetry lock	2024-05-06 12:58:34 +02:00
Arpad Müller	c9370d48de	Merge remote-tracking branch 'origin/main' into arpad/less_async_trait	2024-05-06 12:54:51 +02:00
Arpad Müller	6f714c308b	Adjust timeouts	2024-04-08 17:02:40 +02:00
Arpad Müller	2e6afaa642	Merge remote-tracking branch 'origin/main' into arpad/less_async_trait	2024-04-08 16:58:47 +02:00
Alexander Bayandin	8f0a0440ba	CI: reduce session timeout to 30 minutes	2024-04-08 12:29:24 +01:00
Alexander Bayandin	987dc01ed7	CI: set fix timeout value in seconds for regression tests	2024-04-05 15:05:07 +01:00
Alexander Bayandin	719e4ad580	Bump pytest-timeout from 2.1.0 to 2.3.1	2024-04-05 14:58:58 +01:00
Alexander Bayandin	e61b2a08b3	CI: set pytest timeout for regression test suite	2024-04-05 12:54:47 +01:00
Arpad Müller	cc89b46ae5	Merge branch 'main' into arpad/less_async_trait	2024-04-04 16:30:09 +02:00
Arpad Müller	d5cbdd2e90	Remove it here as well	2024-04-04 12:36:28 +02:00
Arpad Müller	6ad9c3560e	Merge branch 'main' into arpad/less_async_trait	2024-04-04 12:27:38 +02:00
Arpad Müller	9dc3b09e57	Remove async-trait from Cargo.toml of crates it became unused in	2024-04-03 23:25:06 +02:00
Arpad Müller	fe762e35d8	Remove async_trait from Handler trait as well	2024-04-03 23:21:16 +02:00
Arpad Müller	0c4988a92c	Remove async_trait from CompactionDeltaLayer	2024-04-03 23:21:16 +02:00