layer file migration

Signed-off-by: Alex Chi Z <chi@neon.tech>
rfc: new encryption
2026-02-06 20:20:37 +00:00 · 2025-04-25 15:44:42 -04:00 · 2025-04-25 15:40:31 -04:00 · 2025-04-15 09:31:09 +01:00 · 2025-04-14 13:09:00 +01:00
180 changed files with 1633 additions and 4453 deletions
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -6,7 +6,6 @@ self-hosted-runner:
    - small
    - small-metal
    - small-arm64
-    - unit-perf
    - us-east-2
 config-variables:
  - AWS_ECR_REGION
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -70,7 +70,6 @@ runs:

    - name: Install Allure
      shell: bash -euxo pipefail {0}
-      working-directory: /tmp
      run: |
        if ! which allure; then
          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -113,6 +113,8 @@ runs:
        TEST_OUTPUT: /tmp/test_output
        BUILD_TYPE: ${{ inputs.build_type }}
        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
+        ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
+        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FAILED: ${{ inputs.rerun_failed }}
        PG_VERSION: ${{ inputs.pg_version }}
        SANITIZERS: ${{ inputs.sanitizers }}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -272,13 +272,10 @@ jobs:
          # run pageserver tests with different settings
          for get_vectored_concurrent_io in sequential sidecar-task; do
            for io_engine in std-fs tokio-epoll-uring ; do
-                for io_mode in buffered direct direct-rw ; do
-                  NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
-                  NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
-                  NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=$io_mode \
-                  ${cov_prefix} \
-                  cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
-              done
+              NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
+                NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
+                ${cov_prefix} \
+                cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
            done
          done

@@ -349,7 +346,7 @@ jobs:
      contents: read
      statuses: write
    needs: [ build-neon ]
-    runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large-metal')) }}
+    runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
@@ -395,7 +392,6 @@ jobs:
          BUILD_TAG: ${{ inputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
-          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}

      # Temporary disable this step until we figure out why it's so flaky
--- a/.github/workflows/_create-release-pr.yml
+++ b/.github/workflows/_create-release-pr.yml
@@ -53,13 +53,10 @@ jobs:
            || inputs.component-name == 'Compute' && 'release-compute'
          }}
      run: |
-        now_date=$(date -u +'%Y-%m-%d')
-        now_time=$(date -u +'%H-%M-%Z')
-        {
-          echo "title=${COMPONENT_NAME} release ${now_date}"
-          echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
-          echo "release-branch=${RELEASE_BRANCH}"
-        } | tee -a ${GITHUB_OUTPUT}
+        today=$(date +'%Y-%m-%d')
+        echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
+        echo "rc-branch=rc/${RELEASE_BRANCH}/${today}"  | tee -a ${GITHUB_OUTPUT}
+        echo "release-branch=${RELEASE_BRANCH}"         | tee -a ${GITHUB_OUTPUT}

    - name: Configure git
      run: |
--- a/.github/workflows/_meta.yml
+++ b/.github/workflows/_meta.yml
@@ -165,5 +165,5 @@ jobs:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          CURRENT_SHA: ${{ github.sha }}
        run: |
-          RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release.*$"; "s"))] | first | .id // ("Failed to find Build and Test run from  RC PR!" | halt_error(1))')
+          RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from  RC PR!" | halt_error(1))')
          echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -284,7 +284,7 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf ]
+    runs-on: [ self-hosted, small-metal ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
      credentials:
@@ -323,8 +323,6 @@ jobs:
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
-          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
-          PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
          SYNC_BETWEEN_TESTS: true
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones
@@ -1273,7 +1271,7 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, trigger-custom-extensions-build-and-wait ]
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
    if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
    permissions:
--- a/.github/workflows/fast-forward.yml
+++ b/.github/workflows/fast-forward.yml
@@ -27,17 +27,15 @@ jobs:
      - name: Fast forwarding
        uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979
        # See https://docs.github.com/en/graphql/reference/enums#mergestatestatus
-        if: ${{ contains(fromJSON('["clean", "unstable"]'), github.event.pull_request.mergeable_state) }}
+        if: ${{ github.event.pull_request.mergeable_state  == 'clean' }}
        with:
          merge: true
          comment: on-error
          github_token: ${{ secrets.CI_ACCESS_TOKEN }}

      - name: Comment if mergeable_state is not clean
-        if: ${{ !contains(fromJSON('["clean", "unstable"]'), github.event.pull_request.mergeable_state) }}
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+        if: ${{ github.event.pull_request.mergeable_state  != 'clean' }}
        run: |
          gh pr comment ${{ github.event.pull_request.number }} \
            --repo "${GITHUB_REPOSITORY}" \
-            --body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\` or \`unstable\`."
+            --body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\`."
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -30,7 +30,7 @@ permissions:
  statuses: write # require for posting a status update

 env:
-  DEFAULT_PG_VERSION: 17
+  DEFAULT_PG_VERSION: 16
  PLATFORM: neon-captest-new
  AWS_DEFAULT_REGION: eu-central-1

@@ -42,8 +42,6 @@ jobs:
      github-event-name: ${{ github.event_name }}

  build-build-tools-image:
-    permissions:
-      packages: write
    needs: [ check-permissions ]
    uses: ./.github/workflows/build-build-tools-image.yml
    secrets: inherit
--- a/.github/workflows/random-ops-test.yml
+++ b/.github/workflows/random-ops-test.yml
@@ -1,93 +0,0 @@
-name: Random Operations Test
-
-on:
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │  ┌───────────── hour (0 - 23)
-    #          │  │  ┌───────────── day of the month (1 - 31)
-    #          │  │  │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │  │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '23 */2 * * *' # runs every 2 hours
-  workflow_dispatch:
-    inputs:
-      random_seed:
-        type: number
-        description: 'The random seed'
-        required: false
-        default: 0
-      num_operations:
-        type: number
-        description: "The number of operations to test"
-        default: 250
-
-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-permissions: {}
-
-env:
-  DEFAULT_PG_VERSION: 16
-  PLATFORM: neon-captest-new
-  AWS_DEFAULT_REGION: eu-central-1
-
-jobs:
-  run-random-rests:
-    env:
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-    runs-on: small
-    permissions:
-      id-token: write
-      statuses: write
-
-    strategy:
-      fail-fast: false
-      matrix:
-        pg-version: [16, 17]
-
-    container:
-      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
-      credentials:
-        username: ${{ github.actor }}
-        password: ${{ secrets.GITHUB_TOKEN }}
-      options: --init
-    steps:
-      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Download Neon artifact
-        uses: ./.github/actions/download
-        with:
-          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
-          path: /tmp/neon/
-          prefix: latest
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-
-      - name: Run tests
-        uses: ./.github/actions/run-python-test-set
-        with:
-          build_type: remote
-          test_selection: random_ops
-          run_in_parallel: false
-          extra_params: -m remote_cluster
-          pg_version: ${{ matrix.pg-version }}
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-        env:
-          NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
-          RANDOM_SEED: ${{ inputs.random_seed }}
-          NUM_OPERATIONS: ${{ inputs.num_operations }}
-
-      - name: Create Allure report
-        if: ${{ !cancelled() }}
-        id: create-allure-report
-        uses: ./.github/actions/allure-report-generate
-        with:
-          store-test-results-into-db: true
-          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-        env:
-          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1416,7 +1416,6 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "base64 0.13.1",
 "camino",
 "clap",
 "comfy-table",
@@ -1426,12 +1425,10 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.30",
- "jsonwebtoken",
 "nix 0.27.1",
 "once_cell",
 "pageserver_api",
 "pageserver_client",
- "pem",
 "postgres_backend",
 "postgres_connection",
 "regex",
@@ -1440,8 +1437,6 @@ dependencies = [
 "scopeguard",
 "serde",
 "serde_json",
- "sha2",
- "spki 0.7.3",
 "storage_broker",
 "thiserror 1.0.69",
 "tokio",
@@ -2822,7 +2817,6 @@ dependencies = [
 "hyper 0.14.30",
 "itertools 0.10.5",
 "jemalloc_pprof",
- "jsonwebtoken",
 "metrics",
 "once_cell",
 "pprof",
@@ -4275,7 +4269,6 @@ dependencies = [
 "hyper 0.14.30",
 "indoc",
 "itertools 0.10.5",
- "jsonwebtoken",
 "md5",
 "metrics",
 "nix 0.27.1",
@@ -4285,7 +4278,6 @@ dependencies = [
 "pageserver_api",
 "pageserver_client",
 "pageserver_compaction",
- "pem",
 "pin-project-lite",
 "postgres-protocol",
 "postgres-types",
@@ -4353,7 +4345,6 @@ dependencies = [
 "humantime-serde",
 "itertools 0.10.5",
 "nix 0.27.1",
- "once_cell",
 "postgres_backend",
 "postgres_ffi",
 "rand 0.8.5",
@@ -5694,9 +5685,9 @@ dependencies = [

 [[package]]
 name = "ring"
-version = "0.17.14"
+version = "0.17.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
 dependencies = [
 "cc",
 "cfg-if",
@@ -5997,12 +5988,10 @@ dependencies = [
 "humantime",
 "hyper 0.14.30",
 "itertools 0.10.5",
- "jsonwebtoken",
 "metrics",
 "once_cell",
 "pageserver_api",
 "parking_lot 0.12.1",
- "pem",
 "postgres-protocol",
 "postgres_backend",
 "postgres_ffi",
@@ -7883,7 +7872,6 @@ dependencies = [
 "metrics",
 "nix 0.27.1",
 "once_cell",
- "pem",
 "pin-project-lite",
 "postgres_connection",
 "pprof",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -141,7 +141,6 @@ parking_lot = "0.12"
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
-pem = "3.0.3"
 pin-project-lite = "0.2"
 pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
 procfs = "0.16"
@@ -175,7 +174,6 @@ signal-hook = "0.3"
 smallvec = "1.11"
 smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
-spki = "0.7.3"
 strum = "0.26"
 strum_macros = "0.26"
 "subtle"  = "2.5.0"
--- a/README.md
+++ b/README.md
@@ -270,7 +270,7 @@ By default, this runs both debug and release modes, and all supported postgres v
 testing locally, it is convenient to run just one set of permutations, like this:

 ```sh
-DEFAULT_PG_VERSION=17 BUILD_TYPE=release ./scripts/pytest
+DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
 ```

 ## Flamegraphs
--- a/clippy.toml
+++ b/clippy.toml
@@ -12,5 +12,3 @@ disallowed-macros = [
    # cannot disallow this, because clippy finds used from tokio macros
    #"tokio::pin",
 ]
-
-allow-unwrap-in-tests = true
--- a/compute/patches/pg_anon.patch
+++ b/compute/patches/pg_anon.patch
@@ -0,0 +1,265 @@
+commit 00aa659afc9c7336ab81036edec3017168aabf40
+Author: Heikki Linnakangas <heikki@neon.tech>
+Date:   Tue Nov 12 16:59:19 2024 +0200
+
+    Temporarily disable test that depends on timezone
+
+diff --git a/tests/expected/generalization.out b/tests/expected/generalization.out
+index 23ef5fa..9e60deb 100644
+--- a/ext-src/pg_anon-src/tests/expected/generalization.out
+++ b/ext-src/pg_anon-src/tests/expected/generalization.out
+@@ -284,12 +284,9 @@ SELECT anon.generalize_tstzrange('19041107','century');
+  ["Tue Jan 01 00:00:00 1901 PST","Mon Jan 01 00:00:00 2001 PST")
+ (1 row)
+ 
+-SELECT anon.generalize_tstzrange('19041107','millennium');
+-                      generalize_tstzrange                       
+------------------------------------------------------------------
+- ["Thu Jan 01 00:00:00 1001 PST","Mon Jan 01 00:00:00 2001 PST")
+-(1 row)
+-
+-- temporarily disabled, see:
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
+--SELECT anon.generalize_tstzrange('19041107','millennium');
+ -- generalize_daterange
+ SELECT anon.generalize_daterange('19041107');
+   generalize_daterange   
+diff --git a/tests/sql/generalization.sql b/tests/sql/generalization.sql
+index b868344..b4fc977 100644
+--- a/ext-src/pg_anon-src/tests/sql/generalization.sql
+++ b/ext-src/pg_anon-src/tests/sql/generalization.sql
+@@ -61,7 +61,9 @@ SELECT anon.generalize_tstzrange('19041107','month');
+ SELECT anon.generalize_tstzrange('19041107','year');
+ SELECT anon.generalize_tstzrange('19041107','decade');
+ SELECT anon.generalize_tstzrange('19041107','century');
+-SELECT anon.generalize_tstzrange('19041107','millennium');
+-- temporarily disabled, see:
+-- https://gitlab.com/dalibo/postgresql_anonymizer/-/commit/199f0a392b37c59d92ae441fb8f037e094a11a52#note_2148017485
+--SELECT anon.generalize_tstzrange('19041107','millennium');
+ 
+ -- generalize_daterange
+ SELECT anon.generalize_daterange('19041107');
+
+commit 7dd414ee75f2875cffb1d6ba474df1f135a6fc6f
+Author: Alexey Masterov <alexeymasterov@neon.tech>
+Date:   Fri May 31 06:34:26 2024 +0000
+
+    These alternative expected files were added to consider the neon features
+
+diff --git a/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
+new file mode 100644
+index 0000000..2539cfd
+--- /dev/null
+++ b/ext-src/pg_anon-src/tests/expected/permissions_masked_role_1.out
+@@ -0,0 +1,101 @@
+BEGIN;
+CREATE EXTENSION anon CASCADE;
+NOTICE:  installing required extension "pgcrypto"
+SELECT anon.init();
+ init 
+------
+ t
+(1 row)
+
+CREATE ROLE mallory_the_masked_user;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
+CREATE TABLE t1(i INT);
+ALTER TABLE t1 ADD COLUMN t TEXT;
+SECURITY LABEL FOR anon ON COLUMN t1.t
+IS 'MASKED WITH VALUE NULL';
+INSERT INTO t1 VALUES (1,'test');
+--
+-- We're checking the owner's permissions
+--
+-- see
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
+--
+SET ROLE mallory_the_masked_user;
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+  PERFORM anon.init();
+  EXCEPTION WHEN insufficient_privilege
+  THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE:  insufficient_privilege
+-- SHOULD FAIL
+DO $$
+BEGIN
+  PERFORM anon.anonymize_table('t1');
+  EXCEPTION WHEN insufficient_privilege
+  THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE:  insufficient_privilege
+-- SHOULD FAIL
+SAVEPOINT fail_start_engine;
+SELECT anon.start_dynamic_masking();
+ERROR:  Only supersusers can start the dynamic masking engine.
+CONTEXT:  PL/pgSQL function anon.start_dynamic_masking(boolean) line 18 at RAISE
+ROLLBACK TO fail_start_engine;
+RESET ROLE;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking 
+-----------------------
+ t
+(1 row)
+
+SET ROLE mallory_the_masked_user;
+SELECT * FROM mask.t1;
+ i | t 
+---+---
+ 1 | 
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+  SELECT * FROM public.t1;
+  EXCEPTION WHEN insufficient_privilege
+  THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE:  insufficient_privilege
+-- SHOULD FAIL
+SAVEPOINT fail_stop_engine;
+SELECT anon.stop_dynamic_masking();
+ERROR:  Only supersusers can stop the dynamic masking engine.
+CONTEXT:  PL/pgSQL function anon.stop_dynamic_masking() line 18 at RAISE
+ROLLBACK TO fail_stop_engine;
+RESET ROLE;
+SELECT anon.stop_dynamic_masking();
+NOTICE:  The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
+ stop_dynamic_masking 
+----------------------
+ t
+(1 row)
+
+SET ROLE mallory_the_masked_user;
+SELECT COUNT(*)=1 FROM anon.pg_masking_rules;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+SAVEPOINT fail_seclabel_on_role;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
+ERROR:  permission denied
+DETAIL:  The current user must have the CREATEROLE attribute.
+ROLLBACK TO fail_seclabel_on_role;
+ROLLBACK;
+diff --git a/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
+new file mode 100644
+index 0000000..8b090fe
+--- /dev/null
+++ b/ext-src/pg_anon-src/tests/expected/permissions_owner_1.out
+@@ -0,0 +1,104 @@
+BEGIN;
+CREATE EXTENSION anon CASCADE;
+NOTICE:  installing required extension "pgcrypto"
+SELECT anon.init();
+ init 
+------
+ t
+(1 row)
+
+CREATE ROLE oscar_the_owner;
+ALTER DATABASE :DBNAME OWNER TO oscar_the_owner;
+CREATE ROLE mallory_the_masked_user;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS 'MASKED';
+--
+-- We're checking the owner's permissions
+--
+-- see
+-- https://postgresql-anonymizer.readthedocs.io/en/latest/SECURITY/#permissions
+--
+SET ROLE oscar_the_owner;
+SELECT anon.pseudo_first_name(0) IS NOT NULL;
+ ?column? 
+----------
+ t
+(1 row)
+
+-- SHOULD FAIL
+DO $$
+BEGIN
+  PERFORM anon.init();
+  EXCEPTION WHEN insufficient_privilege
+  THEN RAISE NOTICE 'insufficient_privilege';
+END$$;
+NOTICE:  insufficient_privilege
+CREATE TABLE t1(i INT);
+ALTER TABLE t1 ADD COLUMN t TEXT;
+SECURITY LABEL FOR anon ON COLUMN t1.t
+IS 'MASKED WITH VALUE NULL';
+INSERT INTO t1 VALUES (1,'test');
+SELECT anon.anonymize_table('t1');
+ anonymize_table 
+-----------------
+ t
+(1 row)
+
+SELECT * FROM t1;
+ i | t 
+---+---
+ 1 | 
+(1 row)
+
+UPDATE t1 SET t='test' WHERE i=1;
+-- SHOULD FAIL
+SAVEPOINT fail_start_engine;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking 
+-----------------------
+ t
+(1 row)
+
+ROLLBACK TO fail_start_engine;
+RESET ROLE;
+SELECT anon.start_dynamic_masking();
+ start_dynamic_masking 
+-----------------------
+ t
+(1 row)
+
+SET ROLE oscar_the_owner;
+SELECT * FROM t1;
+ i |  t   
+---+------
+ 1 | test
+(1 row)
+
+--SELECT * FROM mask.t1;
+-- SHOULD FAIL
+SAVEPOINT fail_stop_engine;
+SELECT anon.stop_dynamic_masking();
+ERROR:  permission denied for schema mask
+CONTEXT:  SQL statement "DROP VIEW mask.t1;"
+PL/pgSQL function anon.mask_drop_view(oid) line 3 at EXECUTE
+SQL statement "SELECT anon.mask_drop_view(oid)
+  FROM pg_catalog.pg_class
+  WHERE relnamespace=quote_ident(pg_catalog.current_setting('anon.sourceschema'))::REGNAMESPACE
+  AND relkind IN ('r','p','f')"
+PL/pgSQL function anon.stop_dynamic_masking() line 22 at PERFORM
+ROLLBACK TO fail_stop_engine;
+RESET ROLE;
+SELECT anon.stop_dynamic_masking();
+NOTICE:  The previous priviledges of 'mallory_the_masked_user' are not restored. You need to grant them manually.
+ stop_dynamic_masking 
+----------------------
+ t
+(1 row)
+
+SET ROLE oscar_the_owner;
+-- SHOULD FAIL
+SAVEPOINT fail_seclabel_on_role;
+SECURITY LABEL FOR anon ON ROLE mallory_the_masked_user IS NULL;
+ERROR:  permission denied
+DETAIL:  The current user must have the CREATEROLE attribute.
+ROLLBACK TO fail_seclabel_on_role;
+ROLLBACK;
--- a/compute/patches/pgvector.patch
+++ b/compute/patches/pgvector.patch
@@ -15,7 +15,7 @@ index 7a4b88c..56678af 100644
 HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
 
 diff --git a/src/hnswbuild.c b/src/hnswbuild.c
-index b667478..1298aa1 100644
+index b667478..dc95d89 100644
 --- a/src/hnswbuild.c
 +++ b/src/hnswbuild.c
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
@@ -36,7 +36,7 @@ index b667478..1298aa1 100644
 	/* Close relations within worker */
 	index_close(indexRel, indexLockmode);
 	table_close(heapRel, heapLockmode);
-@@ -1100,13 +1108,25 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
+@@ -1100,12 +1108,39 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
 	SeedRandom(42);
 #endif
 
@@ -48,17 +48,32 @@ index b667478..1298aa1 100644
 
 	BuildGraph(buildstate, forkNum);
 
+-	if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
 +#ifdef NEON_SMGR
 +	smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
 +#endif
 +
- 	if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM)
+	if (RelationNeedsWAL(index) || forkNum == INIT_FORKNUM) {
 		log_newpage_range(index, forkNum, 0, RelationGetNumberOfBlocksInFork(index, forkNum), true);
- 
+#ifdef NEON_SMGR
+		{
+#if PG_VERSION_NUM >= 160000
+			RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
+#else
+			RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif
+			if (set_lwlsn_block_range_hook)
+				set_lwlsn_block_range_hook(XactLastRecEnd, rlocator,
+										   MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+			if (set_lwlsn_relation_hook)
+				set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+		}
+#endif
+	}
+
 +#ifdef NEON_SMGR
 +	smgr_end_unlogged_build(RelationGetSmgr(index));
 +#endif
-+
+ 
 	FreeBuildState(buildstate);
 }
- 
--- a/compute/patches/rum.patch
+++ b/compute/patches/rum.patch
@@ -1,5 +1,5 @@
 diff --git a/src/ruminsert.c b/src/ruminsert.c
-index 255e616..1c6edb7 100644
+index 255e616..7a2240f 100644
 --- a/src/ruminsert.c
 +++ b/src/ruminsert.c
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
@@ -24,12 +24,24 @@ index 255e616..1c6edb7 100644
 	/*
 	 * Write index to xlog
 	 */
-@@ -713,6 +721,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
+@@ -713,6 +721,22 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
 		UnlockReleaseBuffer(buffer);
 	}
 
 +#ifdef NEON_SMGR
-+	smgr_end_unlogged_build(index->rd_smgr);
+	{
+#if PG_VERSION_NUM >= 160000
+		RelFileLocator rlocator = RelationGetSmgr(index)->smgr_rlocator.locator;
+#else
+		RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif
+		if (set_lwlsn_block_range_hook)
+			set_lwlsn_block_range_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+		if (set_lwlsn_relation_hook)
+			set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+
+		smgr_end_unlogged_build(index->rd_smgr);
+	}
 +#endif
 +
 	/*
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -22,7 +22,7 @@ commands:
  - name: local_proxy
    user: postgres
    sysvInitAction: respawn
-    shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
+    shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -22,7 +22,7 @@ commands:
  - name: local_proxy
    user: postgres
    sysvInitAction: respawn
-    shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
+    shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
  - name: postgres-exporter
    user: nobody
    sysvInitAction: respawn
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -57,13 +57,24 @@ use tracing::{error, info};
 use url::Url;
 use utils::failpoint_support;

+// Compatibility hack: if the control plane specified any remote-ext-config
+// use the default value for extension storage proxy gateway.
+// Remove this once the control plane is updated to pass the gateway URL
+fn parse_remote_ext_config(arg: &str) -> Result<String> {
+    if arg.starts_with("http") {
+        Ok(arg.trim_end_matches('/').to_string())
+    } else {
+        Ok("http://pg-ext-s3-gateway".to_string())
+    }
+}
+
 #[derive(Parser)]
 #[command(rename_all = "kebab-case")]
 struct Cli {
    #[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
    pub pgbin: String,

-    #[arg(short = 'r', long)]
+    #[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
    pub remote_ext_config: Option<String>,

    /// The port to bind the external listening HTTP server to. Clients running
@@ -105,7 +116,9 @@ struct Cli {
    #[arg(long)]
    pub set_disk_quota_for_fs: Option<String>,

-    #[arg(short = 'c', long)]
+    // TODO(tristan957): remove alias after compatibility tests are no longer
+    // an issue
+    #[arg(short = 'c', long, alias = "spec-path")]
    pub config: Option<OsString>,

    #[arg(short = 'i', long, group = "compute-id")]
@@ -126,7 +139,7 @@ fn main() -> Result<()> {

    let scenario = failpoint_support::init();

-    // For historical reasons, the main thread that processes the config and launches postgres
+    // For historical reasons, the main thread that processes the spec and launches postgres
    // is synchronous, but we always have this tokio runtime available and we "enter" it so
    // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
    // from all parts of compute_ctl.
@@ -142,7 +155,7 @@ fn main() -> Result<()> {

    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;

-    let config = get_config(&cli)?;
+    let cli_spec = get_config(&cli)?;

    let compute_node = ComputeNode::new(
        ComputeNodeParams {
@@ -163,7 +176,8 @@ fn main() -> Result<()> {
            #[cfg(target_os = "linux")]
            vm_monitor_addr: cli.vm_monitor_addr,
        },
-        config,
+        cli_spec.spec,
+        cli_spec.compute_ctl_config,
    )?;

    let exit_code = compute_node.run()?;
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -11,7 +11,7 @@ use std::{env, fs};
 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
-use compute_api::responses::{ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus};
+use compute_api::responses::{ComputeCtlConfig, ComputeMetrics, ComputeStatus};
 use compute_api::spec::{
    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
 };
@@ -303,7 +303,11 @@ struct StartVmMonitorResult {
 }

 impl ComputeNode {
-    pub fn new(params: ComputeNodeParams, config: ComputeConfig) -> Result<Self> {
+    pub fn new(
+        params: ComputeNodeParams,
+        cli_spec: Option<ComputeSpec>,
+        compute_ctl_config: ComputeCtlConfig,
+    ) -> Result<Self> {
        let connstr = params.connstr.as_str();
        let conn_conf = postgres::config::Config::from_str(connstr)
            .context("cannot build postgres config from connstr")?;
@@ -311,8 +315,8 @@ impl ComputeNode {
            .context("cannot build tokio postgres config from connstr")?;

        let mut new_state = ComputeState::new();
-        if let Some(spec) = config.spec {
-            let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
+        if let Some(cli_spec) = cli_spec {
+            let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?;
            new_state.pspec = Some(pspec);
        }

@@ -323,7 +327,7 @@ impl ComputeNode {
            state: Mutex::new(new_state),
            state_changed: Condvar::new(),
            ext_download_progress: RwLock::new(HashMap::new()),
-            compute_ctl_config: config.compute_ctl_config,
+            compute_ctl_config,
        })
    }

@@ -641,26 +645,7 @@ impl ComputeNode {

                let log_directory_path = Path::new(&self.params.pgdata).join("log");
                let log_directory_path = log_directory_path.to_string_lossy().to_string();
-
-                // Add project_id,endpoint_id tag to identify the logs.
-                //
-                // These ids are passed from cplane,
-                // for backwards compatibility (old computes that don't have them),
-                // we set them to None.
-                // TODO: Clean up this code when all computes have them.
-                let tag: Option<String> = match (
-                    pspec.spec.project_id.as_deref(),
-                    pspec.spec.endpoint_id.as_deref(),
-                ) {
-                    (Some(project_id), Some(endpoint_id)) => {
-                        Some(format!("{project_id}/{endpoint_id}"))
-                    }
-                    (Some(project_id), None) => Some(format!("{project_id}/None")),
-                    (None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
-                    (None, None) => None,
-                };
-
-                configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;
+                configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?;

                // Launch a background task to clean up the audit logs
                launch_pgaudit_gc(log_directory_path);
--- a/compute_tools/src/http/extract/mod.rs
+++ b/compute_tools/src/http/extract/mod.rs
@@ -6,5 +6,4 @@ pub(crate) mod request_id;
 pub(crate) use json::Json;
 pub(crate) use path::Path;
 pub(crate) use query::Query;
-#[allow(unused)]
 pub(crate) use request_id::RequestId;
--- a/compute_tools/src/http/middleware/authorize.rs
+++ b/compute_tools/src/http/middleware/authorize.rs
@@ -1,7 +1,7 @@
-use std::collections::HashSet;
+use std::{collections::HashSet, net::SocketAddr};

 use anyhow::{Result, anyhow};
-use axum::{RequestExt, body::Body};
+use axum::{RequestExt, body::Body, extract::ConnectInfo};
 use axum_extra::{
    TypedHeader,
    headers::{Authorization, authorization::Bearer},
@@ -11,9 +11,9 @@ use futures::future::BoxFuture;
 use http::{Request, Response, StatusCode};
 use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};
 use tower_http::auth::AsyncAuthorizeRequest;
-use tracing::{debug, warn};
+use tracing::warn;

-use crate::http::JsonResponse;
+use crate::http::{JsonResponse, extract::RequestId};

 #[derive(Clone, Debug)]
 pub(in crate::http) struct Authorize {
@@ -52,6 +52,31 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
        let validation = self.validation.clone();

        Box::pin(async move {
+            let request_id = request.extract_parts::<RequestId>().await.unwrap();
+
+            // TODO: Remove this stanza after teaching neon_local and the
+            // regression tests to use a JWT + JWKS.
+            //
+            // https://github.com/neondatabase/neon/issues/11316
+            if cfg!(feature = "testing") {
+                warn!(%request_id, "Skipping compute_ctl authorization check");
+
+                return Ok(request);
+            }
+
+            let connect_info = request
+                .extract_parts::<ConnectInfo<SocketAddr>>()
+                .await
+                .unwrap();
+
+            // In the event the request is coming from the loopback interface,
+            // allow all requests
+            if connect_info.ip().is_loopback() {
+                warn!(%request_id, "Bypassed authorization because request is coming from the loopback interface");
+
+                return Ok(request);
+            }
+
            let TypedHeader(Authorization(bearer)) = request
                .extract_parts::<TypedHeader<Authorization<Bearer>>>()
                .await
@@ -67,7 +92,7 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
            if data.claims.compute_id != compute_id {
                return Err(JsonResponse::error(
                    StatusCode::UNAUTHORIZED,
-                    "invalid compute ID in authorization token claims",
+                    "invalid claims in authorization token",
                ));
            }

@@ -87,16 +112,12 @@ impl Authorize {
        token: &str,
        validation: &Validation,
    ) -> Result<TokenData<ComputeClaims>> {
-        debug_assert!(!jwks.keys.is_empty());
-
-        debug!("verifying token {}", token);
-
        for jwk in jwks.keys.iter() {
            let decoding_key = match DecodingKey::from_jwk(jwk) {
                Ok(key) => key,
                Err(e) => {
                    warn!(
-                        "failed to construct decoding key from {}: {}",
+                        "Failed to construct decoding key from {}: {}",
                        jwk.common.key_id.as_ref().unwrap(),
                        e
                    );
@@ -109,7 +130,7 @@ impl Authorize {
                Ok(data) => return Ok(data),
                Err(e) => {
                    warn!(
-                        "failed to decode authorization token using {}: {}",
+                        "Failed to decode authorization token using {}: {}",
                        jwk.common.key_id.as_ref().unwrap(),
                        e
                    );
@@ -119,6 +140,6 @@ impl Authorize {
            }
        }

-        Err(anyhow!("failed to verify authorization token"))
+        Err(anyhow!("Failed to verify authorization token"))
    }
 }
--- a/compute_tools/src/rsyslog.rs
+++ b/compute_tools/src/rsyslog.rs
@@ -50,13 +50,13 @@ fn restart_rsyslog() -> Result<()> {

 pub fn configure_audit_rsyslog(
    log_directory: String,
-    tag: Option<String>,
+    tag: &str,
    remote_endpoint: &str,
 ) -> Result<()> {
    let config_content: String = format!(
        include_str!("config_template/compute_audit_rsyslog_template.conf"),
        log_directory = log_directory,
-        tag = tag.unwrap_or("".to_string()),
+        tag = tag,
        remote_endpoint = remote_endpoint
    );

--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,16 +6,13 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-base64.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
 futures.workspace = true
 humantime.workspace = true
-jsonwebtoken.workspace = true
 nix.workspace = true
 once_cell.workspace = true
-pem.workspace = true
 humantime-serde.workspace = true
 hyper0.workspace = true
 regex.workspace = true
@@ -23,8 +20,6 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
 scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-sha2.workspace = true
-spki.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 toml_edit.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -63,7 +63,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

-const DEFAULT_PG_VERSION: u32 = 17;
+const DEFAULT_PG_VERSION: u32 = 16;

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

@@ -552,7 +552,6 @@ enum EndpointCmd {
    Start(EndpointStartCmdArgs),
    Reconfigure(EndpointReconfigureCmdArgs),
    Stop(EndpointStopCmdArgs),
-    GenerateJwt(EndpointGenerateJwtCmdArgs),
 }

 #[derive(clap::Args)]
@@ -700,13 +699,6 @@ struct EndpointStopCmdArgs {
    mode: String,
 }

-#[derive(clap::Args)]
-#[clap(about = "Generate a JWT for an endpoint")]
-struct EndpointGenerateJwtCmdArgs {
-    #[clap(help = "Postgres endpoint id")]
-    endpoint_id: String,
-}
-
 #[derive(clap::Subcommand)]
 #[clap(about = "Manage neon_local branch name mappings")]
 enum MappingsCmd {
@@ -1536,16 +1528,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
            endpoint.stop(&args.mode, args.destroy)?;
        }
-        EndpointCmd::GenerateJwt(args) => {
-            let endpoint_id = &args.endpoint_id;
-            let endpoint = cplane
-                .endpoints
-                .get(endpoint_id)
-                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            let jwt = endpoint.generate_jwt()?;
-
-            print!("{jwt}");
-        }
    }

    Ok(())
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -42,30 +42,22 @@ use std::path::PathBuf;
 use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

 use anyhow::{Context, Result, anyhow, bail};
-use compute_api::requests::{ComputeClaims, ConfigurationRequest};
+use compute_api::requests::ConfigurationRequest;
 use compute_api::responses::{
-    ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
+    ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse,
 };
 use compute_api::spec::{
    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
    RemoteExtSpec, Role,
 };
-use jsonwebtoken::jwk::{
-    AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
-    OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
-};
 use nix::sys::signal::{Signal, kill};
 use pageserver_api::shard::ShardStripeSize;
-use pem::Pem;
 use reqwest::header::CONTENT_TYPE;
 use safekeeper_api::membership::SafekeeperGeneration;
 use serde::{Deserialize, Serialize};
-use sha2::{Digest, Sha256};
-use spki::der::Decode;
-use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
 use tracing::debug;
 use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};
@@ -90,7 +82,6 @@ pub struct EndpointConf {
    drop_subscriptions_before_start: bool,
    features: Vec<ComputeFeature>,
    cluster: Option<Cluster>,
-    compute_ctl_config: ComputeCtlConfig,
 }

 //
@@ -146,37 +137,6 @@ impl ComputeControlPlane {
            .unwrap_or(self.base_port)
    }

-    /// Create a JSON Web Key Set. This ideally matches the way we create a JWKS
-    /// from the production control plane.
-    fn create_jwks_from_pem(pem: &Pem) -> Result<JwkSet> {
-        let spki: SubjectPublicKeyInfoRef = SubjectPublicKeyInfo::from_der(pem.contents())?;
-        let public_key = spki.subject_public_key.raw_bytes();
-
-        let mut hasher = Sha256::new();
-        hasher.update(public_key);
-        let key_hash = hasher.finalize();
-
-        Ok(JwkSet {
-            keys: vec![Jwk {
-                common: CommonParameters {
-                    public_key_use: Some(PublicKeyUse::Signature),
-                    key_operations: Some(vec![KeyOperations::Verify]),
-                    key_algorithm: Some(KeyAlgorithm::EdDSA),
-                    key_id: Some(base64::encode_config(key_hash, base64::URL_SAFE_NO_PAD)),
-                    x509_url: None::<String>,
-                    x509_chain: None::<Vec<String>>,
-                    x509_sha1_fingerprint: None::<String>,
-                    x509_sha256_fingerprint: None::<String>,
-                },
-                algorithm: AlgorithmParameters::OctetKeyPair(OctetKeyPairParameters {
-                    key_type: OctetKeyPairType::OctetKeyPair,
-                    curve: EllipticCurve::Ed25519,
-                    x: base64::encode_config(public_key, base64::URL_SAFE_NO_PAD),
-                }),
-            }],
-        })
-    }
-
    #[allow(clippy::too_many_arguments)]
    pub fn new_endpoint(
        &mut self,
@@ -194,10 +154,6 @@ impl ComputeControlPlane {
        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
        let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
        let internal_http_port = internal_http_port.unwrap_or_else(|| external_http_port + 1);
-        let compute_ctl_config = ComputeCtlConfig {
-            jwks: Self::create_jwks_from_pem(&self.env.read_public_key()?)?,
-            tls: None::<TlsConfig>,
-        };
        let ep = Arc::new(Endpoint {
            endpoint_id: endpoint_id.to_owned(),
            pg_address: SocketAddr::new(IpAddr::from(Ipv4Addr::LOCALHOST), pg_port),
@@ -225,7 +181,6 @@ impl ComputeControlPlane {
            reconfigure_concurrency: 1,
            features: vec![],
            cluster: None,
-            compute_ctl_config: compute_ctl_config.clone(),
        });

        ep.create_endpoint_dir()?;
@@ -245,7 +200,6 @@ impl ComputeControlPlane {
                reconfigure_concurrency: 1,
                features: vec![],
                cluster: None,
-                compute_ctl_config,
            })?,
        )?;
        std::fs::write(
@@ -288,6 +242,7 @@ impl ComputeControlPlane {

 ///////////////////////////////////////////////////////////////////////////////

+#[derive(Debug)]
 pub struct Endpoint {
    /// used as the directory name
    endpoint_id: String,
@@ -316,9 +271,6 @@ pub struct Endpoint {
    features: Vec<ComputeFeature>,
    // Cluster settings
    cluster: Option<Cluster>,
-
-    /// The compute_ctl config for the endpoint's compute.
-    compute_ctl_config: ComputeCtlConfig,
 }

 #[derive(PartialEq, Eq)]
@@ -381,7 +333,6 @@ impl Endpoint {
            drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
            features: conf.features,
            cluster: conf.cluster,
-            compute_ctl_config: conf.compute_ctl_config,
        })
    }

@@ -629,13 +580,6 @@ impl Endpoint {
        Ok(safekeeper_connstrings)
    }

-    /// Generate a JWT with the correct claims.
-    pub fn generate_jwt(&self) -> Result<String> {
-        self.env.generate_auth_token(&ComputeClaims {
-            compute_id: self.endpoint_id.clone(),
-        })
-    }
-
    #[allow(clippy::too_many_arguments)]
    pub async fn start(
        &self,
@@ -762,10 +706,14 @@ impl Endpoint {

            ComputeConfig {
                spec: Some(spec),
-                compute_ctl_config: self.compute_ctl_config.clone(),
+                compute_ctl_config: ComputeCtlConfig::default(),
            }
        };

+        // TODO(tristan957): Remove the write to spec.json after compatibility
+        // tests work themselves out
+        let spec_path = self.endpoint_path().join("spec.json");
+        std::fs::write(spec_path, serde_json::to_string_pretty(&config.spec)?)?;
        let config_path = self.endpoint_path().join("config.json");
        std::fs::write(config_path, serde_json::to_string_pretty(&config)?)?;

@@ -775,6 +723,16 @@ impl Endpoint {
            .append(true)
            .open(self.endpoint_path().join("compute.log"))?;

+        // TODO(tristan957): Remove when compatibility tests are no longer an
+        // issue
+        let old_compute_ctl = {
+            let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
+            let help_output = cmd.arg("--help").output()?;
+            let help_output = String::from_utf8_lossy(&help_output.stdout);
+
+            !help_output.contains("--config")
+        };
+
        // Launch compute_ctl
        let conn_str = self.connstr("cloud_admin", "postgres");
        println!("Starting postgres node at '{}'", conn_str);
@@ -793,8 +751,19 @@ impl Endpoint {
        ])
        .args(["--pgdata", self.pgdata().to_str().unwrap()])
        .args(["--connstr", &conn_str])
-        .arg("--config")
-        .arg(self.endpoint_path().join("config.json").as_os_str())
+        // TODO(tristan957): Change this to --config when compatibility tests
+        // are no longer an issue
+        .args([
+            "--spec-path",
+            self.endpoint_path()
+                .join(if old_compute_ctl {
+                    "spec.json"
+                } else {
+                    "config.json"
+                })
+                .to_str()
+                .unwrap(),
+        ])
        .args([
            "--pgbin",
            self.env
@@ -805,7 +774,16 @@ impl Endpoint {
        ])
        // TODO: It would be nice if we generated compute IDs with the same
        // algorithm as the real control plane.
-        .args(["--compute-id", &self.endpoint_id])
+        .args([
+            "--compute-id",
+            &format!(
+                "compute-{}",
+                SystemTime::now()
+                    .duration_since(UNIX_EPOCH)
+                    .unwrap()
+                    .as_secs()
+            ),
+        ])
        .stdin(std::process::Stdio::null())
        .stderr(logfile.try_clone()?)
        .stdout(logfile);
@@ -903,7 +881,6 @@ impl Endpoint {
                    self.external_http_address.port()
                ),
            )
-            .bearer_auth(self.generate_jwt()?)
            .send()
            .await?;

@@ -980,7 +957,6 @@ impl Endpoint {
                self.external_http_address.port()
            ))
            .header(CONTENT_TYPE.as_str(), "application/json")
-            .bearer_auth(self.generate_jwt()?)
            .body(
                serde_json::to_string(&ConfigurationRequest {
                    spec,
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -12,7 +12,6 @@ use std::{env, fs};

 use anyhow::{Context, bail};
 use clap::ValueEnum;
-use pem::Pem;
 use postgres_backend::AuthType;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
@@ -23,7 +22,7 @@ use crate::object_storage::{OBJECT_STORAGE_REMOTE_STORAGE_DIR, ObjectStorage};
 use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
 use crate::safekeeper::SafekeeperNode;

-pub const DEFAULT_PG_VERSION: u32 = 17;
+pub const DEFAULT_PG_VERSION: u32 = 16;

 //
 // This data structures represents neon_local CLI config
@@ -57,7 +56,6 @@ pub struct LocalEnv {

    // used to issue tokens during e.g pg start
    pub private_key_path: PathBuf,
-    /// Path to environment's public key
    pub public_key_path: PathBuf,

    pub broker: NeonBroker,
@@ -760,11 +758,11 @@ impl LocalEnv {

    // this function is used only for testing purposes in CLI e g generate tokens during init
    pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {
-        let key = self.read_private_key()?;
-        encode_from_key_file(claims, &key)
+        let private_key_path = self.get_private_key_path();
+        let key_data = fs::read(private_key_path)?;
+        encode_from_key_file(claims, &key_data)
    }

-    /// Get the path to the private key.
    pub fn get_private_key_path(&self) -> PathBuf {
        if self.private_key_path.is_absolute() {
            self.private_key_path.to_path_buf()
@@ -773,29 +771,6 @@ impl LocalEnv {
        }
    }

-    /// Get the path to the public key.
-    pub fn get_public_key_path(&self) -> PathBuf {
-        if self.public_key_path.is_absolute() {
-            self.public_key_path.to_path_buf()
-        } else {
-            self.base_data_dir.join(&self.public_key_path)
-        }
-    }
-
-    /// Read the contents of the private key file.
-    pub fn read_private_key(&self) -> anyhow::Result<Pem> {
-        let private_key_path = self.get_private_key_path();
-        let pem = pem::parse(fs::read(private_key_path)?)?;
-        Ok(pem)
-    }
-
-    /// Read the contents of the public key file.
-    pub fn read_public_key(&self) -> anyhow::Result<Pem> {
-        let public_key_path = self.get_public_key_path();
-        let pem = pem::parse(fs::read(public_key_path)?)?;
-        Ok(pem)
-    }
-
    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
        let base_path = base_path();
@@ -981,7 +956,6 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
            String::from_utf8_lossy(&keygen_output.stderr)
        );
    }
-
    // Extract the public key from the private key file
    //
    // openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
@@ -998,7 +972,6 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
            String::from_utf8_lossy(&keygen_output.stderr)
        );
    }
-
    Ok(())
 }

--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -413,11 +413,6 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("Failed to parse 'compaction_algorithm' json")?,
-            compaction_shard_ancestor: settings
-                .remove("compaction_shard_ancestor")
-                .map(|x| x.parse::<bool>())
-                .transpose()
-                .context("Failed to parse 'compaction_shard_ancestor' as a bool")?,
            compaction_l0_first: settings
                .remove("compaction_l0_first")
                .map(|x| x.parse::<bool>())
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -18,7 +18,6 @@ use pageserver_api::models::{
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
-use pem::Pem;
 use postgres_backend::AuthType;
 use reqwest::{Certificate, Method};
 use serde::de::DeserializeOwned;
@@ -35,8 +34,8 @@ use crate::local_env::{LocalEnv, NeonStorageControllerConf};

 pub struct StorageController {
    env: LocalEnv,
-    private_key: Option<Pem>,
-    public_key: Option<Pem>,
+    private_key: Option<Vec<u8>>,
+    public_key: Option<String>,
    client: reqwest::Client,
    config: NeonStorageControllerConf,

@@ -117,9 +116,7 @@ impl StorageController {
            AuthType::Trust => (None, None),
            AuthType::NeonJWT => {
                let private_key_path = env.get_private_key_path();
-                let private_key =
-                    pem::parse(fs::read(private_key_path).expect("failed to read private key"))
-                        .expect("failed to parse PEM file");
+                let private_key = fs::read(private_key_path).expect("failed to read private key");

                // If pageserver auth is enabled, this implicitly enables auth for this service,
                // using the same credentials.
@@ -141,13 +138,9 @@ impl StorageController {
                        .expect("Empty key dir")
                        .expect("Error reading key dir");

-                    pem::parse(std::fs::read_to_string(dent.path()).expect("Can't read public key"))
-                        .expect("Failed to parse PEM file")
+                    std::fs::read_to_string(dent.path()).expect("Can't read public key")
                } else {
-                    pem::parse(
-                        std::fs::read_to_string(&public_key_path).expect("Can't read public key"),
-                    )
-                    .expect("Failed to parse PEM file")
+                    std::fs::read_to_string(&public_key_path).expect("Can't read public key")
                };
                (Some(private_key), Some(public_key))
            }
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -1,3 +1,4 @@
+
 # Example docker compose configuration

 The configuration in this directory is used for testing Neon docker images: it is
@@ -7,13 +8,3 @@ you can experiment with a miniature Neon system, use `cargo neon` rather than co
 This configuration does not start the storage controller, because the controller
 needs a way to reconfigure running computes, and no such thing exists in this setup.

-## Generating the JWKS for a compute
-
-```shell
-openssl genpkey -algorithm Ed25519 -out private-key.pem
-openssl pkey -in private-key.pem -pubout -out public-key.pem
-openssl pkey -pubin -inform pem -in public-key.pem -pubout -outform der -out public-key.der
-key="$(xxd -plain -cols 32 -s -32 public-key.der)"
-key_id="$(printf '%s' "$key" | sha256sum | awk '{ print $1 }' | basenc --base64url --wrap=0)"
-x="$(printf '%s' "$key" | basenc --base64url --wrap=0)"
-```
--- a/docker-compose/compute_wrapper/private-key.pem
+++ b/docker-compose/compute_wrapper/private-key.pem
@@ -1,3 +0,0 @@
-----BEGIN PRIVATE KEY-----
-MC4CAQAwBQYDK2VwBCIEIOmnRbzt2AJ0d+S3aU1hiYOl/tXpvz1FmWBfwHYBgOma
-----END PRIVATE KEY-----
--- a/docker-compose/compute_wrapper/public-key.der
+++ b/docker-compose/compute_wrapper/public-key.der
--- a/docker-compose/compute_wrapper/public-key.pem
+++ b/docker-compose/compute_wrapper/public-key.pem
@@ -1,3 +0,0 @@
-----BEGIN PUBLIC KEY-----
-MCowBQYDK2VwAyEADY0al/U0bgB3+9fUGk+3PKWnsck9OyxN5DjHIN6Xep0=
-----END PUBLIC KEY-----
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -81,9 +81,19 @@ sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}

 cat ${CONFIG_FILE}

+# TODO(tristan957): Remove these workarounds for backwards compatibility after
+# the next compute release. That includes these next few lines and the
+# --spec-path in the compute_ctl invocation.
+if compute_ctl --help | grep --quiet -- '--config'; then
+  SPEC_PATH="$CONFIG_FILE"
+else
+  jq '.spec' < "$CONFIG_FILE" > /tmp/spec.json
+  SPEC_PATH=/tmp/spec.json
+fi
+
 echo "Start compute node"
 /usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
     -C "postgresql://cloud_admin@localhost:55433/postgres"  \
     -b /usr/local/bin/postgres                              \
     --compute-id "compute-$RANDOM"                          \
-     --config "$CONFIG_FILE"
+     --spec-path "$SPEC_PATH"
--- a/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
@@ -142,19 +142,7 @@
    },
    "compute_ctl_config": {
        "jwks": {
-            "keys": [
-                {
-                    "use": "sig",
-                    "key_ops": [
-                        "verify"
-                    ],
-                    "alg": "EdDSA",
-                    "kid": "ZGIxMzAzOGY0YWQwODk2ODU1MTk1NzMxMDFkYmUyOWU2NzZkOWNjNjMyMGRkZGJjOWY0MjdjYWVmNzE1MjUyOAo=",
-                    "kty": "OKP",
-                    "crv": "Ed25519",
-                    "x": "MGQ4ZDFhOTdmNTM0NmUwMDc3ZmJkN2Q0MWE0ZmI3M2NhNWE3YjFjOTNkM2IyYzRkZTQzOGM3MjBkZTk3N2E5ZAo="
-                }
-            ]
+            "keys": []
        }
    }
 }
--- a/docker-compose/ext-src/pg_jsonschema-src/Makefile
+++ b/docker-compose/ext-src/pg_jsonschema-src/Makefile
@@ -1,8 +0,0 @@
-EXTENSION = pg_jsonschema
-DATA = pg_jsonschema--1.0.sql
-REGRESS = jsonschema_valid_api  jsonschema_edge_cases
-REGRESS_OPTS = --load-extension=pg_jsonschema
-
-PG_CONFIG ?= pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
--- a/docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_edge_cases.out
+++ b/docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_edge_cases.out
@@ -1,87 +0,0 @@
-- Schema with enums, nulls, extra properties disallowed
-SELECT jsonschema_is_valid('{
-  "type": "object",
-  "properties": {
-    "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-    "email": { "type": ["string", "null"], "format": "email" }
-  },
-  "required": ["status"],
-  "additionalProperties": false
-}'::json);
- jsonschema_is_valid 
---------------------
- t
-(1 row)
-
-- Valid enum and null email
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "email": null}'::json
-);
- jsonschema_validation_errors 
------------------------------
- {}
-(1 row)
-
-- Invalid enum value
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "disabled", "email": null}'::json
-);
-                     jsonschema_validation_errors                     
----------------------------------------------------------------------
- {"\"disabled\" is not one of [\"active\",\"inactive\",\"pending\"]"}
-(1 row)
-
-- Invalid email format (assuming format is validated)
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "email": "not-an-email"}'::json
-);
-      jsonschema_validation_errors       
-----------------------------------------
- {"\"not-an-email\" is not a \"email\""}
-(1 row)
-
-- Extra property not allowed
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "extra": "should not be here"}'::json
-);
-                    jsonschema_validation_errors                    
--------------------------------------------------------------------
- {"Additional properties are not allowed ('extra' was unexpected)"}
-(1 row)
-
--- a/docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_valid_api.out
+++ b/docker-compose/ext-src/pg_jsonschema-src/expected/jsonschema_valid_api.out
@@ -1,65 +0,0 @@
-- Define schema
-SELECT jsonschema_is_valid('{
-  "type": "object",
-  "properties": {
-    "username": { "type": "string" },
-    "age": { "type": "integer" }
-  },
-  "required": ["username"]
-}'::json);
- jsonschema_is_valid 
---------------------
- t
-(1 row)
-
-- Valid instance
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"username": "alice", "age": 25}'::json
-);
- jsonschema_validation_errors 
------------------------------
- {}
-(1 row)
-
-- Invalid instance: missing required "username"
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"age": 25}'::json
-);
-      jsonschema_validation_errors       
-----------------------------------------
- {"\"username\" is a required property"}
-(1 row)
-
-- Invalid instance: wrong type for "age"
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"username": "bob", "age": "twenty"}'::json
-);
-       jsonschema_validation_errors        
-------------------------------------------
- {"\"twenty\" is not of type \"integer\""}
-(1 row)
-
--- a/docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_edge_cases.sql
+++ b/docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_edge_cases.sql
@@ -1,66 +0,0 @@
-- Schema with enums, nulls, extra properties disallowed
-SELECT jsonschema_is_valid('{
-  "type": "object",
-  "properties": {
-    "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-    "email": { "type": ["string", "null"], "format": "email" }
-  },
-  "required": ["status"],
-  "additionalProperties": false
-}'::json);
-
-- Valid enum and null email
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "email": null}'::json
-);
-
-- Invalid enum value
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "disabled", "email": null}'::json
-);
-
-- Invalid email format (assuming format is validated)
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "email": "not-an-email"}'::json
-);
-
-- Extra property not allowed
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "status": { "type": "string", "enum": ["active", "inactive", "pending"] },
-      "email": { "type": ["string", "null"], "format": "email" }
-    },
-    "required": ["status"],
-    "additionalProperties": false
-  }'::json,
-  '{"status": "active", "extra": "should not be here"}'::json
-);
--- a/docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_valid_api.sql
+++ b/docker-compose/ext-src/pg_jsonschema-src/sql/jsonschema_valid_api.sql
@@ -1,48 +0,0 @@
-- Define schema
-SELECT jsonschema_is_valid('{
-  "type": "object",
-  "properties": {
-    "username": { "type": "string" },
-    "age": { "type": "integer" }
-  },
-  "required": ["username"]
-}'::json);
-
-- Valid instance
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"username": "alice", "age": 25}'::json
-);
-
-- Invalid instance: missing required "username"
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"age": 25}'::json
-);
-
-- Invalid instance: wrong type for "age"
-SELECT jsonschema_validation_errors(
-  '{
-    "type": "object",
-    "properties": {
-      "username": { "type": "string" },
-      "age": { "type": "integer" }
-    },
-    "required": ["username"]
-  }'::json,
-  '{"username": "bob", "age": "twenty"}'::json
-);
--- a/docker-compose/ext-src/pg_session_jwt-src/Makefile
+++ b/docker-compose/ext-src/pg_session_jwt-src/Makefile
@@ -1,9 +0,0 @@
-EXTENSION = pg_session_jwt
-
-REGRESS = basic_functions
-REGRESS_OPTS = --load-extension=$(EXTENSION)
-export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz-63zJ00l-IraL5fQhwkhGVZCSooQFV5ntC3C7M"}
-
-PG_CONFIG ?= pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
--- a/docker-compose/ext-src/pg_session_jwt-src/expected/basic_functions.out
+++ b/docker-compose/ext-src/pg_session_jwt-src/expected/basic_functions.out
@@ -1,35 +0,0 @@
-- Basic functionality tests for pg_session_jwt
-- Test auth.init() function
-SELECT auth.init();
- init 
------
- 
-(1 row)
-
-- Test an invalid JWT
-SELECT auth.jwt_session_init('INVALID-JWT');
-ERROR:  invalid JWT encoding
-- Test creating a session with an expired JWT
-SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
-ERROR:  Token used after it has expired
-- Test creating a session with a valid JWT
-SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
- jwt_session_init 
------------------
- 
-(1 row)
-
-- Test auth.session() function
-SELECT auth.session();
-                                 session                                 
-------------------------------------------------------------------------
- {"exp": 4896164252, "iat": 1742564252, "jti": 434343, "sub": "user123"}
-(1 row)
-
-- Test auth.user_id() function
-SELECT auth.user_id() AS user_id;
- user_id 
---------
- user123
-(1 row)
-
--- a/docker-compose/ext-src/pg_session_jwt-src/sql/basic_functions.sql
+++ b/docker-compose/ext-src/pg_session_jwt-src/sql/basic_functions.sql
@@ -1,19 +0,0 @@
-- Basic functionality tests for pg_session_jwt
-
-- Test auth.init() function
-SELECT auth.init();
-
-- Test an invalid JWT
-SELECT auth.jwt_session_init('INVALID-JWT');
-
-- Test creating a session with an expired JWT
-SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
-
-- Test creating a session with a valid JWT
-SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
-
-- Test auth.session() function
-SELECT auth.session();
-
-- Test auth.user_id() function
-SELECT auth.user_id() AS user_id;
--- a/docs/rfcs/2025-04-14-storage-keys.md
+++ b/docs/rfcs/2025-04-14-storage-keys.md
@@ -0,0 +1,242 @@
+
+# Storage Encryption Key Management
+
+## Summary
+
+As a precursor to adding new encryption capabilities to Neon's storage services, this RFC proposes
+mechanisms for creating and storing fine-grained encryption keys for user data in Neon.  We aim
+to provide at least tenant granularity, but will use timeline granularity when it is simpler to do
+so.
+
+Out of scope:
+- We describe an abstract KMS interface, but not particular platform implementations (such as how
+  to authenticate with KMS).
+
+## Terminology
+
+_wrapped/unwrapped_: a wrapped encryption key is a key encrypted by another key.  For example, the key for
+encrypting a timeline's pageserver data might be wrapped by some "root" key for the tenant's user account, stored in a KMS system.
+
+_key hierarchy_: the relationships between keys which wrap each other. For example, a layer file key might
+be wrapped by a pageserver tenant key, which is wrapped by a tenant's root key.
+
+## Design Choices
+
+Storage: S3 will be the store of record for wrapped keys.
+
+Separate keys: Safekeeper and Pageserver will use independent keys.
+
+AES256: rather than building a generic system for keys, we will assume that all the keys
+we manage are AES256 keys -- this is the de-facto standard for enterprise data storage.
+
+Per-object keys: rather than encrypting data objects (layer files and segment files) with
+the tenant keys directly, they will be encrypted with separate keys.  This avoids cryptographic
+safety issues from re-using the same key for large quantities of potentially repetitive plaintext.
+
+S3 objects are self-contained: each encrypted file will have a metadata block in the file itself
+storing the KMS-wrapped key to decrypt itself.
+
+Key storage is optional at a per-tenant granularity: eventually this would be on by default, but:
+- initially only some environments will have a KMS set up.
+- Encryption has some overhead and it may be that some tenants don't want or need it.
+
+## Design
+
+### Summary of format changes
+
+- Pageserver layer files and safekeeper segment objects are split into blocks and each
+  block is encrypted by the layer key.
+- Pageserver layer files and safekeeper segment objects get new metadata fields to
+  store wrapped layer key and the KMS-wrapped timeline key.
+
+### Summary of API changes
+
+- Pageserver TenantConf API gets a new field for account ID
+- Pageserver TenantConf API gets a new field for encryption mode
+- Safekeeper timeline creation API gets a new field for account ID
+- Controller, pageserver & safekeeper get a new timeline-scoped `rotate_key` API
+
+### KMS interface
+
+Neon will interoperate with different KMS APIs on different platforms.  We will implement a generic interface,
+similar to how `remote_storage` wraps different object storage APIs:
+- `generate(accountId, keyType, alias) -> (wrapped key, plaintext key)`
+- `unwrap(accountId, ciphertext key) -> plaintext key`
+
+Hereafter, when we talk about generating or unwrapping a key, this means a call into the KMS API.
+
+The KMS deals with abstract "account IDs", which are not equal to tenant IDs and may not be
+1:1 with tenants.  The account ID will be provided as part of tenant configuration, along
+with a field to identify an encryption mode.
+
+
+### Pageserver Layer File Format
+
+Encryption blocks are the minimum of unit of read. To read the part of the data within the encryption block
+we must decrypt the whole block. All encryption blocks share the same layer key within the layer (is this safe?).
+
+Image layers: each image is one encryption block.
+
+Delta layers: for the first stage of the project, each delta is encrypted separately; in the future, we can batch
+several small deltas into a single encryption block.
+
+Indicies: each B+ tree node is an encryption block.
+
+Layer format:
+
+```
+| Data Block | Data Block | Data Block | ... | Index Block | Index Block | Index Block | Metadata |
+Data block = encrypt(data, layer_key)
+Index block = encrypt(index, layer_key); index points a key to a offset of the data block inside the layer file.
+Metadata = wrap(layer_key, timeline_key), wrap_kms(tenant_key), and other metadata we want to store in the future
+```
+
+Note that we generate a random layer_key for each of the layer. We store the layer key wrapped by the current
+tenant key (described in later sections) and the KMS-wrapped tenant key in the layer.
+
+If data compression is enabled, the data is compressed first before being encrypted (is this safe?)
+
+This file format is used across both object storage and local storage. We do not decrypt when downloading
+the layer file to the disk. Decryption is done when reading the layer.
+
+### Layer File Format Migration
+
+We record the file format for each of the layer file in both the index_part and the layer file name (suffix v2?).
+The layer file format version will be passed into the layer readers. The re-keying operation (described below)
+will migrate all layer files automatically to v2.
+
+### Safekeeper Segment Format
+
+TBD
+
+### Pageserver Timeline Index
+
+We will add a `created_at` for each of the layer file so that during re-keying (described in later sections)
+we can determine which layer files to rewrite. We also record the offset of the metadata block so that it is
+possible to obtain more information about the layer file without downloading the full layer file (i.e., the
+exact timeline key being used to encrypt the layer file).
+
+```
+# LayerFileMetadata
+{
+  "format": 2,
+  "created_at": "<time>",
+  "metadata_block_offset": u64,
+}
+```
+
+TODO: create an index for safekeeper so that it's faster to determine what files to re-key? Or we can scan all
+files.
+
+### Pageserver Key Cache
+
+We have a hashmap from KMS-wrapped tenant key to plain key for each of the tenant so that we do not need to repeatly
+unwrap the same key.
+
+### Key rotation
+
+Each tenant stores a tenant key in memory to encrypt all layer files generated across all timelines within
+its active period. When the key rotation API gets called, we rotate the timeline key in memory by calling the
+KMS API to generate a new key-pair, and all new layer files' layer keys will be encrypted using this key.
+
+### Re-keying
+
+While re-keying and key-rotation are sometimes used synonymously, we distinguish them:
+- Key rotation is generating a new key to use for new data
+- Re-keying is rewriting existing data so that old keys are no longer used at all
+
+Re-keying is a bulk data operation, and not fully defined in this RFC: it can be defined
+quite simply as "For object in objects, if object key version is < the rekeying horizon,
+then do a read/write cycle on the object using latest key".  This is a simple but potentially very
+expensive operation, so we discuss efficiency here.
+
+#### Pageserver re-key
+
+For pageservers, occasional rekeying may be implemented efficiently if one tolerates using
+the last few keys and doesn't insist on the latest, because pageservers periodically rewrite
+their data for GC-compaction anyway.  Thereby an API call to re-key any data with an overly old
+key would often be a no-op because all data was rewritten recently anyway.
+
+When object versioning is enabled in storage, re-keying is not fully accomplished by just
+re-writing live data: old versions would still contain user data encrypted with older keys.  To
+fully re-key, an extra step is needed to purge old objects.  Ideally, we should only purge
+old objects which were encrypted using old keys.  To this end, it would be useful to store
+the encryption key version as metadata on objects, so that a scrub of deleted object versions
+can efficiently select those objects that should be purged during re-key.
+
+Checks on object versions should not only be on deleted objects: because pageserver can emit
+"orphan" objects not referenced in the index under some circumstances, re-key must also 
+check non-deleted objects.
+
+To summarize, the pageserver re-key operation is:
+- Iterate over index of layer files, select those with too-old key and rewrite them
+- Iterate over all versions in object storage, select those with a too-old key version
+  in their metadata and purge them (with a safety check that these are not referenced
+  by the latest index).
+
+It would be wise to combine the re-key procedure with an exhaustive read of a timeline's data,
+to ensure that when testing & rolling this feature out we are not rendering anything unreadable
+due to bugs in implementation.  Since we are deleting old versions in object storage, our
+time travel recovery tool will not be any help if we get something wrong in this process.
+
+#### Safekeeper re-key
+
+Re-keying a safekeeper timeline requires an exhaustive walk of segment objects, read
+metadata on each one and decide whether it requires rewrite.
+
+Safekeeper currently keeps historic objects forever, so re-keying this data will get
+more expensive as time goes on.  This would be a good time to add cleanup of old safekeeper
+segments, but doing so is beyond the scope of this RFC.
+
+### Enabling encryption for existing tenants
+
+To enable encryption for an existing tenant, we may simply call key-rotation API (to generate a key),
+and then re-key API (to rewrite existing data using this key).
+
+## Observability
+
+- To enable some external service to implement re-keying, we should publish metrics per-timeline
+  on the age of their latest encryption key.
+- Calls to KMS should be tracked with typical request rate/result/latency histograms to enable
+  detection of a slow KMS server and/or errors.
+
+## Alternatives considered
+
+### Use same tenant key for safekeeper and pageserver
+
+We could halve the number of keys in circulation by having the safekeeper and pageserver
+share a key rather than working independently.
+
+However, this would be substantially more complex to implement, as safekeepers and pageservers
+currently share no storage, so some new communication path would be needed.  There is minimal
+upside in sharing a key.
+
+### No KMS dependency
+
+We could choose to do all key management ourselves.  However, the industry standard approach
+to enabling users of cloud SaaS software to self-manage keys is to use the KMS as the intermediary
+between our system and the user's control of their key.  Although this RFC does not propose user-managed keys, we should design with this in mind.
+
+### Do all key generation/wrapping in KMS service
+
+We could avoid generating and wrapping/unwrapping object keys in our storage
+services by delegating all responsibility for key operations to the KMS.  However,
+KMS services have limited throughput and in some cases may charge per operation, so
+it is useful to avoid doing KMS operations per-object, and restrict them to per-timeline
+frequency.
+
+### Per-tenant instead of per-timeline pageserver keys
+
+For tenants with many timelines, we may reduce load on KMS service by
+using per-tenant instead of per-timeline keys, so that we may do operations
+such as creating a timeline without needing to do a KMS unwrap operation.
+
+However, per-timeline key management is much simpler to implement on the safekeeper,
+which currently has no concept of a tenant (other than as a namespace for timelines).
+It is also slightly simpler to implement on the pageserver, as it avoids implementing
+a tenant-scoped creation operation to initialize keys (instead, we may initialize keys
+during timeline creation).
+
+As a side benefit, per-timeline key management also enables implementing secure deletion in future
+at a per-timeline granularity.
+
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -160,7 +160,7 @@ pub struct CatalogObjects {
    pub databases: Vec<Database>,
 }

-#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct ComputeCtlConfig {
    /// Set of JSON web keys that the compute can use to authenticate
    /// communication from the control plane.
@@ -179,7 +179,7 @@ impl Default for ComputeCtlConfig {
    }
 }

-#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct TlsConfig {
    pub key_path: String,
    pub cert_path: String,
--- a/libs/http-utils/Cargo.toml
+++ b/libs/http-utils/Cargo.toml
@@ -14,7 +14,6 @@ futures.workspace = true
 hyper0.workspace = true
 itertools.workspace = true
 jemalloc_pprof.workspace = true
-jsonwebtoken.workspace = true
 once_cell.workspace = true
 pprof.workspace = true
 regex.workspace = true
--- a/libs/http-utils/src/endpoint.rs
+++ b/libs/http-utils/src/endpoint.rs
@@ -8,7 +8,6 @@ use bytes::{Bytes, BytesMut};
 use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName};
 use hyper::http::HeaderValue;
 use hyper::{Body, Method, Request, Response};
-use jsonwebtoken::TokenData;
 use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter};
 use once_cell::sync::Lazy;
 use pprof::ProfilerGuardBuilder;
@@ -619,7 +618,7 @@ pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
                    })?;
                    let token = parse_token(header_value)?;

-                    let data: TokenData<Claims> = auth.decode(token).map_err(|err| {
+                    let data = auth.decode(token).map_err(|err| {
                        warn!("Authentication error: {err}");
                        // Rely on From<AuthError> for ApiError impl
                        err
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -35,7 +35,6 @@ nix = {workspace = true, optional = true}
 reqwest.workspace = true
 rand.workspace = true
 tracing-utils.workspace = true
-once_cell.workspace = true

 [dev-dependencies]
 bincode.workspace = true
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -379,8 +379,6 @@ pub struct TenantConfigToml {
    /// size exceeds `compaction_upper_limit * checkpoint_distance`.
    pub compaction_upper_limit: usize,
    pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
-    /// If true, enable shard ancestor compaction (enabled by default).
-    pub compaction_shard_ancestor: bool,
    /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
    /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
    pub compaction_l0_first: bool,
@@ -679,13 +677,12 @@ pub mod tenant_conf_defaults {

    pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
    pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
-    pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;

    // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
-    // 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of
-    // DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak
-    // compaction usage of 15360MB.
-    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;
+    // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
+    // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
+    // with this config, we can get a maximum peak compaction usage of 9 GB.
+    pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
    // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
    // read amp.
    pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
@@ -702,11 +699,8 @@ pub mod tenant_conf_defaults {
    // Relevant: https://github.com/neondatabase/neon/issues/3394
    pub const DEFAULT_GC_PERIOD: &str = "1 hr";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
-    // Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure
-    // without looking at the exact number of L0 layers.
-    // It was expected to have the following behavior:
-    // > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
-    // > layer creation will end immediately. Set to 0 to disable.
+    // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
+    // layer creation will end immediately. Set to 0 to disable.
    pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
@@ -740,7 +734,6 @@ impl Default for TenantConfigToml {
            compaction_algorithm: crate::models::CompactionAlgorithmSettings {
                kind: DEFAULT_COMPACTION_ALGORITHM,
            },
-            compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,
            compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
            compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
            l0_flush_delay_threshold: None,
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -526,8 +526,6 @@ pub struct TenantConfigPatch {
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
-    pub compaction_shard_ancestor: FieldPatch<bool>,
-    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub compaction_l0_first: FieldPatch<bool>,
    #[serde(skip_serializing_if = "FieldPatch::is_noop")]
    pub compaction_l0_semaphore: FieldPatch<bool>,
@@ -617,9 +615,6 @@ pub struct TenantConfig {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub compaction_algorithm: Option<CompactionAlgorithmSettings>,

-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub compaction_shard_ancestor: Option<bool>,
-
    #[serde(skip_serializing_if = "Option::is_none")]
    pub compaction_l0_first: Option<bool>,

@@ -729,7 +724,6 @@ impl TenantConfig {
            mut compaction_threshold,
            mut compaction_upper_limit,
            mut compaction_algorithm,
-            mut compaction_shard_ancestor,
            mut compaction_l0_first,
            mut compaction_l0_semaphore,
            mut l0_flush_delay_threshold,
@@ -778,9 +772,6 @@ impl TenantConfig {
            .compaction_upper_limit
            .apply(&mut compaction_upper_limit);
        patch.compaction_algorithm.apply(&mut compaction_algorithm);
-        patch
-            .compaction_shard_ancestor
-            .apply(&mut compaction_shard_ancestor);
        patch.compaction_l0_first.apply(&mut compaction_l0_first);
        patch
            .compaction_l0_semaphore
@@ -869,7 +860,6 @@ impl TenantConfig {
            compaction_threshold,
            compaction_upper_limit,
            compaction_algorithm,
-            compaction_shard_ancestor,
            compaction_l0_first,
            compaction_l0_semaphore,
            l0_flush_delay_threshold,
@@ -930,9 +920,6 @@ impl TenantConfig {
                .as_ref()
                .unwrap_or(&global_conf.compaction_algorithm)
                .clone(),
-            compaction_shard_ancestor: self
-                .compaction_shard_ancestor
-                .unwrap_or(global_conf.compaction_shard_ancestor),
            compaction_l0_first: self
                .compaction_l0_first
                .unwrap_or(global_conf.compaction_l0_first),
@@ -1817,34 +1804,8 @@ pub mod virtual_file {
    }

    impl IoMode {
-        pub fn preferred() -> Self {
-            // The default behavior when running Rust unit tests without any further
-            // flags is to use the newest behavior if available on the platform (Direct).
-            // The CI uses the following environment variable to unit tests for all
-            // different modes.
-            // NB: the Python regression & perf tests have their own defaults management
-            // that writes pageserver.toml; they do not use this variable.
-            if cfg!(test) {
-                use once_cell::sync::Lazy;
-                static CACHED: Lazy<IoMode> = Lazy::new(|| {
-                    utils::env::var_serde_json_string(
-                        "NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE",
-                    )
-                    .unwrap_or({
-                        #[cfg(target_os = "linux")]
-                        {
-                            IoMode::Direct
-                        }
-                        #[cfg(not(target_os = "linux"))]
-                        {
-                            IoMode::Buffered
-                        }
-                    })
-                });
-                *CACHED
-            } else {
-                IoMode::Buffered
-            }
+        pub const fn preferred() -> Self {
+            Self::Buffered
        }
    }

--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -29,7 +29,6 @@ futures = { workspace = true }
 jsonwebtoken.workspace = true
 nix = { workspace = true, features = ["ioctl"] }
 once_cell.workspace = true
-pem.workspace = true
 pin-project-lite.workspace = true
 regex.workspace = true
 serde.workspace = true
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -11,8 +11,7 @@ use camino::Utf8Path;
 use jsonwebtoken::{
    Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode,
 };
-use pem::Pem;
-use serde::{Deserialize, Serialize, de::DeserializeOwned};
+use serde::{Deserialize, Serialize};

 use crate::id::TenantId;

@@ -74,10 +73,7 @@ impl SwappableJwtAuth {
    pub fn swap(&self, jwt_auth: JwtAuth) {
        self.0.swap(Arc::new(jwt_auth));
    }
-    pub fn decode<D: DeserializeOwned>(
-        &self,
-        token: &str,
-    ) -> std::result::Result<TokenData<D>, AuthError> {
+    pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
        self.0.load().decode(token)
    }
 }
@@ -152,10 +148,7 @@ impl JwtAuth {
    /// The function tries the stored decoding keys in succession,
    /// and returns the first yielding a successful result.
    /// If there is no working decoding key, it returns the last error.
-    pub fn decode<D: DeserializeOwned>(
-        &self,
-        token: &str,
-    ) -> std::result::Result<TokenData<D>, AuthError> {
+    pub fn decode(&self, token: &str) -> std::result::Result<TokenData<Claims>, AuthError> {
        let mut res = None;
        for decoding_key in &self.decoding_keys {
            res = Some(decode(token, decoding_key, &self.validation));
@@ -180,8 +173,8 @@ impl std::fmt::Debug for JwtAuth {
 }

 // this function is used only for testing purposes in CLI e g generate tokens during init
-pub fn encode_from_key_file<S: Serialize>(claims: &S, pem: &Pem) -> Result<String> {
-    let key = EncodingKey::from_ed_der(pem.contents());
+pub fn encode_from_key_file<S: Serialize>(claims: &S, key_data: &[u8]) -> Result<String> {
+    let key = EncodingKey::from_ed_pem(key_data)?;
    Ok(encode(&Header::new(STORAGE_TOKEN_ALGORITHM), claims, &key)?)
 }

@@ -195,13 +188,13 @@ mod tests {
    //
    // openssl genpkey -algorithm ed25519 -out ed25519-priv.pem
    // openssl pkey -in ed25519-priv.pem -pubout -out ed25519-pub.pem
-    const TEST_PUB_KEY_ED25519: &str = r#"
+    const TEST_PUB_KEY_ED25519: &[u8] = br#"
 -----BEGIN PUBLIC KEY-----
 MCowBQYDK2VwAyEARYwaNBayR+eGI0iXB4s3QxE3Nl2g1iWbr6KtLWeVD/w=
 -----END PUBLIC KEY-----
 "#;

-    const TEST_PRIV_KEY_ED25519: &str = r#"
+    const TEST_PRIV_KEY_ED25519: &[u8] = br#"
 -----BEGIN PRIVATE KEY-----
 MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
 -----END PRIVATE KEY-----
@@ -229,9 +222,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH

        // Check it can be validated with the public key
        let auth = JwtAuth::new(vec![
-            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
+            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
        ]);
-        let claims_from_token: Claims = auth.decode(encoded_eddsa).unwrap().claims;
+        let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims;
        assert_eq!(claims_from_token, expected_claims);
    }

@@ -242,14 +235,13 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
            scope: Scope::Tenant,
        };

-        let pem = pem::parse(TEST_PRIV_KEY_ED25519).unwrap();
-        let encoded = encode_from_key_file(&claims, &pem).unwrap();
+        let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap();

        // decode it back
        let auth = JwtAuth::new(vec![
-            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519.as_bytes()).unwrap(),
+            DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(),
        ]);
-        let decoded: TokenData<Claims> = auth.decode(&encoded).unwrap();
+        let decoded = auth.decode(&encoded).unwrap();

        assert_eq!(decoded.claims, claims);
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -10,8 +10,6 @@ default = []
 # which adds some runtime cost to run tests on outage conditions
 testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "pageserver_client/testing"]

-fuzz-read-path = ["testing"]
-
 [dependencies]
 anyhow.workspace = true
 arc-swap.workspace = true
@@ -35,7 +33,6 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper0.workspace = true
 itertools.workspace = true
-jsonwebtoken.workspace = true
 md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
@@ -78,7 +75,6 @@ metrics.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
 pageserver_compaction.workspace = true
-pem.workspace = true
 postgres_connection.workspace = true
 postgres_ffi.workspace = true
 pq_proto.workspace = true
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -126,7 +126,7 @@ async fn ingest(
            max_concurrency: NonZeroUsize::new(1).unwrap(),
        });
        let (_desc, path) = layer
-            .write_to_disk(&ctx, None, l0_flush_state.inner(), &gate, cancel.clone())
+            .write_to_disk(&ctx, None, l0_flush_state.inner())
            .await?
            .unwrap();
        tokio::fs::remove_file(path).await?;
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -68,13 +68,6 @@ pub(crate) struct Args {
    targets: Option<Vec<TenantTimelineId>>,
 }

-/// State shared by all clients
-#[derive(Debug)]
-struct SharedState {
-    start_work_barrier: tokio::sync::Barrier,
-    live_stats: LiveStats,
-}
-
 #[derive(Debug, Default)]
 struct LiveStats {
    completed_requests: AtomicU64,
@@ -247,26 +240,24 @@ async fn main_impl(
        all_ranges
    };

+    let live_stats = Arc::new(LiveStats::default());
+
    let num_live_stats_dump = 1;
    let num_work_sender_tasks = args.num_clients.get() * timelines.len();
    let num_main_impl = 1;

-    let shared_state = Arc::new(SharedState {
-        start_work_barrier: tokio::sync::Barrier::new(
-            num_live_stats_dump + num_work_sender_tasks + num_main_impl,
-        ),
-        live_stats: LiveStats::default(),
-    });
-    let cancel = CancellationToken::new();
+    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
+        num_live_stats_dump + num_work_sender_tasks + num_main_impl,
+    ));

-    let ss = shared_state.clone();
    tokio::spawn({
+        let stats = Arc::clone(&live_stats);
+        let start_work_barrier = Arc::clone(&start_work_barrier);
        async move {
-            ss.start_work_barrier.wait().await;
+            start_work_barrier.wait().await;
            loop {
                let start = std::time::Instant::now();
                tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-                let stats = &ss.live_stats;
                let completed_requests = stats.completed_requests.swap(0, Ordering::Relaxed);
                let missed = stats.missed.swap(0, Ordering::Relaxed);
                let elapsed = start.elapsed();
@@ -279,12 +270,14 @@ async fn main_impl(
        }
    });

+    let cancel = CancellationToken::new();
+
    let rps_period = args
        .per_client_rate
        .map(|rps_limit| Duration::from_secs_f64(1.0 / (rps_limit as f64)));
    let make_worker: &dyn Fn(WorkerId) -> Pin<Box<dyn Send + Future<Output = ()>>> = &|worker_id| {
-        let ss = shared_state.clone();
-        let cancel = cancel.clone();
+        let live_stats = live_stats.clone();
+        let start_work_barrier = start_work_barrier.clone();
        let ranges: Vec<KeyRange> = all_ranges
            .iter()
            .filter(|r| r.timeline == worker_id.timeline)
@@ -294,8 +287,85 @@ async fn main_impl(
            rand::distributions::weighted::WeightedIndex::new(ranges.iter().map(|v| v.len()))
                .unwrap();

+        let cancel = cancel.clone();
        Box::pin(async move {
-            client_libpq(args, worker_id, ss, cancel, rps_period, ranges, weights).await
+            let client =
+                pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
+                    .await
+                    .unwrap();
+            let mut client = client
+                .pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
+                .await
+                .unwrap();
+
+            start_work_barrier.wait().await;
+            let client_start = Instant::now();
+            let mut ticks_processed = 0;
+            let mut inflight = VecDeque::new();
+            while !cancel.is_cancelled() {
+                // Detect if a request took longer than the RPS rate
+                if let Some(period) = &rps_period {
+                    let periods_passed_until_now =
+                        usize::try_from(client_start.elapsed().as_micros() / period.as_micros())
+                            .unwrap();
+
+                    if periods_passed_until_now > ticks_processed {
+                        live_stats.missed((periods_passed_until_now - ticks_processed) as u64);
+                    }
+                    ticks_processed = periods_passed_until_now;
+                }
+
+                while inflight.len() < args.queue_depth.get() {
+                    let start = Instant::now();
+                    let req = {
+                        let mut rng = rand::thread_rng();
+                        let r = &ranges[weights.sample(&mut rng)];
+                        let key: i128 = rng.gen_range(r.start..r.end);
+                        let key = Key::from_i128(key);
+                        assert!(key.is_rel_block_key());
+                        let (rel_tag, block_no) = key
+                            .to_rel_block()
+                            .expect("we filter non-rel-block keys out above");
+                        PagestreamGetPageRequest {
+                            hdr: PagestreamRequest {
+                                reqid: 0,
+                                request_lsn: if rng.gen_bool(args.req_latest_probability) {
+                                    Lsn::MAX
+                                } else {
+                                    r.timeline_lsn
+                                },
+                                not_modified_since: r.timeline_lsn,
+                            },
+                            rel: rel_tag,
+                            blkno: block_no,
+                        }
+                    };
+                    client.getpage_send(req).await.unwrap();
+                    inflight.push_back(start);
+                }
+
+                let start = inflight.pop_front().unwrap();
+                client.getpage_recv().await.unwrap();
+                let end = Instant::now();
+                live_stats.request_done();
+                ticks_processed += 1;
+                STATS.with(|stats| {
+                    stats
+                        .borrow()
+                        .lock()
+                        .unwrap()
+                        .observe(end.duration_since(start))
+                        .unwrap();
+                });
+
+                if let Some(period) = &rps_period {
+                    let next_at = client_start
+                        + Duration::from_micros(
+                            (ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
+                        );
+                    tokio::time::sleep_until(next_at.into()).await;
+                }
+            }
        })
    };

@@ -317,7 +387,7 @@ async fn main_impl(
    };

    info!("waiting for everything to become ready");
-    shared_state.start_work_barrier.wait().await;
+    start_work_barrier.wait().await;
    info!("work started");
    if let Some(runtime) = args.runtime {
        tokio::time::sleep(runtime.into()).await;
@@ -346,91 +416,3 @@ async fn main_impl(

    anyhow::Ok(())
 }
-
-async fn client_libpq(
-    args: &Args,
-    worker_id: WorkerId,
-    shared_state: Arc<SharedState>,
-    cancel: CancellationToken,
-    rps_period: Option<Duration>,
-    ranges: Vec<KeyRange>,
-    weights: rand::distributions::weighted::WeightedIndex<i128>,
-) {
-    let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
-        .await
-        .unwrap();
-    let mut client = client
-        .pagestream(worker_id.timeline.tenant_id, worker_id.timeline.timeline_id)
-        .await
-        .unwrap();
-
-    shared_state.start_work_barrier.wait().await;
-    let client_start = Instant::now();
-    let mut ticks_processed = 0;
-    let mut inflight = VecDeque::new();
-    while !cancel.is_cancelled() {
-        // Detect if a request took longer than the RPS rate
-        if let Some(period) = &rps_period {
-            let periods_passed_until_now =
-                usize::try_from(client_start.elapsed().as_micros() / period.as_micros()).unwrap();
-
-            if periods_passed_until_now > ticks_processed {
-                shared_state
-                    .live_stats
-                    .missed((periods_passed_until_now - ticks_processed) as u64);
-            }
-            ticks_processed = periods_passed_until_now;
-        }
-
-        while inflight.len() < args.queue_depth.get() {
-            let start = Instant::now();
-            let req = {
-                let mut rng = rand::thread_rng();
-                let r = &ranges[weights.sample(&mut rng)];
-                let key: i128 = rng.gen_range(r.start..r.end);
-                let key = Key::from_i128(key);
-                assert!(key.is_rel_block_key());
-                let (rel_tag, block_no) = key
-                    .to_rel_block()
-                    .expect("we filter non-rel-block keys out above");
-                PagestreamGetPageRequest {
-                    hdr: PagestreamRequest {
-                        reqid: 0,
-                        request_lsn: if rng.gen_bool(args.req_latest_probability) {
-                            Lsn::MAX
-                        } else {
-                            r.timeline_lsn
-                        },
-                        not_modified_since: r.timeline_lsn,
-                    },
-                    rel: rel_tag,
-                    blkno: block_no,
-                }
-            };
-            client.getpage_send(req).await.unwrap();
-            inflight.push_back(start);
-        }
-
-        let start = inflight.pop_front().unwrap();
-        client.getpage_recv().await.unwrap();
-        let end = Instant::now();
-        shared_state.live_stats.request_done();
-        ticks_processed += 1;
-        STATS.with(|stats| {
-            stats
-                .borrow()
-                .lock()
-                .unwrap()
-                .observe(end.duration_since(start))
-                .unwrap();
-        });
-
-        if let Some(period) = &rps_period {
-            let next_at = client_start
-                + Duration::from_micros(
-                    (ticks_processed) as u64 * u64::try_from(period.as_micros()).unwrap(),
-                );
-            tokio::time::sleep_until(next_at.into()).await;
-        }
-    }
-}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -416,18 +416,8 @@ fn start_pageserver(
    // The storage_broker::connect call needs to happen inside a tokio runtime thread.
    let broker_client = WALRECEIVER_RUNTIME
        .block_on(async {
-            let tls_config = storage_broker::ClientTlsConfig::new().ca_certificates(
-                conf.ssl_ca_certs
-                    .iter()
-                    .map(pem::encode)
-                    .map(storage_broker::Certificate::from_pem),
-            );
            // Note: we do not attempt connecting here (but validate endpoints sanity).
-            storage_broker::connect(
-                conf.broker_endpoint.clone(),
-                conf.broker_keepalive_interval,
-                tls_config,
-            )
+            storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)
        })
        .with_context(|| {
            format!(
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -17,10 +17,9 @@ use once_cell::sync::OnceCell;
 use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
-use pem::Pem;
 use postgres_backend::AuthType;
 use remote_storage::{RemotePath, RemoteStorageConfig};
-use reqwest::Url;
+use reqwest::{Certificate, Url};
 use storage_broker::Uri;
 use utils::id::{NodeId, TimelineId};
 use utils::logging::{LogFormat, SecretString};
@@ -68,8 +67,8 @@ pub struct PageServerConf {
    /// Period to reload certificate and private key from files.
    /// Default: 60s.
    pub ssl_cert_reload_period: Duration,
-    /// Trusted root CA certificates to use in https APIs in PEM format.
-    pub ssl_ca_certs: Vec<Pem>,
+    /// Trusted root CA certificates to use in https APIs.
+    pub ssl_ca_certs: Vec<Certificate>,

    /// Current availability zone. Used for traffic metrics.
    pub availability_zone: Option<String>,
@@ -119,13 +118,13 @@ pub struct PageServerConf {
    /// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
    pub concurrent_tenant_warmup: ConfigurableSemaphore,

-    /// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
+    /// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
    pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
-    /// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
+    /// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
    /// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
    /// See the comment in `eviction_task` for details.
    ///
-    /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
+    /// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
    pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,

    // How often to collect metrics and send them to the metrics endpoint.
@@ -498,10 +497,7 @@ impl PageServerConf {
            ssl_ca_certs: match ssl_ca_file {
                Some(ssl_ca_file) => {
                    let buf = std::fs::read(ssl_ca_file)?;
-                    pem::parse_many(&buf)?
-                        .into_iter()
-                        .filter(|pem| pem.tag() == "CERTIFICATE")
-                        .collect()
+                    Certificate::from_pem_bundle(&buf)?
                }
                None => Vec::new(),
            },
@@ -592,10 +588,10 @@ impl ConfigurableSemaphore {
    /// Initializse using a non-zero amount of permits.
    ///
    /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
-    /// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
+    /// feature such as [`Tenant::gather_size_inputs`]. Otherwise any semaphore using future will
    /// behave like [`futures::future::pending`], just waiting until new permits are added.
    ///
-    /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
+    /// [`Tenant::gather_size_inputs`]: crate::tenant::Tenant::gather_size_inputs
    pub fn new(initial_permits: NonZeroUsize) -> Self {
        ConfigurableSemaphore {
            initial_permits,
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind};
 use crate::tenant::mgr::TenantManager;
 use crate::tenant::size::CalculateSyntheticSizeError;
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
+use crate::tenant::{LogicalSizeCalculationCause, Tenant};

 mod disk_cache;
 mod metrics;
@@ -428,7 +428,7 @@ async fn calculate_synthetic_size_worker(
    }
 }

-async fn calculate_and_log(tenant: &TenantShard, cancel: &CancellationToken, ctx: &RequestContext) {
+async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
    const CAUSE: LogicalSizeCalculationCause =
        LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;

--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -175,9 +175,9 @@ impl MetricsKey {
        .absolute_values()
    }

-    /// [`TenantShard::remote_size`]
+    /// [`Tenant::remote_size`]
    ///
-    /// [`TenantShard::remote_size`]: crate::tenant::TenantShard::remote_size
+    /// [`Tenant::remote_size`]: crate::tenant::Tenant::remote_size
    const fn remote_storage_size(tenant_id: TenantId) -> AbsoluteValueFactory {
        MetricsKey {
            tenant_id,
@@ -199,9 +199,9 @@ impl MetricsKey {
        .absolute_values()
    }

-    /// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
+    /// [`Tenant::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
    ///
-    /// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
+    /// [`Tenant::cached_synthetic_size`]: crate::tenant::Tenant::cached_synthetic_size
    /// [`calculate_synthetic_size_worker`]: super::calculate_synthetic_size_worker
    const fn synthetic_size(tenant_id: TenantId) -> AbsoluteValueFactory {
        MetricsKey {
@@ -254,7 +254,7 @@ pub(super) async fn collect_all_metrics(

 async fn collect<S>(tenants: S, cache: &Cache, ctx: &RequestContext) -> Vec<NewRawMetric>
 where
-    S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::TenantShard>)>,
+    S: futures::stream::Stream<Item = (TenantId, Arc<crate::tenant::Tenant>)>,
 {
    let mut current_metrics: Vec<NewRawMetric> = Vec::new();

@@ -308,7 +308,7 @@ impl TenantSnapshot {
    ///
    /// `resident_size` is calculated of the timelines we had access to for other metrics, so we
    /// cannot just list timelines here.
-    fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
+    fn collect(t: &Arc<crate::tenant::Tenant>, resident_size: u64) -> Self {
        TenantSnapshot {
            resident_size,
            remote_size: t.remote_size(),
--- a/pageserver/src/controller_upcall_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -8,7 +8,6 @@ use pageserver_api::upcall_api::{
    ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
    ValidateRequestTenant, ValidateResponse,
 };
-use reqwest::Certificate;
 use serde::Serialize;
 use serde::de::DeserializeOwned;
 use tokio_util::sync::CancellationToken;
@@ -77,8 +76,8 @@ impl StorageControllerUpcallClient {
            client = client.default_headers(headers);
        }

-        for cert in &conf.ssl_ca_certs {
-            client = client.add_root_certificate(Certificate::from_der(cert.contents())?);
+        for ssl_ca_cert in &conf.ssl_ca_certs {
+            client = client.add_root_certificate(ssl_ca_cert.clone());
        }

        Ok(Some(Self {
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1873,7 +1873,7 @@ async fn update_tenant_config_handler(
        &ShardParameters::default(),
    );

-    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
+    crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
        .await
        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;

@@ -1917,7 +1917,7 @@ async fn patch_tenant_config_handler(
        &ShardParameters::default(),
    );

-    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
+    crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
        .await
        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;

--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -49,7 +49,7 @@ use tracing::{info, info_span};
 /// backwards-compatible changes to the metadata format.
 pub const STORAGE_FORMAT_VERSION: u16 = 3;

-pub const DEFAULT_PG_VERSION: u32 = 17;
+pub const DEFAULT_PG_VERSION: u32 = 16;

 // Magic constants used to identify different kinds of files
 pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1086,7 +1086,7 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
    .expect("Failed to register metric")
 });

-/// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
+/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
 /// like how long it took to load.
 ///
 /// Note that these are process-global metrics, _not_ per-tenant metrics.  Per-tenant
@@ -1714,28 +1714,6 @@ pub enum SmgrQueryType {
    Test,
 }

-#[derive(
-    Debug,
-    Clone,
-    Copy,
-    IntoStaticStr,
-    strum_macros::EnumCount,
-    strum_macros::EnumIter,
-    strum_macros::FromRepr,
-    enum_map::Enum,
-)]
-#[strum(serialize_all = "snake_case")]
-pub enum GetPageBatchBreakReason {
-    BatchFull,
-    NonBatchableRequest,
-    NonUniformLsn,
-    SamePageAtDifferentLsn,
-    NonUniformTimeline,
-    ExecutorSteal,
-    #[cfg(feature = "testing")]
-    NonUniformKey,
-}
-
 pub(crate) struct SmgrQueryTimePerTimeline {
    global_started: [IntCounter; SmgrQueryType::COUNT],
    global_latency: [Histogram; SmgrQueryType::COUNT],
@@ -1747,8 +1725,6 @@ pub(crate) struct SmgrQueryTimePerTimeline {
    per_timeline_flush_in_progress_micros: IntCounter,
    global_batch_wait_time: Histogram,
    per_timeline_batch_wait_time: Histogram,
-    global_batch_break_reason: [IntCounter; GetPageBatchBreakReason::COUNT],
-    per_timeline_batch_break_reason: GetPageBatchBreakReasonTimelineMetrics,
    throttling: Arc<tenant_throttling::Pagestream>,
 }

@@ -1882,49 +1858,6 @@ static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::n
    .expect("failed to define a metric")
 });

-static PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        // it's a counter, but, name is prepared to extend it to a histogram of queue depth
-        "pageserver_page_service_batch_break_reason_global",
-        "Reason for breaking batches of get page requests",
-        &["reason"],
-    )
-    .expect("failed to define a metric")
-});
-
-struct GetPageBatchBreakReasonTimelineMetrics {
-    map: EnumMap<GetPageBatchBreakReason, IntCounter>,
-}
-
-impl GetPageBatchBreakReasonTimelineMetrics {
-    fn new(tenant_id: &str, shard_slug: &str, timeline_id: &str) -> Self {
-        GetPageBatchBreakReasonTimelineMetrics {
-            map: EnumMap::from_array(std::array::from_fn(|reason_idx| {
-                let reason = GetPageBatchBreakReason::from_usize(reason_idx);
-                PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.with_label_values(&[
-                    tenant_id,
-                    shard_slug,
-                    timeline_id,
-                    reason.into(),
-                ])
-            })),
-        }
-    }
-
-    fn inc(&self, reason: GetPageBatchBreakReason) {
-        self.map[reason].inc()
-    }
-}
-
-static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "pageserver_page_service_batch_break_reason",
-        "Reason for breaking batches of get page requests",
-        &["tenant_id", "shard_id", "timeline_id", "reason"],
-    )
-    .expect("failed to define a metric")
-});
-
 pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
    register_int_gauge_vec!(
        "pageserver_page_service_config_max_batch_size",
@@ -2052,15 +1985,6 @@ impl SmgrQueryTimePerTimeline {
            .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
            .unwrap();

-        let global_batch_break_reason = std::array::from_fn(|i| {
-            let reason = GetPageBatchBreakReason::from_usize(i);
-            PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL
-                .get_metric_with_label_values(&[reason.into()])
-                .unwrap()
-        });
-        let per_timeline_batch_break_reason =
-            GetPageBatchBreakReasonTimelineMetrics::new(&tenant_id, &shard_slug, &timeline_id);
-
        let global_flush_in_progress_micros =
            PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
        let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
@@ -2078,8 +2002,6 @@ impl SmgrQueryTimePerTimeline {
            per_timeline_flush_in_progress_micros,
            global_batch_wait_time,
            per_timeline_batch_wait_time,
-            global_batch_break_reason,
-            per_timeline_batch_break_reason,
            throttling: pagestream_throttle_metrics,
        }
    }
@@ -2108,16 +2030,9 @@ impl SmgrQueryTimePerTimeline {
    }

    /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
-    pub(crate) fn observe_getpage_batch_start(
-        &self,
-        batch_size: usize,
-        break_reason: GetPageBatchBreakReason,
-    ) {
+    pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
        self.global_batch_size.observe(batch_size as f64);
        self.per_timeline_batch_size.observe(batch_size as f64);
-
-        self.global_batch_break_reason[break_reason.into_usize()].inc();
-        self.per_timeline_batch_break_reason.inc(break_reason);
    }
 }

@@ -3483,15 +3398,6 @@ impl TimelineMetrics {
            shard_id,
            timeline_id,
        ]);
-
-        for reason in GetPageBatchBreakReason::iter() {
-            let _ = PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.remove_label_values(&[
-                tenant_id,
-                shard_id,
-                timeline_id,
-                reason.into(),
-            ]);
-        }
    }
 }

@@ -4370,7 +4276,6 @@ pub fn preinitialize_metrics(
    [
        &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
        &SMGR_QUERY_STARTED_GLOBAL,
-        &PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL,
    ]
    .into_iter()
    .for_each(|c| {
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -15,7 +15,6 @@ use async_compression::tokio::write::GzipEncoder;
 use bytes::Buf;
 use futures::FutureExt;
 use itertools::Itertools;
-use jsonwebtoken::TokenData;
 use once_cell::sync::OnceCell;
 use pageserver_api::config::{
    PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
@@ -59,8 +58,8 @@ use crate::context::{
    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
 };
 use crate::metrics::{
-    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, GetPageBatchBreakReason, LIVE_CONNECTIONS,
-    SmgrOpTimer, TimelineMetrics,
+    self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer,
+    TimelineMetrics,
 };
 use crate::pgdatadir_mapping::Version;
 use crate::span::{
@@ -76,7 +75,7 @@ use crate::tenant::timeline::{self, WaitLsnError};
 use crate::tenant::{GetTimelineError, PageReconstructError, Timeline};
 use crate::{basebackup, timed_after_cancellation};

-/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::TenantShard`] which
+/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which
 /// is not yet in state [`TenantState::Active`].
 ///
 /// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`].
@@ -673,7 +672,6 @@ enum BatchedFeMessage {
        span: Span,
        shard: timeline::handle::WeakHandle<TenantManagerTypes>,
        pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
-        batch_break_reason: GetPageBatchBreakReason,
    },
    DbSize {
        span: Span,
@@ -726,119 +724,6 @@ impl BatchedFeMessage {
            BatchedFeMessage::RespondError { .. } => {}
        }
    }
-
-    fn should_break_batch(
-        &self,
-        other: &BatchedFeMessage,
-        max_batch_size: NonZeroUsize,
-        batching_strategy: PageServiceProtocolPipelinedBatchingStrategy,
-    ) -> Option<GetPageBatchBreakReason> {
-        match (self, other) {
-            (
-                BatchedFeMessage::GetPage {
-                    shard: accum_shard,
-                    pages: accum_pages,
-                    ..
-                },
-                BatchedFeMessage::GetPage {
-                    shard: this_shard,
-                    pages: this_pages,
-                    ..
-                },
-            ) => {
-                assert_eq!(this_pages.len(), 1);
-                if accum_pages.len() >= max_batch_size.get() {
-                    trace!(%max_batch_size, "stopping batching because of batch size");
-                    assert_eq!(accum_pages.len(), max_batch_size.get());
-
-                    return Some(GetPageBatchBreakReason::BatchFull);
-                }
-                if !accum_shard.is_same_handle_as(this_shard) {
-                    trace!("stopping batching because timeline object mismatch");
-                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).
-                    // But the current logic for keeping responses in order does not support that.
-
-                    return Some(GetPageBatchBreakReason::NonUniformTimeline);
-                }
-
-                match batching_strategy {
-                    PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => {
-                        if let Some(last_in_batch) = accum_pages.last() {
-                            if last_in_batch.effective_request_lsn
-                                != this_pages[0].effective_request_lsn
-                            {
-                                trace!(
-                                    accum_lsn = %last_in_batch.effective_request_lsn,
-                                    this_lsn = %this_pages[0].effective_request_lsn,
-                                    "stopping batching because LSN changed"
-                                );
-
-                                return Some(GetPageBatchBreakReason::NonUniformLsn);
-                            }
-                        }
-                    }
-                    PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => {
-                        // The read path doesn't curently support serving the same page at different LSNs.
-                        // While technically possible, it's uncertain if the complexity is worth it.
-                        // Break the batch if such a case is encountered.
-                        let same_page_different_lsn = accum_pages.iter().any(|batched| {
-                            batched.req.rel == this_pages[0].req.rel
-                                && batched.req.blkno == this_pages[0].req.blkno
-                                && batched.effective_request_lsn
-                                    != this_pages[0].effective_request_lsn
-                        });
-
-                        if same_page_different_lsn {
-                            trace!(
-                                rel=%this_pages[0].req.rel,
-                                blkno=%this_pages[0].req.blkno,
-                                lsn=%this_pages[0].effective_request_lsn,
-                                "stopping batching because same page was requested at different LSNs"
-                            );
-
-                            return Some(GetPageBatchBreakReason::SamePageAtDifferentLsn);
-                        }
-                    }
-                }
-
-                None
-            }
-            #[cfg(feature = "testing")]
-            (
-                BatchedFeMessage::Test {
-                    shard: accum_shard,
-                    requests: accum_requests,
-                    ..
-                },
-                BatchedFeMessage::Test {
-                    shard: this_shard,
-                    requests: this_requests,
-                    ..
-                },
-            ) => {
-                assert!(this_requests.len() == 1);
-                if accum_requests.len() >= max_batch_size.get() {
-                    trace!(%max_batch_size, "stopping batching because of batch size");
-                    assert_eq!(accum_requests.len(), max_batch_size.get());
-                    return Some(GetPageBatchBreakReason::BatchFull);
-                }
-                if !accum_shard.is_same_handle_as(this_shard) {
-                    trace!("stopping batching because timeline object mismatch");
-                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).
-                    // But the current logic for keeping responses in order does not support that.
-                    return Some(GetPageBatchBreakReason::NonUniformTimeline);
-                }
-                let this_batch_key = this_requests[0].req.batch_key;
-                let accum_batch_key = accum_requests[0].req.batch_key;
-                if this_requests[0].req.batch_key != accum_requests[0].req.batch_key {
-                    trace!(%accum_batch_key, %this_batch_key, "stopping batching because batch key changed");
-                    return Some(GetPageBatchBreakReason::NonUniformKey);
-                }
-                None
-            }
-            (_, _) => Some(GetPageBatchBreakReason::NonBatchableRequest),
-        }
-    }
 }

 impl PageServerHandler {
@@ -1162,10 +1047,6 @@ impl PageServerHandler {
                        effective_request_lsn,
                        ctx,
                    }],
-                    // The executor grabs the batch when it becomes idle.
-                    // Hence, [`GetPageBatchBreakReason::ExecutorSteal`] is the
-                    // default reason for breaking the batch.
-                    batch_break_reason: GetPageBatchBreakReason::ExecutorSteal,
                }
            }
            #[cfg(feature = "testing")]
@@ -1203,58 +1084,117 @@ impl PageServerHandler {
            Err(e) => return Err(Err(e)),
        };

-        let eligible_batch = match batch {
-            Ok(b) => b,
-            Err(_) => {
-                return Err(Ok(this_msg));
-            }
-        };
-
-        let batch_break =
-            eligible_batch.should_break_batch(&this_msg, max_batch_size, batching_strategy);
-
-        match batch_break {
-            Some(reason) => {
-                if let BatchedFeMessage::GetPage {
-                    batch_break_reason, ..
-                } = eligible_batch
-                {
-                    *batch_break_reason = reason;
+        match (&mut *batch, this_msg) {
+            // something batched already, let's see if we can add this message to the batch
+            (
+                Ok(BatchedFeMessage::GetPage {
+                    span: _,
+                    shard: accum_shard,
+                    pages: accum_pages,
+                }),
+                BatchedFeMessage::GetPage {
+                    span: _,
+                    shard: this_shard,
+                    pages: this_pages,
+                },
+            ) if (|| {
+                assert_eq!(this_pages.len(), 1);
+                if accum_pages.len() >= max_batch_size.get() {
+                    trace!(%max_batch_size, "stopping batching because of batch size");
+                    assert_eq!(accum_pages.len(), max_batch_size.get());
+                    return false;
+                }
+                if !accum_shard.is_same_handle_as(&this_shard) {
+                    trace!("stopping batching because timeline object mismatch");
+                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).
+                    // But the current logic for keeping responses in order does not support that.
+                    return false;
                }

-                Err(Ok(this_msg))
-            }
-            None => {
+                match batching_strategy {
+                    PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => {
+                        if let Some(last_in_batch) = accum_pages.last() {
+                            if last_in_batch.effective_request_lsn
+                                != this_pages[0].effective_request_lsn
+                            {
+                                return false;
+                            }
+                        }
+                    }
+                    PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => {
+                        // The read path doesn't curently support serving the same page at different LSNs.
+                        // While technically possible, it's uncertain if the complexity is worth it.
+                        // Break the batch if such a case is encountered.
+                        //
+                        // TODO(vlad): Include a metric for batch breaks with a reason label.
+                        let same_page_different_lsn = accum_pages.iter().any(|batched| {
+                            batched.req.rel == this_pages[0].req.rel
+                                && batched.req.blkno == this_pages[0].req.blkno
+                                && batched.effective_request_lsn
+                                    != this_pages[0].effective_request_lsn
+                        });
+
+                        if same_page_different_lsn {
+                            trace!(
+                                rel=%this_pages[0].req.rel,
+                                blkno=%this_pages[0].req.blkno,
+                                lsn=%this_pages[0].effective_request_lsn,
+                                "stopping batching because same page was requested at different LSNs"
+                            );
+                            return false;
+                        }
+                    }
+                }
+
+                true
+            })() =>
+            {
                // ok to batch
-                match (eligible_batch, this_msg) {
-                    (
-                        BatchedFeMessage::GetPage {
-                            pages: accum_pages, ..
-                        },
-                        BatchedFeMessage::GetPage {
-                            pages: this_pages, ..
-                        },
-                    ) => {
-                        accum_pages.extend(this_pages);
-                        Ok(())
-                    }
-                    #[cfg(feature = "testing")]
-                    (
-                        BatchedFeMessage::Test {
-                            requests: accum_requests,
-                            ..
-                        },
-                        BatchedFeMessage::Test {
-                            requests: this_requests,
-                            ..
-                        },
-                    ) => {
-                        accum_requests.extend(this_requests);
-                        Ok(())
-                    }
-                    // Shape guaranteed by [`BatchedFeMessage::should_break_batch`]
-                    _ => unreachable!(),
+                accum_pages.extend(this_pages);
+                Ok(())
+            }
+            #[cfg(feature = "testing")]
+            (
+                Ok(BatchedFeMessage::Test {
+                    shard: accum_shard,
+                    requests: accum_requests,
+                    ..
+                }),
+                BatchedFeMessage::Test {
+                    shard: this_shard,
+                    requests: this_requests,
+                    ..
+                },
+            ) if (|| {
+                assert!(this_requests.len() == 1);
+                if accum_requests.len() >= max_batch_size.get() {
+                    trace!(%max_batch_size, "stopping batching because of batch size");
+                    assert_eq!(accum_requests.len(), max_batch_size.get());
+                    return false;
                }
+                if !accum_shard.is_same_handle_as(&this_shard) {
+                    trace!("stopping batching because timeline object mismatch");
+                    // TODO: we _could_ batch & execute each shard seperately (and in parallel).
+                    // But the current logic for keeping responses in order does not support that.
+                    return false;
+                }
+                let this_batch_key = this_requests[0].req.batch_key;
+                let accum_batch_key = accum_requests[0].req.batch_key;
+                if this_requests[0].req.batch_key != accum_requests[0].req.batch_key {
+                    trace!(%accum_batch_key, %this_batch_key, "stopping batching because batch key changed");
+                    return false;
+                }
+                true
+            })() =>
+            {
+                // ok to batch
+                accum_requests.extend(this_requests);
+                Ok(())
+            }
+            // something batched already but this message is unbatchable
+            (_, this_msg) => {
+                // by default, don't continue batching
+                Err(Ok(this_msg))
            }
        }
    }
@@ -1473,12 +1413,7 @@ impl PageServerHandler {
                    span,
                )
            }
-            BatchedFeMessage::GetPage {
-                span,
-                shard,
-                pages,
-                batch_break_reason,
-            } => {
+            BatchedFeMessage::GetPage { span, shard, pages } => {
                fail::fail_point!("ps::handle-pagerequest-message::getpage");
                let (shard, ctx) = upgrade_handle_and_set_context!(shard);
                (
@@ -1490,7 +1425,6 @@ impl PageServerHandler {
                                &shard,
                                pages,
                                io_concurrency,
-                                batch_break_reason,
                                &ctx,
                            )
                            .instrument(span.clone())
@@ -2179,14 +2113,13 @@ impl PageServerHandler {
        timeline: &Timeline,
        requests: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
        io_concurrency: IoConcurrency,
-        batch_break_reason: GetPageBatchBreakReason,
        ctx: &RequestContext,
    ) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
        debug_assert_current_span_has_tenant_and_timeline_id();

        timeline
            .query_metrics
-            .observe_getpage_batch_start(requests.len(), batch_break_reason);
+            .observe_getpage_batch_start(requests.len());

        // If a page trace is running, submit an event for this request.
        if let Some(page_trace) = timeline.page_trace.load().as_ref() {
@@ -2838,7 +2771,7 @@ where
    ) -> Result<(), QueryError> {
        // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
        // which requires auth to be present
-        let data: TokenData<Claims> = self
+        let data = self
            .auth
            .as_ref()
            .unwrap()
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -158,7 +158,7 @@ pub struct TenantSharedResources {
    pub l0_flush_global_state: L0FlushGlobalState,
 }

-/// A [`TenantShard`] is really an _attached_ tenant.  The configuration
+/// A [`Tenant`] is really an _attached_ tenant.  The configuration
 /// for an attached tenant is a subset of the [`LocationConf`], represented
 /// in this struct.
 #[derive(Clone)]
@@ -245,7 +245,7 @@ pub(crate) enum SpawnMode {
 ///
 /// Tenant consists of multiple timelines. Keep them in a hash table.
 ///
-pub struct TenantShard {
+pub struct Tenant {
    // Global pageserver config parameters
    pub conf: &'static PageServerConf,

@@ -267,7 +267,7 @@ pub struct TenantShard {
    shard_identity: ShardIdentity,

    /// The remote storage generation, used to protect S3 objects from split-brain.
-    /// Does not change over the lifetime of the [`TenantShard`] object.
+    /// Does not change over the lifetime of the [`Tenant`] object.
    ///
    /// This duplicates the generation stored in LocationConf, but that structure is mutable:
    /// this copy enforces the invariant that generatio doesn't change during a Tenant's lifetime.
@@ -309,7 +309,7 @@ pub struct TenantShard {
    // Access to global deletion queue for when this tenant wants to schedule a deletion
    deletion_queue_client: DeletionQueueClient,

-    /// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].
+    /// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
    cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
    cached_synthetic_tenant_size: Arc<AtomicU64>,

@@ -337,12 +337,12 @@ pub struct TenantShard {
    // Timelines' cancellation token.
    pub(crate) cancel: CancellationToken,

-    // Users of the TenantShard such as the page service must take this Gate to avoid
-    // trying to use a TenantShard which is shutting down.
+    // Users of the Tenant such as the page service must take this Gate to avoid
+    // trying to use a Tenant which is shutting down.
    pub(crate) gate: Gate,

    /// Throttle applied at the top of [`Timeline::get`].
-    /// All [`TenantShard::timelines`] of a given [`TenantShard`] instance share the same [`throttle::Throttle`] instance.
+    /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
    pub(crate) pagestream_throttle: Arc<throttle::Throttle>,

    pub(crate) pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
@@ -362,7 +362,7 @@ pub struct TenantShard {

    l0_flush_global_state: L0FlushGlobalState,
 }
-impl std::fmt::Debug for TenantShard {
+impl std::fmt::Debug for Tenant {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} ({})", self.tenant_shard_id, self.current_state())
    }
@@ -841,7 +841,7 @@ impl Debug for SetStoppingError {
    }
 }

-/// Arguments to [`TenantShard::create_timeline`].
+/// Arguments to [`Tenant::create_timeline`].
 ///
 /// Not usable as an idempotency key for timeline creation because if [`CreateTimelineParamsBranch::ancestor_start_lsn`]
 /// is `None`, the result of the timeline create call is not deterministic.
@@ -876,7 +876,7 @@ pub(crate) struct CreateTimelineParamsImportPgdata {
    pub(crate) idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
 }

-/// What is used to determine idempotency of a [`TenantShard::create_timeline`] call in  [`TenantShard::start_creating_timeline`] in  [`TenantShard::start_creating_timeline`].
+/// What is used to determine idempotency of a [`Tenant::create_timeline`] call in  [`Tenant::start_creating_timeline`] in  [`Tenant::start_creating_timeline`].
 ///
 /// Each [`Timeline`] object holds [`Self`] as an immutable property in [`Timeline::create_idempotency`].
 ///
@@ -914,7 +914,7 @@ pub(crate) struct CreatingTimelineIdempotencyImportPgdata {
    idempotency_key: import_pgdata::index_part_format::IdempotencyKey,
 }

-/// What is returned by [`TenantShard::start_creating_timeline`].
+/// What is returned by [`Tenant::start_creating_timeline`].
 #[must_use]
 enum StartCreatingTimelineResult {
    CreateGuard(TimelineCreateGuard),
@@ -943,13 +943,13 @@ struct TimelineInitAndSyncNeedsSpawnImportPgdata {
    guard: TimelineCreateGuard,
 }

-/// What is returned by [`TenantShard::create_timeline`].
+/// What is returned by [`Tenant::create_timeline`].
 enum CreateTimelineResult {
    Created(Arc<Timeline>),
    Idempotent(Arc<Timeline>),
-    /// IMPORTANT: This [`Arc<Timeline>`] object is not in [`TenantShard::timelines`] when
+    /// IMPORTANT: This [`Arc<Timeline>`] object is not in [`Tenant::timelines`] when
    /// we return this result, nor will this concrete object ever be added there.
-    /// Cf method comment on [`TenantShard::create_timeline_import_pgdata`].
+    /// Cf method comment on [`Tenant::create_timeline_import_pgdata`].
    ImportSpawned(Arc<Timeline>),
 }

@@ -1082,7 +1082,7 @@ pub(crate) enum LoadConfigError {
    NotFound(Utf8PathBuf),
 }

-impl TenantShard {
+impl Tenant {
    /// Yet another helper for timeline initialization.
    ///
    /// - Initializes the Timeline struct and inserts it into the tenant's hash map
@@ -1303,7 +1303,7 @@ impl TenantShard {
        init_order: Option<InitializationOrder>,
        mode: SpawnMode,
        ctx: &RequestContext,
-    ) -> Result<Arc<TenantShard>, GlobalShutDown> {
+    ) -> Result<Arc<Tenant>, GlobalShutDown> {
        let wal_redo_manager =
            WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?;

@@ -1317,7 +1317,7 @@ impl TenantShard {
        let attach_mode = attached_conf.location.attach_mode;
        let generation = attached_conf.location.generation;

-        let tenant = Arc::new(TenantShard::new(
+        let tenant = Arc::new(Tenant::new(
            TenantState::Attaching,
            conf,
            attached_conf,
@@ -1334,7 +1334,7 @@ impl TenantShard {
        let attach_gate_guard = tenant
            .gate
            .enter()
-            .expect("We just created the TenantShard: nothing else can have shut it down yet");
+            .expect("We just created the Tenant: nothing else can have shut it down yet");

        // Do all the hard work in the background
        let tenant_clone = Arc::clone(&tenant);
@@ -1362,7 +1362,7 @@ impl TenantShard {
                    }
                }

-                fn make_broken_or_stopping(t: &TenantShard, err: anyhow::Error) {
+                fn make_broken_or_stopping(t: &Tenant, err: anyhow::Error) {
                    t.state.send_modify(|state| match state {
                        // TODO: the old code alluded to DeleteTenantFlow sometimes setting
                        // TenantState::Stopping before we get here, but this may be outdated.
@@ -1627,7 +1627,7 @@ impl TenantShard {
    /// No background tasks are started as part of this routine.
    ///
    async fn attach(
-        self: &Arc<TenantShard>,
+        self: &Arc<Tenant>,
        preload: Option<TenantPreload>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
@@ -1957,7 +1957,7 @@ impl TenantShard {
    }

    async fn load_timelines_metadata(
-        self: &Arc<TenantShard>,
+        self: &Arc<Tenant>,
        timeline_ids: HashSet<TimelineId>,
        remote_storage: &GenericRemoteStorage,
        heatmap: Option<(HeatMapTenant, std::time::Instant)>,
@@ -2028,7 +2028,7 @@ impl TenantShard {
    }

    fn load_timeline_metadata(
-        self: &Arc<TenantShard>,
+        self: &Arc<Tenant>,
        timeline_id: TimelineId,
        remote_storage: GenericRemoteStorage,
        previous_heatmap: Option<PreviousHeatmap>,
@@ -2429,14 +2429,14 @@ impl TenantShard {
    /// This is used by tests & import-from-basebackup.
    ///
    /// The returned [`UninitializedTimeline`] contains no data nor metadata and it is in
-    /// a state that will fail [`TenantShard::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
+    /// a state that will fail [`Tenant::load_remote_timeline`] because `disk_consistent_lsn=Lsn(0)`.
    ///
    /// The caller is responsible for getting the timeline into a state that will be accepted
-    /// by [`TenantShard::load_remote_timeline`] / [`TenantShard::attach`].
+    /// by [`Tenant::load_remote_timeline`] / [`Tenant::attach`].
    /// Then they may call [`UninitializedTimeline::finish_creation`] to add the timeline
-    /// to the [`TenantShard::timelines`].
+    /// to the [`Tenant::timelines`].
    ///
-    /// Tests should use `TenantShard::create_test_timeline` to set up the minimum required metadata keys.
+    /// Tests should use `Tenant::create_test_timeline` to set up the minimum required metadata keys.
    pub(crate) async fn create_empty_timeline(
        self: &Arc<Self>,
        new_timeline_id: TimelineId,
@@ -2584,7 +2584,7 @@ impl TenantShard {
    /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
    #[allow(clippy::too_many_arguments)]
    pub(crate) async fn create_timeline(
-        self: &Arc<TenantShard>,
+        self: &Arc<Tenant>,
        params: CreateTimelineParams,
        broker_client: storage_broker::BrokerClientChannel,
        ctx: &RequestContext,
@@ -2751,13 +2751,13 @@ impl TenantShard {
        Ok(activated_timeline)
    }

-    /// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import
+    /// The returned [`Arc<Timeline>`] is NOT in the [`Tenant::timelines`] map until the import
    /// completes in the background. A DIFFERENT [`Arc<Timeline>`] will be inserted into the
-    /// [`TenantShard::timelines`] map when the import completes.
+    /// [`Tenant::timelines`] map when the import completes.
    /// We only return an [`Arc<Timeline>`] here so the API handler can create a [`pageserver_api::models::TimelineInfo`]
    /// for the response.
    async fn create_timeline_import_pgdata(
-        self: &Arc<Self>,
+        self: &Arc<Tenant>,
        params: CreateTimelineParamsImportPgdata,
        activate: ActivateTimelineArgs,
        ctx: &RequestContext,
@@ -2854,7 +2854,7 @@ impl TenantShard {

    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline.timeline_id))]
    async fn create_timeline_import_pgdata_task(
-        self: Arc<TenantShard>,
+        self: Arc<Tenant>,
        timeline: Arc<Timeline>,
        index_part: import_pgdata::index_part_format::Root,
        activate: ActivateTimelineArgs,
@@ -2882,7 +2882,7 @@ impl TenantShard {
    }

    async fn create_timeline_import_pgdata_task_impl(
-        self: Arc<TenantShard>,
+        self: Arc<Tenant>,
        timeline: Arc<Timeline>,
        index_part: import_pgdata::index_part_format::Root,
        activate: ActivateTimelineArgs,
@@ -2899,10 +2899,10 @@ impl TenantShard {
        // Reload timeline from remote.
        // This proves that the remote state is attachable, and it reuses the code.
        //
-        // TODO: think about whether this is safe to do with concurrent TenantShard::shutdown.
+        // TODO: think about whether this is safe to do with concurrent Tenant::shutdown.
        // timeline_create_guard hols the tenant gate open, so, shutdown cannot _complete_ until we exit.
-        // But our activate() call might launch new background tasks after TenantShard::shutdown
-        // already went past shutting down the TenantShard::timelines, which this timeline here is no part of.
+        // But our activate() call might launch new background tasks after Tenant::shutdown
+        // already went past shutting down the Tenant::timelines, which this timeline here is no part of.
        // I think the same problem exists with the bootstrap & branch mgmt API tasks (tenant shutting
        // down while bootstrapping/branching + activating), but, the race condition is much more likely
        // to manifest because of the long runtime of this import task.
@@ -2917,7 +2917,7 @@ impl TenantShard {
        // };
        let timeline_id = timeline.timeline_id;

-        // load from object storage like TenantShard::attach does
+        // load from object storage like Tenant::attach does
        let resources = self.build_timeline_resources(timeline_id);
        let index_part = resources
            .remote_client
@@ -3938,7 +3938,7 @@ enum ActivateTimelineArgs {
    No,
 }

-impl TenantShard {
+impl Tenant {
    pub fn tenant_specific_overrides(&self) -> pageserver_api::models::TenantConfig {
        self.tenant_conf.load().tenant_conf.clone()
    }
@@ -4096,7 +4096,7 @@ impl TenantShard {
        update: F,
    ) -> anyhow::Result<pageserver_api::models::TenantConfig> {
        // Use read-copy-update in order to avoid overwriting the location config
-        // state if this races with [`TenantShard::set_new_location_config`]. Note that
+        // state if this races with [`Tenant::set_new_location_config`]. Note that
        // this race is not possible if both request types come from the storage
        // controller (as they should!) because an exclusive op lock is required
        // on the storage controller side.
@@ -4219,7 +4219,7 @@ impl TenantShard {
        Ok((timeline, timeline_ctx))
    }

-    /// [`TenantShard::shutdown`] must be called before dropping the returned [`TenantShard`] object
+    /// [`Tenant::shutdown`] must be called before dropping the returned [`Tenant`] object
    /// to ensure proper cleanup of background tasks and metrics.
    //
    // Allow too_many_arguments because a constructor's argument list naturally grows with the
@@ -4235,7 +4235,7 @@ impl TenantShard {
        remote_storage: GenericRemoteStorage,
        deletion_queue_client: DeletionQueueClient,
        l0_flush_global_state: L0FlushGlobalState,
-    ) -> TenantShard {
+    ) -> Tenant {
        debug_assert!(
            !attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
        );
@@ -4295,7 +4295,7 @@ impl TenantShard {
            }
        });

-        TenantShard {
+        Tenant {
            tenant_shard_id,
            shard_identity,
            generation: attached_conf.location.generation,
@@ -4330,7 +4330,7 @@ impl TenantShard {
            cancel: CancellationToken::default(),
            gate: Gate::default(),
            pagestream_throttle: Arc::new(throttle::Throttle::new(
-                TenantShard::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
+                Tenant::get_pagestream_throttle_config(conf, &attached_conf.tenant_conf),
            )),
            pagestream_throttle_metrics: Arc::new(
                crate::metrics::tenant_throttling::Pagestream::new(&tenant_shard_id),
@@ -4466,11 +4466,11 @@ impl TenantShard {

        // Perform GC for each timeline.
        //
-        // Note that we don't hold the `TenantShard::gc_cs` lock here because we don't want to delay the
+        // Note that we don't hold the `Tenant::gc_cs` lock here because we don't want to delay the
        // branch creation task, which requires the GC lock. A GC iteration can run concurrently
        // with branch creation.
        //
-        // See comments in [`TenantShard::branch_timeline`] for more information about why branch
+        // See comments in [`Tenant::branch_timeline`] for more information about why branch
        // creation task can run concurrently with timeline's GC iteration.
        for timeline in gc_timelines {
            if cancel.is_cancelled() {
@@ -4500,7 +4500,7 @@ impl TenantShard {

    /// Refreshes the Timeline::gc_info for all timelines, returning the
    /// vector of timelines which have [`Timeline::get_last_record_lsn`] past
-    /// [`TenantShard::get_gc_horizon`].
+    /// [`Tenant::get_gc_horizon`].
    ///
    /// This is usually executed as part of periodic gc, but can now be triggered more often.
    pub(crate) async fn refresh_gc_info(
@@ -5499,7 +5499,7 @@ impl TenantShard {
            }
        }

-        // The flushes we did above were just writes, but the TenantShard might have had
+        // The flushes we did above were just writes, but the Tenant might have had
        // pending deletions as well from recent compaction/gc: we want to flush those
        // as well.  This requires flushing the global delete queue.  This is cheap
        // because it's typically a no-op.
@@ -5517,7 +5517,7 @@ impl TenantShard {

    /// How much local storage would this tenant like to have?  It can cope with
    /// less than this (via eviction and on-demand downloads), but this function enables
-    /// the TenantShard to advertise how much storage it would prefer to have to provide fast I/O
+    /// the Tenant to advertise how much storage it would prefer to have to provide fast I/O
    /// by keeping important things on local disk.
    ///
    /// This is a heuristic, not a guarantee: tenants that are long-idle will actually use less
@@ -5540,11 +5540,11 @@ impl TenantShard {
    /// manifest in `Self::remote_tenant_manifest`.
    ///
    /// TODO: instead of requiring callers to remember to call `maybe_upload_tenant_manifest` after
-    /// changing any `TenantShard` state that's included in the manifest, consider making the manifest
+    /// changing any `Tenant` state that's included in the manifest, consider making the manifest
    /// the authoritative source of data with an API that automatically uploads on changes. Revisit
    /// this when the manifest is more widely used and we have a better idea of the data model.
    pub(crate) async fn maybe_upload_tenant_manifest(&self) -> Result<(), TenantManifestError> {
-        // Multiple tasks may call this function concurrently after mutating the TenantShard runtime
+        // Multiple tasks may call this function concurrently after mutating the Tenant runtime
        // state, affecting the manifest generated by `build_tenant_manifest`. We use an async mutex
        // to serialize these callers. `eq_ignoring_version` acts as a slightly inefficient but
        // simple coalescing mechanism.
@@ -5812,7 +5812,7 @@ pub(crate) mod harness {
            info_span!("TenantHarness", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug())
        }

-        pub(crate) async fn load(&self) -> (Arc<TenantShard>, RequestContext) {
+        pub(crate) async fn load(&self) -> (Arc<Tenant>, RequestContext) {
            let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
                .with_scope_unit_test();
            (
@@ -5827,10 +5827,10 @@ pub(crate) mod harness {
        pub(crate) async fn do_try_load(
            &self,
            ctx: &RequestContext,
-        ) -> anyhow::Result<Arc<TenantShard>> {
+        ) -> anyhow::Result<Arc<Tenant>> {
            let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));

-            let tenant = Arc::new(TenantShard::new(
+            let tenant = Arc::new(Tenant::new(
                TenantState::Attaching,
                self.conf,
                AttachedTenantConf::try_from(LocationConf::attached_single(
@@ -5933,20 +5933,12 @@ mod tests {
    use models::CompactLsnRange;
    use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
    use pageserver_api::keyspace::KeySpace;
-    #[cfg(feature = "testing")]
-    use pageserver_api::keyspace::KeySpaceRandomAccum;
    use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
    #[cfg(feature = "testing")]
    use pageserver_api::record::NeonWalRecord;
    use pageserver_api::value::Value;
    use pageserver_compaction::helpers::overlaps_with;
-    #[cfg(feature = "testing")]
-    use rand::SeedableRng;
-    #[cfg(feature = "testing")]
-    use rand::rngs::StdRng;
    use rand::{Rng, thread_rng};
-    #[cfg(feature = "testing")]
-    use std::ops::Range;
    use storage_layer::{IoConcurrency, PersistentLayerKey};
    use tests::storage_layer::ValuesReconstructState;
    use tests::timeline::{GetVectoredError, ShutdownMode};
@@ -5968,318 +5960,6 @@ mod tests {
    static TEST_KEY: Lazy<Key> =
        Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));

-    #[cfg(feature = "testing")]
-    struct TestTimelineSpecification {
-        start_lsn: Lsn,
-        last_record_lsn: Lsn,
-
-        in_memory_layers_shape: Vec<(Range<Key>, Range<Lsn>)>,
-        delta_layers_shape: Vec<(Range<Key>, Range<Lsn>)>,
-        image_layers_shape: Vec<(Range<Key>, Lsn)>,
-
-        gap_chance: u8,
-        will_init_chance: u8,
-    }
-
-    #[cfg(feature = "testing")]
-    struct Storage {
-        storage: HashMap<(Key, Lsn), Value>,
-        start_lsn: Lsn,
-    }
-
-    #[cfg(feature = "testing")]
-    impl Storage {
-        fn get(&self, key: Key, lsn: Lsn) -> Bytes {
-            use bytes::BufMut;
-
-            let mut crnt_lsn = lsn;
-            let mut got_base = false;
-
-            let mut acc = Vec::new();
-
-            while crnt_lsn >= self.start_lsn {
-                if let Some(value) = self.storage.get(&(key, crnt_lsn)) {
-                    acc.push(value.clone());
-
-                    match value {
-                        Value::WalRecord(NeonWalRecord::Test { will_init, .. }) => {
-                            if *will_init {
-                                got_base = true;
-                                break;
-                            }
-                        }
-                        Value::Image(_) => {
-                            got_base = true;
-                            break;
-                        }
-                        _ => unreachable!(),
-                    }
-                }
-
-                crnt_lsn = crnt_lsn.checked_sub(1u64).unwrap();
-            }
-
-            assert!(
-                got_base,
-                "Input data was incorrect. No base image for {key}@{lsn}"
-            );
-
-            tracing::debug!("Wal redo depth for {key}@{lsn} is {}", acc.len());
-
-            let mut blob = BytesMut::new();
-            for value in acc.into_iter().rev() {
-                match value {
-                    Value::WalRecord(NeonWalRecord::Test { append, .. }) => {
-                        blob.extend_from_slice(append.as_bytes());
-                    }
-                    Value::Image(img) => {
-                        blob.put(img);
-                    }
-                    _ => unreachable!(),
-                }
-            }
-
-            blob.into()
-        }
-    }
-
-    #[cfg(feature = "testing")]
-    #[allow(clippy::too_many_arguments)]
-    async fn randomize_timeline(
-        tenant: &Arc<TenantShard>,
-        new_timeline_id: TimelineId,
-        pg_version: u32,
-        spec: TestTimelineSpecification,
-        random: &mut rand::rngs::StdRng,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<(Arc<Timeline>, Storage, Vec<Lsn>)> {
-        let mut storage: HashMap<(Key, Lsn), Value> = HashMap::default();
-        let mut interesting_lsns = vec![spec.last_record_lsn];
-
-        for (key_range, lsn_range) in spec.in_memory_layers_shape.iter() {
-            let mut lsn = lsn_range.start;
-            while lsn < lsn_range.end {
-                let mut key = key_range.start;
-                while key < key_range.end {
-                    let gap = random.gen_range(1..=100) <= spec.gap_chance;
-                    let will_init = random.gen_range(1..=100) <= spec.will_init_chance;
-
-                    if gap {
-                        continue;
-                    }
-
-                    let record = if will_init {
-                        Value::WalRecord(NeonWalRecord::wal_init(format!("[wil_init {key}@{lsn}]")))
-                    } else {
-                        Value::WalRecord(NeonWalRecord::wal_append(format!("[delta {key}@{lsn}]")))
-                    };
-
-                    storage.insert((key, lsn), record);
-
-                    key = key.next();
-                }
-                lsn = Lsn(lsn.0 + 1);
-            }
-
-            // Stash some interesting LSN for future use
-            for offset in [0, 5, 100].iter() {
-                if *offset == 0 {
-                    interesting_lsns.push(lsn_range.start);
-                } else {
-                    let below = lsn_range.start.checked_sub(*offset);
-                    match below {
-                        Some(v) if v >= spec.start_lsn => {
-                            interesting_lsns.push(v);
-                        }
-                        _ => {}
-                    }
-
-                    let above = Lsn(lsn_range.start.0 + offset);
-                    interesting_lsns.push(above);
-                }
-            }
-        }
-
-        for (key_range, lsn_range) in spec.delta_layers_shape.iter() {
-            let mut lsn = lsn_range.start;
-            while lsn < lsn_range.end {
-                let mut key = key_range.start;
-                while key < key_range.end {
-                    let gap = random.gen_range(1..=100) <= spec.gap_chance;
-                    let will_init = random.gen_range(1..=100) <= spec.will_init_chance;
-
-                    if gap {
-                        continue;
-                    }
-
-                    let record = if will_init {
-                        Value::WalRecord(NeonWalRecord::wal_init(format!("[wil_init {key}@{lsn}]")))
-                    } else {
-                        Value::WalRecord(NeonWalRecord::wal_append(format!("[delta {key}@{lsn}]")))
-                    };
-
-                    storage.insert((key, lsn), record);
-
-                    key = key.next();
-                }
-                lsn = Lsn(lsn.0 + 1);
-            }
-
-            // Stash some interesting LSN for future use
-            for offset in [0, 5, 100].iter() {
-                if *offset == 0 {
-                    interesting_lsns.push(lsn_range.start);
-                } else {
-                    let below = lsn_range.start.checked_sub(*offset);
-                    match below {
-                        Some(v) if v >= spec.start_lsn => {
-                            interesting_lsns.push(v);
-                        }
-                        _ => {}
-                    }
-
-                    let above = Lsn(lsn_range.start.0 + offset);
-                    interesting_lsns.push(above);
-                }
-            }
-        }
-
-        for (key_range, lsn) in spec.image_layers_shape.iter() {
-            let mut key = key_range.start;
-            while key < key_range.end {
-                let blob = Bytes::from(format!("[image {key}@{lsn}]"));
-                let record = Value::Image(blob.clone());
-                storage.insert((key, *lsn), record);
-
-                key = key.next();
-            }
-
-            // Stash some interesting LSN for future use
-            for offset in [0, 5, 100].iter() {
-                if *offset == 0 {
-                    interesting_lsns.push(*lsn);
-                } else {
-                    let below = lsn.checked_sub(*offset);
-                    match below {
-                        Some(v) if v >= spec.start_lsn => {
-                            interesting_lsns.push(v);
-                        }
-                        _ => {}
-                    }
-
-                    let above = Lsn(lsn.0 + offset);
-                    interesting_lsns.push(above);
-                }
-            }
-        }
-
-        let in_memory_test_layers = {
-            let mut acc = Vec::new();
-
-            for (key_range, lsn_range) in spec.in_memory_layers_shape.iter() {
-                let mut data = Vec::new();
-
-                let mut lsn = lsn_range.start;
-                while lsn < lsn_range.end {
-                    let mut key = key_range.start;
-                    while key < key_range.end {
-                        if let Some(record) = storage.get(&(key, lsn)) {
-                            data.push((key, lsn, record.clone()));
-                        }
-
-                        key = key.next();
-                    }
-                    lsn = Lsn(lsn.0 + 1);
-                }
-
-                acc.push(InMemoryLayerTestDesc {
-                    data,
-                    lsn_range: lsn_range.clone(),
-                    is_open: false,
-                })
-            }
-
-            acc
-        };
-
-        let delta_test_layers = {
-            let mut acc = Vec::new();
-
-            for (key_range, lsn_range) in spec.delta_layers_shape.iter() {
-                let mut data = Vec::new();
-
-                let mut lsn = lsn_range.start;
-                while lsn < lsn_range.end {
-                    let mut key = key_range.start;
-                    while key < key_range.end {
-                        if let Some(record) = storage.get(&(key, lsn)) {
-                            data.push((key, lsn, record.clone()));
-                        }
-
-                        key = key.next();
-                    }
-                    lsn = Lsn(lsn.0 + 1);
-                }
-
-                acc.push(DeltaLayerTestDesc {
-                    data,
-                    lsn_range: lsn_range.clone(),
-                    key_range: key_range.clone(),
-                })
-            }
-
-            acc
-        };
-
-        let image_test_layers = {
-            let mut acc = Vec::new();
-
-            for (key_range, lsn) in spec.image_layers_shape.iter() {
-                let mut data = Vec::new();
-
-                let mut key = key_range.start;
-                while key < key_range.end {
-                    if let Some(record) = storage.get(&(key, *lsn)) {
-                        let blob = match record {
-                            Value::Image(blob) => blob.clone(),
-                            _ => unreachable!(),
-                        };
-
-                        data.push((key, blob));
-                    }
-
-                    key = key.next();
-                }
-
-                acc.push((*lsn, data));
-            }
-
-            acc
-        };
-
-        let tline = tenant
-            .create_test_timeline_with_layers(
-                new_timeline_id,
-                spec.start_lsn,
-                pg_version,
-                ctx,
-                in_memory_test_layers,
-                delta_test_layers,
-                image_test_layers,
-                spec.last_record_lsn,
-            )
-            .await?;
-
-        Ok((
-            tline,
-            Storage {
-                storage,
-                start_lsn: spec.start_lsn,
-            },
-            interesting_lsns,
-        ))
-    }
-
    #[tokio::test]
    async fn test_basic() -> anyhow::Result<()> {
        let (tenant, ctx) = TenantHarness::create("test_basic").await?.load().await;
@@ -6936,7 +6616,7 @@ mod tests {
    }

    async fn bulk_insert_compact_gc(
-        tenant: &TenantShard,
+        tenant: &Tenant,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        lsn: Lsn,
@@ -6948,7 +6628,7 @@ mod tests {
    }

    async fn bulk_insert_maybe_compact_gc(
-        tenant: &TenantShard,
+        tenant: &Tenant,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        mut lsn: Lsn,
@@ -7858,7 +7538,7 @@ mod tests {
            let (tline, _ctx) = tenant
                .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
                .await?;
-            // Leave the timeline ID in [`TenantShard::timelines_creating`] to exclude attempting to create it again
+            // Leave the timeline ID in [`Tenant::timelines_creating`] to exclude attempting to create it again
            let raw_tline = tline.raw_timeline().unwrap();
            raw_tline
                .shutdown(super::timeline::ShutdownMode::Hard)
@@ -10863,214 +10543,6 @@ mod tests {
        Ok(())
    }

-    // A randomized read path test. Generates a layer map according to a deterministic
-    // specification. Fills the (key, LSN) space in random manner and then performs
-    // random scattered queries validating the results against in-memory storage.
-    //
-    // See this internal Notion page for a diagram of the layer map:
-    // https://www.notion.so/neondatabase/Read-Path-Unit-Testing-Fuzzing-1d1f189e0047806c8e5cd37781b0a350?pvs=4
-    //
-    // A fuzzing mode is also supported. In this mode, the test will use a random
-    // seed instead of a hardcoded one. Use it in conjunction with `cargo stress`
-    // to run multiple instances in parallel:
-    //
-    // $ RUST_BACKTRACE=1 RUST_LOG=INFO \
-    //   cargo stress --package=pageserver --features=testing,fuzz-read-path --release -- test_read_path
-    #[cfg(feature = "testing")]
-    #[tokio::test]
-    async fn test_read_path() -> anyhow::Result<()> {
-        use rand::seq::SliceRandom;
-
-        let seed = if cfg!(feature = "fuzz-read-path") {
-            let seed: u64 = thread_rng().r#gen();
-            seed
-        } else {
-            // Use a hard-coded seed when not in fuzzing mode.
-            // Note that with the current approach results are not reproducible
-            // accross platforms and Rust releases.
-            const SEED: u64 = 0;
-            SEED
-        };
-
-        let mut random = StdRng::seed_from_u64(seed);
-
-        let (queries, will_init_chance, gap_chance) = if cfg!(feature = "fuzz-read-path") {
-            const QUERIES: u64 = 5000;
-            let will_init_chance: u8 = random.gen_range(0..=10);
-            let gap_chance: u8 = random.gen_range(0..=50);
-
-            (QUERIES, will_init_chance, gap_chance)
-        } else {
-            const QUERIES: u64 = 1000;
-            const WILL_INIT_CHANCE: u8 = 1;
-            const GAP_CHANCE: u8 = 5;
-
-            (QUERIES, WILL_INIT_CHANCE, GAP_CHANCE)
-        };
-
-        let harness = TenantHarness::create("test_read_path").await?;
-        let (tenant, ctx) = harness.load().await;
-
-        tracing::info!("Using random seed: {seed}");
-        tracing::info!(%will_init_chance, %gap_chance, "Fill params");
-
-        // Define the layer map shape. Note that this part is not randomized.
-
-        const KEY_DIMENSION_SIZE: u32 = 99;
-        let start_key = Key::from_hex("110000000033333333444444445500000000").unwrap();
-        let end_key = start_key.add(KEY_DIMENSION_SIZE);
-        let total_key_range = start_key..end_key;
-        let total_key_range_size = end_key.to_i128() - start_key.to_i128();
-        let total_start_lsn = Lsn(104);
-        let last_record_lsn = Lsn(504);
-
-        assert!(total_key_range_size % 3 == 0);
-
-        let in_memory_layers_shape = vec![
-            (total_key_range.clone(), Lsn(304)..Lsn(400)),
-            (total_key_range.clone(), Lsn(400)..last_record_lsn),
-        ];
-
-        let delta_layers_shape = vec![
-            (
-                start_key..(start_key.add((total_key_range_size / 3) as u32)),
-                Lsn(200)..Lsn(304),
-            ),
-            (
-                (start_key.add((total_key_range_size / 3) as u32))
-                    ..(start_key.add((total_key_range_size * 2 / 3) as u32)),
-                Lsn(200)..Lsn(304),
-            ),
-            (
-                (start_key.add((total_key_range_size * 2 / 3) as u32))
-                    ..(start_key.add(total_key_range_size as u32)),
-                Lsn(200)..Lsn(304),
-            ),
-        ];
-
-        let image_layers_shape = vec![
-            (
-                start_key.add((total_key_range_size * 2 / 3 - 10) as u32)
-                    ..start_key.add((total_key_range_size * 2 / 3 + 10) as u32),
-                Lsn(456),
-            ),
-            (
-                start_key.add((total_key_range_size / 3 - 10) as u32)
-                    ..start_key.add((total_key_range_size / 3 + 10) as u32),
-                Lsn(256),
-            ),
-            (total_key_range.clone(), total_start_lsn),
-        ];
-
-        let specification = TestTimelineSpecification {
-            start_lsn: total_start_lsn,
-            last_record_lsn,
-            in_memory_layers_shape,
-            delta_layers_shape,
-            image_layers_shape,
-            gap_chance,
-            will_init_chance,
-        };
-
-        // Create and randomly fill in the layers according to the specification
-        let (tline, storage, interesting_lsns) = randomize_timeline(
-            &tenant,
-            TIMELINE_ID,
-            DEFAULT_PG_VERSION,
-            specification,
-            &mut random,
-            &ctx,
-        )
-        .await?;
-
-        // Now generate queries based on the interesting lsns that we've collected.
-        //
-        // While there's still room in the query, pick and interesting LSN and a random
-        // key. Then roll the dice to see if the next key should also be included in
-        // the query. When the roll fails, break the "batch" and pick another point in the
-        // (key, LSN) space.
-
-        const PICK_NEXT_CHANCE: u8 = 50;
-        for _ in 0..queries {
-            let query = {
-                let mut keyspaces_at_lsn: HashMap<Lsn, KeySpaceRandomAccum> = HashMap::default();
-                let mut used_keys: HashSet<Key> = HashSet::default();
-
-                while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
-                    let selected_lsn = interesting_lsns.choose(&mut random).expect("not empty");
-                    let mut selected_key = start_key.add(random.gen_range(0..KEY_DIMENSION_SIZE));
-
-                    while used_keys.len() < Timeline::MAX_GET_VECTORED_KEYS as usize {
-                        if used_keys.contains(&selected_key)
-                            || selected_key >= start_key.add(KEY_DIMENSION_SIZE)
-                        {
-                            break;
-                        }
-
-                        keyspaces_at_lsn
-                            .entry(*selected_lsn)
-                            .or_default()
-                            .add_key(selected_key);
-                        used_keys.insert(selected_key);
-
-                        let pick_next = random.gen_range(0..=100) <= PICK_NEXT_CHANCE;
-                        if pick_next {
-                            selected_key = selected_key.next();
-                        } else {
-                            break;
-                        }
-                    }
-                }
-
-                VersionedKeySpaceQuery::scattered(
-                    keyspaces_at_lsn
-                        .into_iter()
-                        .map(|(lsn, acc)| (lsn, acc.to_keyspace()))
-                        .collect(),
-                )
-            };
-
-            // Run the query and validate the results
-
-            let results = tline
-                .get_vectored(query.clone(), IoConcurrency::Sequential, &ctx)
-                .await;
-
-            let blobs = match results {
-                Ok(ok) => ok,
-                Err(err) => {
-                    panic!("seed={seed} Error returned for query {query}: {err}");
-                }
-            };
-
-            for (key, key_res) in blobs.into_iter() {
-                match key_res {
-                    Ok(blob) => {
-                        let requested_at_lsn = query.map_key_to_lsn(&key);
-                        let expected = storage.get(key, requested_at_lsn);
-
-                        if blob != expected {
-                            tracing::error!(
-                                "seed={seed} Mismatch for {key}@{requested_at_lsn} from query: {query}"
-                            );
-                        }
-
-                        assert_eq!(blob, expected);
-                    }
-                    Err(err) => {
-                        let requested_at_lsn = query.map_key_to_lsn(&key);
-
-                        panic!(
-                            "seed={seed} Error returned for {key}@{requested_at_lsn} from query {query}: {err}"
-                        );
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
    fn sort_layer_key(k1: &PersistentLayerKey, k2: &PersistentLayerKey) -> std::cmp::Ordering {
        (
            k1.is_delta,
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -22,7 +22,6 @@ use bytes::{BufMut, BytesMut};
 use pageserver_api::models::ImageCompressionAlgorithm;
 use tokio::io::AsyncWriteExt;
 use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};
-use tokio_util::sync::CancellationToken;
 use tracing::warn;

 use crate::context::RequestContext;
@@ -37,63 +36,6 @@ pub struct CompressionInfo {
    pub compressed_size: Option<usize>,
 }

-/// A blob header, with header+data length and compression info.
-///
-/// TODO: use this more widely, and add an encode() method too.
-/// TODO: document the header format.
-#[derive(Clone, Copy, Default)]
-pub struct Header {
-    pub header_len: usize,
-    pub data_len: usize,
-    pub compression_bits: u8,
-}
-
-impl Header {
-    /// Decodes a header from a byte slice.
-    pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
-        let Some(&first_header_byte) = bytes.first() else {
-            return Err(std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                "zero-length blob header",
-            ));
-        };
-
-        // If the first bit is 0, this is just a 1-byte length prefix up to 128 bytes.
-        if first_header_byte < 0x80 {
-            return Ok(Self {
-                header_len: 1, // by definition
-                data_len: first_header_byte as usize,
-                compression_bits: BYTE_UNCOMPRESSED,
-            });
-        }
-
-        // Otherwise, this is a 4-byte header containing compression information and length.
-        const HEADER_LEN: usize = 4;
-        let mut header_buf: [u8; HEADER_LEN] = bytes[0..HEADER_LEN].try_into().map_err(|_| {
-            std::io::Error::new(
-                std::io::ErrorKind::InvalidData,
-                format!("blob header too short: {bytes:?}"),
-            )
-        })?;
-
-        // TODO: verify the compression bits and convert to an enum.
-        let compression_bits = header_buf[0] & LEN_COMPRESSION_BIT_MASK;
-        header_buf[0] &= !LEN_COMPRESSION_BIT_MASK;
-        let data_len = u32::from_be_bytes(header_buf) as usize;
-
-        Ok(Self {
-            header_len: HEADER_LEN,
-            data_len,
-            compression_bits,
-        })
-    }
-
-    /// Returns the total header+data length.
-    pub fn total_len(&self) -> usize {
-        self.header_len + self.data_len
-    }
-}
-
 impl BlockCursor<'_> {
    /// Read a blob into a new buffer.
    pub async fn read_blob(
@@ -227,13 +169,7 @@ pub struct BlobWriter<const BUFFERED: bool> {
 }

 impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
-    pub fn new(
-        inner: VirtualFile,
-        start_offset: u64,
-        _gate: &utils::sync::gate::Gate,
-        _cancel: CancellationToken,
-        _ctx: &RequestContext,
-    ) -> Self {
+    pub fn new(inner: VirtualFile, start_offset: u64) -> Self {
        Self {
            inner,
            offset: start_offset,
@@ -446,34 +382,6 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
        };
        (srcbuf, res.map(|_| (offset, compression_info)))
    }
-
-    /// Writes a raw blob containing both header and data, returning its offset.
-    pub(crate) async fn write_blob_raw<Buf: IoBuf + Send>(
-        &mut self,
-        raw_with_header: FullSlice<Buf>,
-        ctx: &RequestContext,
-    ) -> (FullSlice<Buf>, Result<u64, Error>) {
-        // Verify the header, to ensure we don't write invalid/corrupt data.
-        let header = match Header::decode(&raw_with_header) {
-            Ok(header) => header,
-            Err(err) => return (raw_with_header, Err(err)),
-        };
-        if raw_with_header.len() != header.total_len() {
-            let header_total_len = header.total_len();
-            let raw_len = raw_with_header.len();
-            return (
-                raw_with_header,
-                Err(std::io::Error::new(
-                    std::io::ErrorKind::InvalidData,
-                    format!("header length mismatch: {header_total_len} != {raw_len}"),
-                )),
-            );
-        }
-
-        let offset = self.offset;
-        let (raw_with_header, result) = self.write_all(raw_with_header, ctx).await;
-        (raw_with_header, result.map(|_| offset))
-    }
 }

 impl BlobWriter<true> {
@@ -524,14 +432,12 @@ pub(crate) mod tests {
    ) -> Result<(Utf8TempDir, Utf8PathBuf, Vec<u64>), Error> {
        let temp_dir = camino_tempfile::tempdir()?;
        let pathbuf = temp_dir.path().join("file");
-        let gate = utils::sync::gate::Gate::default();
-        let cancel = CancellationToken::new();

        // Write part (in block to drop the file)
        let mut offsets = Vec::new();
        {
            let file = VirtualFile::create(pathbuf.as_path(), ctx).await?;
-            let mut wtr = BlobWriter::<BUFFERED>::new(file, 0, &gate, cancel.clone(), ctx);
+            let mut wtr = BlobWriter::<BUFFERED>::new(file, 0);
            for blob in blobs.iter() {
                let (_, res) = if compression {
                    let res = wtr
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -714,7 +714,7 @@ impl LayerMap {
        true
    }

-    pub fn iter_historic_layers(&self) -> impl ExactSizeIterator<Item = Arc<PersistentLayerDesc>> {
+    pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
        self.historic.iter()
    }

--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -504,7 +504,7 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
    }

    /// Iterate all the layers
-    pub fn iter(&self) -> impl ExactSizeIterator<Item = Value> {
+    pub fn iter(&self) -> impl '_ + Iterator<Item = Value> {
        // NOTE we can actually perform this without rebuilding,
        //      but it's not necessary for now.
        if !self.buffer.is_empty() {
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -564,9 +564,8 @@ mod tests {
            Lsn(0),
            Lsn(0),
            Lsn(0),
-            // Updating this version to 17 will cause the test to fail at the
-            // next assert_eq!().
-            16,
+            // Any version will do here, so use the default
+            crate::DEFAULT_PG_VERSION,
        );
        let expected_bytes = vec![
            /* TimelineMetadataHeader */
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -52,9 +52,7 @@ use crate::tenant::config::{
 use crate::tenant::span::debug_assert_current_span_has_tenant_id;
 use crate::tenant::storage_layer::inmemory_layer;
 use crate::tenant::timeline::ShutdownMode;
-use crate::tenant::{
-    AttachedTenantConf, GcError, LoadConfigError, SpawnMode, TenantShard, TenantState,
-};
+use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Tenant, TenantState};
 use crate::virtual_file::MaybeFatalIo;
 use crate::{InitializationOrder, TEMP_FILE_SUFFIX};

@@ -69,7 +67,7 @@ use crate::{InitializationOrder, TEMP_FILE_SUFFIX};
 /// having a properly acquired generation (Secondary doesn't need a generation)
 #[derive(Clone)]
 pub(crate) enum TenantSlot {
-    Attached(Arc<TenantShard>),
+    Attached(Arc<Tenant>),
    Secondary(Arc<SecondaryTenant>),
    /// In this state, other administrative operations acting on the TenantId should
    /// block, or return a retry indicator equivalent to HTTP 503.
@@ -88,7 +86,7 @@ impl std::fmt::Debug for TenantSlot {

 impl TenantSlot {
    /// Return the `Tenant` in this slot if attached, else None
-    fn get_attached(&self) -> Option<&Arc<TenantShard>> {
+    fn get_attached(&self) -> Option<&Arc<Tenant>> {
        match self {
            Self::Attached(t) => Some(t),
            Self::Secondary(_) => None,
@@ -166,7 +164,7 @@ impl TenantStartupMode {
 /// Result type for looking up a TenantId to a specific shard
 pub(crate) enum ShardResolveResult {
    NotFound,
-    Found(Arc<TenantShard>),
+    Found(Arc<Tenant>),
    // Wait for this barrrier, then query again
    InProgress(utils::completion::Barrier),
 }
@@ -175,7 +173,7 @@ impl TenantsMap {
    /// Convenience function for typical usage, where we want to get a `Tenant` object, for
    /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
    /// None is returned.
-    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<TenantShard>> {
+    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
        match self {
            TenantsMap::Initializing => None,
            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
@@ -412,7 +410,7 @@ fn load_tenant_config(
        return None;
    }

-    Some(TenantShard::load_tenant_config(conf, &tenant_shard_id))
+    Some(Tenant::load_tenant_config(conf, &tenant_shard_id))
 }

 /// Initial stage of load: walk the local tenants directory, clean up any temp files,
@@ -608,8 +606,7 @@ pub async fn init_tenant_mgr(
        // Presence of a generation number implies attachment: attach the tenant
        // if it wasn't already, and apply the generation number.
        config_write_futs.push(async move {
-            let r =
-                TenantShard::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
+            let r = Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
            (tenant_shard_id, location_conf, r)
        });
    }
@@ -697,7 +694,7 @@ fn tenant_spawn(
    init_order: Option<InitializationOrder>,
    mode: SpawnMode,
    ctx: &RequestContext,
-) -> Result<Arc<TenantShard>, GlobalShutDown> {
+) -> Result<Arc<Tenant>, GlobalShutDown> {
    // All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed
    // path, and contains a configuration file.  Assertions that do synchronous I/O are limited to debug mode
    // to avoid impacting prod runtime performance.
@@ -709,7 +706,7 @@ fn tenant_spawn(
            .unwrap()
    );

-    TenantShard::spawn(
+    Tenant::spawn(
        conf,
        tenant_shard_id,
        resources,
@@ -886,12 +883,12 @@ impl TenantManager {
    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or currently
    /// undergoing a state change (i.e. slot is InProgress).
    ///
-    /// The return TenantShard is not guaranteed to be active: check its status after obtaing it, or
-    /// use [`TenantShard::wait_to_become_active`] before using it if you will do I/O on it.
+    /// The return Tenant is not guaranteed to be active: check its status after obtaing it, or
+    /// use [`Tenant::wait_to_become_active`] before using it if you will do I/O on it.
    pub(crate) fn get_attached_tenant_shard(
        &self,
        tenant_shard_id: TenantShardId,
-    ) -> Result<Arc<TenantShard>, GetTenantError> {
+    ) -> Result<Arc<Tenant>, GetTenantError> {
        let locked = self.tenants.read().unwrap();

        let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
@@ -940,12 +937,12 @@ impl TenantManager {
        flush: Option<Duration>,
        mut spawn_mode: SpawnMode,
        ctx: &RequestContext,
-    ) -> Result<Option<Arc<TenantShard>>, UpsertLocationError> {
+    ) -> Result<Option<Arc<Tenant>>, UpsertLocationError> {
        debug_assert_current_span_has_tenant_id();
        info!("configuring tenant location to state {new_location_config:?}");

        enum FastPathModified {
-            Attached(Arc<TenantShard>),
+            Attached(Arc<Tenant>),
            Secondary(Arc<SecondaryTenant>),
        }

@@ -1002,13 +999,9 @@ impl TenantManager {
        // phase of writing config and/or waiting for flush, before returning.
        match fast_path_taken {
            Some(FastPathModified::Attached(tenant)) => {
-                TenantShard::persist_tenant_config(
-                    self.conf,
-                    &tenant_shard_id,
-                    &new_location_config,
-                )
-                .await
-                .fatal_err("write tenant shard config");
+                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+                    .await
+                    .fatal_err("write tenant shard config");

                // Transition to AttachedStale means we may well hold a valid generation
                // still, and have been requested to go stale as part of a migration.  If
@@ -1037,13 +1030,9 @@ impl TenantManager {
                return Ok(Some(tenant));
            }
            Some(FastPathModified::Secondary(_secondary_tenant)) => {
-                TenantShard::persist_tenant_config(
-                    self.conf,
-                    &tenant_shard_id,
-                    &new_location_config,
-                )
-                .await
-                .fatal_err("write tenant shard config");
+                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+                    .await
+                    .fatal_err("write tenant shard config");

                return Ok(None);
            }
@@ -1133,7 +1122,7 @@ impl TenantManager {
        // Before activating either secondary or attached mode, persist the
        // configuration, so that on restart we will re-attach (or re-start
        // secondary) on the tenant.
-        TenantShard::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+        Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
            .await
            .fatal_err("write tenant shard config");

@@ -1273,7 +1262,7 @@ impl TenantManager {

        let tenant_path = self.conf.tenant_path(&tenant_shard_id);
        let timelines_path = self.conf.timelines_path(&tenant_shard_id);
-        let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)?;
+        let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;

        if drop_cache {
            tracing::info!("Dropping local file cache");
@@ -1308,7 +1297,7 @@ impl TenantManager {
        Ok(())
    }

-    pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<TenantShard>> {
+    pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
        let locked = self.tenants.read().unwrap();
        match &*locked {
            TenantsMap::Initializing => Vec::new(),
@@ -1457,7 +1446,7 @@ impl TenantManager {
    #[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
    pub(crate) async fn shard_split(
        &self,
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        new_shard_count: ShardCount,
        new_stripe_size: Option<ShardStripeSize>,
        ctx: &RequestContext,
@@ -1487,7 +1476,7 @@ impl TenantManager {

    pub(crate) async fn do_shard_split(
        &self,
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        new_shard_count: ShardCount,
        new_stripe_size: Option<ShardStripeSize>,
        ctx: &RequestContext,
@@ -1714,7 +1703,7 @@ impl TenantManager {
    /// For each resident layer in the parent shard, we will hard link it into all of the child shards.
    async fn shard_split_hardlink(
        &self,
-        parent_shard: &TenantShard,
+        parent_shard: &Tenant,
        child_shards: Vec<TenantShardId>,
    ) -> anyhow::Result<()> {
        debug_assert_current_span_has_tenant_id();
@@ -1999,7 +1988,7 @@ impl TenantManager {
            }

            let tenant_path = self.conf.tenant_path(&tenant_shard_id);
-            let config = TenantShard::load_tenant_config(self.conf, &tenant_shard_id)
+            let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)
                .map_err(|e| Error::DetachReparent(e.into()))?;

            let shard_identity = config.shard;
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -133,7 +133,7 @@
 //! - Initiate upload queue with that [`IndexPart`].
 //! - Reschedule all lost operations by comparing the local filesystem state
 //!   and remote state as per [`IndexPart`]. This is done in
-//!   [`TenantShard::timeline_init_and_sync`].
+//!   [`Tenant::timeline_init_and_sync`].
 //!
 //! Note that if we crash during file deletion between the index update
 //! that removes the file from the list of files, and deleting the remote file,
@@ -171,7 +171,7 @@
 //! If no remote storage configuration is provided, the [`RemoteTimelineClient`] is
 //! not created and the uploads are skipped.
 //!
-//! [`TenantShard::timeline_init_and_sync`]: super::TenantShard::timeline_init_and_sync
+//! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
 //! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map

 pub(crate) mod download;
@@ -2743,7 +2743,7 @@ mod tests {
    use crate::tenant::config::AttachmentMode;
    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
    use crate::tenant::storage_layer::layer::local_layer_path;
-    use crate::tenant::{TenantShard, Timeline};
+    use crate::tenant::{Tenant, Timeline};

    pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
        format!("contents for {name}").into()
@@ -2796,7 +2796,7 @@ mod tests {

    struct TestSetup {
        harness: TenantHarness,
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        timeline: Arc<Timeline>,
        tenant_ctx: RequestContext,
    }
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -452,7 +452,7 @@ async fn do_download_index_part(
 /// generation (normal case when migrating/restarting).  Only if both of these return 404 do we fall back
 /// to listing objects.
 ///
-/// * `my_generation`: the value of `[crate::tenant::TenantShard::generation]`
+/// * `my_generation`: the value of `[crate::tenant::Tenant::generation]`
 /// * `what`: for logging, what object are we downloading
 /// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)
 /// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -21,7 +21,7 @@ use super::scheduler::{
 use super::{CommandRequest, SecondaryTenantError, UploadCommand};
 use crate::TEMP_FILE_SUFFIX;
 use crate::metrics::SECONDARY_MODE;
-use crate::tenant::TenantShard;
+use crate::tenant::Tenant;
 use crate::tenant::config::AttachmentMode;
 use crate::tenant::mgr::{GetTenantError, TenantManager};
 use crate::tenant::remote_timeline_client::remote_heatmap_path;
@@ -74,7 +74,7 @@ impl RunningJob for WriteInProgress {
 }

 struct UploadPending {
-    tenant: Arc<TenantShard>,
+    tenant: Arc<Tenant>,
    last_upload: Option<LastUploadState>,
    target_time: Option<Instant>,
    period: Option<Duration>,
@@ -106,7 +106,7 @@ impl scheduler::Completion for WriteComplete {
 struct UploaderTenantState {
    // This Weak only exists to enable culling idle instances of this type
    // when the Tenant has been deallocated.
-    tenant: Weak<TenantShard>,
+    tenant: Weak<Tenant>,

    /// Digest of the serialized heatmap that we last successfully uploaded
    last_upload_state: Option<LastUploadState>,
@@ -357,7 +357,7 @@ struct LastUploadState {
 /// of the object we would have uploaded.
 async fn upload_tenant_heatmap(
    remote_storage: GenericRemoteStorage,
-    tenant: &Arc<TenantShard>,
+    tenant: &Arc<Tenant>,
    last_upload: Option<LastUploadState>,
 ) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
    debug_assert_current_span_has_tenant_id();
--- a/pageserver/src/tenant/secondary/scheduler.rs
+++ b/pageserver/src/tenant/secondary/scheduler.rs
@@ -360,7 +360,7 @@ where

    /// Periodic execution phase: inspect all attached tenants and schedule any work they require.
    ///
-    /// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::TenantShard`] or [`crate::tenant::secondary::SecondaryTenant`]
+    /// The type in `tenants` should be a tenant-like structure, e.g. [`crate::tenant::Tenant`] or [`crate::tenant::secondary::SecondaryTenant`]
    ///
    /// This function resets the pending list: it is assumed that the caller may change their mind about
    /// which tenants need work between calls to schedule_iteration.
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -12,7 +12,7 @@ use tracing::*;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;

-use super::{GcError, LogicalSizeCalculationCause, TenantShard};
+use super::{GcError, LogicalSizeCalculationCause, Tenant};
 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
 use crate::tenant::{MaybeOffloaded, Timeline};
@@ -156,7 +156,7 @@ pub struct TimelineInputs {
 ///   initdb_lsn  branchpoints*  next_pitr_cutoff  latest
 /// ```
 pub(super) async fn gather_inputs(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    limit: &Arc<Semaphore>,
    max_retention_period: Option<u64>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -5,7 +5,6 @@ use std::sync::Arc;
 use bytes::Bytes;
 use pageserver_api::key::{KEY_SIZE, Key};
 use pageserver_api::value::Value;
-use tokio_util::sync::CancellationToken;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;
 use utils::shard::TenantShardId;
@@ -180,7 +179,7 @@ impl BatchLayerWriter {

 /// An image writer that takes images and produces multiple image layers.
 #[must_use]
-pub struct SplitImageLayerWriter<'a> {
+pub struct SplitImageLayerWriter {
    inner: ImageLayerWriter,
    target_layer_size: u64,
    lsn: Lsn,
@@ -189,12 +188,9 @@ pub struct SplitImageLayerWriter<'a> {
    tenant_shard_id: TenantShardId,
    batches: BatchLayerWriter,
    start_key: Key,
-    gate: &'a utils::sync::gate::Gate,
-    cancel: CancellationToken,
 }

-impl<'a> SplitImageLayerWriter<'a> {
-    #[allow(clippy::too_many_arguments)]
+impl SplitImageLayerWriter {
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
@@ -202,8 +198,6 @@ impl<'a> SplitImageLayerWriter<'a> {
        start_key: Key,
        lsn: Lsn,
        target_layer_size: u64,
-        gate: &'a utils::sync::gate::Gate,
-        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
@@ -214,8 +208,6 @@ impl<'a> SplitImageLayerWriter<'a> {
                tenant_shard_id,
                &(start_key..Key::MAX),
                lsn,
-                gate,
-                cancel.clone(),
                ctx,
            )
            .await?,
@@ -225,8 +217,6 @@ impl<'a> SplitImageLayerWriter<'a> {
            batches: BatchLayerWriter::new(conf).await?,
            lsn,
            start_key,
-            gate,
-            cancel,
        })
    }

@@ -249,8 +239,6 @@ impl<'a> SplitImageLayerWriter<'a> {
                self.tenant_shard_id,
                &(key..Key::MAX),
                self.lsn,
-                self.gate,
-                self.cancel.clone(),
                ctx,
            )
            .await?;
@@ -303,7 +291,7 @@ impl<'a> SplitImageLayerWriter<'a> {
 /// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm
 /// will split them into multiple files based on size.
 #[must_use]
-pub struct SplitDeltaLayerWriter<'a> {
+pub struct SplitDeltaLayerWriter {
    inner: Option<(Key, DeltaLayerWriter)>,
    target_layer_size: u64,
    conf: &'static PageServerConf,
@@ -312,19 +300,15 @@ pub struct SplitDeltaLayerWriter<'a> {
    lsn_range: Range<Lsn>,
    last_key_written: Key,
    batches: BatchLayerWriter,
-    gate: &'a utils::sync::gate::Gate,
-    cancel: CancellationToken,
 }

-impl<'a> SplitDeltaLayerWriter<'a> {
+impl SplitDeltaLayerWriter {
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        lsn_range: Range<Lsn>,
        target_layer_size: u64,
-        gate: &'a utils::sync::gate::Gate,
-        cancel: CancellationToken,
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
@@ -335,8 +319,6 @@ impl<'a> SplitDeltaLayerWriter<'a> {
            lsn_range,
            last_key_written: Key::MIN,
            batches: BatchLayerWriter::new(conf).await?,
-            gate,
-            cancel,
        })
    }

@@ -362,8 +344,6 @@ impl<'a> SplitDeltaLayerWriter<'a> {
                    self.tenant_shard_id,
                    key,
                    self.lsn_range.clone(),
-                    self.gate,
-                    self.cancel.clone(),
                    ctx,
                )
                .await?,
@@ -382,8 +362,6 @@ impl<'a> SplitDeltaLayerWriter<'a> {
                    self.tenant_shard_id,
                    key,
                    self.lsn_range.clone(),
-                    self.gate,
-                    self.cancel.clone(),
                    ctx,
                )
                .await?;
@@ -491,8 +469,6 @@ mod tests {
            get_key(0),
            Lsn(0x18),
            4 * 1024 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
            &ctx,
        )
        .await
@@ -504,8 +480,6 @@ mod tests {
            tenant.tenant_shard_id,
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
        )
        .await
        .unwrap();
@@ -572,8 +546,6 @@ mod tests {
            get_key(0),
            Lsn(0x18),
            4 * 1024 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
            &ctx,
        )
        .await
@@ -584,8 +556,6 @@ mod tests {
            tenant.tenant_shard_id,
            Lsn(0x18)..Lsn(0x20),
            4 * 1024 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
        )
        .await
        .unwrap();
@@ -673,8 +643,6 @@ mod tests {
            get_key(0),
            Lsn(0x18),
            4 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
            &ctx,
        )
        .await
@@ -686,8 +654,6 @@ mod tests {
            tenant.tenant_shard_id,
            Lsn(0x18)..Lsn(0x20),
            4 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
        )
        .await
        .unwrap();
@@ -764,8 +730,6 @@ mod tests {
            tenant.tenant_shard_id,
            Lsn(0x10)..Lsn(N as u64 * 16 + 0x10),
            4 * 1024 * 1024,
-            &tline.gate,
-            tline.cancel.clone(),
        )
        .await
        .unwrap();
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -50,7 +50,6 @@ use rand::distributions::Alphanumeric;
 use serde::{Deserialize, Serialize};
 use tokio::sync::OnceCell;
 use tokio_epoll_uring::IoBuf;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::bin_ser::BeSer;
 use utils::id::{TenantId, TimelineId};
@@ -401,15 +400,12 @@ impl DeltaLayerWriterInner {
    ///
    /// Start building a new delta layer.
    ///
-    #[allow(clippy::too_many_arguments)]
    async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        key_start: Key,
        lsn_range: Range<Lsn>,
-        gate: &utils::sync::gate::Gate,
-        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        // Create the file initially with a temporary filename. We don't know
@@ -424,7 +420,7 @@ impl DeltaLayerWriterInner {
        let mut file = VirtualFile::create(&path, ctx).await?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
-        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64, gate, cancel, ctx);
+        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64);

        // Initialize the b-tree index builder
        let block_buf = BlockBuf::new();
@@ -632,15 +628,12 @@ impl DeltaLayerWriter {
    ///
    /// Start building a new delta layer.
    ///
-    #[allow(clippy::too_many_arguments)]
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        key_start: Key,
        lsn_range: Range<Lsn>,
-        gate: &utils::sync::gate::Gate,
-        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        Ok(Self {
@@ -651,8 +644,6 @@ impl DeltaLayerWriter {
                    tenant_shard_id,
                    key_start,
                    lsn_range,
-                    gate,
-                    cancel,
                    ctx,
                )
                .await?,
@@ -1620,7 +1611,7 @@ pub(crate) mod test {
    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
    use crate::tenant::storage_layer::{Layer, ResidentLayer};
    use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
-    use crate::tenant::{TenantShard, Timeline};
+    use crate::tenant::{Tenant, Timeline};

    /// Construct an index for a fictional delta layer and and then
    /// traverse in order to plan vectored reads for a query. Finally,
@@ -1894,8 +1885,6 @@ pub(crate) mod test {
            harness.tenant_shard_id,
            entries_meta.key_range.start,
            entries_meta.lsn_range.clone(),
-            &timeline.gate,
-            timeline.cancel.clone(),
            &ctx,
        )
        .await?;
@@ -2090,8 +2079,6 @@ pub(crate) mod test {
                tenant.tenant_shard_id,
                Key::MIN,
                Lsn(0x11)..truncate_at,
-                &branch.gate,
-                branch.cancel.clone(),
                ctx,
            )
            .await
@@ -2209,7 +2196,7 @@ pub(crate) mod test {
    }

    pub(crate) async fn produce_delta_layer(
-        tenant: &TenantShard,
+        tenant: &Tenant,
        tline: &Arc<Timeline>,
        mut deltas: Vec<(Key, Lsn, Value)>,
        ctx: &RequestContext,
@@ -2226,8 +2213,6 @@ pub(crate) mod test {
            tenant.tenant_shard_id,
            *key_start,
            (*lsn_min)..lsn_end,
-            &tline.gate,
-            tline.cancel.clone(),
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -48,7 +48,6 @@ use rand::distributions::Alphanumeric;
 use serde::{Deserialize, Serialize};
 use tokio::sync::OnceCell;
 use tokio_stream::StreamExt;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::bin_ser::BeSer;
 use utils::id::{TenantId, TimelineId};
@@ -559,12 +558,11 @@ impl ImageLayerInner {
            let view = BufView::new_slice(&blobs_buf.buf);

            for meta in blobs_buf.blobs.iter() {
-                // Just read the raw header+data and pass it through to the target layer, without
-                // decoding and recompressing it.
-                let raw = meta.raw_with_header(&view);
+                let img_buf = meta.read(&view).await?;
+
                key_count += 1;
                writer
-                    .put_image_raw(meta.meta.key, raw.into_bytes(), ctx)
+                    .put_image(meta.meta.key, img_buf.into_bytes(), ctx)
                    .await
                    .context(format!("Storing key {}", meta.meta.key))?;
            }
@@ -750,15 +748,12 @@ impl ImageLayerWriterInner {
    ///
    /// Start building a new image layer.
    ///
-    #[allow(clippy::too_many_arguments)]
    async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        key_range: &Range<Key>,
        lsn: Lsn,
-        gate: &utils::sync::gate::Gate,
-        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
        // Create the file initially with a temporary filename.
@@ -785,7 +780,7 @@ impl ImageLayerWriterInner {
        };
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
-        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64, gate, cancel, ctx);
+        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64);

        // Initialize the b-tree index builder
        let block_buf = BlockBuf::new();
@@ -854,41 +849,6 @@ impl ImageLayerWriterInner {
        Ok(())
    }

-    ///
-    /// Write the next image to the file, as a raw blob header and data.
-    ///
-    /// The page versions must be appended in blknum order.
-    ///
-    async fn put_image_raw(
-        &mut self,
-        key: Key,
-        raw_with_header: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        ensure!(self.key_range.contains(&key));
-
-        // NB: we don't update the (un)compressed metrics, since we can't determine them without
-        // decompressing the image. This seems okay.
-        self.num_keys += 1;
-
-        let (_, res) = self
-            .blob_writer
-            .write_blob_raw(raw_with_header.slice_len(), ctx)
-            .await;
-        let offset = res?;
-
-        let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE];
-        key.write_to_byte_slice(&mut keybuf);
-        self.tree.append(&keybuf, offset)?;
-
-        #[cfg(feature = "testing")]
-        {
-            self.last_written_key = key;
-        }
-
-        Ok(())
-    }
-
    ///
    /// Finish writing the image layer.
    ///
@@ -924,13 +884,7 @@ impl ImageLayerWriterInner {
        crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED
            .inc_by(self.uncompressed_bytes_eligible);
        crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN.inc_by(self.uncompressed_bytes_chosen);
-
-        // NB: filter() may pass through raw pages from a different layer, without looking at
-        // whether these are compressed or not. We don't track metrics for these, so avoid
-        // increasing `COMPRESSION_IMAGE_OUTPUT_BYTES` in this case too.
-        if self.uncompressed_bytes > 0 {
-            crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);
-        };
+        crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size);

        let mut file = self.blob_writer.into_inner();

@@ -1034,30 +988,18 @@ impl ImageLayerWriter {
    ///
    /// Start building a new image layer.
    ///
-    #[allow(clippy::too_many_arguments)]
    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
        key_range: &Range<Key>,
        lsn: Lsn,
-        gate: &utils::sync::gate::Gate,
-        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<ImageLayerWriter> {
        Ok(Self {
            inner: Some(
-                ImageLayerWriterInner::new(
-                    conf,
-                    timeline_id,
-                    tenant_shard_id,
-                    key_range,
-                    lsn,
-                    gate,
-                    cancel,
-                    ctx,
-                )
-                .await?,
+                ImageLayerWriterInner::new(conf, timeline_id, tenant_shard_id, key_range, lsn, ctx)
+                    .await?,
            ),
        })
    }
@@ -1076,25 +1018,6 @@ impl ImageLayerWriter {
        self.inner.as_mut().unwrap().put_image(key, img, ctx).await
    }

-    ///
-    /// Write the next value to the file, as a raw header and data. This allows passing through a
-    /// raw, potentially compressed image from a different layer file without recompressing it.
-    ///
-    /// The page versions must be appended in blknum order.
-    ///
-    pub async fn put_image_raw(
-        &mut self,
-        key: Key,
-        raw_with_header: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        self.inner
-            .as_mut()
-            .unwrap()
-            .put_image_raw(key, raw_with_header, ctx)
-            .await
-    }
-
    /// Estimated size of the image layer.
    pub(crate) fn estimated_size(&self) -> u64 {
        let inner = self.inner.as_ref().unwrap();
@@ -1228,7 +1151,7 @@ mod test {
    use crate::tenant::harness::{TIMELINE_ID, TenantHarness};
    use crate::tenant::storage_layer::{Layer, ResidentLayer};
    use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
-    use crate::tenant::{TenantShard, Timeline};
+    use crate::tenant::{Tenant, Timeline};

    #[tokio::test]
    async fn image_layer_rewrite() {
@@ -1280,8 +1203,6 @@ mod test {
                harness.tenant_shard_id,
                &range,
                lsn,
-                &timeline.gate,
-                timeline.cancel.clone(),
                &ctx,
            )
            .await
@@ -1347,8 +1268,6 @@ mod test {
                harness.tenant_shard_id,
                &range,
                lsn,
-                &timeline.gate,
-                timeline.cancel.clone(),
                &ctx,
            )
            .await
@@ -1410,7 +1329,7 @@ mod test {
    }

    async fn produce_image_layer(
-        tenant: &TenantShard,
+        tenant: &Tenant,
        tline: &Arc<Timeline>,
        mut images: Vec<(Key, Bytes)>,
        lsn: Lsn,
@@ -1427,8 +1346,6 @@ mod test {
            tenant.tenant_shard_id,
            &key_range,
            lsn,
-            &tline.gate,
-            tline.cancel.clone(),
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -719,8 +719,6 @@ impl InMemoryLayer {
        ctx: &RequestContext,
        key_range: Option<Range<Key>>,
        l0_flush_global_state: &l0_flush::Inner,
-        gate: &utils::sync::gate::Gate,
-        cancel: CancellationToken,
    ) -> Result<Option<(PersistentLayerDesc, Utf8PathBuf)>> {
        // Grab the lock in read-mode. We hold it over the I/O, but because this
        // layer is not writeable anymore, no one should be trying to acquire the
@@ -761,8 +759,6 @@ impl InMemoryLayer {
            self.tenant_shard_id,
            Key::MIN,
            self.start_lsn..end_lsn,
-            gate,
-            cancel,
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -24,7 +24,7 @@ use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::timeline::compaction::CompactionOutcome;
-use crate::tenant::{TenantShard, TenantState};
+use crate::tenant::{Tenant, TenantState};

 /// Semaphore limiting concurrent background tasks (across all tenants).
 ///
@@ -117,7 +117,7 @@ pub(crate) async fn acquire_concurrency_permit(
 }

 /// Start per tenant background loops: compaction, GC, and ingest housekeeping.
-pub fn start_background_loops(tenant: &Arc<TenantShard>, can_start: Option<&Barrier>) {
+pub fn start_background_loops(tenant: &Arc<Tenant>, can_start: Option<&Barrier>) {
    let tenant_shard_id = tenant.tenant_shard_id;

    task_mgr::spawn(
@@ -198,7 +198,7 @@ pub fn start_background_loops(tenant: &Arc<TenantShard>, can_start: Option<&Barr
 }

 /// Compaction task's main loop.
-async fn compaction_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
+async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    const BASE_BACKOFF_SECS: f64 = 1.0;
    const MAX_BACKOFF_SECS: f64 = 300.0;
    const RECHECK_CONFIG_INTERVAL: Duration = Duration::from_secs(10);
@@ -348,7 +348,7 @@ pub(crate) fn log_compaction_error(
 }

 /// GC task's main loop.
-async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
+async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    const MAX_BACKOFF_SECS: f64 = 300.0;
    let mut error_run = 0; // consecutive errors

@@ -432,7 +432,7 @@ async fn gc_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
 }

 /// Tenant housekeeping's main loop.
-async fn tenant_housekeeping_loop(tenant: Arc<TenantShard>, cancel: CancellationToken) {
+async fn tenant_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    let mut last_throttle_flag_reset_at = Instant::now();
    loop {
        if wait_for_active_tenant(&tenant, &cancel).await.is_break() {
@@ -483,7 +483,7 @@ async fn tenant_housekeeping_loop(tenant: Arc<TenantShard>, cancel: Cancellation

 /// Waits until the tenant becomes active, or returns `ControlFlow::Break()` to shut down.
 async fn wait_for_active_tenant(
-    tenant: &Arc<TenantShard>,
+    tenant: &Arc<Tenant>,
    cancel: &CancellationToken,
 ) -> ControlFlow<()> {
    if tenant.current_state() == TenantState::Active {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -412,7 +412,7 @@ pub struct Timeline {
    /// Timeline deletion will acquire both compaction and gc locks in whatever order.
    gc_lock: tokio::sync::Mutex<()>,

-    /// Cloned from [`super::TenantShard::pagestream_throttle`] on construction.
+    /// Cloned from [`super::Tenant::pagestream_throttle`] on construction.
    pub(crate) pagestream_throttle: Arc<crate::tenant::throttle::Throttle>,

    /// Size estimator for aux file v2
@@ -2065,7 +2065,7 @@ impl Timeline {

    pub(crate) fn activate(
        self: &Arc<Self>,
-        parent: Arc<crate::tenant::TenantShard>,
+        parent: Arc<crate::tenant::Tenant>,
        broker_client: BrokerClientChannel,
        background_jobs_can_start: Option<&completion::Barrier>,
        ctx: &RequestContext,
@@ -2702,14 +2702,6 @@ impl Timeline {
            .clone()
    }

-    pub fn get_compaction_shard_ancestor(&self) -> bool {
-        let tenant_conf = self.tenant_conf.load();
-        tenant_conf
-            .tenant_conf
-            .compaction_shard_ancestor
-            .unwrap_or(self.conf.default_tenant_conf.compaction_shard_ancestor)
-    }
-
    fn get_eviction_policy(&self) -> EvictionPolicy {
        let tenant_conf = self.tenant_conf.load();
        tenant_conf
@@ -3325,7 +3317,7 @@ impl Timeline {
        //     (1) and (4)
        // TODO: this is basically a no-op now, should we remove it?
        self.remote_client.schedule_barrier()?;
-        // TenantShard::create_timeline will wait for these uploads to happen before returning, or
+        // Tenant::create_timeline will wait for these uploads to happen before returning, or
        // on retry.

        // Now that we have the full layer map, we may calculate the visibility of layers within it (a global scan)
@@ -4034,7 +4026,7 @@ impl VersionedKeySpaceQuery {
    /// Returns LSN for a specific key.
    ///
    /// Invariant: requested key must be part of [`Self::total_keyspace`]
-    pub(super) fn map_key_to_lsn(&self, key: &Key) -> Lsn {
+    fn map_key_to_lsn(&self, key: &Key) -> Lsn {
        match self {
            Self::Uniform { lsn, .. } => *lsn,
            Self::Scattered { keyspaces_at_lsn } => {
@@ -4994,13 +4986,7 @@ impl Timeline {
        let ctx = ctx.attached_child();
        let work = async move {
            let Some((desc, path)) = frozen_layer
-                .write_to_disk(
-                    &ctx,
-                    key_range,
-                    self_clone.l0_flush_global_state.inner(),
-                    &self_clone.gate,
-                    self_clone.cancel.clone(),
-                )
+                .write_to_disk(&ctx, key_range, self_clone.l0_flush_global_state.inner())
                .await?
            else {
                return Ok(None);
@@ -5540,8 +5526,6 @@ impl Timeline {
                self.tenant_shard_id,
                &img_range,
                lsn,
-                &self.gate,
-                self.cancel.clone(),
                ctx,
            )
            .await?;
@@ -5710,12 +5694,6 @@ impl Timeline {
            return;
        }

-        if self.cancel.is_cancelled() {
-            // We already requested stopping the tenant, so we cannot wait for the logical size
-            // calculation to complete given the task might have been already cancelled.
-            return;
-        }
-
        if let Some(await_bg_cancel) = self
            .current_logical_size
            .cancel_wait_for_background_loop_concurrency_limit_semaphore
@@ -5754,7 +5732,7 @@ impl Timeline {
    /// from our ancestor to be branches of this timeline.
    pub(crate) async fn prepare_to_detach_from_ancestor(
        self: &Arc<Timeline>,
-        tenant: &crate::tenant::TenantShard,
+        tenant: &crate::tenant::Tenant,
        options: detach_ancestor::Options,
        behavior: DetachBehavior,
        ctx: &RequestContext,
@@ -5773,7 +5751,7 @@ impl Timeline {
    /// resetting the tenant.
    pub(crate) async fn detach_from_ancestor_and_reparent(
        self: &Arc<Timeline>,
-        tenant: &crate::tenant::TenantShard,
+        tenant: &crate::tenant::Tenant,
        prepared: detach_ancestor::PreparedTimelineDetach,
        ancestor_timeline_id: TimelineId,
        ancestor_lsn: Lsn,
@@ -5797,7 +5775,7 @@ impl Timeline {
    /// The tenant must've been reset if ancestry was modified previously (in tenant manager).
    pub(crate) async fn complete_detaching_timeline_ancestor(
        self: &Arc<Timeline>,
-        tenant: &crate::tenant::TenantShard,
+        tenant: &crate::tenant::Tenant,
        attempt: detach_ancestor::Attempt,
        ctx: &RequestContext,
    ) -> Result<(), detach_ancestor::Error> {
@@ -6859,14 +6837,14 @@ impl Timeline {
    /// Persistently blocks gc for `Manual` reason.
    ///
    /// Returns true if no such block existed before, false otherwise.
-    pub(crate) async fn block_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<bool> {
+    pub(crate) async fn block_gc(&self, tenant: &super::Tenant) -> anyhow::Result<bool> {
        use crate::tenant::remote_timeline_client::index::GcBlockingReason;
        assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
        tenant.gc_block.insert(self, GcBlockingReason::Manual).await
    }

    /// Persistently unblocks gc for `Manual` reason.
-    pub(crate) async fn unblock_gc(&self, tenant: &super::TenantShard) -> anyhow::Result<()> {
+    pub(crate) async fn unblock_gc(&self, tenant: &super::Tenant) -> anyhow::Result<()> {
        use crate::tenant::remote_timeline_client::index::GcBlockingReason;
        assert_eq!(self.tenant_shard_id, tenant.tenant_shard_id);
        tenant.gc_block.remove(self, GcBlockingReason::Manual).await
@@ -6884,8 +6862,8 @@ impl Timeline {

    /// Force create an image layer and place it into the layer map.
    ///
-    /// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
-    /// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
+    /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
+    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
    /// placed into the layer map in one run AND be validated.
    #[cfg(test)]
    pub(super) async fn force_create_image_layer(
@@ -6912,8 +6890,6 @@ impl Timeline {
            self.tenant_shard_id,
            &(min_key..end_key),
            lsn,
-            &self.gate,
-            self.cancel.clone(),
            ctx,
        )
        .await?;
@@ -6941,8 +6917,8 @@ impl Timeline {

    /// Force create a delta layer and place it into the layer map.
    ///
-    /// DO NOT use this function directly. Use [`TenantShard::branch_timeline_test_with_layers`]
-    /// or [`TenantShard::create_test_timeline_with_layers`] to ensure all these layers are
+    /// DO NOT use this function directly. Use [`Tenant::branch_timeline_test_with_layers`]
+    /// or [`Tenant::create_test_timeline_with_layers`] to ensure all these layers are
    /// placed into the layer map in one run AND be validated.
    #[cfg(test)]
    pub(super) async fn force_create_delta_layer(
@@ -6975,8 +6951,6 @@ impl Timeline {
            self.tenant_shard_id,
            deltas.key_range.start,
            deltas.lsn_range,
-            &self.gate,
-            self.cancel.clone(),
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -56,8 +56,7 @@ use crate::tenant::storage_layer::batch_split_writer::{
 use crate::tenant::storage_layer::filter_iterator::FilterIterator;
 use crate::tenant::storage_layer::merge_iterator::MergeIterator;
 use crate::tenant::storage_layer::{
-    AsLayerDesc, LayerVisibilityHint, PersistentLayerDesc, PersistentLayerKey,
-    ValueReconstructState,
+    AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState,
 };
 use crate::tenant::tasks::log_compaction_error;
 use crate::tenant::timeline::{
@@ -70,14 +69,7 @@ use crate::virtual_file::{MaybeFatalIo, VirtualFile};
 /// Maximum number of deltas before generating an image layer in bottom-most compaction.
 const COMPACTION_DELTA_THRESHOLD: usize = 5;

-/// Ratio of shard-local pages below which we trigger shard ancestor layer rewrites. 0.3 means that
-/// <= 30% of layer pages must belong to the descendant shard to rewrite the layer.
-///
-/// We choose a value < 0.5 to avoid rewriting all visible layers every time we do a power-of-two
-/// shard split, which gets expensive for large tenants.
-const ANCESTOR_COMPACTION_REWRITE_THRESHOLD: f64 = 0.3;
-
-#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize)]
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
 pub struct GcCompactionJobId(pub usize);

 impl std::fmt::Display for GcCompactionJobId {
@@ -105,50 +97,6 @@ pub enum GcCompactionQueueItem {
    Notify(GcCompactionJobId, Option<Lsn>),
 }

-/// Statistics for gc-compaction meta jobs, which contains several sub compaction jobs.
-#[derive(Debug, Clone, Serialize, Default)]
-pub struct GcCompactionMetaStatistics {
-    /// The total number of sub compaction jobs.
-    pub total_sub_compaction_jobs: usize,
-    /// The total number of sub compaction jobs that failed.
-    pub failed_sub_compaction_jobs: usize,
-    /// The total number of sub compaction jobs that succeeded.
-    pub succeeded_sub_compaction_jobs: usize,
-    /// The layer size before compaction.
-    pub before_compaction_layer_size: u64,
-    /// The layer size after compaction.
-    pub after_compaction_layer_size: u64,
-    /// The start time of the meta job.
-    pub start_time: Option<chrono::DateTime<chrono::Utc>>,
-    /// The end time of the meta job.
-    pub end_time: Option<chrono::DateTime<chrono::Utc>>,
-    /// The duration of the meta job.
-    pub duration_secs: f64,
-    /// The id of the meta job.
-    pub meta_job_id: GcCompactionJobId,
-    /// The LSN below which the layers are compacted, used to compute the statistics.
-    pub below_lsn: Lsn,
-    /// The retention ratio of the meta job (after_compaction_layer_size / before_compaction_layer_size)
-    pub retention_ratio: f64,
-}
-
-impl GcCompactionMetaStatistics {
-    fn finalize(&mut self) {
-        let end_time = chrono::Utc::now();
-        if let Some(start_time) = self.start_time {
-            if end_time > start_time {
-                let delta = end_time - start_time;
-                if let Ok(std_dur) = delta.to_std() {
-                    self.duration_secs = std_dur.as_secs_f64();
-                }
-            }
-        }
-        self.retention_ratio = self.after_compaction_layer_size as f64
-            / (self.before_compaction_layer_size as f64 + 1.0);
-        self.end_time = Some(end_time);
-    }
-}
-
 impl GcCompactionQueueItem {
    pub fn into_compact_info_resp(
        self,
@@ -186,7 +134,6 @@ struct GcCompactionQueueInner {
    queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>,
    guards: HashMap<GcCompactionJobId, GcCompactionGuardItems>,
    last_id: GcCompactionJobId,
-    meta_statistics: Option<GcCompactionMetaStatistics>,
 }

 impl GcCompactionQueueInner {
@@ -218,7 +165,6 @@ impl GcCompactionQueue {
                queued: VecDeque::new(),
                guards: HashMap::new(),
                last_id: GcCompactionJobId(0),
-                meta_statistics: None,
            }),
            consumer_lock: tokio::sync::Mutex::new(()),
        }
@@ -403,23 +349,6 @@ impl GcCompactionQueue {
        Ok(())
    }

-    async fn collect_layer_below_lsn(
-        &self,
-        timeline: &Arc<Timeline>,
-        lsn: Lsn,
-    ) -> Result<u64, CompactionError> {
-        let guard = timeline.layers.read().await;
-        let layer_map = guard.layer_map()?;
-        let layers = layer_map.iter_historic_layers().collect_vec();
-        let mut size = 0;
-        for layer in layers {
-            if layer.lsn_range.start <= lsn {
-                size += layer.file_size();
-            }
-        }
-        Ok(size)
-    }
-
    /// Notify the caller the job has finished and unblock GC.
    fn notify_and_unblock(&self, id: GcCompactionJobId) {
        info!("compaction job id={} finished", id);
@@ -429,16 +358,6 @@ impl GcCompactionQueue {
                let _ = tx.send(());
            }
        }
-        if let Some(ref meta_statistics) = guard.meta_statistics {
-            if meta_statistics.meta_job_id == id {
-                if let Ok(stats) = serde_json::to_string(&meta_statistics) {
-                    info!(
-                        "gc-compaction meta statistics for job id = {}: {}",
-                        id, stats
-                    );
-                }
-            }
-        }
    }

    fn clear_running_job(&self) {
@@ -478,11 +397,7 @@ impl GcCompactionQueue {
            let mut pending_tasks = Vec::new();
            // gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate.
            // And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN.
-            let expected_l2_lsn = jobs
-                .iter()
-                .map(|job| job.compact_lsn_range.end)
-                .max()
-                .unwrap();
+            let expected_l2_lsn = jobs.iter().map(|job| job.compact_lsn_range.end).max();
            for job in jobs {
                // Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions`
                // until we do further refactors to allow directly call `compact_with_gc`.
@@ -507,13 +422,9 @@ impl GcCompactionQueue {
            if !auto {
                pending_tasks.push(GcCompactionQueueItem::Notify(id, None));
            } else {
-                pending_tasks.push(GcCompactionQueueItem::Notify(id, Some(expected_l2_lsn)));
+                pending_tasks.push(GcCompactionQueueItem::Notify(id, expected_l2_lsn));
            }

-            let layer_size = self
-                .collect_layer_below_lsn(timeline, expected_l2_lsn)
-                .await?;
-
            {
                let mut guard = self.inner.lock().unwrap();
                let mut tasks = Vec::new();
@@ -525,16 +436,7 @@ impl GcCompactionQueue {
                for item in tasks {
                    guard.queued.push_front(item);
                }
-                guard.meta_statistics = Some(GcCompactionMetaStatistics {
-                    meta_job_id: id,
-                    start_time: Some(chrono::Utc::now()),
-                    before_compaction_layer_size: layer_size,
-                    below_lsn: expected_l2_lsn,
-                    total_sub_compaction_jobs: jobs_len,
-                    ..Default::default()
-                });
            }
-
            info!(
                "scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs",
                jobs_len
@@ -663,10 +565,6 @@ impl GcCompactionQueue {
                    Err(err) => {
                        warn!(%err, "failed to run gc-compaction subcompaction job");
                        self.clear_running_job();
-                        let mut guard = self.inner.lock().unwrap();
-                        if let Some(ref mut meta_statistics) = guard.meta_statistics {
-                            meta_statistics.failed_sub_compaction_jobs += 1;
-                        }
                        return Err(err);
                    }
                };
@@ -676,34 +574,8 @@ impl GcCompactionQueue {
                    // we need to clean things up before returning from the function.
                    yield_for_l0 = true;
                }
-                {
-                    let mut guard = self.inner.lock().unwrap();
-                    if let Some(ref mut meta_statistics) = guard.meta_statistics {
-                        meta_statistics.succeeded_sub_compaction_jobs += 1;
-                    }
-                }
            }
            GcCompactionQueueItem::Notify(id, l2_lsn) => {
-                let below_lsn = {
-                    let mut guard = self.inner.lock().unwrap();
-                    if let Some(ref mut meta_statistics) = guard.meta_statistics {
-                        meta_statistics.below_lsn
-                    } else {
-                        Lsn::INVALID
-                    }
-                };
-                let layer_size = if below_lsn != Lsn::INVALID {
-                    self.collect_layer_below_lsn(timeline, below_lsn).await?
-                } else {
-                    0
-                };
-                {
-                    let mut guard = self.inner.lock().unwrap();
-                    if let Some(ref mut meta_statistics) = guard.meta_statistics {
-                        meta_statistics.after_compaction_layer_size = layer_size;
-                        meta_statistics.finalize();
-                    }
-                }
                self.notify_and_unblock(id);
                if let Some(l2_lsn) = l2_lsn {
                    let current_l2_lsn = timeline
@@ -877,8 +749,8 @@ impl KeyHistoryRetention {
    async fn pipe_to(
        self,
        key: Key,
-        delta_writer: &mut SplitDeltaLayerWriter<'_>,
-        mut image_writer: Option<&mut SplitImageLayerWriter<'_>>,
+        delta_writer: &mut SplitDeltaLayerWriter,
+        mut image_writer: Option<&mut SplitImageLayerWriter>,
        stat: &mut CompactionStatistics,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
@@ -947,15 +819,7 @@ impl KeyHistoryRetention {
            base_img: &Option<(Lsn, &Bytes)>,
            history: &[(Lsn, &NeonWalRecord)],
            tline: &Arc<Timeline>,
-            skip_empty: bool,
        ) -> anyhow::Result<()> {
-            if base_img.is_none() && history.is_empty() {
-                if skip_empty {
-                    return Ok(());
-                }
-                anyhow::bail!("verification failed: key {} has no history at {}", key, lsn);
-            };
-
            let mut records = history
                .iter()
                .map(|(lsn, val)| (*lsn, (*val).clone()))
@@ -996,12 +860,17 @@ impl KeyHistoryRetention {
            if *retain_lsn >= min_lsn {
                // Only verify after the key appears in the full history for the first time.

+                if base_img.is_none() && history.is_empty() {
+                    anyhow::bail!(
+                        "verificatoin failed: key {} has no history at {}",
+                        key,
+                        retain_lsn
+                    );
+                };
                // We don't modify history: in theory, we could replace the history with a single
                // image as in `generate_key_retention` to make redos at later LSNs faster. But we
                // want to verify everything as if they are read from the real layer map.
-                collect_and_verify(key, *retain_lsn, &base_img, &history, tline, false)
-                    .await
-                    .context("below horizon retain_lsn")?;
+                collect_and_verify(key, *retain_lsn, &base_img, &history, tline).await?;
            }
        }

@@ -1009,17 +878,13 @@ impl KeyHistoryRetention {
            match val {
                Value::Image(img) => {
                    // Above the GC horizon, we verify every time we see an image.
-                    collect_and_verify(key, *lsn, &base_img, &history, tline, true)
-                        .await
-                        .context("above horizon full image")?;
+                    collect_and_verify(key, *lsn, &base_img, &history, tline).await?;
                    base_img = Some((*lsn, img));
                    history.clear();
                }
                Value::WalRecord(rec) if val.will_init() => {
                    // Above the GC horizon, we verify every time we see an init record.
-                    collect_and_verify(key, *lsn, &base_img, &history, tline, true)
-                        .await
-                        .context("above horizon init record")?;
+                    collect_and_verify(key, *lsn, &base_img, &history, tline).await?;
                    base_img = None;
                    history.clear();
                    history.push((*lsn, rec));
@@ -1030,9 +895,7 @@ impl KeyHistoryRetention {
            }
        }
        // Ensure the latest record is readable.
-        collect_and_verify(key, max_lsn, &base_img, &history, tline, false)
-            .await
-            .context("latest record")?;
+        collect_and_verify(key, max_lsn, &base_img, &history, tline).await?;
        Ok(())
    }
 }
@@ -1359,7 +1222,8 @@ impl Timeline {
        let partition_count = self.partitioning.read().0.0.parts.len();

        // 4. Shard ancestor compaction
-        if self.get_compaction_shard_ancestor() && self.shard_identity.count >= ShardCount::new(2) {
+
+        if self.shard_identity.count >= ShardCount::new(2) {
            // Limit the number of layer rewrites to the number of partitions: this means its
            // runtime should be comparable to a full round of image layer creations, rather than
            // being potentially much longer.
@@ -1409,10 +1273,7 @@ impl Timeline {
        let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.time;

        let layers = self.layers.read().await;
-        let layers_iter = layers.layer_map()?.iter_historic_layers();
-        let (layers_total, mut layers_checked) = (layers_iter.len(), 0);
-        for layer_desc in layers_iter {
-            layers_checked += 1;
+        for layer_desc in layers.layer_map()?.iter_historic_layers() {
            let layer = layers.get_from_desc(&layer_desc);
            if layer.metadata().shard.shard_count == self.shard_identity.count {
                // This layer does not belong to a historic ancestor, no need to re-image it.
@@ -1456,15 +1317,14 @@ impl Timeline {
                continue;
            }

-            // Only rewrite a layer if we can reclaim significant space.
+            // Don't bother re-writing a layer unless it will at least halve its size
            if layer_local_page_count != u32::MAX
-                && layer_local_page_count as f64 / layer_raw_page_count as f64
-                    <= ANCESTOR_COMPACTION_REWRITE_THRESHOLD
+                && layer_local_page_count > layer_raw_page_count / 2
            {
                debug!(%layer,
-                    "layer has a large share of local pages \
-                        ({layer_local_page_count}/{layer_raw_page_count} > \
-                        {ANCESTOR_COMPACTION_REWRITE_THRESHOLD}), not rewriting",
+                    "layer is already mostly local ({}/{}), not rewriting",
+                    layer_local_page_count,
+                    layer_raw_page_count
                );
            }

@@ -1476,19 +1336,12 @@ impl Timeline {
                continue;
            }

-            // We do not yet implement rewrite of delta layers.
            if layer_desc.is_delta() {
+                // We do not yet implement rewrite of delta layers
                debug!(%layer, "Skipping rewrite of delta layer");
                continue;
            }

-            // We don't bother rewriting layers that aren't visible, since these won't be needed by
-            // reads and will likely be garbage collected soon.
-            if layer.visibility() != LayerVisibilityHint::Visible {
-                debug!(%layer, "Skipping rewrite of invisible layer");
-                continue;
-            }
-
            // Only rewrite layers if their generations differ.  This guarantees:
            //  - that local rewrite is safe, as local layer paths will differ between existing layer and rewritten one
            //  - that the layer is persistent in remote storage, as we only see old-generation'd layer via loading from remote storage
@@ -1518,8 +1371,7 @@ impl Timeline {
        }

        info!(
-            "starting shard ancestor compaction, rewriting {} layers and dropping {} layers, \
-                checked {layers_checked}/{layers_total} layers \
+            "starting shard ancestor compaction, rewriting {} layers and dropping {} layers \
                (latest_gc_cutoff={} pitr_cutoff={})",
            layers_to_rewrite.len(),
            drop_layers.len(),
@@ -1542,8 +1394,6 @@ impl Timeline {
                self.tenant_shard_id,
                &layer.layer_desc().key_range,
                layer.layer_desc().image_layer_lsn(),
-                &self.gate,
-                self.cancel.clone(),
                ctx,
            )
            .await
@@ -2183,8 +2033,6 @@ impl Timeline {
                                debug!("Create new layer {}..{}", lsn_range.start, lsn_range.end);
                                lsn_range.clone()
                            },
-                            &self.gate,
-                            self.cancel.clone(),
                            ctx,
                        )
                        .await
@@ -3384,8 +3232,6 @@ impl Timeline {
                    job_desc.compaction_key_range.start,
                    lowest_retain_lsn,
                    self.get_compaction_target_size(),
-                    &self.gate,
-                    self.cancel.clone(),
                    ctx,
                )
                .await
@@ -3402,8 +3248,6 @@ impl Timeline {
            self.tenant_shard_id,
            lowest_retain_lsn..end_lsn,
            self.get_compaction_target_size(),
-            &self.gate,
-            self.cancel.clone(),
        )
        .await
        .context("failed to create delta layer writer")
@@ -3500,8 +3344,6 @@ impl Timeline {
                                self.tenant_shard_id,
                                desc.key_range.start,
                                desc.lsn_range.clone(),
-                                &self.gate,
-                                self.cancel.clone(),
                                ctx,
                            )
                            .await
@@ -3519,8 +3361,6 @@ impl Timeline {
                                self.tenant_shard_id,
                                job_desc.compaction_key_range.end,
                                desc.lsn_range.clone(),
-                                &self.gate,
-                                self.cancel.clone(),
                                ctx,
                            )
                            .await
@@ -4092,8 +3932,6 @@ impl CompactionJobExecutor for TimelineAdaptor {
            self.timeline.tenant_shard_id,
            key_range.start,
            lsn_range.clone(),
-            &self.timeline.gate,
-            self.timeline.cancel.clone(),
            ctx,
        )
        .await?;
@@ -4169,8 +4007,6 @@ impl TimelineAdaptor {
            self.timeline.tenant_shard_id,
            key_range,
            lsn,
-            &self.timeline.gate,
-            self.timeline.cancel.clone(),
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -18,8 +18,8 @@ use crate::tenant::remote_timeline_client::{
    PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
 };
 use crate::tenant::{
-    CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, TenantManifestError,
-    TenantShard, Timeline, TimelineOrOffloaded,
+    CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, TenantManifestError,
+    Timeline, TimelineOrOffloaded,
 };
 use crate::virtual_file::MaybeFatalIo;

@@ -113,7 +113,7 @@ pub(super) async fn delete_local_timeline_directory(
 /// It is important that this gets called when DeletionGuard is being held.
 /// For more context see comments in [`make_timeline_delete_guard`]
 async fn remove_maybe_offloaded_timeline_from_tenant(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    timeline: &TimelineOrOffloaded,
    _: &DeletionGuard, // using it as a witness
 ) -> anyhow::Result<()> {
@@ -192,7 +192,7 @@ impl DeleteTimelineFlow {
    // error out if some of the shutdown tasks have already been completed!
    #[instrument(skip_all)]
    pub async fn run(
-        tenant: &Arc<TenantShard>,
+        tenant: &Arc<Tenant>,
        timeline_id: TimelineId,
    ) -> Result<(), DeleteTimelineError> {
        super::debug_assert_current_span_has_tenant_and_timeline_id();
@@ -288,7 +288,7 @@ impl DeleteTimelineFlow {
    /// Shortcut to create Timeline in stopping state and spawn deletion task.
    #[instrument(skip_all, fields(%timeline_id))]
    pub(crate) async fn resume_deletion(
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        timeline_id: TimelineId,
        local_metadata: &TimelineMetadata,
        remote_client: RemoteTimelineClient,
@@ -338,7 +338,7 @@ impl DeleteTimelineFlow {
    fn schedule_background(
        guard: DeletionGuard,
        conf: &'static PageServerConf,
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        timeline: TimelineOrOffloaded,
        remote_client: Arc<RemoteTimelineClient>,
    ) {
@@ -381,7 +381,7 @@ impl DeleteTimelineFlow {
    async fn background(
        mut guard: DeletionGuard,
        conf: &PageServerConf,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        timeline: &TimelineOrOffloaded,
        remote_client: Arc<RemoteTimelineClient>,
    ) -> Result<(), DeleteTimelineError> {
@@ -435,7 +435,7 @@ pub(super) enum TimelineDeleteGuardKind {
 }

 pub(super) fn make_timeline_delete_guard(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    timeline_id: TimelineId,
    guard_kind: TimelineDeleteGuardKind,
 ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -23,7 +23,7 @@ use super::layer_manager::LayerManager;
 use super::{FlushLayerError, Timeline};
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::TaskKind;
-use crate::tenant::TenantShard;
+use crate::tenant::Tenant;
 use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor;
 use crate::tenant::storage_layer::layer::local_layer_path;
 use crate::tenant::storage_layer::{
@@ -228,8 +228,6 @@ async fn generate_tombstone_image_layer(
            detached.tenant_shard_id,
            &key_range,
            image_lsn,
-            &detached.gate,
-            detached.cancel.clone(),
            ctx,
        )
        .await
@@ -265,7 +263,7 @@ async fn generate_tombstone_image_layer(
 /// See [`Timeline::prepare_to_detach_from_ancestor`]
 pub(super) async fn prepare(
    detached: &Arc<Timeline>,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    behavior: DetachBehavior,
    options: Options,
    ctx: &RequestContext,
@@ -590,7 +588,7 @@ pub(super) async fn prepare(

 async fn start_new_attempt(
    detached: &Timeline,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    ancestor_timeline_id: TimelineId,
    ancestor_lsn: Lsn,
 ) -> Result<Attempt, Error> {
@@ -611,7 +609,7 @@ async fn start_new_attempt(

 async fn continue_with_blocked_gc(
    detached: &Timeline,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    ancestor_timeline_id: TimelineId,
    ancestor_lsn: Lsn,
 ) -> Result<Attempt, Error> {
@@ -622,7 +620,7 @@ async fn continue_with_blocked_gc(

 fn obtain_exclusive_attempt(
    detached: &Timeline,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    ancestor_timeline_id: TimelineId,
    ancestor_lsn: Lsn,
 ) -> Result<Attempt, Error> {
@@ -655,7 +653,7 @@ fn obtain_exclusive_attempt(

 fn reparented_direct_children(
    detached: &Arc<Timeline>,
-    tenant: &TenantShard,
+    tenant: &Tenant,
 ) -> Result<HashSet<TimelineId>, Error> {
    let mut all_direct_children = tenant
        .timelines
@@ -778,8 +776,6 @@ async fn copy_lsn_prefix(
        target_timeline.tenant_shard_id,
        layer.layer_desc().key_range.start,
        layer.layer_desc().lsn_range.start..end_lsn,
-        &target_timeline.gate,
-        target_timeline.cancel.clone(),
        ctx,
    )
    .await
@@ -950,7 +946,7 @@ impl DetachingAndReparenting {
 /// See [`Timeline::detach_from_ancestor_and_reparent`].
 pub(super) async fn detach_and_reparent(
    detached: &Arc<Timeline>,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    prepared: PreparedTimelineDetach,
    ancestor_timeline_id: TimelineId,
    ancestor_lsn: Lsn,
@@ -1184,7 +1180,7 @@ pub(super) async fn detach_and_reparent(

 pub(super) async fn complete(
    detached: &Arc<Timeline>,
-    tenant: &TenantShard,
+    tenant: &Tenant,
    mut attempt: Attempt,
    _ctx: &RequestContext,
 ) -> Result<(), Error> {
@@ -1258,7 +1254,7 @@ where
 }

 fn check_no_archived_children_of_ancestor(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    detached: &Arc<Timeline>,
    ancestor: &Arc<Timeline>,
    ancestor_lsn: Lsn,
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -33,7 +33,7 @@ use crate::tenant::size::CalculateSyntheticSizeError;
 use crate::tenant::storage_layer::LayerVisibilityHint;
 use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random};
 use crate::tenant::timeline::EvictionError;
-use crate::tenant::{LogicalSizeCalculationCause, TenantShard};
+use crate::tenant::{LogicalSizeCalculationCause, Tenant};

 #[derive(Default)]
 pub struct EvictionTaskTimelineState {
@@ -48,7 +48,7 @@ pub struct EvictionTaskTenantState {
 impl Timeline {
    pub(super) fn launch_eviction_task(
        self: &Arc<Self>,
-        parent: Arc<TenantShard>,
+        parent: Arc<Tenant>,
        background_tasks_can_start: Option<&completion::Barrier>,
    ) {
        let self_clone = Arc::clone(self);
@@ -75,7 +75,7 @@ impl Timeline {
    }

    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
-    async fn eviction_task(self: Arc<Self>, tenant: Arc<TenantShard>) {
+    async fn eviction_task(self: Arc<Self>, tenant: Arc<Tenant>) {
        // acquire the gate guard only once within a useful span
        let Ok(guard) = self.gate.enter() else {
            return;
@@ -118,7 +118,7 @@ impl Timeline {
    #[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
    async fn eviction_iteration(
        self: &Arc<Self>,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        policy: &EvictionPolicy,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -175,7 +175,7 @@ impl Timeline {

    async fn eviction_iteration_threshold(
        self: &Arc<Self>,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -309,7 +309,7 @@ impl Timeline {
    /// disk usage based eviction task.
    async fn imitiate_only(
        self: &Arc<Self>,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -363,7 +363,7 @@ impl Timeline {
    #[instrument(skip_all)]
    async fn imitate_layer_accesses(
        &self,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        p: &EvictionPolicyLayerAccessThreshold,
        cancel: &CancellationToken,
        gate: &GateGuard,
@@ -499,7 +499,7 @@ impl Timeline {
    #[instrument(skip_all)]
    async fn imitate_synthetic_size_calculation_worker(
        &self,
-        tenant: &TenantShard,
+        tenant: &Tenant,
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) {
--- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs
@@ -738,8 +738,6 @@ impl ChunkProcessingJob {
            self.timeline.tenant_shard_id,
            &self.range,
            self.pgdata_lsn,
-            &self.timeline.gate,
-            self.timeline.cancel.clone(),
            ctx,
        )
        .await?;
--- a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
+++ b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs
@@ -1,6 +1,6 @@
 //! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
 use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
-use reqwest::{Certificate, Method};
+use reqwest::Method;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use tracing::error;
@@ -34,7 +34,7 @@ impl Client {
        };
        let mut http_client = reqwest::Client::builder();
        for cert in &conf.ssl_ca_certs {
-            http_client = http_client.add_root_certificate(Certificate::from_der(cert.contents())?);
+            http_client = http_client.add_root_certificate(cert.clone());
        }
        let http_client = http_client.build()?;

--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -8,7 +8,7 @@ use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
 use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
 use crate::tenant::{
-    DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,
+    DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded,
 };

 #[derive(thiserror::Error, Debug)]
@@ -33,7 +33,7 @@ impl From<TenantManifestError> for OffloadError {
 }

 pub(crate) async fn offload_timeline(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    timeline: &Arc<Timeline>,
 ) -> Result<(), OffloadError> {
    debug_assert_current_span_has_tenant_and_timeline_id();
@@ -123,7 +123,7 @@ pub(crate) async fn offload_timeline(
 ///
 /// Returns the strong count of the timeline `Arc`
 fn remove_timeline_from_tenant(
-    tenant: &TenantShard,
+    tenant: &Tenant,
    timeline: &Timeline,
    _: &DeletionGuard, // using it as a witness
 ) -> usize {
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -15,19 +15,17 @@ use super::Timeline;
 use crate::context::RequestContext;
 use crate::import_datadir;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::{
-    CreateTimelineError, CreateTimelineIdempotency, TenantShard, TimelineOrOffloaded,
-};
+use crate::tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded};

 /// A timeline with some of its files on disk, being initialized.
 /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
 /// its local files are removed.  If we crash while this class exists, then the timeline's local
-/// state is cleaned up during [`TenantShard::clean_up_timelines`], because the timeline's content isn't in remote storage.
+/// state is cleaned up during [`Tenant::clean_up_timelines`], because the timeline's content isn't in remote storage.
 ///
 /// The caller is responsible for proper timeline data filling before the final init.
 #[must_use]
 pub struct UninitializedTimeline<'t> {
-    pub(crate) owning_tenant: &'t TenantShard,
+    pub(crate) owning_tenant: &'t Tenant,
    timeline_id: TimelineId,
    raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
    /// Whether we spawned the inner Timeline's tasks such that we must later shut it down
@@ -37,7 +35,7 @@ pub struct UninitializedTimeline<'t> {

 impl<'t> UninitializedTimeline<'t> {
    pub(crate) fn new(
-        owning_tenant: &'t TenantShard,
+        owning_tenant: &'t Tenant,
        timeline_id: TimelineId,
        raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
    ) -> Self {
@@ -158,7 +156,7 @@ impl<'t> UninitializedTimeline<'t> {
    /// Prepares timeline data by loading it from the basebackup archive.
    pub(crate) async fn import_basebackup_from_tar(
        mut self,
-        tenant: Arc<TenantShard>,
+        tenant: Arc<Tenant>,
        copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
        base_lsn: Lsn,
        broker_client: storage_broker::BrokerClientChannel,
@@ -229,17 +227,17 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
            error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
        }
    }
-    // Having cleaned up, we can release this TimelineId in `[TenantShard::timelines_creating]` to allow other
+    // Having cleaned up, we can release this TimelineId in `[Tenant::timelines_creating]` to allow other
    // timeline creation attempts under this TimelineId to proceed
    drop(create_guard);
 }

 /// A guard for timeline creations in process: as long as this object exists, the timeline ID
-/// is kept in `[TenantShard::timelines_creating]` to exclude concurrent attempts to create the same timeline.
+/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
 #[must_use]
 pub(crate) struct TimelineCreateGuard {
    pub(crate) _tenant_gate_guard: GateGuard,
-    pub(crate) owning_tenant: Arc<TenantShard>,
+    pub(crate) owning_tenant: Arc<Tenant>,
    pub(crate) timeline_id: TimelineId,
    pub(crate) timeline_path: Utf8PathBuf,
    pub(crate) idempotency: CreateTimelineIdempotency,
@@ -265,7 +263,7 @@ pub(crate) enum TimelineExclusionError {

 impl TimelineCreateGuard {
    pub(crate) fn new(
-        owning_tenant: &Arc<TenantShard>,
+        owning_tenant: &Arc<Tenant>,
        timeline_id: TimelineId,
        timeline_path: Utf8PathBuf,
        idempotency: CreateTimelineIdempotency,
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -26,7 +26,7 @@ use utils::lsn::Lsn;
 use utils::vec_map::VecMap;

 use crate::context::RequestContext;
-use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, Header};
+use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK};
 use crate::virtual_file::{self, IoBufferMut, VirtualFile};

 /// Metadata bundled with the start and end offset of a blob.
@@ -111,20 +111,18 @@ impl From<Bytes> for BufView<'_> {
 pub struct VectoredBlob {
    /// Blob metadata.
    pub meta: BlobMeta,
-    /// Header start offset.
-    header_start: usize,
-    /// Data start offset.
-    data_start: usize,
+    /// Start offset.
+    start: usize,
    /// End offset.
    end: usize,
-    /// Compression used on the data, extracted from the header.
+    /// Compression used on the the blob.
    compression_bits: u8,
 }

 impl VectoredBlob {
    /// Reads a decompressed view of the blob.
    pub(crate) async fn read<'a>(&self, buf: &BufView<'a>) -> Result<BufView<'a>, std::io::Error> {
-        let view = buf.view(self.data_start..self.end);
+        let view = buf.view(self.start..self.end);

        match self.compression_bits {
            BYTE_UNCOMPRESSED => Ok(view),
@@ -142,18 +140,13 @@ impl VectoredBlob {
                    std::io::ErrorKind::InvalidData,
                    format!(
                        "Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}",
-                        self.meta.key, self.meta.lsn, self.data_start, self.end
+                        self.meta.key, self.meta.lsn, self.start, self.end
                    ),
                );
                Err(error)
            }
        }
    }
-
-    /// Returns the raw blob including header.
-    pub(crate) fn raw_with_header<'a>(&self, buf: &BufView<'a>) -> BufView<'a> {
-        buf.view(self.header_start..self.end)
-    }
 }

 impl std::fmt::Display for VectoredBlob {
@@ -161,7 +154,7 @@ impl std::fmt::Display for VectoredBlob {
        write!(
            f,
            "{}@{}, {}..{}",
-            self.meta.key, self.meta.lsn, self.data_start, self.end
+            self.meta.key, self.meta.lsn, self.start, self.end
        )
    }
 }
@@ -500,28 +493,50 @@ impl<'a> VectoredBlobReader<'a> {

        let blobs_at = read.blobs_at.as_slice();

-        let mut blobs = Vec::with_capacity(blobs_at.len());
+        let start_offset = read.start;
+
+        let mut metas = Vec::with_capacity(blobs_at.len());
        // Blobs in `read` only provide their starting offset. The end offset
        // of a blob is implicit: the start of the next blob if one exists
        // or the end of the read.

-        for (blob_start, meta) in blobs_at.iter().copied() {
-            let header_start = (blob_start - read.start) as usize;
-            let header = Header::decode(&buf[header_start..])?;
-            let data_start = header_start + header.header_len;
-            let end = data_start + header.data_len;
-            let compression_bits = header.compression_bits;
+        for (blob_start, meta) in blobs_at {
+            let blob_start_in_buf = blob_start - start_offset;
+            let first_len_byte = buf[blob_start_in_buf as usize];

-            blobs.push(VectoredBlob {
-                header_start,
-                data_start,
+            // Each blob is prefixed by a header containing its size and compression information.
+            // Extract the size and skip that header to find the start of the data.
+            // The size can be 1 or 4 bytes. The most significant bit is 0 in the
+            // 1 byte case and 1 in the 4 byte case.
+            let (size_length, blob_size, compression_bits) = if first_len_byte < 0x80 {
+                (1, first_len_byte as u64, BYTE_UNCOMPRESSED)
+            } else {
+                let mut blob_size_buf = [0u8; 4];
+                let offset_in_buf = blob_start_in_buf as usize;
+
+                blob_size_buf.copy_from_slice(&buf[offset_in_buf..offset_in_buf + 4]);
+                blob_size_buf[0] &= !LEN_COMPRESSION_BIT_MASK;
+
+                let compression_bits = first_len_byte & LEN_COMPRESSION_BIT_MASK;
+                (
+                    4,
+                    u32::from_be_bytes(blob_size_buf) as u64,
+                    compression_bits,
+                )
+            };
+
+            let start = (blob_start_in_buf + size_length) as usize;
+            let end = start + blob_size as usize;
+
+            metas.push(VectoredBlob {
+                start,
                end,
-                meta,
+                meta: *meta,
                compression_bits,
            });
        }

-        Ok(VectoredBlobsBuf { buf, blobs })
+        Ok(VectoredBlobsBuf { buf, blobs: metas })
    }
 }

@@ -982,15 +997,6 @@ mod tests {
                &read_buf[..],
                "mismatch for idx={idx} at offset={offset}"
            );
-
-            // Check that raw_with_header returns a valid header.
-            let raw = read_blob.raw_with_header(&view);
-            let header = Header::decode(&raw)?;
-            if !compression || header.header_len == 1 {
-                assert_eq!(header.compression_bits, BYTE_UNCOMPRESSED);
-            }
-            assert_eq!(raw.len(), header.total_len());
-
            buf = result.buf;
        }
        Ok(())
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -1366,8 +1366,7 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
 pub(crate) type IoPageSlice<'a> =
    AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;

-static IO_MODE: once_cell::sync::Lazy<AtomicU8> =
-    once_cell::sync::Lazy::new(|| AtomicU8::new(IoMode::preferred() as u8));
+static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);

 pub(crate) fn set_io_mode(mode: IoMode) {
    IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
--- a/pgxn/neon/communicator.c
+++ b/pgxn/neon/communicator.c
@@ -95,7 +95,7 @@ static uint32 local_request_counter;
 * Various settings related to prompt (fast) handling of PageStream responses
 * at any CHECK_FOR_INTERRUPTS point.
 */
-int				readahead_getpage_pull_timeout_ms = 50;
+int				readahead_getpage_pull_timeout_ms = 0;
 static int		PS_TIMEOUT_ID = 0;
 static bool		timeout_set = false;
 static bool		timeout_signaled = false;
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -75,7 +75,7 @@ char	   *neon_auth_token;
 int			readahead_buffer_size = 128;
 int			flush_every_n_requests = 8;

-int         neon_protocol_version = 3;
+int         neon_protocol_version = 2;

 static int	neon_compute_mode = 0;
 static int	max_reconnect_attempts = 60;
@@ -1362,7 +1362,7 @@ pg_init_libpagestore(void)
 							   "",
 							   PGC_POSTMASTER,
 							   0,	/* no flags required */
-							   NULL, NULL, NULL);
+							   check_neon_id, NULL, NULL);
 	DefineCustomStringVariable("neon.branch_id",
 							   "Neon branch_id the server is running on",
 							   NULL,
@@ -1370,7 +1370,7 @@ pg_init_libpagestore(void)
 							   "",
 							   PGC_POSTMASTER,
 							   0,	/* no flags required */
-							   NULL, NULL, NULL);
+							   check_neon_id, NULL, NULL);
 	DefineCustomStringVariable("neon.endpoint_id",
 							   "Neon endpoint_id the server is running on",
 							   NULL,
@@ -1378,7 +1378,7 @@ pg_init_libpagestore(void)
 							   "",
 							   PGC_POSTMASTER,
 							   0,	/* no flags required */
-							   NULL, NULL, NULL);
+							   check_neon_id, NULL, NULL);

 	DefineCustomIntVariable("neon.stripe_size",
 							"sharding stripe size",
@@ -1432,7 +1432,7 @@ pg_init_libpagestore(void)
 							"PageStream connection when we have pages which "
 							"were read ahead but not yet received.",
 							&readahead_getpage_pull_timeout_ms,
-							50, 0, 5 * 60 * 1000,
+							0, 0, 5 * 60 * 1000,
 							PGC_USERSET,
 							GUC_UNIT_MS,
 							NULL, NULL, NULL);
@@ -1440,7 +1440,7 @@ pg_init_libpagestore(void)
 							"Version of compute<->page server protocol",
 							NULL,
 							&neon_protocol_version,
-							3,	/* use protocol version 3 */
+							2,	/* use protocol version 2 */
 							2,	/* min */
 							3,	/* max */
 							PGC_SU_BACKEND,
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -2040,7 +2040,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
 /*
 * neon_end_unlogged_build() -- Finish an unlogged rel build.
 *
- * Call this after you have finished WAL-logging a relation that was
+ * Call this after you have finished WAL-logging an relation that was
 * first populated without WAL-logging.
 *
 * This removes the local copy of the rel, since it's now been fully
@@ -2059,35 +2059,14 @@ neon_end_unlogged_build(SMgrRelation reln)

 	if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
 	{
-		XLogRecPtr recptr;
-		BlockNumber nblocks;
-
 		Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
 		Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);

-		/*
-		 * Update the last-written LSN cache.
-		 *
-		 * The relation is still on local disk so we can get the size by
-		 * calling mdnblocks() directly. For the LSN, GetXLogInsertRecPtr() is
-		 * very conservative. If we could assume that this function is called
-		 * from the same backend that WAL-logged the contents, we could use
-		 * XactLastRecEnd here. But better safe than sorry.
-		 */
-		nblocks = mdnblocks(reln, MAIN_FORKNUM);
-		recptr = GetXLogInsertRecPtr();
-
-		neon_set_lwlsn_block_range(recptr,
-								   InfoFromNInfoB(rinfob),
-								   MAIN_FORKNUM, 0, nblocks);
-		neon_set_lwlsn_relation(recptr,
-								InfoFromNInfoB(rinfob),
-								MAIN_FORKNUM);
-
 		/* Make the relation look permanent again */
 		reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;

 		/* Remove local copy */
+		rinfob = InfoBFromSMgrRel(reln);
 		for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
 		{
 			neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -2118,6 +2118,9 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
 	 */
 	if (wp->config->syncSafekeepers)
 	{
+		int			n_synced;
+
+		n_synced = 0;
 		for (int i = 0; i < wp->n_safekeepers; i++)
 		{
 			Safekeeper *sk = &wp->safekeeper[i];
@@ -2126,6 +2129,8 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
 			/* alive safekeeper which is not synced yet; wait for it */
 			if (sk->state != SS_OFFLINE && !synced)
 				return;
+			if (synced)
+				n_synced++;
 		}

 		if (newCommitLsn >= wp->propTermStartLsn)
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -890,7 +890,7 @@ libpqwp_connect_start(char *conninfo)
 	 * palloc will exit on failure though, so there's not much we could do if
 	 * it *did* fail.
 	 */
-	conn = (WalProposerConn*)MemoryContextAllocZero(TopMemoryContext, sizeof(WalProposerConn));
+	conn = palloc(sizeof(WalProposerConn));
 	conn->pg_conn = pg_conn;
 	conn->is_nonblocking = false;	/* connections always start in blocking
 									 * mode */
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -776,6 +776,7 @@ impl From<&jose_jwk::Key> for KeyType {
 }

 #[cfg(test)]
+#[expect(clippy::unwrap_used)]
 mod tests {
    use std::future::IntoFuture;
    use std::net::SocketAddr;
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -253,6 +253,7 @@ fn project_name_valid(name: &str) -> bool {
 }

 #[cfg(test)]
+#[expect(clippy::unwrap_used)]
 mod tests {
    use ComputeUserInfoParseError::*;
    use serde_json::json;
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -258,7 +258,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
                "unexpected startup packet, rejecting connection"
            );
            stream
-                .throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User, None)
+                .throw_error_str(ERR_INSECURE_CONNECTION, crate::error::ErrorKind::User)
                .await?
        }
    }
@@ -297,7 +297,7 @@ async fn handle_client(
    // Starting from here we only proxy the client's traffic.
    info!("performing the proxy pass...");

-    match copy_bidirectional_client_compute(&mut tls_stream, &mut client, |_, _| {}).await {
+    match copy_bidirectional_client_compute(&mut tls_stream, &mut client).await {
        Ok(_) => Ok(()),
        Err(ErrorSource::Client(err)) => Err(err).context("client"),
        Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Alex Chi Z	7f27254392	layer file migration Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-04-25 15:44:42 -04:00
Alex Chi Z	2f338daf17	rfc: new encryption Signed-off-by: Alex Chi Z <chi@neon.tech>	2025-04-25 15:40:31 -04:00
John Spray	75638230b2	comments	2025-04-15 09:31:09 +01:00
John Spray	1ad48b2eaf	docs/rfcs: add storage encryption key RFC	2025-04-14 13:09:00 +01:00