Add wait events without query to metric.

Add query to pg_wait_sampling metric
Add pg_wait_sampling metric for vms.
2026-03-15 14:20:38 +00:00 · 2023-11-16 23:56:04 +01:00 · 2023-11-16 22:42:08 +01:00 · 2023-11-16 22:04:29 +01:00 · 2023-11-16 20:54:02 +00:00 · 2023-11-16 20:54:02 +00:00
272 changed files with 8141 additions and 17414 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,3 +1,17 @@
+# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
+# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
+# optimizations enabled by "opt-level=1" don't affect debuggability too much.
+#
+# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
+#
+[profile.dev.package."*"]
+# Set the default for dependencies in Development mode.
+opt-level = 3
+
+[profile.dev]
+# Turn on a small amount of optimization in Development mode.
+opt-level = 1
+
 [build]
 # This is only present for local builds, as it will be overridden
 # by the RUSTDOCFLAGS env var in CI.
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -199,10 +199,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v3
@@ -408,7 +404,7 @@ jobs:
        uses: ./.github/actions/save-coverage-data

  regress-tests:
-    needs: [ check-permissions, build-neon, tag ]
+    needs: [ check-permissions, build-neon ]
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
@@ -440,7 +436,6 @@ jobs:
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
-          BUILD_TAG: ${{ needs.tag.outputs.build-tag }}

      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
@@ -1101,10 +1096,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v3
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -142,10 +142,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v4
@@ -242,20 +238,6 @@ jobs:
      options: --init

    steps:
-      - name: Fix git ownership
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done
-
      - name: Checkout
        uses: actions/checkout@v4
        with:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -2,7 +2,7 @@ name: Create Release Branch

 on:
  schedule:
-    - cron: '0 6 * * 1'
+    - cron: '0 7 * * 5'
  workflow_dispatch:

 jobs:
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,3 @@ test_output/
 *.o
 *.so
 *.Po
-
-# pgindent typedef lists
-*.list
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -9,24 +9,6 @@ refactoring, additional comments, and so forth. Let's try to raise the
 bar, and clean things up as we go. Try to leave code in a better shape
 than it was before.

-## Pre-commit hook
-
-We have a sample pre-commit hook in `pre-commit.py`.
-To set it up, run:
-
-```bash
-ln -s ../../pre-commit.py .git/hooks/pre-commit
-```
-
-This will run following checks on staged files before each commit:
- `rustfmt`
- checks for python files, see [obligatory checks](/docs/sourcetree.md#obligatory-checks).
-
-There is also a separate script `./run_clippy.sh` that runs `cargo clippy` on the whole project
-and `./scripts/reformat` that runs all formatting tools to ensure the project is up to date.
-
-If you want to skip the hook, run `git commit` with `--no-verify` option.
-
 ## Submitting changes

 1. Get at least one +1 on your PR before you push.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,6 @@ members = [
    "control_plane",
    "pageserver",
    "pageserver/ctl",
-    "pageserver/client",
    "proxy",
    "safekeeper",
    "storage_broker",
@@ -38,19 +37,20 @@ license = "Apache-2.0"
 [workspace.dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
-async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-azure_core = "0.18"
-azure_identity = "0.18"
-azure_storage = "0.18"
-azure_storage_blobs = "0.18"
+async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
+azure_core = "0.16"
+azure_identity = "0.16"
+azure_storage = "0.16"
+azure_storage_blobs = "0.16"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.0", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "1.0"
-aws-smithy-async = { version = "1.0", default-features = false, features=["rt-tokio"] }
-aws-smithy-types = "1.0"
-aws-credential-types = "1.0"
+aws-config = { version = "0.56", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "0.29"
+aws-smithy-http = "0.56"
+aws-smithy-async = { version = "0.56", default-features = false, features=["rt-tokio"] }
+aws-credential-types = "0.56"
+aws-types = "0.56"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -89,7 +89,6 @@ humantime-serde = "1.1.1"
 hyper = "0.14"
 hyper-tungstenite = "0.11"
 inotify = "0.10.2"
-ipnet = "2.9.0"
 itertools = "0.10"
 jsonwebtoken = "8"
 libc = "0.2"
@@ -110,7 +109,7 @@ pin-project-lite = "0.2"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
-regex = "1.10.2"
+regex = "1.4"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
 reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_19"] }
 reqwest-middleware = "0.2.0"
@@ -123,17 +122,14 @@ rustls-pemfile = "1"
 rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
-sd-notify = "0.4.1"
 sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
-serde_path_to_error = "0.1"
 serde_with = "2.0"
 serde_assert = "0.5.0"
 sha2 = "0.10.2"
 signal-hook = "0.3"
 smallvec = "1.11"
-smol_str = { version = "0.2.0", features = ["serde"] }
 socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
@@ -150,7 +146,7 @@ tokio-postgres-rustls = "0.10.0"
 tokio-rustls = "0.24"
 tokio-stream = "0.1"
 tokio-tar = "0.3"
-tokio-util = { version = "0.7.10", features = ["io", "rt"] }
+tokio-util = { version = "0.7", features = ["io"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
@@ -169,11 +165,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }

 ## Other git libraries
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
@@ -183,7 +179,6 @@ compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
-pageserver_client = { path = "./pageserver/client" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
@@ -211,7 +206,7 @@ tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="6ce32f791526e27533cab0232a6bb243b2c32584" }

 ################# Binary contents sections

--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -393,9 +393,7 @@ RUN case "${PG_VERSION}" in \
        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
        ;; \
      *) \
-        export TIMESCALEDB_VERSION=2.13.0 \
-        export TIMESCALEDB_CHECKSUM=584a351c7775f0e067eaa0e7277ea88cab9077cc4c455cbbf09a5d9723dce95d \
-        ;; \
+        echo "TimescaleDB not supported on this PostgreSQL version. See https://github.com/timescale/timescaledb/issues/5752" && exit 0;; \
    esac && \
    apt-get update && \
    apt-get install -y cmake && \
@@ -718,20 +716,20 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -

 #########################################################################################
 #
-# Layer "wal2json-build"
-# Compile "wal2json" extension
+# Layer "pg-wait-sampling-pg-build"
+# compile pg_wait_sampling extension
 #
 #########################################################################################
-
-FROM build-deps AS wal2json-pg-build
+FROM build-deps AS pg-wait-sampling-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
-    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install
+RUN wget https://github.com/postgrespro/pg_wait_sampling/archive/refs/tags/v1.1.5.tar.gz -O pg_wait_sampling.tar.gz && \
+    echo 'a03da6a413f5652ce470a3635ed6ebba528c74cb26aa4cfced8aff8a8441f81ec6dd657ff62cd6ce96a4e6ce02cad9f2519ae9525367ece60497aa20faafde5c  pg_wait_sampling.tar.gz' | sha512sum -c && \
+    mkdir pg_wait_sampling-src && cd pg_wait_sampling-src && tar xvzf ../pg_wait_sampling.tar.gz --strip-components=1 -C . && \
+    make USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN) && \
+    make USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_wait_sampling.control

 #########################################################################################
 #
@@ -769,7 +767,7 @@ COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
+COPY --from=pg-wait-sampling-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/38
+++ b/38
@@ -260,44 +260,6 @@ distclean:
 fmt:
 	./pre-commit.py --fix-inplace

-postgres-%-pg-bsd-indent: postgres-%
-	+@echo "Compiling pg_bsd_indent"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
-
-# Create typedef list for the core. Note that generally it should be combined with
-# buildfarm one to cover platform specific stuff.
-# https://wiki.postgresql.org/wiki/Running_pgindent_on_non-core_code_or_development_code
-postgres-%-typedefs.list: postgres-%
-	$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/find_typedef $(POSTGRES_INSTALL_DIR)/$*/bin > $@
-
-# Indent postgres. See src/tools/pgindent/README for details.
-.PHONY: postgres-%-pgindent
-postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
-	+@echo merge with buildfarm typedef to cover all platforms
-	+@echo note: I first tried to download from pgbuildfarm.org, but for unclear reason e.g. \
-		REL_16_STABLE list misses PGSemaphoreData
-	# wget -q -O - "http://www.pgbuildfarm.org/cgi-bin/typedefs.pl?branch=REL_16_STABLE" |\
-	# cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
-	cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
-		cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
-	+@echo note: you might want to run it on selected files/dirs instead.
-	INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
-		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
-		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
-		--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
-	rm -f pg*.BAK
-
-# Indent pxgn/neon.
-.PHONY: pgindent
-neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
-		INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
-		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
-
-
 .PHONY: setup-pre-commit-hook
 setup-pre-commit-hook:
 	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
--- a/README.md
+++ b/README.md
@@ -149,9 +149,6 @@ tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
 Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one

-# create postgres compute node
-> cargo neon endpoint create main
-
 # start postgres compute node
 > cargo neon endpoint start main
 Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
@@ -188,11 +185,8 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 (L) main [de200bd42b49cc1814412c7e592dd6e9]
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

-# create postgres on that branch
-> cargo neon endpoint create migration_check --branch-name migration_check
-
 # start postgres on that branch
-> cargo neon endpoint start migration_check
+> cargo neon endpoint start migration_check --branch-name migration_check
 Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,4 +38,3 @@ toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.12.4"
-bytes = "1.0"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -31,7 +31,7 @@
 //!             -C 'postgresql://cloud_admin@localhost/postgres' \
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres \
-//!             -r http://pg-ext-s3-gateway
+//!             -r {"bucket": "neon-dev-extensions-eu-central-1", "region": "eu-central-1"}
 //! ```
 //!
 use std::collections::HashMap;
@@ -51,7 +51,7 @@ use compute_api::responses::ComputeStatus;

 use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_tools::configurator::launch_configurator;
-use compute_tools::extension_server::get_pg_version;
+use compute_tools::extension_server::{get_pg_version, init_remote_storage};
 use compute_tools::http::api::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
@@ -60,7 +60,7 @@ use compute_tools::spec::*;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
-const BUILD_TAG_DEFAULT: &str = "latest";
+const BUILD_TAG_DEFAULT: &str = "5670669815";

 fn main() -> Result<()> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
@@ -74,18 +74,10 @@ fn main() -> Result<()> {
    let pgbin_default = String::from("postgres");
    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);

-    let ext_remote_storage = matches
-        .get_one::<String>("remote-ext-config")
-        // Compatibility hack: if the control plane specified any remote-ext-config
-        // use the default value for extension storage proxy gateway.
-        // Remove this once the control plane is updated to pass the gateway URL
-        .map(|conf| {
-            if conf.starts_with("http") {
-                conf.trim_end_matches('/')
-            } else {
-                "http://pg-ext-s3-gateway"
-            }
-        });
+    let remote_ext_config = matches.get_one::<String>("remote-ext-config");
+    let ext_remote_storage = remote_ext_config.map(|x| {
+        init_remote_storage(x).expect("cannot initialize remote extension storage from config")
+    });

    let http_port = *matches
        .get_one::<u16>("http-port")
@@ -206,7 +198,7 @@ fn main() -> Result<()> {
        live_config_allowed,
        state: Mutex::new(new_state),
        state_changed: Condvar::new(),
-        ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
+        ext_remote_storage,
        ext_download_progress: RwLock::new(HashMap::new()),
        build_tag,
    };
@@ -274,13 +266,7 @@ fn main() -> Result<()> {
            let mut state = compute.state.lock().unwrap();
            state.error = Some(format!("{:?}", err));
            state.status = ComputeStatus::Failed;
-            // Notify others that Postgres failed to start. In case of configuring the
-            // empty compute, it's likely that API handler is still waiting for compute
-            // state change. With this we will notify it that compute is in Failed state,
-            // so control plane will know about it earlier and record proper error instead
-            // of timeout.
-            compute.state_changed.notify_all();
-            drop(state); // unlock
+            drop(state);
            delay_exit = true;
            None
        }
@@ -493,6 +479,13 @@ fn cli() -> clap::Command {
                )
                .value_name("FILECACHE_CONNSTR"),
        )
+        .arg(
+            // DEPRECATED, NO LONGER DOES ANYTHING.
+            // See https://github.com/neondatabase/cloud/issues/7516
+            Arg::new("file-cache-on-disk")
+                .long("file-cache-on-disk")
+                .action(clap::ArgAction::SetTrue),
+        )
 }

 #[test]
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;
 use std::env;
 use std::fs;
 use std::io::BufRead;
+use std::io::Write;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::{Command, Stdio};
@@ -14,6 +15,7 @@ use chrono::{DateTime, Utc};
 use futures::future::join_all;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
+use notify::event;
 use postgres::{Client, NoTls};
 use tokio;
 use tokio_postgres;
@@ -22,10 +24,10 @@ use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec};
+use compute_api::spec::{ComputeMode, ComputeSpec};
 use utils::measured_stream::MeasuredReader;

-use remote_storage::{DownloadError, RemotePath};
+use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};

 use crate::checker::create_availability_check_data;
 use crate::pg_helpers::*;
@@ -59,8 +61,8 @@ pub struct ComputeNode {
    pub state: Mutex<ComputeState>,
    /// `Condvar` to allow notifying waiters about state changes.
    pub state_changed: Condvar,
-    /// the address of extension storage proxy gateway
-    pub ext_remote_storage: Option<String>,
+    ///  the S3 bucket that we search for extensions in
+    pub ext_remote_storage: Option<GenericRemoteStorage>,
    // key: ext_archive_name, value: started download time, download_completed?
    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
    pub build_tag: String,
@@ -252,7 +254,7 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
                    IF NOT EXISTS (
                        SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
                    THEN
-                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
+                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION IN ROLE pg_read_all_data, pg_write_all_data;
                        IF array_length(roles, 1) IS NOT NULL THEN
                            EXECUTE format('GRANT neon_superuser TO %s',
                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
@@ -277,17 +279,6 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
 }

 impl ComputeNode {
-    /// Check that compute node has corresponding feature enabled.
-    pub fn has_feature(&self, feature: ComputeFeature) -> bool {
-        let state = self.state.lock().unwrap();
-
-        if let Some(s) = state.pspec.as_ref() {
-            s.spec.features.contains(&feature)
-        } else {
-            false
-        }
-    }
-
    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
        state.status = status;
@@ -655,9 +646,30 @@ impl ComputeNode {
            } else {
                vec![]
            })
+            .stderr(Stdio::piped())
            .spawn()
            .expect("cannot start postgres process");

+        let stderr = pg.stderr.take().unwrap();
+        std::thread::spawn(move || {
+            let reader = std::io::BufReader::new(stderr);
+            let mut last_lines = vec![];
+            for line in reader.lines() {
+                if let Ok(line) = line {
+                    if line.starts_with("2023-") {
+                        // print all lines from the previous postgres instance
+                        let combined = format!("PG:{}\n", last_lines.join("\u{200B}"));
+                        let res = std::io::stderr().lock().write_all(combined.as_bytes());
+                        if let Err(e) = res {
+                            error!("failed to write to stderr: {}", e);
+                        }
+                        last_lines.clear();
+                    }
+                    last_lines.push(line);
+                }
+            }
+        });
+
        wait_for_postgres(&mut pg, pgdata_path)?;

        Ok(pg)
@@ -709,7 +721,6 @@ impl ComputeNode {
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
        handle_grants(spec, &mut client, self.connstr.as_str())?;
        handle_extensions(spec, &mut client)?;
-        handle_extension_neon(&mut client)?;
        create_availability_check_data(&mut client)?;

        // 'Close' connection
@@ -739,12 +750,7 @@ impl ComputeNode {

        // Write new config
        let pgdata_path = Path::new(&self.pgdata);
-        let postgresql_conf_path = pgdata_path.join("postgresql.conf");
-        config::write_postgres_conf(&postgresql_conf_path, &spec, None)?;
-        // temporarily reset max_cluster_size in config
-        // to avoid the possibility of hitting the limit, while we are reconfiguring:
-        // creating new extensions, roles, etc...
-        config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
+        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec, None)?;
        self.pg_reload_conf()?;

        let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
@@ -759,16 +765,11 @@ impl ComputeNode {
            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
            handle_grants(&spec, &mut client, self.connstr.as_str())?;
            handle_extensions(&spec, &mut client)?;
-            handle_extension_neon(&mut client)?;
        }

        // 'Close' connection
        drop(client);

-        // reset max_cluster_size in config back to original value and reload config
-        config::compute_ctl_temp_override_remove(pgdata_path)?;
-        self.pg_reload_conf()?;
-
        let unknown_op = "unknown".to_string();
        let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
        info!(
@@ -829,17 +830,7 @@ impl ComputeNode {

        let config_time = Utc::now();
        if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
-            let pgdata_path = Path::new(&self.pgdata);
-            // temporarily reset max_cluster_size in config
-            // to avoid the possibility of hitting the limit, while we are applying config:
-            // creating new extensions, roles, etc...
-            config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
-            self.pg_reload_conf()?;
-
            self.apply_config(&compute_state)?;
-
-            config::compute_ctl_temp_override_remove(pgdata_path)?;
-            self.pg_reload_conf()?;
        }

        let startup_end_time = Utc::now();
@@ -987,12 +978,12 @@ LIMIT 100",
        real_ext_name: String,
        ext_path: RemotePath,
    ) -> Result<u64, DownloadError> {
-        let ext_remote_storage =
-            self.ext_remote_storage
-                .as_ref()
-                .ok_or(DownloadError::BadInput(anyhow::anyhow!(
-                    "Remote extensions storage is not configured",
-                )))?;
+        let remote_storage = self
+            .ext_remote_storage
+            .as_ref()
+            .ok_or(DownloadError::BadInput(anyhow::anyhow!(
+                "Remote extensions storage is not configured",
+            )))?;

        let ext_archive_name = ext_path.object_name().expect("bad path");

@@ -1048,7 +1039,7 @@ LIMIT 100",
        let download_size = extension_server::download_extension(
            &real_ext_name,
            &ext_path,
-            ext_remote_storage,
+            remote_storage,
            &self.pgbin,
        )
        .await
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -93,25 +93,5 @@ pub fn write_postgres_conf(
        writeln!(file, "neon.extension_server_port={}", port)?;
    }

-    // This is essential to keep this line at the end of the file,
-    // because it is intended to override any settings above.
-    writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?;
-
-    Ok(())
-}
-
-/// create file compute_ctl_temp_override.conf in pgdata_dir
-/// add provided options to this file
-pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
-    let path = pgdata_path.join("compute_ctl_temp_override.conf");
-    let mut file = File::create(path)?;
-    write!(file, "{}", options)?;
-    Ok(())
-}
-
-/// remove file compute_ctl_temp_override.conf in pgdata_dir
-pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
-    let path = pgdata_path.join("compute_ctl_temp_override.conf");
-    std::fs::remove_file(path)?;
    Ok(())
 }
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,16 +71,18 @@ More specifically, here is an example ext_index.json
    }
 }
 */
+use anyhow::Context;
 use anyhow::{self, Result};
-use anyhow::{bail, Context};
-use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
-use reqwest::StatusCode;
+use serde_json;
+use std::io::Read;
+use std::num::NonZeroUsize;
 use std::path::Path;
 use std::str;
 use tar::Archive;
+use tokio::io::AsyncReadExt;
 use tracing::info;
 use tracing::log::warn;
 use zstd::stream::read::Decoder;
@@ -136,31 +138,23 @@ fn parse_pg_version(human_version: &str) -> &str {
 pub async fn download_extension(
    ext_name: &str,
    ext_path: &RemotePath,
-    ext_remote_storage: &str,
+    remote_storage: &GenericRemoteStorage,
    pgbin: &str,
 ) -> Result<u64> {
    info!("Download extension {:?} from {:?}", ext_name, ext_path);
-
-    // TODO add retry logic
-    let download_buffer =
-        match download_extension_tar(ext_remote_storage, &ext_path.to_string()).await {
-            Ok(buffer) => buffer,
-            Err(error_message) => {
-                return Err(anyhow::anyhow!(
-                    "error downloading extension {:?}: {:?}",
-                    ext_name,
-                    error_message
-                ));
-            }
-        };
-
+    let mut download = remote_storage.download(ext_path).await?;
+    let mut download_buffer = Vec::new();
+    download
+        .download_stream
+        .read_to_end(&mut download_buffer)
+        .await?;
    let download_size = download_buffer.len() as u64;
-    info!("Download size {:?}", download_size);
    // it's unclear whether it is more performant to decompress into memory or not
    // TODO: decompressing into memory can be avoided
-    let decoder = Decoder::new(download_buffer.as_ref())?;
-    let mut archive = Archive::new(decoder);
-
+    let mut decoder = Decoder::new(download_buffer.as_slice())?;
+    let mut decompress_buffer = Vec::new();
+    decoder.read_to_end(&mut decompress_buffer)?;
+    let mut archive = Archive::new(decompress_buffer.as_slice());
    let unzip_dest = pgbin
        .strip_suffix("/bin/postgres")
        .expect("bad pgbin")
@@ -228,32 +222,29 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
    }
 }

-// Do request to extension storage proxy, i.e.
-// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
-// using HHTP GET
-// and return the response body as bytes
-//
-async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
-    let uri = format!("{}/{}", ext_remote_storage, ext_path);
-
-    info!("Download extension {:?} from uri {:?}", ext_path, uri);
-
-    let resp = reqwest::get(uri).await?;
-
-    match resp.status() {
-        StatusCode::OK => match resp.bytes().await {
-            Ok(resp) => {
-                info!("Download extension {:?} completed successfully", ext_path);
-                Ok(resp)
-            }
-            Err(e) => bail!("could not deserialize remote extension response: {}", e),
-        },
-        StatusCode::SERVICE_UNAVAILABLE => bail!("remote extension is temporarily unavailable"),
-        _ => bail!(
-            "unexpected remote extension response status code: {}",
-            resp.status()
-        ),
+// This function initializes the necessary structs to use remote storage
+pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
+    #[derive(Debug, serde::Deserialize)]
+    struct RemoteExtJson {
+        bucket: String,
+        region: String,
+        endpoint: Option<String>,
+        prefix: Option<String>,
    }
+    let remote_ext_json = serde_json::from_str::<RemoteExtJson>(remote_ext_config)?;
+
+    let config = S3Config {
+        bucket_name: remote_ext_json.bucket,
+        bucket_region: remote_ext_json.region,
+        prefix_in_bucket: remote_ext_json.prefix,
+        endpoint: remote_ext_json.endpoint,
+        concurrency_limit: NonZeroUsize::new(100).expect("100 != 0"),
+        max_keys_per_list_response: None,
+    };
+    let config = RemoteStorageConfig {
+        storage: RemoteStorageKind::AwsS3(config),
+    };
+    GenericRemoteStorage::from_config(&config)
 }

 #[cfg(test)]
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -123,7 +123,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        // download extension files from remote extension storage on demand
+        // download extension files from S3 on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
            info!("req.uri {:?}", req.uri());
@@ -227,7 +227,7 @@ async fn handle_configure_request(

        let parsed_spec = match ParsedSpec::try_from(spec) {
            Ok(ps) => ps,
-            Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
+            Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
        };

        // XXX: wrap state update under lock in code blocks. Otherwise,
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -156,17 +156,17 @@ paths:
                description: Error text or 'OK' if download succeeded.
                example: "OK"
        400:
-          description: Request is invalid.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
+        description: Request is invalid.
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/GenericError"
        500:
-          description: Extension download request failed.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
+        description: Extension download request failed.
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/GenericError"

 components:
  securitySchemes:
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -193,11 +193,16 @@ impl Escaping for PgIdent {
 /// Build a list of existing Postgres roles
 pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
    let postgres_roles = xact
-        .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
+        .query(
+            "SELECT rolname, rolpassword, rolreplication, rolbypassrls FROM pg_catalog.pg_authid",
+            &[],
+        )?
        .iter()
        .map(|row| Role {
            name: row.get("rolname"),
            encrypted_password: row.get("rolpassword"),
+            replication: Some(row.get("rolreplication")),
+            bypassrls: Some(row.get("rolbypassrls")),
            options: None,
        })
        .collect();
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -118,6 +118,19 @@ pub fn get_spec_from_control_plane(
    spec
 }

+/// It takes cluster specification and does the following:
+/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
+/// - Update `pg_hba.conf` to allow external connections.
+pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
+    // File `postgresql.conf` is no longer included into `basebackup`, so just
+    // always write all config into it creating new file.
+    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec, None)?;
+
+    update_pg_hba(pgdata_path)?;
+
+    Ok(())
+}
+
 /// Check `pg_hba.conf` and update if needed to allow external connections.
 pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
    // XXX: consider making it a part of spec.json
@@ -252,6 +265,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
        let action = if let Some(r) = pg_role {
            if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
                || (r.encrypted_password.is_some() && role.encrypted_password.is_none())
+                || !r.bypassrls.unwrap_or(false)
+                || !r.replication.unwrap_or(false)
            {
                RoleAction::Update
            } else if let Some(pg_pwd) = &r.encrypted_password {
@@ -283,22 +298,14 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
        match action {
            RoleAction::None => {}
            RoleAction::Update => {
-                // This can be run on /every/ role! Not just ones created through the console.
-                // This means that if you add some funny ALTER here that adds a permission,
-                // this will get run even on user-created roles! This will result in different
-                // behavior before and after a spec gets reapplied. The below ALTER as it stands
-                // now only grants LOGIN and changes the password. Please do not allow this branch
-                // to do anything silly.
-                let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
+                let mut query: String =
+                    format!("ALTER ROLE {} BYPASSRLS REPLICATION", name.pg_quote());
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
            }
            RoleAction::Create => {
-                // This branch only runs when roles are created through the console, so it is
-                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
+                    "CREATE ROLE {} CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                    name.pg_quote()
                );
                info!("role create query: '{}'", &query);
@@ -663,37 +670,13 @@ pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()>
            info!("creating system extensions with query: {}", query);
            client.simple_query(query)?;
        }
+        if libs.contains("pg_wait_sampling") {
+            // Create extension only if this compute really needs it
+            let query = "CREATE EXTENSION IF NOT EXISTS pg_wait_sampling";
+            info!("creating system extensions with query: {}", query);
+            client.simple_query(query)?;
+        }
    }

    Ok(())
 }
-
-/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
-#[instrument(skip_all)]
-pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
-    info!("handle extension neon");
-
-    let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
-    client.simple_query(query)?;
-
-    query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
-    info!("create neon extension with query: {}", query);
-    client.simple_query(query)?;
-
-    query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
-    client.simple_query(query)?;
-
-    query = "ALTER EXTENSION neon SET SCHEMA neon";
-    info!("alter neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    // this will be a no-op if extension is already up to date,
-    // which may happen in two cases:
-    // - extension was just installed
-    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension schema with query: {}", query);
-    client.simple_query(query)?;
-
-    Ok(())
-}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,11 +6,9 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
-futures.workspace = true
 git-version.workspace = true
 nix.workspace = true
 once_cell.workspace = true
@@ -26,11 +24,10 @@ tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 tokio.workspace = true
-tokio-postgres.workspace = true
-tokio-util.workspace = true
 url.workspace = true
+# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
+# instead, so that recompile times are better.
 pageserver_api.workspace = true
-pageserver_client.workspace = true
 postgres_backend.workspace = true
 safekeeper_api.workspace = true
 postgres_connection.workspace = true
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -9,7 +9,7 @@ pub struct AttachmentService {
    env: LocalEnv,
    listen: String,
    path: PathBuf,
-    client: reqwest::Client,
+    client: reqwest::blocking::Client,
 }

 const COMMAND: &str = "attachment_service";
@@ -53,7 +53,7 @@ impl AttachmentService {
            env: env.clone(),
            path,
            listen,
-            client: reqwest::ClientBuilder::new()
+            client: reqwest::blocking::ClientBuilder::new()
                .build()
                .expect("Failed to construct http client"),
        }
@@ -64,7 +64,7 @@ impl AttachmentService {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self) -> anyhow::Result<Child> {
+    pub fn start(&self) -> anyhow::Result<Child> {
        let path_str = self.path.to_string_lossy();

        background_process::start_process(
@@ -73,11 +73,10 @@ impl AttachmentService {
            &self.env.attachment_service_bin(),
            ["-l", &self.listen, "-p", &path_str],
            [],
-            background_process::InitialPidFile::Create(self.pid_file()),
+            background_process::InitialPidFile::Create(&self.pid_file()),
            // TODO: a real status check
-            || async move { anyhow::Ok(true) },
+            || Ok(true),
        )
-        .await
    }

    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
@@ -85,7 +84,7 @@ impl AttachmentService {
    }

    /// Call into the attach_hook API, for use before handing out attachments to pageservers
-    pub async fn attach_hook(
+    pub fn attach_hook(
        &self,
        tenant_id: TenantId,
        pageserver_id: NodeId,
@@ -105,16 +104,16 @@ impl AttachmentService {
            node_id: Some(pageserver_id),
        };

-        let response = self.client.post(url).json(&request).send().await?;
+        let response = self.client.post(url).json(&request).send()?;
        if response.status() != StatusCode::OK {
            return Err(anyhow!("Unexpected status {}", response.status()));
        }

-        let response = response.json::<AttachHookResponse>().await?;
+        let response = response.json::<AttachHookResponse>()?;
        Ok(response.gen)
    }

-    pub async fn inspect(&self, tenant_id: TenantId) -> anyhow::Result<Option<(u32, NodeId)>> {
+    pub fn inspect(&self, tenant_id: TenantId) -> anyhow::Result<Option<(u32, NodeId)>> {
        use hyper::StatusCode;

        let url = self
@@ -127,12 +126,12 @@ impl AttachmentService {

        let request = InspectRequest { tenant_id };

-        let response = self.client.post(url).json(&request).send().await?;
+        let response = self.client.post(url).json(&request).send()?;
        if response.status() != StatusCode::OK {
            return Err(anyhow!("Unexpected status {}", response.status()));
        }

-        let response = response.json::<InspectResponse>().await?;
+        let response = response.json::<InspectResponse>()?;
        Ok(response.attachment)
    }
 }
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -44,15 +44,15 @@ const NOTICE_AFTER_RETRIES: u64 = 50;

 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
-pub enum InitialPidFile {
+pub enum InitialPidFile<'t> {
    /// Create a pidfile, to allow future CLI invocations to manipulate the process.
-    Create(Utf8PathBuf),
+    Create(&'t Utf8Path),
    /// The process will create the pidfile itself, need to wait for that event.
-    Expect(Utf8PathBuf),
+    Expect(&'t Utf8Path),
 }

 /// Start a background child process using the parameters given.
-pub async fn start_process<F, Fut, AI, A, EI>(
+pub fn start_process<F, AI, A, EI>(
    process_name: &str,
    datadir: &Path,
    command: &Path,
@@ -62,8 +62,7 @@ pub async fn start_process<F, Fut, AI, A, EI>(
    process_status_check: F,
 ) -> anyhow::Result<Child>
 where
-    F: Fn() -> Fut,
-    Fut: std::future::Future<Output = anyhow::Result<bool>>,
+    F: Fn() -> anyhow::Result<bool>,
    AI: IntoIterator<Item = A>,
    A: AsRef<OsStr>,
    // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
@@ -90,7 +89,7 @@ where
    let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
    filled_cmd.envs(envs);

-    let pid_file_to_check = match &initial_pid_file {
+    let pid_file_to_check = match initial_pid_file {
        InitialPidFile::Create(path) => {
            pre_exec_create_pidfile(filled_cmd, path);
            path
@@ -108,7 +107,7 @@ where
    );

    for retries in 0..RETRIES {
-        match process_started(pid, pid_file_to_check, &process_status_check).await {
+        match process_started(pid, Some(pid_file_to_check), &process_status_check) {
            Ok(true) => {
                println!("\n{process_name} started, pid: {pid}");
                return Ok(spawned_process);
@@ -317,20 +316,22 @@ where
    cmd
 }

-async fn process_started<F, Fut>(
+fn process_started<F>(
    pid: Pid,
-    pid_file_to_check: &Utf8Path,
+    pid_file_to_check: Option<&Utf8Path>,
    status_check: &F,
 ) -> anyhow::Result<bool>
 where
-    F: Fn() -> Fut,
-    Fut: std::future::Future<Output = anyhow::Result<bool>>,
+    F: Fn() -> anyhow::Result<bool>,
 {
-    match status_check().await {
-        Ok(true) => match pid_file::read(pid_file_to_check)? {
-            PidFileRead::NotExist => Ok(false),
-            PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
-            PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
+    match status_check() {
+        Ok(true) => match pid_file_to_check {
+            Some(pid_file_path) => match pid_file::read(pid_file_path)? {
+                PidFileRead::NotExist => Ok(false),
+                PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
+                PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
+            },
+            None => Ok(true),
        },
        Ok(false) => Ok(false),
        Err(e) => anyhow::bail!("process failed to start: {e}"),
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -9,7 +9,6 @@ use clap::Parser;
 use hex::FromHex;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response};
-use pageserver_api::shard::TenantShardId;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::{collections::HashMap, sync::Arc};
@@ -174,8 +173,7 @@ async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiE
        if state.pageserver == Some(reattach_req.node_id) {
            state.generation += 1;
            response.tenants.push(ReAttachResponseTenant {
-                // TODO(sharding): make this shard-aware
-                id: TenantShardId::unsharded(*t),
+                id: *t,
                gen: state.generation,
            });
        }
@@ -198,15 +196,8 @@ async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiEr
    };

    for req_tenant in validate_req.tenants {
-        // TODO(sharding): make this shard-aware
-        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id.tenant_id) {
+        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id) {
            let valid = tenant_state.generation == req_tenant.gen;
-            tracing::info!(
-                "handle_validate: {}(gen {}): valid={valid} (latest {})",
-                req_tenant.id,
-                req_tenant.gen,
-                tenant_state.generation
-            );
            response.tenants.push(ValidateResponseTenant {
                id: req_tenant.id,
                valid,
@@ -256,13 +247,6 @@ async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, Ap
    tenant_state.pageserver = attach_req.node_id;
    let generation = tenant_state.generation;

-    tracing::info!(
-        "handle_attach_hook: tenant {} set generation {}, pageserver {}",
-        attach_req.tenant_id,
-        tenant_state.generation,
-        attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
-    );
-
    locked.save().await.map_err(ApiError::InternalServerError)?;

    json_response(
@@ -302,7 +286,6 @@ async fn main() -> anyhow::Result<()> {
    logging::init(
        LogFormat::Plain,
        logging::TracingErrorLayerEnablement::Disabled,
-        logging::Output::Stdout,
    )?;

    let args = Cli::parse();
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -120,20 +120,15 @@ fn main() -> Result<()> {
        let mut env = LocalEnv::load_config().context("Error loading config")?;
        let original_env = env.clone();

-        let rt = tokio::runtime::Builder::new_current_thread()
-            .enable_all()
-            .build()
-            .unwrap();
-
        let subcommand_result = match sub_name {
-            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
-            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(sub_args, &env)),
+            "tenant" => handle_tenant(sub_args, &mut env),
+            "timeline" => handle_timeline(sub_args, &mut env),
+            "start" => handle_start_all(sub_args, &env),
            "stop" => handle_stop_all(sub_args, &env),
-            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
-            "attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)),
-            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
-            "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
+            "pageserver" => handle_pageserver(sub_args, &env),
+            "attachment_service" => handle_attachment_service(sub_args, &env),
+            "safekeeper" => handle_safekeeper(sub_args, &env),
+            "endpoint" => handle_endpoint(sub_args, &env),
            "mappings" => handle_mappings(sub_args, &mut env),
            "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
            _ => bail!("unexpected subcommand {sub_name}"),
@@ -173,7 +168,7 @@ fn print_timelines_tree(
                    info: t.clone(),
                    children: BTreeSet::new(),
                    name: timeline_name_mappings
-                        .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
+                        .remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
                },
            )
        })
@@ -274,13 +269,12 @@ fn print_timeline(

 /// Returns a map of timeline IDs to timeline_id@lsn strings.
 /// Connects to the pageserver to query this information.
-async fn get_timeline_infos(
+fn get_timeline_infos(
    env: &local_env::LocalEnv,
    tenant_id: &TenantId,
 ) -> Result<HashMap<TimelineId, TimelineInfo>> {
    Ok(get_default_pageserver(env)
-        .timeline_list(tenant_id)
-        .await?
+        .timeline_list(tenant_id)?
        .into_iter()
        .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
        .collect())
@@ -379,14 +373,11 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
        .collect()
 }

-async fn handle_tenant(
-    tenant_match: &ArgMatches,
-    env: &mut local_env::LocalEnv,
-) -> anyhow::Result<()> {
+fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {
    let pageserver = get_default_pageserver(env);
    match tenant_match.subcommand() {
        Some(("list", _)) => {
-            for t in pageserver.tenant_list().await? {
+            for t in pageserver.tenant_list()? {
                println!("{} {:?}", t.id, t.state);
            }
        }
@@ -403,16 +394,12 @@ async fn handle_tenant(
                // We must register the tenant with the attachment service, so
                // that when the pageserver restarts, it will be re-attached.
                let attachment_service = AttachmentService::from_env(env);
-                attachment_service
-                    .attach_hook(tenant_id, pageserver.conf.id)
-                    .await?
+                attachment_service.attach_hook(tenant_id, pageserver.conf.id)?
            } else {
                None
            };

-            pageserver
-                .tenant_create(tenant_id, generation, tenant_conf)
-                .await?;
+            pageserver.tenant_create(tenant_id, generation, tenant_conf)?;
            println!("tenant {tenant_id} successfully created on the pageserver");

            // Create an initial timeline for the new tenant
@@ -422,16 +409,13 @@ async fn handle_tenant(
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            let timeline_info = pageserver
-                .timeline_create(
-                    tenant_id,
-                    new_timeline_id,
-                    None,
-                    None,
-                    Some(pg_version),
-                    None,
-                )
-                .await?;
+            let timeline_info = pageserver.timeline_create(
+                tenant_id,
+                new_timeline_id,
+                None,
+                None,
+                Some(pg_version),
+            )?;
            let new_timeline_id = timeline_info.timeline_id;
            let last_record_lsn = timeline_info.last_record_lsn;

@@ -465,7 +449,6 @@ async fn handle_tenant(

            pageserver
                .tenant_config(tenant_id, tenant_conf)
-                .await
                .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
            println!("tenant {tenant_id} successfully configured on the pageserver");
        }
@@ -474,7 +457,7 @@ async fn handle_tenant(
            let new_pageserver = get_pageserver(env, matches)?;
            let new_pageserver_id = new_pageserver.conf.id;

-            migrate_tenant(env, tenant_id, new_pageserver).await?;
+            migrate_tenant(env, tenant_id, new_pageserver)?;
            println!("tenant {tenant_id} migrated to {}", new_pageserver_id);
        }

@@ -484,13 +467,13 @@ async fn handle_tenant(
    Ok(())
 }

-async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
+fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
    let pageserver = get_default_pageserver(env);

    match timeline_match.subcommand() {
        Some(("list", list_match)) => {
            let tenant_id = get_tenant_id(list_match, env)?;
-            let timelines = pageserver.timeline_list(&tenant_id).await?;
+            let timelines = pageserver.timeline_list(&tenant_id)?;
            print_timelines_tree(timelines, env.timeline_name_mappings())?;
        }
        Some(("create", create_match)) => {
@@ -504,18 +487,8 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            let new_timeline_id_opt = parse_timeline_id(create_match)?;
-
-            let timeline_info = pageserver
-                .timeline_create(
-                    tenant_id,
-                    new_timeline_id_opt,
-                    None,
-                    None,
-                    Some(pg_version),
-                    None,
-                )
-                .await?;
+            let timeline_info =
+                pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;
@@ -560,9 +533,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local

            let mut cplane = ComputeControlPlane::load(env.clone())?;
            println!("Importing timeline into pageserver ...");
-            pageserver
-                .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
-                .await?;
+            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;

            println!("Creating endpoint for imported timeline ...");
@@ -598,16 +569,13 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
-            let timeline_info = pageserver
-                .timeline_create(
-                    tenant_id,
-                    None,
-                    start_lsn,
-                    Some(ancestor_timeline_id),
-                    None,
-                    None,
-                )
-                .await?;
+            let timeline_info = pageserver.timeline_create(
+                tenant_id,
+                None,
+                start_lsn,
+                Some(ancestor_timeline_id),
+                None,
+            )?;
            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;
@@ -626,22 +594,22 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
    Ok(())
 }

-async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    let (sub_name, sub_args) = match ep_match.subcommand() {
        Some(ep_subcommand_data) => ep_subcommand_data,
        None => bail!("no endpoint subcommand provided"),
    };
    let mut cplane = ComputeControlPlane::load(env.clone())?;

+    // All subcommands take an optional --tenant-id option
+    let tenant_id = get_tenant_id(sub_args, env)?;
+
    match sub_name {
        "list" => {
-            let tenant_id = get_tenant_id(sub_args, env)?;
-            let timeline_infos = get_timeline_infos(env, &tenant_id)
-                .await
-                .unwrap_or_else(|e| {
-                    eprintln!("Failed to load timeline info: {}", e);
-                    HashMap::new()
-                });
+            let timeline_infos = get_timeline_infos(env, &tenant_id).unwrap_or_else(|e| {
+                eprintln!("Failed to load timeline info: {}", e);
+                HashMap::new()
+            });

            let timeline_name_mappings = env.timeline_name_mappings();

@@ -697,7 +665,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
            println!("{table}");
        }
        "create" => {
-            let tenant_id = get_tenant_id(sub_args, env)?;
            let branch_name = sub_args
                .get_one::<String>("branch-name")
                .map(|s| s.as_str())
@@ -742,18 +709,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
            };

-            match (mode, hot_standby) {
-                (ComputeMode::Static(_), true) => {
-                    bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
-                }
-                (ComputeMode::Primary, true) => {
-                    bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
-                }
-                _ => {}
-            }
-
-            cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
-
            cplane.new_endpoint(
                &endpoint_id,
                tenant_id,
@@ -766,6 +721,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
            )?;
        }
        "start" => {
+            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
+            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
            let endpoint_id = sub_args
                .get_one::<String>("endpoint_id")
                .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
@@ -794,30 +751,80 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                    env.safekeepers.iter().map(|sk| sk.id).collect()
                };

-            let endpoint = cplane
-                .endpoints
-                .get(endpoint_id.as_str())
-                .ok_or_else(|| anyhow::anyhow!("endpoint {endpoint_id} not found"))?;
-
-            cplane.check_conflicting_endpoints(
-                endpoint.mode,
-                endpoint.tenant_id,
-                endpoint.timeline_id,
-            )?;
+            let endpoint = cplane.endpoints.get(endpoint_id.as_str());

            let ps_conf = env.get_pageserver_conf(pageserver_id)?;
            let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
-                let claims = Claims::new(Some(endpoint.tenant_id), Scope::Tenant);
+                let claims = Claims::new(Some(tenant_id), Scope::Tenant);

                Some(env.generate_auth_token(&claims)?)
            } else {
                None
            };

-            println!("Starting existing endpoint {endpoint_id}...");
-            endpoint
-                .start(&auth_token, safekeepers, remote_ext_config)
-                .await?;
+            let hot_standby = sub_args
+                .get_one::<bool>("hot-standby")
+                .copied()
+                .unwrap_or(false);
+
+            if let Some(endpoint) = endpoint {
+                match (&endpoint.mode, hot_standby) {
+                    (ComputeMode::Static(_), true) => {
+                        bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
+                    }
+                    (ComputeMode::Primary, true) => {
+                        bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
+                    }
+                    _ => {}
+                }
+                println!("Starting existing endpoint {endpoint_id}...");
+                endpoint.start(&auth_token, safekeepers, remote_ext_config)?;
+            } else {
+                let branch_name = sub_args
+                    .get_one::<String>("branch-name")
+                    .map(|s| s.as_str())
+                    .unwrap_or(DEFAULT_BRANCH_NAME);
+                let timeline_id = env
+                    .get_branch_timeline_id(branch_name, tenant_id)
+                    .ok_or_else(|| {
+                        anyhow!("Found no timeline id for branch name '{branch_name}'")
+                    })?;
+                let lsn = sub_args
+                    .get_one::<String>("lsn")
+                    .map(|lsn_str| Lsn::from_str(lsn_str))
+                    .transpose()
+                    .context("Failed to parse Lsn from the request")?;
+                let pg_version = sub_args
+                    .get_one::<u32>("pg-version")
+                    .copied()
+                    .context("Failed to `pg-version` from the argument string")?;
+
+                let mode = match (lsn, hot_standby) {
+                    (Some(lsn), false) => ComputeMode::Static(lsn),
+                    (None, true) => ComputeMode::Replica,
+                    (None, false) => ComputeMode::Primary,
+                    (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
+                };
+
+                // when used with custom port this results in non obvious behaviour
+                // port is remembered from first start command, i e
+                // start --port X
+                // stop
+                // start <-- will also use port X even without explicit port argument
+                println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
+
+                let ep = cplane.new_endpoint(
+                    endpoint_id,
+                    tenant_id,
+                    timeline_id,
+                    pg_port,
+                    http_port,
+                    pg_version,
+                    mode,
+                    pageserver_id,
+                )?;
+                ep.start(&auth_token, safekeepers, remote_ext_config)?;
+            }
        }
        "reconfigure" => {
            let endpoint_id = sub_args
@@ -835,7 +842,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                } else {
                    None
                };
-            endpoint.reconfigure(pageserver_id).await?;
+            endpoint.reconfigure(pageserver_id)?;
        }
        "stop" => {
            let endpoint_id = sub_args
@@ -901,12 +908,11 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
    ))
 }

-async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
            if let Err(e) = get_pageserver(env, subcommand_args)?
                .start(&pageserver_config_overrides(subcommand_args))
-                .await
            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
@@ -933,10 +939,7 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }

-            if let Err(e) = pageserver
-                .start(&pageserver_config_overrides(subcommand_args))
-                .await
-            {
+            if let Err(e) = pageserver.start(&pageserver_config_overrides(subcommand_args)) {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
@@ -950,17 +953,14 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }

-            if let Err(e) = pageserver
-                .start(&pageserver_config_overrides(subcommand_args))
-                .await
-            {
+            if let Err(e) = pageserver.start(&pageserver_config_overrides(subcommand_args)) {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
        }

        Some(("status", subcommand_args)) => {
-            match get_pageserver(env, subcommand_args)?.check_status().await {
+            match get_pageserver(env, subcommand_args)?.check_status() {
                Ok(_) => println!("Page server is up and running"),
                Err(err) => {
                    eprintln!("Page server is not available: {}", err);
@@ -975,14 +975,11 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_attachment_service(
-    sub_match: &ArgMatches,
-    env: &local_env::LocalEnv,
-) -> Result<()> {
+fn handle_attachment_service(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    let svc = AttachmentService::from_env(env);
    match sub_match.subcommand() {
        Some(("start", _start_match)) => {
-            if let Err(e) = svc.start().await {
+            if let Err(e) = svc.start() {
                eprintln!("start failed: {e}");
                exit(1);
            }
@@ -1023,7 +1020,7 @@ fn safekeeper_extra_opts(init_match: &ArgMatches) -> Vec<String> {
        .collect()
 }

-async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    let (sub_name, sub_args) = match sub_match.subcommand() {
        Some(safekeeper_command_data) => safekeeper_command_data,
        None => bail!("no safekeeper subcommand provided"),
@@ -1041,7 +1038,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
        "start" => {
            let extra_opts = safekeeper_extra_opts(sub_args);

-            if let Err(e) = safekeeper.start(extra_opts).await {
+            if let Err(e) = safekeeper.start(extra_opts) {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1067,7 +1064,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
            }

            let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper.start(extra_opts).await {
+            if let Err(e) = safekeeper.start(extra_opts) {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1080,15 +1077,15 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
+fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
    // Endpoints are not started automatically

-    broker::start_broker_process(env).await?;
+    broker::start_broker_process(env)?;

    // Only start the attachment service if the pageserver is configured to need it
    if env.control_plane_api.is_some() {
        let attachment_service = AttachmentService::from_env(env);
-        if let Err(e) = attachment_service.start().await {
+        if let Err(e) = attachment_service.start() {
            eprintln!("attachment_service start failed: {:#}", e);
            try_stop_all(env, true);
            exit(1);
@@ -1097,10 +1094,7 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->

    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver
-            .start(&pageserver_config_overrides(sub_match))
-            .await
-        {
+        if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
            try_stop_all(env, true);
            exit(1);
@@ -1109,7 +1103,7 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->

    for node in env.safekeepers.iter() {
        let safekeeper = SafekeeperNode::from_env(env, node);
-        if let Err(e) = safekeeper.start(vec![]).await {
+        if let Err(e) = safekeeper.start(vec![]) {
            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
            try_stop_all(env, false);
            exit(1);
@@ -1251,7 +1245,7 @@ fn cli() -> Command {
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
-        .help("Configure the remote extensions storage proxy gateway to request for extensions.")
+        .help("Configure the S3 bucket that we search for extensions in.")
        .required(false);

    let lsn_arg = Arg::new("lsn")
@@ -1314,7 +1308,6 @@ fn cli() -> Command {
            .subcommand(Command::new("create")
                .about("Create a new blank timeline")
                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
                .arg(branch_name_arg.clone())
                .arg(pg_version_arg.clone())
            )
@@ -1436,7 +1429,15 @@ fn cli() -> Command {
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
                    .arg(endpoint_id_arg.clone())
+                    .arg(tenant_id_arg.clone())
+                    .arg(branch_name_arg.clone())
+                    .arg(timeline_id_arg.clone())
+                    .arg(lsn_arg)
+                    .arg(pg_port_arg)
+                    .arg(http_port_arg)
                    .arg(endpoint_pageserver_id_arg.clone())
+                    .arg(pg_version_arg)
+                    .arg(hot_standby_arg)
                    .arg(safekeepers_arg)
                    .arg(remote_ext_config_args)
                )
@@ -1449,6 +1450,7 @@ fn cli() -> Command {
                .subcommand(
                    Command::new("stop")
                    .arg(endpoint_id_arg)
+                    .arg(tenant_id_arg.clone())
                    .arg(
                        Arg::new("destroy")
                            .help("Also delete data directory (now optional, should be default in future)")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -11,7 +11,7 @@ use camino::Utf8PathBuf;

 use crate::{background_process, local_env};

-pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
+pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let broker = &env.broker;
    let listen_addr = &broker.listen_addr;

@@ -19,15 +19,15 @@ pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<(

    let args = [format!("--listen-addr={listen_addr}")];

-    let client = reqwest::Client::new();
+    let client = reqwest::blocking::Client::new();
    background_process::start_process(
        "storage_broker",
        &env.base_data_dir,
        &env.storage_broker_bin(),
        args,
        [],
-        background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
-        || async {
+        background_process::InitialPidFile::Create(&storage_broker_pid_file_path(env)),
+        || {
            let url = broker.client_url();
            let status_url = url.join("status").with_context(|| {
                format!("Failed to append /status path to broker endpoint {url}")
@@ -36,13 +36,12 @@ pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<(
                .get(status_url)
                .build()
                .with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
-            match client.execute(request).await {
+            match client.execute(request) {
                Ok(resp) => Ok(resp.status().is_success()),
                Err(_) => Ok(false),
            }
        },
    )
-    .await
    .context("Failed to spawn storage_broker subprocess")?;
    Ok(())
 }
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -45,7 +45,6 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{anyhow, bail, Context, Result};
-use compute_api::spec::RemoteExtSpec;
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId, TimelineId};

@@ -125,7 +124,6 @@ impl ComputeControlPlane {
        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
        let pageserver =
            PageServerNode::from_env(&self.env, self.env.get_pageserver_conf(pageserver_id)?);
-
        let ep = Arc::new(Endpoint {
            endpoint_id: endpoint_id.to_owned(),
            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
@@ -170,30 +168,6 @@ impl ComputeControlPlane {

        Ok(ep)
    }
-
-    pub fn check_conflicting_endpoints(
-        &self,
-        mode: ComputeMode,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> Result<()> {
-        if matches!(mode, ComputeMode::Primary) {
-            // this check is not complete, as you could have a concurrent attempt at
-            // creating another primary, both reading the state before checking it here,
-            // but it's better than nothing.
-            let mut duplicates = self.endpoints.iter().filter(|(_k, v)| {
-                v.tenant_id == tenant_id
-                    && v.timeline_id == timeline_id
-                    && v.mode == mode
-                    && v.status() != "stopped"
-            });
-
-            if let Some((key, _)) = duplicates.next() {
-                bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
-            }
-        }
-        Ok(())
-    }
 }

 ///////////////////////////////////////////////////////////////////////////////
@@ -464,7 +438,7 @@ impl Endpoint {
        }
    }

-    pub async fn start(
+    pub fn start(
        &self,
        auth_token: &Option<String>,
        safekeepers: Vec<NodeId>,
@@ -502,24 +476,11 @@ impl Endpoint {
            }
        }

-        // check for file remote_extensions_spec.json
-        // if it is present, read it and pass to compute_ctl
-        let remote_extensions_spec_path = self.endpoint_path().join("remote_extensions_spec.json");
-        let remote_extensions_spec = std::fs::File::open(remote_extensions_spec_path);
-        let remote_extensions: Option<RemoteExtSpec>;
-
-        if let Ok(spec_file) = remote_extensions_spec {
-            remote_extensions = serde_json::from_reader(spec_file).ok();
-        } else {
-            remote_extensions = None;
-        };
-
        // Create spec file
        let spec = ComputeSpec {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
            format_version: 1.0,
            operation_uuid: None,
-            features: vec![],
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
@@ -536,7 +497,7 @@ impl Endpoint {
            pageserver_connstring: Some(pageserver_connstring),
            safekeeper_connstrings,
            storage_auth_token: auth_token.clone(),
-            remote_extensions,
+            remote_extensions: None,
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
@@ -587,7 +548,7 @@ impl Endpoint {
        const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
        loop {
            attempt += 1;
-            match self.get_status().await {
+            match self.get_status() {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
@@ -629,8 +590,8 @@ impl Endpoint {
    }

    // Call the /status HTTP API
-    pub async fn get_status(&self) -> Result<ComputeState> {
-        let client = reqwest::Client::new();
+    pub fn get_status(&self) -> Result<ComputeState> {
+        let client = reqwest::blocking::Client::new();

        let response = client
            .request(
@@ -641,17 +602,16 @@ impl Endpoint {
                    self.http_address.port()
                ),
            )
-            .send()
-            .await?;
+            .send()?;

        // Interpret the response
        let status = response.status();
        if !(status.is_client_error() || status.is_server_error()) {
-            Ok(response.json().await?)
+            Ok(response.json()?)
        } else {
            // reqwest does not export its error construction utility functions, so let's craft the message ourselves
            let url = response.url().to_owned();
-            let msg = match response.text().await {
+            let msg = match response.text() {
                Ok(err_body) => format!("Error: {}", err_body),
                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
            };
@@ -659,7 +619,7 @@ impl Endpoint {
        }
    }

-    pub async fn reconfigure(&self, pageserver_id: Option<NodeId>) -> Result<()> {
+    pub fn reconfigure(&self, pageserver_id: Option<NodeId>) -> Result<()> {
        let mut spec: ComputeSpec = {
            let spec_path = self.endpoint_path().join("spec.json");
            let file = std::fs::File::open(spec_path)?;
@@ -688,7 +648,7 @@ impl Endpoint {
            spec.pageserver_connstring = Some(format!("postgresql://no_user@{host}:{port}"));
        }

-        let client = reqwest::Client::new();
+        let client = reqwest::blocking::Client::new();
        let response = client
            .post(format!(
                "http://{}:{}/configure",
@@ -699,15 +659,14 @@ impl Endpoint {
                "{{\"spec\":{}}}",
                serde_json::to_string_pretty(&spec)?
            ))
-            .send()
-            .await?;
+            .send()?;

        let status = response.status();
        if !(status.is_client_error() || status.is_server_error()) {
            Ok(())
        } else {
            let url = response.url().to_owned();
-            let msg = match response.text().await {
+            let msg = match response.text() {
                Ok(err_body) => format!("Error: {}", err_body),
                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
            };
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -6,24 +6,27 @@
 //!
 use std::borrow::Cow;
 use std::collections::HashMap;
-
-use std::io;
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, Write};
 use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::process::{Child, Command};
-use std::time::Duration;
+use std::{io, result};

 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
-use futures::SinkExt;
-use pageserver_api::models::{self, LocationConfig, TenantInfo, TimelineInfo};
+use pageserver_api::models::{
+    self, LocationConfig, TenantInfo, TenantLocationConfigRequest, TimelineInfo,
+};
 use pageserver_api::shard::TenantShardId;
-use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
+use reqwest::blocking::{Client, RequestBuilder, Response};
+use reqwest::{IntoUrl, Method};
+use thiserror::Error;
 use utils::auth::{Claims, Scope};
 use utils::{
+    http::error::HttpErrorBody,
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };
@@ -34,6 +37,45 @@ use crate::{background_process, local_env::LocalEnv};
 /// Directory within .neon which will be used by default for LocalFs remote storage.
 pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver";

+#[derive(Error, Debug)]
+pub enum PageserverHttpError {
+    #[error("Reqwest error: {0}")]
+    Transport(#[from] reqwest::Error),
+
+    #[error("Error: {0}")]
+    Response(String),
+}
+
+impl From<anyhow::Error> for PageserverHttpError {
+    fn from(e: anyhow::Error) -> Self {
+        Self::Response(e.to_string())
+    }
+}
+
+type Result<T> = result::Result<T, PageserverHttpError>;
+
+pub trait ResponseErrorMessageExt: Sized {
+    fn error_from_body(self) -> Result<Self>;
+}
+
+impl ResponseErrorMessageExt for Response {
+    fn error_from_body(self) -> Result<Self> {
+        let status = self.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            return Ok(self);
+        }
+
+        // reqwest does not export its error construction utility functions, so let's craft the message ourselves
+        let url = self.url().to_owned();
+        Err(PageserverHttpError::Response(
+            match self.json::<HttpErrorBody>() {
+                Ok(err_body) => format!("Error: {}", err_body.msg),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+            },
+        ))
+    }
+}
+
 //
 // Control routines for pageserver.
 //
@@ -44,7 +86,8 @@ pub struct PageServerNode {
    pub pg_connection_config: PgConnectionConfig,
    pub conf: PageServerConf,
    pub env: LocalEnv,
-    pub http_client: mgmt_api::Client,
+    pub http_client: Client,
+    pub http_base_url: String,
 }

 impl PageServerNode {
@@ -56,19 +99,8 @@ impl PageServerNode {
            pg_connection_config: PgConnectionConfig::new_host_port(host, port),
            conf: conf.clone(),
            env: env.clone(),
-            http_client: mgmt_api::Client::new(
-                format!("http://{}", conf.listen_http_addr),
-                {
-                    match conf.http_auth_type {
-                        AuthType::Trust => None,
-                        AuthType::NeonJWT => Some(
-                            env.generate_auth_token(&Claims::new(None, Scope::PageServerApi))
-                                .unwrap(),
-                        ),
-                    }
-                }
-                .as_deref(),
-            ),
+            http_client: Client::new(),
+            http_base_url: format!("http://{}/v1", conf.listen_http_addr),
        }
    }

@@ -149,8 +181,8 @@ impl PageServerNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
-        self.start_node(config_overrides, false).await
+    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
+        self.start_node(config_overrides, false)
    }

    fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
@@ -191,12 +223,7 @@ impl PageServerNode {
        Ok(())
    }

-    async fn start_node(
-        &self,
-        config_overrides: &[&str],
-        update_config: bool,
-    ) -> anyhow::Result<Child> {
-        // TODO: using a thread here because start_process() is not async but we need to call check_status()
+    fn start_node(&self, config_overrides: &[&str], update_config: bool) -> anyhow::Result<Child> {
        let datadir = self.repo_path();
        print!(
            "Starting pageserver node {} at '{}' in {:?}",
@@ -204,7 +231,7 @@ impl PageServerNode {
            self.pg_connection_config.raw_address(),
            datadir
        );
-        io::stdout().flush().context("flush stdout")?;
+        io::stdout().flush()?;

        let datadir_path_str = datadir.to_str().with_context(|| {
            format!(
@@ -216,23 +243,20 @@ impl PageServerNode {
        if update_config {
            args.push(Cow::Borrowed("--update-config"));
        }
+
        background_process::start_process(
            "pageserver",
            &datadir,
            &self.env.pageserver_bin(),
            args.iter().map(Cow::as_ref),
            self.pageserver_env_variables()?,
-            background_process::InitialPidFile::Expect(self.pid_file()),
-            || async {
-                let st = self.check_status().await;
-                match st {
-                    Ok(()) => Ok(true),
-                    Err(mgmt_api::Error::ReceiveBody(_)) => Ok(false),
-                    Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
-                }
+            background_process::InitialPidFile::Expect(&self.pid_file()),
+            || match self.check_status() {
+                Ok(()) => Ok(true),
+                Err(PageserverHttpError::Transport(_)) => Ok(false),
+                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
            },
        )
-        .await
    }

    fn pageserver_basic_args<'a>(
@@ -278,12 +302,7 @@ impl PageServerNode {
        background_process::stop_process(immediate, "pageserver", &self.pid_file())
    }

-    pub async fn page_server_psql_client(
-        &self,
-    ) -> anyhow::Result<(
-        tokio_postgres::Client,
-        tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
-    )> {
+    pub fn page_server_psql_client(&self) -> anyhow::Result<postgres::Client> {
        let mut config = self.pg_connection_config.clone();
        if self.conf.pg_auth_type == AuthType::NeonJWT {
            let token = self
@@ -291,18 +310,36 @@ impl PageServerNode {
                .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
            config = config.set_password(Some(token));
        }
-        Ok(config.connect_no_tls().await?)
+        Ok(config.connect_no_tls()?)
    }

-    pub async fn check_status(&self) -> mgmt_api::Result<()> {
-        self.http_client.status().await
+    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> anyhow::Result<RequestBuilder> {
+        let mut builder = self.http_client.request(method, url);
+        if self.conf.http_auth_type == AuthType::NeonJWT {
+            let token = self
+                .env
+                .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
+            builder = builder.bearer_auth(token)
+        }
+        Ok(builder)
    }

-    pub async fn tenant_list(&self) -> mgmt_api::Result<Vec<TenantInfo>> {
-        self.http_client.list_tenants().await
+    pub fn check_status(&self) -> Result<()> {
+        self.http_request(Method::GET, format!("{}/status", self.http_base_url))?
+            .send()?
+            .error_from_body()?;
+        Ok(())
    }

-    pub async fn tenant_create(
+    pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
+        Ok(self
+            .http_request(Method::GET, format!("{}/tenant", self.http_base_url))?
+            .send()?
+            .error_from_body()?
+            .json()?)
+    }
+
+    pub fn tenant_create(
        &self,
        new_tenant_id: TenantId,
        generation: Option<u32>,
@@ -369,7 +406,6 @@ impl PageServerNode {
                .map(|x| x.parse::<bool>())
                .transpose()
                .context("Failed to parse 'gc_feedback' as bool")?,
-            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
        };

        let request = models::TenantCreateRequest {
@@ -380,10 +416,23 @@ impl PageServerNode {
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
        }
-        Ok(self.http_client.tenant_create(&request).await?)
+        self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))?
+            .json(&request)
+            .send()?
+            .error_from_body()?
+            .json::<Option<String>>()
+            .with_context(|| {
+                format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
+            })?
+            .context("No tenant id was found in the tenant creation response")
+            .and_then(|tenant_id_string| {
+                tenant_id_string.parse().with_context(|| {
+                    format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
+                })
+            })
    }

-    pub async fn tenant_config(
+    pub fn tenant_config(
        &self,
        tenant_id: TenantId,
        mut settings: HashMap<&str, &str>,
@@ -454,7 +503,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'gc_feedback' as bool")?,
-                heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
            }
        };

@@ -462,48 +510,80 @@ impl PageServerNode {
            bail!("Unrecognized tenant settings: {settings:?}")
        }

-        self.http_client
-            .tenant_config(&models::TenantConfigRequest { tenant_id, config })
-            .await?;
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
+            .json(&models::TenantConfigRequest { tenant_id, config })
+            .send()?
+            .error_from_body()?;

        Ok(())
    }

-    pub async fn location_config(
+    pub fn location_config(
        &self,
        tenant_id: TenantId,
        config: LocationConfig,
-        flush_ms: Option<Duration>,
    ) -> anyhow::Result<()> {
-        Ok(self
-            .http_client
-            .location_config(tenant_id, config, flush_ms)
-            .await?)
+        let req_body = TenantLocationConfigRequest { tenant_id, config };
+
+        self.http_request(
+            Method::PUT,
+            format!(
+                "{}/tenant/{}/location_config",
+                self.http_base_url, tenant_id
+            ),
+        )?
+        .json(&req_body)
+        .send()?
+        .error_from_body()?;
+
+        Ok(())
    }

-    pub async fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
-        Ok(self.http_client.list_timelines(*tenant_id).await?)
+    pub fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
+        let timeline_infos: Vec<TimelineInfo> = self
+            .http_request(
+                Method::GET,
+                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
+            )?
+            .send()?
+            .error_from_body()?
+            .json()?;
+
+        Ok(timeline_infos)
    }

-    pub async fn timeline_create(
+    pub fn timeline_create(
        &self,
        tenant_id: TenantId,
        new_timeline_id: Option<TimelineId>,
        ancestor_start_lsn: Option<Lsn>,
        ancestor_timeline_id: Option<TimelineId>,
        pg_version: Option<u32>,
-        existing_initdb_timeline_id: Option<TimelineId>,
    ) -> anyhow::Result<TimelineInfo> {
        // If timeline ID was not specified, generate one
        let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
-        let req = models::TimelineCreateRequest {
+
+        self.http_request(
+            Method::POST,
+            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
+        )?
+        .json(&models::TimelineCreateRequest {
            new_timeline_id,
            ancestor_start_lsn,
            ancestor_timeline_id,
            pg_version,
-            existing_initdb_timeline_id,
-        };
-        Ok(self.http_client.timeline_create(tenant_id, &req).await?)
+        })
+        .send()?
+        .error_from_body()?
+        .json::<Option<TimelineInfo>>()
+        .with_context(|| {
+            format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
+        })?
+        .with_context(|| {
+            format!(
+                "No timeline id was found in the timeline creation response for tenant {tenant_id}"
+            )
+        })
    }

    /// Import a basebackup prepared using either:
@@ -515,7 +595,7 @@ impl PageServerNode {
    /// * `timeline_id` - id to assign to imported timeline
    /// * `base` - (start lsn of basebackup, path to `base.tar` file)
    /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
-    pub async fn timeline_import(
+    pub fn timeline_import(
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
@@ -523,60 +603,36 @@ impl PageServerNode {
        pg_wal: Option<(Lsn, PathBuf)>,
        pg_version: u32,
    ) -> anyhow::Result<()> {
-        let (client, conn) = self.page_server_psql_client().await?;
-        // The connection object performs the actual communication with the database,
-        // so spawn it off to run on its own.
-        tokio::spawn(async move {
-            if let Err(e) = conn.await {
-                eprintln!("connection error: {}", e);
-            }
-        });
-        tokio::pin!(client);
+        let mut client = self.page_server_psql_client()?;

        // Init base reader
        let (start_lsn, base_tarfile_path) = base;
-        let base_tarfile = tokio::fs::File::open(base_tarfile_path).await?;
-        let base_tarfile = tokio_util::io::ReaderStream::new(base_tarfile);
+        let base_tarfile = File::open(base_tarfile_path)?;
+        let mut base_reader = BufReader::new(base_tarfile);

        // Init wal reader if necessary
        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
-            let wal_tarfile = tokio::fs::File::open(wal_tarfile_path).await?;
-            let wal_reader = tokio_util::io::ReaderStream::new(wal_tarfile);
+            let wal_tarfile = File::open(wal_tarfile_path)?;
+            let wal_reader = BufReader::new(wal_tarfile);
            (end_lsn, Some(wal_reader))
        } else {
            (start_lsn, None)
        };

-        let copy_in = |reader, cmd| {
-            let client = &client;
-            async move {
-                let writer = client.copy_in(&cmd).await?;
-                let writer = std::pin::pin!(writer);
-                let mut writer = writer.sink_map_err(|e| {
-                    std::io::Error::new(std::io::ErrorKind::Other, format!("{e}"))
-                });
-                let mut reader = std::pin::pin!(reader);
-                writer.send_all(&mut reader).await?;
-                writer.into_inner().finish().await?;
-                anyhow::Ok(())
-            }
-        };
-
        // Import base
-        copy_in(
-            base_tarfile,
-            format!(
-                "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
-            ),
-        )
-        .await?;
+        let import_cmd = format!(
+            "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
+        );
+        let mut writer = client.copy_in(&import_cmd)?;
+        io::copy(&mut base_reader, &mut writer)?;
+        writer.finish()?;
+
        // Import wal if necessary
-        if let Some(wal_reader) = wal_reader {
-            copy_in(
-                wal_reader,
-                format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}"),
-            )
-            .await?;
+        if let Some(mut wal_reader) = wal_reader {
+            let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
+            let mut writer = client.copy_in(&import_cmd)?;
+            io::copy(&mut wal_reader, &mut writer)?;
+            writer.finish()?;
        }

        Ok(())
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -13,6 +13,7 @@ use std::{io, result};
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use postgres_connection::PgConnectionConfig;
+use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
 use utils::{http::error::HttpErrorBody, id::NodeId};
@@ -33,14 +34,12 @@ pub enum SafekeeperHttpError {

 type Result<T> = result::Result<T, SafekeeperHttpError>;

-#[async_trait::async_trait]
 pub trait ResponseErrorMessageExt: Sized {
-    async fn error_from_body(self) -> Result<Self>;
+    fn error_from_body(self) -> Result<Self>;
 }

-#[async_trait::async_trait]
-impl ResponseErrorMessageExt for reqwest::Response {
-    async fn error_from_body(self) -> Result<Self> {
+impl ResponseErrorMessageExt for Response {
+    fn error_from_body(self) -> Result<Self> {
        let status = self.status();
        if !(status.is_client_error() || status.is_server_error()) {
            return Ok(self);
@@ -49,7 +48,7 @@ impl ResponseErrorMessageExt for reqwest::Response {
        // reqwest does not export its error construction utility functions, so let's craft the message ourselves
        let url = self.url().to_owned();
        Err(SafekeeperHttpError::Response(
-            match self.json::<HttpErrorBody>().await {
+            match self.json::<HttpErrorBody>() {
                Ok(err_body) => format!("Error: {}", err_body.msg),
                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
            },
@@ -70,7 +69,7 @@ pub struct SafekeeperNode {

    pub pg_connection_config: PgConnectionConfig,
    pub env: LocalEnv,
-    pub http_client: reqwest::Client,
+    pub http_client: Client,
    pub http_base_url: String,
 }

@@ -81,7 +80,7 @@ impl SafekeeperNode {
            conf: conf.clone(),
            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
            env: env.clone(),
-            http_client: reqwest::Client::new(),
+            http_client: Client::new(),
            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
        }
    }
@@ -104,7 +103,7 @@ impl SafekeeperNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<Child> {
+    pub fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<Child> {
        print!(
            "Starting safekeeper at '{}' in '{}'",
            self.pg_connection_config.raw_address(),
@@ -192,16 +191,13 @@ impl SafekeeperNode {
            &self.env.safekeeper_bin(),
            &args,
            [],
-            background_process::InitialPidFile::Expect(self.pid_file()),
-            || async {
-                match self.check_status().await {
-                    Ok(()) => Ok(true),
-                    Err(SafekeeperHttpError::Transport(_)) => Ok(false),
-                    Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
-                }
+            background_process::InitialPidFile::Expect(&self.pid_file()),
+            || match self.check_status() {
+                Ok(()) => Ok(true),
+                Err(SafekeeperHttpError::Transport(_)) => Ok(false),
+                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
            },
        )
-        .await
    }

    ///
@@ -220,7 +216,7 @@ impl SafekeeperNode {
        )
    }

-    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> reqwest::RequestBuilder {
+    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
        // TODO: authentication
        //if self.env.auth_type == AuthType::NeonJWT {
        //    builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
@@ -228,12 +224,10 @@ impl SafekeeperNode {
        self.http_client.request(method, url)
    }

-    pub async fn check_status(&self) -> Result<()> {
+    pub fn check_status(&self) -> Result<()> {
        self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
-            .send()
-            .await?
-            .error_from_body()
-            .await?;
+            .send()?
+            .error_from_body()?;
        Ok(())
    }
 }
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -14,16 +14,17 @@ use pageserver_api::models::{
 use std::collections::HashMap;
 use std::time::Duration;
 use utils::{
+    generation::Generation,
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };

 /// Given an attached pageserver, retrieve the LSN for all timelines
-async fn get_lsns(
+fn get_lsns(
    tenant_id: TenantId,
    pageserver: &PageServerNode,
 ) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
-    let timelines = pageserver.timeline_list(&tenant_id).await?;
+    let timelines = pageserver.timeline_list(&tenant_id)?;
    Ok(timelines
        .into_iter()
        .map(|t| (t.timeline_id, t.last_record_lsn))
@@ -32,13 +33,13 @@ async fn get_lsns(

 /// Wait for the timeline LSNs on `pageserver` to catch up with or overtake
 /// `baseline`.
-async fn await_lsn(
+fn await_lsn(
    tenant_id: TenantId,
    pageserver: &PageServerNode,
    baseline: HashMap<TimelineId, Lsn>,
 ) -> anyhow::Result<()> {
    loop {
-        let latest = match get_lsns(tenant_id, pageserver).await {
+        let latest = match get_lsns(tenant_id, pageserver) {
            Ok(l) => l,
            Err(e) => {
                println!(
@@ -84,7 +85,7 @@ async fn await_lsn(
 ///  - Coordinate attach/secondary/detach on pageservers
 ///  - call into attachment_service for generations
 ///  - reconfigure compute endpoints to point to new attached pageserver
-pub async fn migrate_tenant(
+pub fn migrate_tenant(
    env: &LocalEnv,
    tenant_id: TenantId,
    dest_ps: PageServerNode,
@@ -92,60 +93,52 @@ pub async fn migrate_tenant(
    // Get a new generation
    let attachment_service = AttachmentService::from_env(env);

-    fn build_location_config(
-        mode: LocationConfigMode,
-        generation: Option<u32>,
-        secondary_conf: Option<LocationConfigSecondary>,
-    ) -> LocationConfig {
-        LocationConfig {
-            mode,
-            generation,
-            secondary_conf,
-            tenant_conf: TenantConfig::default(),
-            shard_number: 0,
-            shard_count: 0,
-            shard_stripe_size: 0,
-        }
-    }
-
-    let previous = attachment_service.inspect(tenant_id).await?;
+    let previous = attachment_service.inspect(tenant_id)?;
    let mut baseline_lsns = None;
    if let Some((generation, origin_ps_id)) = &previous {
        let origin_ps = PageServerNode::from_env(env, env.get_pageserver_conf(*origin_ps_id)?);

        if origin_ps_id == &dest_ps.conf.id {
            println!("🔁 Already attached to {origin_ps_id}, freshening...");
-            let gen = attachment_service
-                .attach_hook(tenant_id, dest_ps.conf.id)
-                .await?;
-            let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
-            dest_ps.location_config(tenant_id, dest_conf, None).await?;
+            let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
+            let dest_conf = LocationConfig {
+                mode: LocationConfigMode::AttachedSingle,
+                generation: gen.map(Generation::new),
+                secondary_conf: None,
+                tenant_conf: TenantConfig::default(),
+            };
+            dest_ps.location_config(tenant_id, dest_conf)?;
            println!("✅ Migration complete");
            return Ok(());
        }

        println!("🔁 Switching origin pageserver {origin_ps_id} to stale mode");

-        let stale_conf =
-            build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
-        origin_ps
-            .location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))
-            .await?;
+        let stale_conf = LocationConfig {
+            mode: LocationConfigMode::AttachedStale,
+            generation: Some(Generation::new(*generation)),
+            secondary_conf: None,
+            tenant_conf: TenantConfig::default(),
+        };
+        origin_ps.location_config(tenant_id, stale_conf)?;

-        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps).await?);
+        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps)?);
    }

-    let gen = attachment_service
-        .attach_hook(tenant_id, dest_ps.conf.id)
-        .await?;
-    let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
+    let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
+    let dest_conf = LocationConfig {
+        mode: LocationConfigMode::AttachedMulti,
+        generation: gen.map(Generation::new),
+        secondary_conf: None,
+        tenant_conf: TenantConfig::default(),
+    };

    println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
-    dest_ps.location_config(tenant_id, dest_conf, None).await?;
+    dest_ps.location_config(tenant_id, dest_conf)?;

    if let Some(baseline) = baseline_lsns {
        println!("🕑 Waiting for LSN to catch up...");
-        await_lsn(tenant_id, &dest_ps, baseline).await?;
+        await_lsn(tenant_id, &dest_ps, baseline)?;
    }

    let cplane = ComputeControlPlane::load(env.clone())?;
@@ -155,7 +148,7 @@ pub async fn migrate_tenant(
                "🔁 Reconfiguring endpoint {} to use pageserver {}",
                endpoint_name, dest_ps.conf.id
            );
-            endpoint.reconfigure(Some(dest_ps.conf.id)).await?;
+            endpoint.reconfigure(Some(dest_ps.conf.id))?;
        }
    }

@@ -165,39 +158,43 @@ pub async fn migrate_tenant(
        }

        let other_ps = PageServerNode::from_env(env, other_ps_conf);
-        let other_ps_tenants = other_ps.tenant_list().await?;
+        let other_ps_tenants = other_ps.tenant_list()?;

        // Check if this tenant is attached
        let found = other_ps_tenants
            .into_iter()
            .map(|t| t.id)
-            .any(|i| i.tenant_id == tenant_id);
+            .any(|i| i == tenant_id);
        if !found {
            continue;
        }

        // Downgrade to a secondary location
-        let secondary_conf = build_location_config(
-            LocationConfigMode::Secondary,
-            None,
-            Some(LocationConfigSecondary { warm: true }),
-        );
+        let secondary_conf = LocationConfig {
+            mode: LocationConfigMode::Secondary,
+            generation: None,
+            secondary_conf: Some(LocationConfigSecondary { warm: true }),
+            tenant_conf: TenantConfig::default(),
+        };

        println!(
            "💤 Switching to secondary mode on pageserver {}",
            other_ps.conf.id
        );
-        other_ps
-            .location_config(tenant_id, secondary_conf, None)
-            .await?;
+        other_ps.location_config(tenant_id, secondary_conf)?;
    }

    println!(
        "🔁 Switching to AttachedSingle mode on pageserver {}",
        dest_ps.conf.id
    );
-    let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
-    dest_ps.location_config(tenant_id, dest_conf, None).await?;
+    let dest_conf = LocationConfig {
+        mode: LocationConfigMode::AttachedSingle,
+        generation: gen.map(Generation::new),
+        secondary_conf: None,
+        tenant_conf: TenantConfig::default(),
+    };
+    dest_ps.location_config(tenant_id, dest_conf)?;

    println!("✅ Migration complete");

--- a/docs/rfcs/029-pageserver-wal-disaster-recovery.md
+++ b/docs/rfcs/029-pageserver-wal-disaster-recovery.md
@@ -1,205 +0,0 @@
-# Name
-
-Created on: 2023-09-08
-Author: Arpad Müller
-
-## Summary
-
-Enable the pageserver to recover from data corruption events by implementing
-a feature to re-apply historic WAL records in parallel to the already occurring
-WAL replay.
-
-The feature is outside of the user-visible backup and history story, and only
-serves as a second-level backup for the case that there is a bug in the
-pageservers that corrupted the served pages.
-
-The RFC proposes the addition of two new features:
-* recover a broken branch from WAL (downtime is allowed)
-* a test recovery system to recover random branches to make sure recovery works
-
-## Motivation
-
-The historic WAL is currently stored in S3 even after it has been replayed by
-the pageserver and thus been integrated into the pageserver's storage system.
-This is done to defend from data corruption failures inside the pageservers.
-
-However, application of this WAL in the disaster recovery setting is currently
-very manual and we want to automate this to make it easier.
-
-### Use cases
-
-There are various use cases for this feature, like:
-
-* The main motivation is replaying in the instance of pageservers corrupting
-  data.
-* We might want to, beyond the user-visible history features, through our
-  support channels and upon customer request, in select instances, recover
-  historic versions beyond the range of history that we officially support.
-* Running the recovery process in the background for random tenant timelines
-  to figure out if there was a corruption of data (we would compare with what
-  the pageserver stores for the "official" timeline).
-* Using the WAL to arrive at historic pages we can then back up to S3 so that
-  WAL itself can be discarded, or at least not used for future replays.
-  Again, this sounds a lot like what the pageserver is already doing, but the
-  point is to provide a fallback to the service provided by the pageserver.
-
-## Design
-
-### Design constraints
-
-The main design constraint is that the feature needs to be *simple* enough that
-the number of bugs are as low, and reliability as high as possible: the main
-goal of this endeavour is to achieve higher correctness than the pageserver.
-
-For the background process, we cannot afford a downtime of the timeline that is
-being cloned, as we don't want to restrict ourselves to offline tenants only.
-In the scenario where we want to recover from disasters or roll back to a
-historic lsn through support staff, downtimes are more affordable, and
-inevitable if the original had been subject to the corruption. Ideally, the
-two code paths would share code, so the solution would be designed for not
-requiring downtimes.
-
-### API endpoint changes
-
-This RFC proposes two API endpoint changes in the safekeeper and the
-pageserver.
-
-Remember, the pageserver timeline API creation endpoint is to this URL:
-
-```
-/v1/tenant/{tenant_id}/timeline/
-```
-
-Where `{tenant_id}` is the ID of the tenant the timeline is created for,
-and specified as part of the URL. The timeline ID is passed via the POST
-request body as the only required parameter `new_timeline_id`.
-
-This proposal adds one optional parameter called
-`existing_initdb_timeline_id` to the request's json body. If the parameter
-is not specified, behaviour should be as existing, so the pageserver runs
-initdb.
-If the parameter is specified, it is expected to point to a timeline ID.
-In fact that ID might match `new_timeline_id`, what's important is that
-S3 storage contains a matching initdb under the URL matching the given
-tenant and timeline.
-
-Having both `ancestor_timeline_id` and `existing_initdb_timeline_id`
-specified is illegal and will yield in an HTTP error. This feature is
-only meant for the "main" branch that doesn't have any ancestors
-of its own, as only here initdb is relevant.
-
-For the safekeeper, we propose the addition of the following copy endpoint:
-
-```
-/v1/tenant/{tenant_id}/timeline/{source_timeline_id}/copy
-```
-it is meant for POST requests with json, and the two URL parameters
-`tenant_id` and `source_timeline_id`. The json request body contains
-the two required parameters `target_timeline_id` and `until_lsn`.
-
-After invoking, the copy endpoint starts a copy process of the WAL from
-the source ID to the target ID. The lsn is updated according to the
-progress of the API call.
-
-### Higher level features
-
-We want the API changes to support the following higher level features:
-
-* recovery-after-corruption DR of the main timeline of a tenant. This
-  feature allows for downtime.
-* test DR of the main timeline into a special copy timeline. this feature
-  is meant to run against selected production tenants in the background,
-  without the user noticing, so it does not allow for downtime.
-
-The recovery-after-corruption DR only needs the pageserver changes.
-It works as follows:
-
-* delete the timeline from the pageservers via timeline deletion API
-* re-create it via timeline creation API (same ID as before) and set
-  `existing_initdb_timeline_id` to the same timeline ID
-
-The test DR requires also the copy primitive and works as follows:
-
-* copy the WAL of the timeline to a new place
-* create a new timeline for the tenant
-
-## Non Goals
-
-At the danger of being repetitive, the main goal of this feature is to be a
-backup method, so reliability is very important. This implies that other
-aspects like performance or space reduction are less important.
-
-### Corrupt WAL
-
-The process suggested by this RFC assumes that the WAL is free of corruption.
-In some instances, corruption can make it into WAL, like for example when
-higher level components like postgres or the application first read corrupt
-data, and then execute a write with data derived from that earlier read. That
-written data might then contain the corruption.
-
-Common use cases can hit this quite easily. For example, an application reads
-some counter, increments it, and then writes the new counter value to the
-database.
-On a lower level, the compute might put FPIs (Full Page Images) into the WAL,
-which have corrupt data for rows unrelated to the write operation at hand.
-
-Separating corrupt writes from non-corrupt ones is a hard problem in general,
-and if the application was involved in making the corrupt write, a recovery
-would also involve the application. Therefore, corruption that has made it into
-the WAL is outside of the scope of this feature. However, the WAL replay can be
-issued to right before the point in time where the corruption occured. Then the
-data loss is isolated to post-corruption writes only.
-
-## Impacted components (e.g. pageserver, safekeeper, console, etc)
-
-Most changes would happen to the pageservers.
-For the higher level features, maybe other components like the console would
-be involved.
-
-We need to make sure that the shadow timelines are not subject to the usual
-limits and billing we apply to existing timelines.
-
-## Proposed implementation
-
-The first problem to keep in mind is the reproducability of `initdb`.
-So an initial step would be to upload `initdb` snapshots to S3.
-
-After that, we'd have the endpoint spawn a background process which
-performs the replay of the WAL to that new timeline. This process should
-follow the existing workflows as closely as possible, just using the
-WAL records of a different timeline.
-
-The timeline created will be in a special state that solely looks for WAL
-entries of the timeline it is trying to copy. Once the target LSN is reached,
-it turns into a normal timeline that also accepts writes to its own
-timeline ID.
-
-### Scalability
-
-For now we want to run this entire process on a single node, and as
-it is by nature linear, it's hard to parallelize. However, for the
-verification workloads, we can easily start the WAL replay in parallel
-for different points in time. This is valuable especially for tenants
-with large WAL records.
-
-Compare this with the tricks to make addition circuits execute with
-lower latency by making them perform the addition for both possible
-values of the carry bit, and then, in a second step, taking the
-result for the carry bit that was actually obtained.
-
-The other scalability dimension to consider is the WAL length, which
-is a growing question as tenants accumulate changes. There are
-possible approaches to this, including creating snapshots of the
-page files and uploading them to S3, but if we do this for every single
-branch, we lose the cheap branching property.
-
-### Implementation by component
-
-The proposed changes for the various components of the neon architecture
-are written up in this notion page:
-
-https://www.notion.so/neondatabase/Pageserver-disaster-recovery-one-pager-4ecfb5df16ce4f6bbfc3817ed1a6cbb2
-
-### Unresolved questions
-
-none known (outside of the mentioned ones).
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -26,13 +26,6 @@ pub struct ComputeSpec {
    // but we don't use it for anything. Serde will ignore missing fields when
    // deserializing it.
    pub operation_uuid: Option<String>,
-
-    /// Compute features to enable. These feature flags are provided, when we
-    /// know all the details about client's compute, so they cannot be used
-    /// to change `Empty` compute behavior.
-    #[serde(default)]
-    pub features: Vec<ComputeFeature>,
-
    /// Expected cluster state at the end of transition process.
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
@@ -75,19 +68,6 @@ pub struct ComputeSpec {
    pub remote_extensions: Option<RemoteExtSpec>,
 }

-/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
-#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
-#[serde(rename_all = "snake_case")]
-pub enum ComputeFeature {
-    // XXX: Add more feature flags here.
-
-    // This is a special feature flag that is used to represent unknown feature flags.
-    // Basically all unknown to enum flags are represented as this one. See unit test
-    // `parse_unknown_features()` for more details.
-    #[serde(other)]
-    UnknownFeature,
-}
-
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct RemoteExtSpec {
    pub public_extensions: Option<Vec<String>>,
@@ -207,6 +187,8 @@ pub struct DeltaOp {
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
+    pub replication: Option<bool>,
+    pub bypassrls: Option<bool>,
    pub options: GenericOptions,
 }

@@ -247,10 +229,7 @@ mod tests {
    #[test]
    fn parse_spec_file() {
        let file = File::open("tests/cluster_spec.json").unwrap();
-        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
-
-        // Features list defaults to empty vector.
-        assert!(spec.features.is_empty());
+        let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
    }

    #[test]
@@ -262,22 +241,4 @@ mod tests {
        ob.insert("unknown_field_123123123".into(), "hello".into());
        let _spec: ComputeSpec = serde_json::from_value(json).unwrap();
    }
-
-    #[test]
-    fn parse_unknown_features() {
-        // Test that unknown feature flags do not cause any errors.
-        let file = File::open("tests/cluster_spec.json").unwrap();
-        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
-        let ob = json.as_object_mut().unwrap();
-
-        // Add unknown feature flags.
-        let features = vec!["foo_bar_feature", "baz_feature"];
-        ob.insert("features".into(), features.into());
-
-        let spec: ComputeSpec = serde_json::from_value(json).unwrap();
-
-        assert!(spec.features.len() == 2);
-        assert!(spec.features.contains(&ComputeFeature::UnknownFeature));
-        assert_eq!(spec.features, vec![ComputeFeature::UnknownFeature; 2]);
-    }
 }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,11 +3,8 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 #![deny(clippy::undocumented_unsafe_blocks)]
-
 use once_cell::sync::Lazy;
-use prometheus::core::{
-    Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec,
-};
+use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::Error;
@@ -135,137 +132,3 @@ fn get_rusage_stats() -> libc::rusage {
        rusage.assume_init()
    }
 }
-
-/// Create an [`IntCounterPairVec`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_int_counter_pair_vec {
-    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr, $LABELS_NAMES:expr $(,)?) => {{
-        match (
-            $crate::register_int_counter_vec!($NAME1, $HELP1, $LABELS_NAMES),
-            $crate::register_int_counter_vec!($NAME2, $HELP2, $LABELS_NAMES),
-        ) {
-            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPairVec::new(inc, dec)),
-            (Err(e), _) | (_, Err(e)) => Err(e),
-        }
-    }};
-}
-/// Create an [`IntCounterPair`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_int_counter_pair {
-    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr $(,)?) => {{
-        match (
-            $crate::register_int_counter!($NAME1, $HELP1),
-            $crate::register_int_counter!($NAME2, $HELP2),
-        ) {
-            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPair::new(inc, dec)),
-            (Err(e), _) | (_, Err(e)) => Err(e),
-        }
-    }};
-}
-
-/// A Pair of [`GenericCounterVec`]s. Like an [`GenericGaugeVec`] but will always observe changes
-pub struct GenericCounterPairVec<P: Atomic> {
-    inc: GenericCounterVec<P>,
-    dec: GenericCounterVec<P>,
-}
-
-/// A Pair of [`GenericCounter`]s. Like an [`GenericGauge`] but will always observe changes
-pub struct GenericCounterPair<P: Atomic> {
-    inc: GenericCounter<P>,
-    dec: GenericCounter<P>,
-}
-
-impl<P: Atomic> GenericCounterPairVec<P> {
-    pub fn new(inc: GenericCounterVec<P>, dec: GenericCounterVec<P>) -> Self {
-        Self { inc, dec }
-    }
-
-    /// `get_metric_with_label_values` returns the [`GenericCounterPair<P>`] for the given slice
-    /// of label values (same order as the VariableLabels in Desc). If that combination of
-    /// label values is accessed for the first time, a new [`GenericCounterPair<P>`] is created.
-    ///
-    /// An error is returned if the number of label values is not the same as the
-    /// number of VariableLabels in Desc.
-    pub fn get_metric_with_label_values(&self, vals: &[&str]) -> Result<GenericCounterPair<P>> {
-        Ok(GenericCounterPair {
-            inc: self.inc.get_metric_with_label_values(vals)?,
-            dec: self.dec.get_metric_with_label_values(vals)?,
-        })
-    }
-
-    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error
-    /// occurs.
-    pub fn with_label_values(&self, vals: &[&str]) -> GenericCounterPair<P> {
-        self.get_metric_with_label_values(vals).unwrap()
-    }
-}
-
-impl<P: Atomic> GenericCounterPair<P> {
-    pub fn new(inc: GenericCounter<P>, dec: GenericCounter<P>) -> Self {
-        Self { inc, dec }
-    }
-
-    /// Increment the gauge by 1, returning a guard that decrements by 1 on drop.
-    pub fn guard(&self) -> GenericCounterPairGuard<P> {
-        self.inc.inc();
-        GenericCounterPairGuard(self.dec.clone())
-    }
-
-    /// Increment the gauge by n, returning a guard that decrements by n on drop.
-    pub fn guard_by(&self, n: P::T) -> GenericCounterPairGuardBy<P> {
-        self.inc.inc_by(n);
-        GenericCounterPairGuardBy(self.dec.clone(), n)
-    }
-
-    /// Increase the gauge by 1.
-    #[inline]
-    pub fn inc(&self) {
-        self.inc.inc();
-    }
-
-    /// Decrease the gauge by 1.
-    #[inline]
-    pub fn dec(&self) {
-        self.dec.inc();
-    }
-
-    /// Add the given value to the gauge. (The value can be
-    /// negative, resulting in a decrement of the gauge.)
-    #[inline]
-    pub fn inc_by(&self, v: P::T) {
-        self.inc.inc_by(v);
-    }
-
-    /// Subtract the given value from the gauge. (The value can be
-    /// negative, resulting in an increment of the gauge.)
-    #[inline]
-    pub fn dec_by(&self, v: P::T) {
-        self.dec.inc_by(v);
-    }
-}
-
-/// Guard returned by [`GenericCounterPair::guard`]
-pub struct GenericCounterPairGuard<P: Atomic>(GenericCounter<P>);
-
-impl<P: Atomic> Drop for GenericCounterPairGuard<P> {
-    fn drop(&mut self) {
-        self.0.inc();
-    }
-}
-/// Guard returned by [`GenericCounterPair::guard_by`]
-pub struct GenericCounterPairGuardBy<P: Atomic>(GenericCounter<P>, P::T);
-
-impl<P: Atomic> Drop for GenericCounterPairGuardBy<P> {
-    fn drop(&mut self) {
-        self.0.inc_by(self.1);
-    }
-}
-
-/// A Pair of [`IntCounterVec`]s. Like an [`IntGaugeVec`] but will always observe changes
-pub type IntCounterPairVec = GenericCounterPairVec<AtomicU64>;
-
-/// A Pair of [`IntCounter`]s. Like an [`IntGauge`] but will always observe changes
-pub type IntCounterPair = GenericCounterPair<AtomicU64>;
-
-/// A guard for [`IntCounterPair`] that will decrement the gauge on drop
-pub type IntCounterPairGuard = GenericCounterPairGuard<AtomicU64>;
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -18,10 +18,8 @@ enum-map.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 hex.workspace = true
-thiserror.workspace = true

 workspace_hack.workspace = true

 [dev-dependencies]
 bincode.workspace = true
-rand.workspace = true
--- a/libs/pageserver_api/src/control_api.rs
+++ b/libs/pageserver_api/src/control_api.rs
@@ -4,9 +4,7 @@
 //! See docs/rfcs/025-generation-numbers.md

 use serde::{Deserialize, Serialize};
-use utils::id::NodeId;
-
-use crate::shard::TenantShardId;
+use utils::id::{NodeId, TenantId};

 #[derive(Serialize, Deserialize)]
 pub struct ReAttachRequest {
@@ -15,7 +13,7 @@ pub struct ReAttachRequest {

 #[derive(Serialize, Deserialize)]
 pub struct ReAttachResponseTenant {
-    pub id: TenantShardId,
+    pub id: TenantId,
    pub gen: u32,
 }

@@ -26,7 +24,7 @@ pub struct ReAttachResponse {

 #[derive(Serialize, Deserialize)]
 pub struct ValidateRequestTenant {
-    pub id: TenantShardId,
+    pub id: TenantId,
    pub gen: u32,
 }

@@ -42,6 +40,6 @@ pub struct ValidateResponse {

 #[derive(Serialize, Deserialize)]
 pub struct ValidateResponseTenant {
-    pub id: TenantShardId,
+    pub id: TenantId,
    pub valid: bool,
 }
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -140,41 +140,3 @@ impl Key {
        })
    }
 }
-
-pub fn is_rel_block_key(key: &Key) -> bool {
-    key.field1 == 0x00 && key.field4 != 0
-}
-
-impl std::str::FromStr for Key {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-        Self::from_hex(s)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::str::FromStr;
-
-    use crate::key::Key;
-
-    use rand::Rng;
-    use rand::SeedableRng;
-
-    #[test]
-    fn display_fromstr_bijection() {
-        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
-
-        let key = Key {
-            field1: rng.gen(),
-            field2: rng.gen(),
-            field3: rng.gen(),
-            field4: rng.gen(),
-            field5: rng.gen(),
-            field6: rng.gen(),
-        };
-
-        assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
-    }
-}
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -5,7 +5,6 @@ use const_format::formatcp;
 /// Public API types
 pub mod control_api;
 pub mod key;
-pub mod keyspace;
 pub mod models;
 pub mod reltag;
 pub mod shard;
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1,5 +1,3 @@
-pub mod partitioning;
-
 use std::{
    collections::HashMap,
    num::{NonZeroU64, NonZeroUsize},
@@ -12,6 +10,7 @@ use serde_with::serde_as;
 use strum_macros;
 use utils::{
    completion,
+    generation::Generation,
    history_buffer::HistoryBufferWithDropCounter,
    id::{NodeId, TenantId, TimelineId},
    lsn::Lsn,
@@ -181,8 +180,6 @@ pub struct TimelineCreateRequest {
    #[serde(default)]
    pub ancestor_timeline_id: Option<TimelineId>,
    #[serde(default)]
-    pub existing_initdb_timeline_id: Option<TimelineId>,
-    #[serde(default)]
    pub ancestor_start_lsn: Option<Lsn>,
    pub pg_version: Option<u32>,
 }
@@ -239,7 +236,6 @@ pub struct TenantConfig {
    pub min_resident_size_override: Option<u64>,
    pub evictions_low_residence_duration_metric_threshold: Option<String>,
    pub gc_feedback: Option<bool>,
-    pub heatmap_period: Option<String>,
 }

 /// A flattened analog of a `pagesever::tenant::LocationMode`, which
@@ -266,19 +262,10 @@ pub struct LocationConfig {
    pub mode: LocationConfigMode,
    /// If attaching, in what generation?
    #[serde(default)]
-    pub generation: Option<u32>,
+    pub generation: Option<Generation>,
    #[serde(default)]
    pub secondary_conf: Option<LocationConfigSecondary>,

-    // Shard parameters: if shard_count is nonzero, then other shard_* fields
-    // must be set accurately.
-    #[serde(default)]
-    pub shard_number: u8,
-    #[serde(default)]
-    pub shard_count: u8,
-    #[serde(default)]
-    pub shard_stripe_size: u32,
-
    // If requesting mode `Secondary`, configuration for that.
    // Custom storage configuration for the tenant, if any
    pub tenant_conf: TenantConfig,
@@ -319,14 +306,31 @@ impl std::ops::Deref for TenantConfigRequest {

 impl TenantConfigRequest {
    pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
-        let config = TenantConfig::default();
+        let config = TenantConfig {
+            checkpoint_distance: None,
+            checkpoint_timeout: None,
+            compaction_target_size: None,
+            compaction_period: None,
+            compaction_threshold: None,
+            gc_horizon: None,
+            gc_period: None,
+            image_creation_threshold: None,
+            pitr_interval: None,
+            walreceiver_connect_timeout: None,
+            lagging_wal_timeout: None,
+            max_lsn_wal_lag: None,
+            trace_read_requests: None,
+            eviction_policy: None,
+            min_resident_size_override: None,
+            evictions_low_residence_duration_metric_threshold: None,
+            gc_feedback: None,
+        };
        TenantConfigRequest { tenant_id, config }
    }
 }

 #[derive(Debug, Deserialize)]
 pub struct TenantAttachRequest {
-    #[serde(default)]
    pub config: TenantAttachConfig,
    #[serde(default)]
    pub generation: Option<u32>,
@@ -334,7 +338,7 @@ pub struct TenantAttachRequest {

 /// Newtype to enforce deny_unknown_fields on TenantConfig for
 /// its usage inside `TenantAttachRequest`.
-#[derive(Debug, Serialize, Deserialize, Default)]
+#[derive(Debug, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct TenantAttachConfig {
    #[serde(flatten)]
@@ -360,7 +364,7 @@ pub enum TenantAttachmentStatus {

 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
-    pub id: TenantShardId,
+    pub id: TenantId,
    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    pub state: TenantState,
    /// Sum of the size of all layer files.
@@ -372,7 +376,7 @@ pub struct TenantInfo {
 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TimelineInfo {
-    pub tenant_id: TenantShardId,
+    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,

    pub ancestor_timeline_id: Option<TimelineId>,
@@ -388,12 +392,7 @@ pub struct TimelineInfo {
    /// The LSN that we are advertizing to safekeepers
    pub remote_consistent_lsn_visible: Lsn,

-    /// The LSN from the start of the root timeline (never changes)
-    pub initdb_lsn: Lsn,
-
-    pub current_logical_size: u64,
-    pub current_logical_size_is_accurate: bool,
-
+    pub current_logical_size: Option<u64>, // is None when timeline is Unloaded
    /// Sum of the size of all layer files.
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
@@ -829,7 +828,7 @@ mod tests {
    fn test_tenantinfo_serde() {
        // Test serialization/deserialization of TenantInfo
        let original_active = TenantInfo {
-            id: TenantShardId::unsharded(TenantId::generate()),
+            id: TenantId::generate(),
            state: TenantState::Active,
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
@@ -846,7 +845,7 @@ mod tests {
        });

        let original_broken = TenantInfo {
-            id: TenantShardId::unsharded(TenantId::generate()),
+            id: TenantId::generate(),
            state: TenantState::Broken {
                reason: "reason".into(),
                backtrace: "backtrace info".into(),
--- a/libs/pageserver_api/src/models/partitioning.rs
+++ b/libs/pageserver_api/src/models/partitioning.rs
@@ -1,151 +0,0 @@
-use utils::lsn::Lsn;
-
-#[derive(Debug, PartialEq, Eq)]
-pub struct Partitioning {
-    pub keys: crate::keyspace::KeySpace,
-
-    pub at_lsn: Lsn,
-}
-
-impl serde::Serialize for Partitioning {
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        pub struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
-
-        impl<'a> serde::Serialize for KeySpace<'a> {
-            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-            where
-                S: serde::Serializer,
-            {
-                use serde::ser::SerializeSeq;
-                let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;
-                for kr in &self.0.ranges {
-                    seq.serialize_element(&KeyRange(kr))?;
-                }
-                seq.end()
-            }
-        }
-
-        use serde::ser::SerializeMap;
-        let mut map = serializer.serialize_map(Some(2))?;
-        map.serialize_key("keys")?;
-        map.serialize_value(&KeySpace(&self.keys))?;
-        map.serialize_key("at_lsn")?;
-        map.serialize_value(&WithDisplay(&self.at_lsn))?;
-        map.end()
-    }
-}
-
-pub struct WithDisplay<'a, T>(&'a T);
-
-impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        serializer.collect_str(&self.0)
-    }
-}
-
-pub struct KeyRange<'a>(&'a std::ops::Range<crate::key::Key>);
-
-impl<'a> serde::Serialize for KeyRange<'a> {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeTuple;
-        let mut t = serializer.serialize_tuple(2)?;
-        t.serialize_element(&WithDisplay(&self.0.start))?;
-        t.serialize_element(&WithDisplay(&self.0.end))?;
-        t.end()
-    }
-}
-
-impl<'a> serde::Deserialize<'a> for Partitioning {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'a>,
-    {
-        pub struct KeySpace(crate::keyspace::KeySpace);
-
-        impl<'de> serde::Deserialize<'de> for KeySpace {
-            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-            where
-                D: serde::Deserializer<'de>,
-            {
-                #[serde_with::serde_as]
-                #[derive(serde::Deserialize)]
-                #[serde(transparent)]
-                struct Key(#[serde_as(as = "serde_with::DisplayFromStr")] crate::key::Key);
-
-                #[serde_with::serde_as]
-                #[derive(serde::Deserialize)]
-                struct Range(Key, Key);
-
-                let ranges: Vec<Range> = serde::Deserialize::deserialize(deserializer)?;
-                Ok(Self(crate::keyspace::KeySpace {
-                    ranges: ranges
-                        .into_iter()
-                        .map(|Range(start, end)| (start.0..end.0))
-                        .collect(),
-                }))
-            }
-        }
-
-        #[serde_with::serde_as]
-        #[derive(serde::Deserialize)]
-        struct De {
-            keys: KeySpace,
-            #[serde_as(as = "serde_with::DisplayFromStr")]
-            at_lsn: Lsn,
-        }
-
-        let de: De = serde::Deserialize::deserialize(deserializer)?;
-        Ok(Self {
-            at_lsn: de.at_lsn,
-            keys: de.keys.0,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_serialization_roundtrip() {
-        let reference = r#"
-        {
-            "keys": [
-              [
-                "000000000000000000000000000000000000",
-                "000000000000000000000000000000000001"
-              ],
-              [
-                "000000067F00000001000000000000000000",
-                "000000067F00000001000000000000000002"
-              ],
-              [
-                "030000000000000000000000000000000000",
-                "030000000000000000000000000000000003"
-              ]
-            ],
-            "at_lsn": "0/2240160"
-        }
-        "#;
-
-        let de: Partitioning = serde_json::from_str(reference).unwrap();
-
-        let ser = serde_json::to_string(&de).unwrap();
-
-        let ser_de: serde_json::Value = serde_json::from_str(&ser).unwrap();
-
-        assert_eq!(
-            ser_de,
-            serde_json::from_str::<'_, serde_json::Value>(reference).unwrap()
-        );
-    }
-}
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -1,15 +1,13 @@
 use std::{ops::RangeInclusive, str::FromStr};

-use crate::key::{is_rel_block_key, Key};
 use hex::FromHex;
 use serde::{Deserialize, Serialize};
-use thiserror;
 use utils::id::TenantId;

-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
 pub struct ShardNumber(pub u8);

-#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
+#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)]
 pub struct ShardCount(pub u8);

 impl ShardCount {
@@ -40,7 +38,7 @@ impl ShardNumber {
 /// Note that the binary encoding is _not_ backward compatible, because
 /// at the time sharding is introduced, there are no existing binary structures
 /// containing TenantId that we need to handle.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
 pub struct TenantShardId {
    pub tenant_id: TenantId,
    pub shard_number: ShardNumber,
@@ -73,33 +71,19 @@ impl TenantShardId {
        )
    }

-    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
-        ShardSlug(self)
-    }
-
-    /// Convenience for code that has special behavior on the 0th shard.
-    pub fn is_zero(&self) -> bool {
-        self.shard_number == ShardNumber(0)
-    }
-}
-
-/// Formatting helper
-struct ShardSlug<'a>(&'a TenantShardId);
-
-impl<'a> std::fmt::Display for ShardSlug<'a> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{:02x}{:02x}",
-            self.0.shard_number.0, self.0.shard_count.0
-        )
+    pub fn shard_slug(&self) -> String {
+        format!("{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
    }
 }

 impl std::fmt::Display for TenantShardId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if self.shard_count != ShardCount(0) {
-            write!(f, "{}-{}", self.tenant_id, self.shard_slug())
+            write!(
+                f,
+                "{}-{:02x}{:02x}",
+                self.tenant_id, self.shard_number.0, self.shard_count.0
+            )
        } else {
            // Legacy case (shard_count == 0) -- format as just the tenant id.  Note that this
            // is distinct from the normal single shard case (shard count == 1).
@@ -155,89 +139,6 @@ impl From<[u8; 18]> for TenantShardId {
    }
 }

-/// For use within the context of a particular tenant, when we need to know which
-/// shard we're dealing with, but do not need to know the full ShardIdentity (because
-/// we won't be doing any page->shard mapping), and do not need to know the fully qualified
-/// TenantShardId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
-pub struct ShardIndex {
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
-impl ShardIndex {
-    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
-        Self {
-            shard_number: number,
-            shard_count: count,
-        }
-    }
-    pub fn unsharded() -> Self {
-        Self {
-            shard_number: ShardNumber(0),
-            shard_count: ShardCount(0),
-        }
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
-    }
-
-    /// For use in constructing remote storage paths: concatenate this with a TenantId
-    /// to get a fully qualified TenantShardId.
-    ///
-    /// Backward compat: this function returns an empty string if Self::is_unsharded, such
-    /// that the legacy pre-sharding remote key format is preserved.
-    pub fn get_suffix(&self) -> String {
-        if self.is_unsharded() {
-            "".to_string()
-        } else {
-            format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-        }
-    }
-}
-
-impl std::fmt::Display for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0)
-    }
-}
-
-impl std::fmt::Debug for ShardIndex {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // Debug is the same as Display: the compact hex representation
-        write!(f, "{}", self)
-    }
-}
-
-impl std::str::FromStr for ShardIndex {
-    type Err = hex::FromHexError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        // Expect format: 1 byte shard number, 1 byte shard count
-        if s.len() == 4 {
-            let bytes = s.as_bytes();
-            let mut shard_parts: [u8; 2] = [0u8; 2];
-            hex::decode_to_slice(bytes, &mut shard_parts)?;
-            Ok(Self {
-                shard_number: ShardNumber(shard_parts[0]),
-                shard_count: ShardCount(shard_parts[1]),
-            })
-        } else {
-            Err(hex::FromHexError::InvalidStringLength)
-        }
-    }
-}
-
-impl From<[u8; 2]> for ShardIndex {
-    fn from(b: [u8; 2]) -> Self {
-        Self {
-            shard_number: ShardNumber(b[0]),
-            shard_count: ShardCount(b[1]),
-        }
-    }
-}
-
 impl Serialize for TenantShardId {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
@@ -308,261 +209,6 @@ impl<'de> Deserialize<'de> for TenantShardId {
    }
 }

-/// Stripe size in number of pages
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardStripeSize(pub u32);
-
-/// Layout version: for future upgrades where we might change how the key->shard mapping works
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardLayout(u8);
-
-const LAYOUT_V1: ShardLayout = ShardLayout(1);
-/// ShardIdentity uses a magic layout value to indicate if it is unusable
-const LAYOUT_BROKEN: ShardLayout = ShardLayout(255);
-
-/// Default stripe size in pages: 256MiB divided by 8kiB page size.
-const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
-
-/// The ShardIdentity contains the information needed for one member of map
-/// to resolve a key to a shard, and then check whether that shard is ==self.
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardIdentity {
-    pub number: ShardNumber,
-    pub count: ShardCount,
-    stripe_size: ShardStripeSize,
-    layout: ShardLayout,
-}
-
-#[derive(thiserror::Error, Debug, PartialEq, Eq)]
-pub enum ShardConfigError {
-    #[error("Invalid shard count")]
-    InvalidCount,
-    #[error("Invalid shard number")]
-    InvalidNumber,
-    #[error("Invalid stripe size")]
-    InvalidStripeSize,
-}
-
-impl ShardIdentity {
-    /// An identity with number=0 count=0 is a "none" identity, which represents legacy
-    /// tenants.  Modern single-shard tenants should not use this: they should
-    /// have number=0 count=1.
-    pub fn unsharded() -> Self {
-        Self {
-            number: ShardNumber(0),
-            count: ShardCount(0),
-            layout: LAYOUT_V1,
-            stripe_size: DEFAULT_STRIPE_SIZE,
-        }
-    }
-
-    /// A broken instance of this type is only used for `TenantState::Broken` tenants,
-    /// which are constructed in code paths that don't have access to proper configuration.
-    ///
-    /// A ShardIdentity in this state may not be used for anything, and should not be persisted.
-    /// Enforcement is via assertions, to avoid making our interface fallible for this
-    /// edge case: it is the Tenant's responsibility to avoid trying to do any I/O when in a broken
-    /// state, and by extension to avoid trying to do any page->shard resolution.
-    pub fn broken(number: ShardNumber, count: ShardCount) -> Self {
-        Self {
-            number,
-            count,
-            layout: LAYOUT_BROKEN,
-            stripe_size: DEFAULT_STRIPE_SIZE,
-        }
-    }
-
-    pub fn is_unsharded(&self) -> bool {
-        self.number == ShardNumber(0) && self.count == ShardCount(0)
-    }
-
-    /// Count must be nonzero, and number must be < count. To construct
-    /// the legacy case (count==0), use Self::unsharded instead.
-    pub fn new(
-        number: ShardNumber,
-        count: ShardCount,
-        stripe_size: ShardStripeSize,
-    ) -> Result<Self, ShardConfigError> {
-        if count.0 == 0 {
-            Err(ShardConfigError::InvalidCount)
-        } else if number.0 > count.0 - 1 {
-            Err(ShardConfigError::InvalidNumber)
-        } else if stripe_size.0 == 0 {
-            Err(ShardConfigError::InvalidStripeSize)
-        } else {
-            Ok(Self {
-                number,
-                count,
-                layout: LAYOUT_V1,
-                stripe_size,
-            })
-        }
-    }
-
-    fn is_broken(&self) -> bool {
-        self.layout == LAYOUT_BROKEN
-    }
-
-    pub fn get_shard_number(&self, key: &Key) -> ShardNumber {
-        assert!(!self.is_broken());
-        key_to_shard_number(self.count, self.stripe_size, key)
-    }
-
-    /// Return true if the key should be ingested by this shard
-    pub fn is_key_local(&self, key: &Key) -> bool {
-        assert!(!self.is_broken());
-        if self.count < ShardCount(2) || (key_is_shard0(key) && self.number == ShardNumber(0)) {
-            true
-        } else {
-            key_to_shard_number(self.count, self.stripe_size, key) == self.number
-        }
-    }
-
-    pub fn shard_slug(&self) -> String {
-        if self.count > ShardCount(0) {
-            format!("-{:02x}{:02x}", self.number.0, self.count.0)
-        } else {
-            String::new()
-        }
-    }
-
-    /// Convenience for checking if this identity is the 0th shard in a tenant,
-    /// for special cases on shard 0 such as ingesting relation sizes.
-    pub fn is_zero(&self) -> bool {
-        self.number == ShardNumber(0)
-    }
-}
-
-impl Serialize for ShardIndex {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if serializer.is_human_readable() {
-            serializer.collect_str(self)
-        } else {
-            // Binary encoding is not used in index_part.json, but is included in anticipation of
-            // switching various structures (e.g. inter-process communication, remote metadata) to more
-            // compact binary encodings in future.
-            let mut packed: [u8; 2] = [0; 2];
-            packed[0] = self.shard_number.0;
-            packed[1] = self.shard_count.0;
-            packed.serialize(serializer)
-        }
-    }
-}
-
-impl<'de> Deserialize<'de> for ShardIndex {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct IdVisitor {
-            is_human_readable_deserializer: bool,
-        }
-
-        impl<'de> serde::de::Visitor<'de> for IdVisitor {
-            type Value = ShardIndex;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                if self.is_human_readable_deserializer {
-                    formatter.write_str("value in form of hex string")
-                } else {
-                    formatter.write_str("value in form of integer array([u8; 2])")
-                }
-            }
-
-            fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
-            where
-                A: serde::de::SeqAccess<'de>,
-            {
-                let s = serde::de::value::SeqAccessDeserializer::new(seq);
-                let id: [u8; 2] = Deserialize::deserialize(s)?;
-                Ok(ShardIndex::from(id))
-            }
-
-            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                ShardIndex::from_str(v).map_err(E::custom)
-            }
-        }
-
-        if deserializer.is_human_readable() {
-            deserializer.deserialize_str(IdVisitor {
-                is_human_readable_deserializer: true,
-            })
-        } else {
-            deserializer.deserialize_tuple(
-                2,
-                IdVisitor {
-                    is_human_readable_deserializer: false,
-                },
-            )
-        }
-    }
-}
-
-/// Whether this key is always held on shard 0 (e.g. shard 0 holds all SLRU keys
-/// in order to be able to serve basebackup requests without peer communication).
-fn key_is_shard0(key: &Key) -> bool {
-    // To decide what to shard out to shards >0, we apply a simple rule that only
-    // relation pages are distributed to shards other than shard zero. Everything else gets
-    // stored on shard 0.  This guarantees that shard 0 can independently serve basebackup
-    // requests, and any request other than those for particular blocks in relations.
-    //
-    // In this condition:
-    // - is_rel_block_key includes only relations, i.e. excludes SLRU data and
-    // all metadata.
-    // - field6 is set to -1 for relation size pages.
-    !(is_rel_block_key(key) && key.field6 != 0xffffffff)
-}
-
-/// Provide the same result as the function in postgres `hashfn.h` with the same name
-fn murmurhash32(mut h: u32) -> u32 {
-    h ^= h >> 16;
-    h = h.wrapping_mul(0x85ebca6b);
-    h ^= h >> 13;
-    h = h.wrapping_mul(0xc2b2ae35);
-    h ^= h >> 16;
-    h
-}
-
-/// Provide the same result as the function in postgres `hashfn.h` with the same name
-fn hash_combine(mut a: u32, mut b: u32) -> u32 {
-    b = b.wrapping_add(0x9e3779b9);
-    b = b.wrapping_add(a << 6);
-    b = b.wrapping_add(a >> 2);
-
-    a ^= b;
-    a
-}
-
-/// Where a Key is to be distributed across shards, select the shard.  This function
-/// does not account for keys that should be broadcast across shards.
-///
-/// The hashing in this function must exactly match what we do in postgres smgr
-/// code.  The resulting distribution of pages is intended to preserve locality within
-/// `stripe_size` ranges of contiguous block numbers in the same relation, while otherwise
-/// distributing data pseudo-randomly.
-///
-/// The mapping of key to shard is not stable across changes to ShardCount: this is intentional
-/// and will be handled at higher levels when shards are split.
-fn key_to_shard_number(count: ShardCount, stripe_size: ShardStripeSize, key: &Key) -> ShardNumber {
-    // Fast path for un-sharded tenants or broadcast keys
-    if count < ShardCount(2) || key_is_shard0(key) {
-        return ShardNumber(0);
-    }
-
-    // relNode
-    let mut hash = murmurhash32(key.field4);
-    // blockNum/stripe size
-    hash = hash_combine(hash, murmurhash32(key.field6 / stripe_size.0));
-
-    ShardNumber((hash % count.0 as u32) as u8)
-}
-
 #[cfg(test)]
 mod tests {
    use std::str::FromStr;
@@ -672,91 +318,4 @@ mod tests {

        Ok(())
    }
-
-    #[test]
-    fn shard_identity_validation() -> Result<(), ShardConfigError> {
-        // Happy cases
-        ShardIdentity::new(ShardNumber(0), ShardCount(1), DEFAULT_STRIPE_SIZE)?;
-        ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(1))?;
-        ShardIdentity::new(ShardNumber(254), ShardCount(255), ShardStripeSize(1))?;
-
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(0), ShardCount(0), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidCount)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(10), ShardCount(10), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(11), ShardCount(10), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(255), ShardCount(255), DEFAULT_STRIPE_SIZE),
-            Err(ShardConfigError::InvalidNumber)
-        );
-        assert_eq!(
-            ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(0)),
-            Err(ShardConfigError::InvalidStripeSize)
-        );
-
-        Ok(())
-    }
-
-    #[test]
-    fn shard_index_human_encoding() -> Result<(), hex::FromHexError> {
-        let example = ShardIndex {
-            shard_number: ShardNumber(13),
-            shard_count: ShardCount(17),
-        };
-        let expected: String = "0d11".to_string();
-        let encoded = format!("{example}");
-        assert_eq!(&encoded, &expected);
-
-        let decoded = ShardIndex::from_str(&encoded)?;
-        assert_eq!(example, decoded);
-        Ok(())
-    }
-
-    #[test]
-    fn shard_index_binary_encoding() -> Result<(), hex::FromHexError> {
-        let example = ShardIndex {
-            shard_number: ShardNumber(13),
-            shard_count: ShardCount(17),
-        };
-        let expected: [u8; 2] = [0x0d, 0x11];
-
-        let encoded = bincode::serialize(&example).unwrap();
-        assert_eq!(Hex(&encoded), Hex(&expected));
-        let decoded = bincode::deserialize(&encoded).unwrap();
-        assert_eq!(example, decoded);
-
-        Ok(())
-    }
-
-    // These are only smoke tests to spot check that our implementation doesn't
-    // deviate from a few examples values: not aiming to validate the overall
-    // hashing algorithm.
-    #[test]
-    fn murmur_hash() {
-        assert_eq!(murmurhash32(0), 0);
-
-        assert_eq!(hash_combine(0xb1ff3b40, 0), 0xfb7923c9);
-    }
-
-    #[test]
-    fn shard_mapping() {
-        let key = Key {
-            field1: 0x00,
-            field2: 0x67f,
-            field3: 0x5,
-            field4: 0x400c,
-            field5: 0x00,
-            field6: 0x7d06,
-        };
-
-        let shard = key_to_shard_number(ShardCount(10), DEFAULT_STRIPE_SIZE, &key);
-        assert_eq!(shard, ShardNumber(8));
-    }
 }
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -163,18 +163,8 @@ impl PgConnectionConfig {
    }

    /// Connect using postgres protocol with TLS disabled.
-    pub async fn connect_no_tls(
-        &self,
-    ) -> Result<
-        (
-            tokio_postgres::Client,
-            tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
-        ),
-        postgres::Error,
-    > {
-        self.to_tokio_postgres_config()
-            .connect(postgres::NoTls)
-            .await
+    pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
+        postgres::Config::from(self.to_tokio_postgres_config()).connect(postgres::NoTls)
    }
 }

--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -289,10 +289,10 @@ impl FeStartupPacket {
        // We shouldn't advance `buf` as probably full message is not there yet,
        // so can't directly use Bytes::get_u32 etc.
        let len = (&buf[0..4]).read_u32::<BigEndian>().unwrap() as usize;
-        // The proposed replacement is `!(8..=MAX_STARTUP_PACKET_LENGTH).contains(&len)`
+        // The proposed replacement is `!(4..=MAX_STARTUP_PACKET_LENGTH).contains(&len)`
        // which is less readable
        #[allow(clippy::manual_range_contains)]
-        if len < 8 || len > MAX_STARTUP_PACKET_LENGTH {
+        if len < 4 || len > MAX_STARTUP_PACKET_LENGTH {
            return Err(ProtocolError::Protocol(format!(
                "invalid startup packet message length {}",
                len
@@ -975,10 +975,4 @@ mod tests {
        let params = make_params("foo\\ bar \\ \\\\ baz\\  lol");
        assert_eq!(split_options(&params), ["foo bar", " \\", "baz ", "lol"]);
    }
-
-    #[test]
-    fn parse_fe_startup_packet_regression() {
-        let data = [0, 0, 0, 7, 0, 0, 0, 0];
-        FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();
-    }
 }
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -9,18 +9,18 @@ anyhow.workspace = true
 async-trait.workspace = true
 once_cell.workspace = true
 aws-smithy-async.workspace = true
-aws-smithy-types.workspace = true
+aws-smithy-http.workspace = true
+aws-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
 aws-credential-types.workspace = true
 bytes.workspace = true
 camino.workspace = true
 hyper = { workspace = true, features = ["stream"] }
-futures.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
-tokio-util = { workspace = true, features = ["compat"] }
+tokio-util.workspace = true
 toml_edit.workspace = true
 tracing.workspace = true
 scopeguard.workspace = true
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -1,24 +1,21 @@
 //! Azure Blob Storage wrapper

-use std::borrow::Cow;
 use std::collections::HashMap;
 use std::env;
 use std::num::NonZeroU32;
-use std::pin::Pin;
 use std::sync::Arc;
+use std::{borrow::Cow, io::Cursor};

 use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Result;
 use azure_core::request_options::{MaxResults, Metadata, Range};
-use azure_core::RetryOptions;
 use azure_identity::DefaultAzureCredential;
 use azure_storage::StorageCredentials;
 use azure_storage_blobs::prelude::ClientBuilder;
 use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
-use bytes::Bytes;
-use futures::stream::Stream;
 use futures_util::StreamExt;
 use http_types::StatusCode;
+use tokio::io::AsyncRead;
 use tracing::debug;

 use crate::s3_bucket::RequestKind;
@@ -52,8 +49,7 @@ impl AzureBlobStorage {
            StorageCredentials::token_credential(Arc::new(token_credential))
        };

-        // we have an outer retry
-        let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none());
+        let builder = ClientBuilder::new(account, credentials);

        let client = builder.container_client(azure_config.container_name.to_owned());

@@ -120,8 +116,7 @@ impl AzureBlobStorage {
        let mut metadata = HashMap::new();
        // TODO give proper streaming response instead of buffering into RAM
        // https://github.com/neondatabase/neon/issues/5563
-
-        let mut bufs = Vec::new();
+        let mut buf = Vec::new();
        while let Some(part) = response.next().await {
            let part = part.map_err(to_download_error)?;
            if let Some(blob_meta) = part.blob.metadata {
@@ -132,10 +127,10 @@ impl AzureBlobStorage {
                .collect()
                .await
                .map_err(|e| DownloadError::Other(e.into()))?;
-            bufs.push(data);
+            buf.extend_from_slice(&data.slice(..));
        }
        Ok(Download {
-            download_stream: Box::pin(futures::stream::iter(bufs.into_iter().map(Ok))),
+            download_stream: Box::pin(Cursor::new(buf)),
            metadata: Some(StorageMetadata(metadata)),
        })
    }
@@ -222,10 +217,9 @@ impl RemoteStorage for AzureBlobStorage {
        }
        Ok(res)
    }
-
    async fn upload(
        &self,
-        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        mut from: impl AsyncRead + Unpin + Send + Sync + 'static,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
@@ -233,12 +227,13 @@ impl RemoteStorage for AzureBlobStorage {
        let _permit = self.permit(RequestKind::Put).await;
        let blob_client = self.client.blob_client(self.relative_path_to_name(to));

-        let from: Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>> =
-            Box::pin(from);
-
-        let from = NonSeekableStream::new(from, data_size_bytes);
-
-        let body = azure_core::Body::SeekableStream(Box::new(from));
+        // TODO FIX THIS UGLY HACK and don't buffer the entire object
+        // into RAM here, but use the streaming interface. For that,
+        // we'd have to change the interface though...
+        // https://github.com/neondatabase/neon/issues/5563
+        let mut buf = Vec::with_capacity(data_size_bytes);
+        tokio::io::copy(&mut from, &mut buf).await?;
+        let body = azure_core::Body::Bytes(buf.into());

        let mut builder = blob_client.put_block_blob(body);

@@ -271,12 +266,17 @@ impl RemoteStorage for AzureBlobStorage {

        let mut builder = blob_client.get();

-        let range: Range = if let Some(end_exclusive) = end_exclusive {
-            (start_inclusive..end_exclusive).into()
+        if let Some(end_exclusive) = end_exclusive {
+            builder = builder.range(Range::new(start_inclusive, end_exclusive));
        } else {
-            (start_inclusive..).into()
-        };
-        builder = builder.range(range);
+            // Open ranges are not supported by the SDK so we work around
+            // by setting the upper limit extremely high (but high enough
+            // to still be representable by signed 64 bit integers).
+            // TODO remove workaround once the SDK adds open range support
+            // https://github.com/Azure/azure-sdk-for-rust/issues/1438
+            let end_exclusive = u64::MAX / 4;
+            builder = builder.range(Range::new(start_inclusive, end_exclusive));
+        }

        self.download_for_builder(builder).await
    }
@@ -312,153 +312,3 @@ impl RemoteStorage for AzureBlobStorage {
        Ok(())
    }
 }
-
-pin_project_lite::pin_project! {
-    /// Hack to work around not being able to stream once with azure sdk.
-    ///
-    /// Azure sdk clones streams around with the assumption that they are like
-    /// `Arc<tokio::fs::File>` (except not supporting tokio), however our streams are not like
-    /// that. For example for an `index_part.json` we just have a single chunk of [`Bytes`]
-    /// representing the whole serialized vec. It could be trivially cloneable and "semi-trivially"
-    /// seekable, but we can also just re-try the request easier.
-    #[project = NonSeekableStreamProj]
-    enum NonSeekableStream<S> {
-        /// A stream wrappers initial form.
-        ///
-        /// Mutex exists to allow moving when cloning. If the sdk changes to do less than 1
-        /// clone before first request, then this must be changed.
-        Initial {
-            inner: std::sync::Mutex<Option<tokio_util::compat::Compat<tokio_util::io::StreamReader<S, Bytes>>>>,
-            len: usize,
-        },
-        /// The actually readable variant, produced by cloning the Initial variant.
-        ///
-        /// The sdk currently always clones once, even without retry policy.
-        Actual {
-            #[pin]
-            inner: tokio_util::compat::Compat<tokio_util::io::StreamReader<S, Bytes>>,
-            len: usize,
-            read_any: bool,
-        },
-        /// Most likely unneeded, but left to make life easier, in case more clones are added.
-        Cloned {
-            len_was: usize,
-        }
-    }
-}
-
-impl<S> NonSeekableStream<S>
-where
-    S: Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-{
-    fn new(inner: S, len: usize) -> NonSeekableStream<S> {
-        use tokio_util::compat::TokioAsyncReadCompatExt;
-
-        let inner = tokio_util::io::StreamReader::new(inner).compat();
-        let inner = Some(inner);
-        let inner = std::sync::Mutex::new(inner);
-        NonSeekableStream::Initial { inner, len }
-    }
-}
-
-impl<S> std::fmt::Debug for NonSeekableStream<S> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Initial { len, .. } => f.debug_struct("Initial").field("len", len).finish(),
-            Self::Actual { len, .. } => f.debug_struct("Actual").field("len", len).finish(),
-            Self::Cloned { len_was, .. } => f.debug_struct("Cloned").field("len", len_was).finish(),
-        }
-    }
-}
-
-impl<S> futures::io::AsyncRead for NonSeekableStream<S>
-where
-    S: Stream<Item = std::io::Result<Bytes>>,
-{
-    fn poll_read(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &mut [u8],
-    ) -> std::task::Poll<std::io::Result<usize>> {
-        match self.project() {
-            NonSeekableStreamProj::Actual {
-                inner, read_any, ..
-            } => {
-                *read_any = true;
-                inner.poll_read(cx, buf)
-            }
-            // NonSeekableStream::Initial does not support reading because it is just much easier
-            // to have the mutex in place where one does not poll the contents, or that's how it
-            // seemed originally. If there is a version upgrade which changes the cloning, then
-            // that support needs to be hacked in.
-            //
-            // including {self:?} into the message would be useful, but unsure how to unproject.
-            _ => std::task::Poll::Ready(Err(std::io::Error::new(
-                std::io::ErrorKind::Other,
-                "cloned or initial values cannot be read",
-            ))),
-        }
-    }
-}
-
-impl<S> Clone for NonSeekableStream<S> {
-    /// Weird clone implementation exists to support the sdk doing cloning before issuing the first
-    /// request, see type documentation.
-    fn clone(&self) -> Self {
-        use NonSeekableStream::*;
-
-        match self {
-            Initial { inner, len } => {
-                if let Some(inner) = inner.lock().unwrap().take() {
-                    Actual {
-                        inner,
-                        len: *len,
-                        read_any: false,
-                    }
-                } else {
-                    Self::Cloned { len_was: *len }
-                }
-            }
-            Actual { len, .. } => Cloned { len_was: *len },
-            Cloned { len_was } => Cloned { len_was: *len_was },
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl<S> azure_core::SeekableStream for NonSeekableStream<S>
-where
-    S: Stream<Item = std::io::Result<Bytes>> + Unpin + Send + Sync + 'static,
-{
-    async fn reset(&mut self) -> azure_core::error::Result<()> {
-        use NonSeekableStream::*;
-
-        let msg = match self {
-            Initial { inner, .. } => {
-                if inner.get_mut().unwrap().is_some() {
-                    return Ok(());
-                } else {
-                    "reset after first clone is not supported"
-                }
-            }
-            Actual { read_any, .. } if !*read_any => return Ok(()),
-            Actual { .. } => "reset after reading is not supported",
-            Cloned { .. } => "reset after second clone is not supported",
-        };
-        Err(azure_core::error::Error::new(
-            azure_core::error::ErrorKind::Io,
-            std::io::Error::new(std::io::ErrorKind::Other, msg),
-        ))
-    }
-
-    // Note: it is not documented if this should be the total or remaining length, total passes the
-    // tests.
-    fn len(&self) -> usize {
-        use NonSeekableStream::*;
-        match self {
-            Initial { len, .. } => *len,
-            Actual { len, .. } => *len,
-            Cloned { len_was, .. } => *len_was,
-        }
-    }
-}
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -19,10 +19,8 @@ use std::{collections::HashMap, fmt::Debug, num::NonZeroUsize, pin::Pin, sync::A
 use anyhow::{bail, Context};
 use camino::{Utf8Path, Utf8PathBuf};

-use bytes::Bytes;
-use futures::stream::Stream;
 use serde::{Deserialize, Serialize};
-use tokio::sync::Semaphore;
+use tokio::{io, sync::Semaphore};
 use toml_edit::Item;
 use tracing::info;

@@ -181,7 +179,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
-        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
        data_size_bytes: usize,
@@ -208,7 +206,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
 }

 pub struct Download {
-    pub download_stream: Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Unpin + Send + Sync>>,
+    pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send + Sync>>,
    /// Extra key-value data, associated with the current remote file.
    pub metadata: Option<StorageMetadata>,
 }
@@ -302,7 +300,7 @@ impl GenericRemoteStorage {

    pub async fn upload(
        &self,
-        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
@@ -400,7 +398,7 @@ impl GenericRemoteStorage {
    /// this path is used for the remote object id conversion only.
    pub async fn upload_storage_object(
        &self,
-        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        from: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
        from_size_bytes: usize,
        to: &RemotePath,
    ) -> anyhow::Result<()> {
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -7,14 +7,11 @@
 use std::{borrow::Cow, future::Future, io::ErrorKind, pin::Pin};

 use anyhow::{bail, ensure, Context};
-use bytes::Bytes;
 use camino::{Utf8Path, Utf8PathBuf};
-use futures::stream::Stream;
 use tokio::{
    fs,
    io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
 };
-use tokio_util::io::ReaderStream;
 use tracing::*;
 use utils::{crashsafe::path_with_suffix_extension, fs_ext::is_directory_empty};

@@ -102,35 +99,27 @@ impl LocalFs {
        };

        // If we were given a directory, we may use it as our starting point.
-        // Otherwise, we must go up to the first ancestor dir that exists.  This is because
+        // Otherwise, we must go up to the parent directory.  This is because
        // S3 object list prefixes can be arbitrary strings, but when reading
        // the local filesystem we need a directory to start calling read_dir on.
        let mut initial_dir = full_path.clone();
-        loop {
-            // Did we make it to the root?
-            if initial_dir.parent().is_none() {
-                anyhow::bail!("list_files: failed to find valid ancestor dir for {full_path}");
-            }
-
-            match fs::metadata(initial_dir.clone()).await {
-                Ok(meta) if meta.is_dir() => {
-                    // We found a directory, break
-                    break;
-                }
-                Ok(_meta) => {
+        match fs::metadata(full_path.clone()).await {
+            Ok(meta) => {
+                if !meta.is_dir() {
                    // It's not a directory: strip back to the parent
                    initial_dir.pop();
                }
-                Err(e) if e.kind() == ErrorKind::NotFound => {
-                    // It's not a file that exists: strip the prefix back to the parent directory
-                    initial_dir.pop();
-                }
-                Err(e) => {
-                    // Unexpected I/O error
-                    anyhow::bail!(e)
-                }
+            }
+            Err(e) if e.kind() == ErrorKind::NotFound => {
+                // It's not a file that exists: strip the prefix back to the parent directory
+                initial_dir.pop();
+            }
+            Err(e) => {
+                // Unexpected I/O error
+                anyhow::bail!(e)
            }
        }
+
        // Note that Utf8PathBuf starts_with only considers full path segments, but
        // object prefixes are arbitrary strings, so we need the strings for doing
        // starts_with later.
@@ -222,7 +211,7 @@ impl RemoteStorage for LocalFs {

    async fn upload(
        &self,
-        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
+        data: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
@@ -255,12 +244,9 @@ impl RemoteStorage for LocalFs {
        );

        let from_size_bytes = data_size_bytes as u64;
-        let data = tokio_util::io::StreamReader::new(data);
-        let data = std::pin::pin!(data);
        let mut buffer_to_read = data.take(from_size_bytes);

-        // alternatively we could just write the bytes to a file, but local_fs is a testing utility
-        let bytes_read = io::copy_buf(&mut buffer_to_read, &mut destination)
+        let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
            .await
            .with_context(|| {
                format!(
@@ -314,7 +300,7 @@ impl RemoteStorage for LocalFs {
    async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
        let target_path = from.with_base(&self.storage_root);
        if file_exists(&target_path).map_err(DownloadError::BadInput)? {
-            let source = ReaderStream::new(
+            let source = io::BufReader::new(
                fs::OpenOptions::new()
                    .read(true)
                    .open(&target_path)
@@ -354,14 +340,16 @@ impl RemoteStorage for LocalFs {
        }
        let target_path = from.with_base(&self.storage_root);
        if file_exists(&target_path).map_err(DownloadError::BadInput)? {
-            let mut source = tokio::fs::OpenOptions::new()
-                .read(true)
-                .open(&target_path)
-                .await
-                .with_context(|| {
-                    format!("Failed to open source file {target_path:?} to use in the download")
-                })
-                .map_err(DownloadError::Other)?;
+            let mut source = io::BufReader::new(
+                fs::OpenOptions::new()
+                    .read(true)
+                    .open(&target_path)
+                    .await
+                    .with_context(|| {
+                        format!("Failed to open source file {target_path:?} to use in the download")
+                    })
+                    .map_err(DownloadError::Other)?,
+            );
            source
                .seek(io::SeekFrom::Start(start_inclusive))
                .await
@@ -375,13 +363,11 @@ impl RemoteStorage for LocalFs {
            Ok(match end_exclusive {
                Some(end_exclusive) => Download {
                    metadata,
-                    download_stream: Box::pin(ReaderStream::new(
-                        source.take(end_exclusive - start_inclusive),
-                    )),
+                    download_stream: Box::pin(source.take(end_exclusive - start_inclusive)),
                },
                None => Download {
                    metadata,
-                    download_stream: Box::pin(ReaderStream::new(source)),
+                    download_stream: Box::pin(source),
                },
            })
        } else {
@@ -481,9 +467,7 @@ fn file_exists(file_path: &Utf8Path) -> anyhow::Result<bool> {
 mod fs_tests {
    use super::*;

-    use bytes::Bytes;
    use camino_tempfile::tempdir;
-    use futures_util::Stream;
    use std::{collections::HashMap, io::Write};

    async fn read_and_assert_remote_file_contents(
@@ -493,7 +477,7 @@ mod fs_tests {
        remote_storage_path: &RemotePath,
        expected_metadata: Option<&StorageMetadata>,
    ) -> anyhow::Result<String> {
-        let download = storage
+        let mut download = storage
            .download(remote_storage_path)
            .await
            .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
@@ -502,9 +486,13 @@ mod fs_tests {
            "Unexpected metadata returned for the downloaded file"
        );

-        let contents = aggregate(download.download_stream).await?;
-
-        String::from_utf8(contents).map_err(anyhow::Error::new)
+        let mut contents = String::new();
+        download
+            .download_stream
+            .read_to_string(&mut contents)
+            .await
+            .context("Failed to read remote file contents into string")?;
+        Ok(contents)
    }

    #[tokio::test]
@@ -533,26 +521,25 @@ mod fs_tests {
        let storage = create_storage()?;

        let id = RemotePath::new(Utf8Path::new("dummy"))?;
-        let content = Bytes::from_static(b"12345");
-        let content = move || futures::stream::once(futures::future::ready(Ok(content.clone())));
+        let content = std::io::Cursor::new(b"12345");

        // Check that you get an error if the size parameter doesn't match the actual
        // size of the stream.
        storage
-            .upload(content(), 0, &id, None)
+            .upload(Box::new(content.clone()), 0, &id, None)
            .await
            .expect_err("upload with zero size succeeded");
        storage
-            .upload(content(), 4, &id, None)
+            .upload(Box::new(content.clone()), 4, &id, None)
            .await
            .expect_err("upload with too short size succeeded");
        storage
-            .upload(content(), 6, &id, None)
+            .upload(Box::new(content.clone()), 6, &id, None)
            .await
            .expect_err("upload with too large size succeeded");

        // Correct size is 5, this should succeed.
-        storage.upload(content(), 5, &id, None).await?;
+        storage.upload(Box::new(content), 5, &id, None).await?;

        Ok(())
    }
@@ -600,7 +587,7 @@ mod fs_tests {
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

-        let first_part_download = storage
+        let mut first_part_download = storage
            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
        assert!(
@@ -608,13 +595,21 @@ mod fs_tests {
            "No metadata should be returned for no metadata upload"
        );

-        let first_part_remote = aggregate(first_part_download.download_stream).await?;
+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut first_part_download.download_stream,
+            &mut first_part_remote,
+        )
+        .await?;
+        first_part_remote.flush().await?;
+        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
-            first_part_local, first_part_remote,
+            first_part_local,
+            first_part_remote.as_slice(),
            "First part bytes should be returned when requested"
        );

-        let second_part_download = storage
+        let mut second_part_download = storage
            .download_byte_range(
                &upload_target,
                first_part_local.len() as u64,
@@ -626,9 +621,17 @@ mod fs_tests {
            "No metadata should be returned for no metadata upload"
        );

-        let second_part_remote = aggregate(second_part_download.download_stream).await?;
+        let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut second_part_download.download_stream,
+            &mut second_part_remote,
+        )
+        .await?;
+        second_part_remote.flush().await?;
+        let second_part_remote = second_part_remote.into_inner().into_inner();
        assert_eq!(
-            second_part_local, second_part_remote,
+            second_part_local,
+            second_part_remote.as_slice(),
            "Second part bytes should be returned when requested"
        );

@@ -718,10 +721,17 @@ mod fs_tests {
        let uploaded_bytes = dummy_contents(upload_name).into_bytes();
        let (first_part_local, _) = uploaded_bytes.split_at(3);

-        let partial_download_with_metadata = storage
+        let mut partial_download_with_metadata = storage
            .download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
            .await?;
-        let first_part_remote = aggregate(partial_download_with_metadata.download_stream).await?;
+        let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
+        io::copy(
+            &mut partial_download_with_metadata.download_stream,
+            &mut first_part_remote,
+        )
+        .await?;
+        first_part_remote.flush().await?;
+        let first_part_remote = first_part_remote.into_inner().into_inner();
        assert_eq!(
            first_part_local,
            first_part_remote.as_slice(),
@@ -797,16 +807,16 @@ mod fs_tests {
                )
            })?;

-        let file = tokio_util::io::ReaderStream::new(file);
-
-        storage.upload(file, size, &relative_path, metadata).await?;
+        storage
+            .upload(Box::new(file), size, &relative_path, metadata)
+            .await?;
        Ok(relative_path)
    }

    async fn create_file_for_upload(
        path: &Utf8Path,
        contents: &str,
-    ) -> anyhow::Result<(fs::File, usize)> {
+    ) -> anyhow::Result<(io::BufReader<fs::File>, usize)> {
        std::fs::create_dir_all(path.parent().unwrap())?;
        let mut file_for_writing = std::fs::OpenOptions::new()
            .write(true)
@@ -816,7 +826,7 @@ mod fs_tests {
        drop(file_for_writing);
        let file_size = path.metadata()?.len() as usize;
        Ok((
-            fs::OpenOptions::new().read(true).open(&path).await?,
+            io::BufReader::new(fs::OpenOptions::new().read(true).open(&path).await?),
            file_size,
        ))
    }
@@ -830,16 +840,4 @@ mod fs_tests {
        files.sort_by(|a, b| a.0.cmp(&b.0));
        Ok(files)
    }
-
-    async fn aggregate(
-        stream: impl Stream<Item = std::io::Result<Bytes>>,
-    ) -> anyhow::Result<Vec<u8>> {
-        use futures::stream::StreamExt;
-        let mut out = Vec::new();
-        let mut stream = std::pin::pin!(stream);
-        while let Some(res) = stream.next().await {
-            out.extend_from_slice(&res?[..]);
-        }
-        Ok(out)
-    }
 }
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -4,14 +4,9 @@
 //! allowing multiple api users to independently work with the same S3 bucket, if
 //! their bucket prefixes are both specified and different.

-use std::{
-    borrow::Cow,
-    pin::Pin,
-    sync::Arc,
-    task::{Context, Poll},
-};
+use std::{borrow::Cow, sync::Arc};

-use anyhow::Context as _;
+use anyhow::Context;
 use aws_config::{
    environment::credentials::EnvironmentVariableCredentialsProvider,
    imds::credentials::ImdsCredentialsProvider,
@@ -19,24 +14,23 @@ use aws_config::{
    provider_config::ProviderConfig,
    retry::{RetryConfigBuilder, RetryMode},
    web_identity_token::WebIdentityTokenCredentialsProvider,
-    BehaviorVersion,
 };
-use aws_credential_types::provider::SharedCredentialsProvider;
+use aws_credential_types::cache::CredentialsCache;
 use aws_sdk_s3::{
-    config::{AsyncSleep, Builder, IdentityCache, Region, SharedAsyncSleep},
+    config::{AsyncSleep, Config, Region, SharedAsyncSleep},
    error::SdkError,
    operation::get_object::GetObjectError,
+    primitives::ByteStream,
    types::{Delete, ObjectIdentifier},
    Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;
-
-use aws_smithy_types::body::SdkBody;
-use aws_smithy_types::byte_stream::ByteStream;
-use bytes::Bytes;
-use futures::stream::Stream;
+use aws_smithy_http::body::SdkBody;
 use hyper::Body;
 use scopeguard::ScopeGuard;
+use tokio::io::{self, AsyncRead};
+use tokio_util::io::ReaderStream;
+use tracing::debug;

 use super::StorageMetadata;
 use crate::{
@@ -67,7 +61,7 @@ struct GetObjectRequest {
 impl S3Bucket {
    /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
    pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
-        tracing::debug!(
+        debug!(
            "Creating s3 remote storage for S3 bucket {}",
            aws_config.bucket_name
        );
@@ -84,6 +78,7 @@ impl S3Bucket {
            // needed to access remote extensions bucket
            .or_else("token", {
                let provider_conf = ProviderConfig::without_region().with_region(region.clone());
+
                WebIdentityTokenCredentialsProvider::builder()
                    .configure(&provider_conf)
                    .build()
@@ -103,20 +98,18 @@ impl S3Bucket {
            .set_max_attempts(Some(1))
            .set_mode(Some(RetryMode::Adaptive));

-        let mut config_builder = Builder::default()
-            .behavior_version(BehaviorVersion::v2023_11_09())
+        let mut config_builder = Config::builder()
            .region(region)
-            .identity_cache(IdentityCache::lazy().build())
-            .credentials_provider(SharedCredentialsProvider::new(credentials_provider))
-            .retry_config(retry_config.build())
-            .sleep_impl(SharedAsyncSleep::from(sleep_impl));
+            .credentials_cache(CredentialsCache::lazy())
+            .credentials_provider(credentials_provider)
+            .sleep_impl(SharedAsyncSleep::from(sleep_impl))
+            .retry_config(retry_config.build());

        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
            config_builder = config_builder
                .endpoint_url(custom_endpoint)
                .force_path_style(true);
        }
-
        let client = Client::from_conf(config_builder.build());

        let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
@@ -229,15 +222,12 @@ impl S3Bucket {
        match get_object {
            Ok(object_output) => {
                let metadata = object_output.metadata().cloned().map(StorageMetadata);
-
-                let body = object_output.body;
-                let body = ByteStreamAsStream::from(body);
-                let body = PermitCarrying::new(permit, body);
-                let body = TimedDownload::new(started_at, body);
-
                Ok(Download {
                    metadata,
-                    download_stream: Box::pin(body),
+                    download_stream: Box::pin(io::BufReader::new(TimedDownload::new(
+                        started_at,
+                        RatelimitedAsyncRead::new(permit, object_output.body.into_async_read()),
+                    ))),
                })
            }
            Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {
@@ -250,55 +240,29 @@ impl S3Bucket {
    }
 }

-pin_project_lite::pin_project! {
-    struct ByteStreamAsStream {
-        #[pin]
-        inner: aws_smithy_types::byte_stream::ByteStream
-    }
-}
-
-impl From<aws_smithy_types::byte_stream::ByteStream> for ByteStreamAsStream {
-    fn from(inner: aws_smithy_types::byte_stream::ByteStream) -> Self {
-        ByteStreamAsStream { inner }
-    }
-}
-
-impl Stream for ByteStreamAsStream {
-    type Item = std::io::Result<Bytes>;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        // this does the std::io::ErrorKind::Other conversion
-        self.project().inner.poll_next(cx).map_err(|x| x.into())
-    }
-
-    // cannot implement size_hint because inner.size_hint is remaining size in bytes, which makes
-    // sense and Stream::size_hint does not really
-}
-
 pin_project_lite::pin_project! {
    /// An `AsyncRead` adapter which carries a permit for the lifetime of the value.
-    struct PermitCarrying<S> {
+    struct RatelimitedAsyncRead<S> {
        permit: tokio::sync::OwnedSemaphorePermit,
        #[pin]
        inner: S,
    }
 }

-impl<S> PermitCarrying<S> {
+impl<S: AsyncRead> RatelimitedAsyncRead<S> {
    fn new(permit: tokio::sync::OwnedSemaphorePermit, inner: S) -> Self {
-        Self { permit, inner }
+        RatelimitedAsyncRead { permit, inner }
    }
 }

-impl<S: Stream<Item = std::io::Result<Bytes>>> Stream for PermitCarrying<S> {
-    type Item = <S as Stream>::Item;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        self.project().inner.poll_next(cx)
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.inner.size_hint()
+impl<S: AsyncRead> AsyncRead for RatelimitedAsyncRead<S> {
+    fn poll_read(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &mut io::ReadBuf<'_>,
+    ) -> std::task::Poll<std::io::Result<()>> {
+        let this = self.project();
+        this.inner.poll_read(cx, buf)
    }
 }

@@ -318,7 +282,7 @@ pin_project_lite::pin_project! {
    }
 }

-impl<S> TimedDownload<S> {
+impl<S: AsyncRead> TimedDownload<S> {
    fn new(started_at: std::time::Instant, inner: S) -> Self {
        TimedDownload {
            started_at,
@@ -328,26 +292,25 @@ impl<S> TimedDownload<S> {
    }
 }

-impl<S: Stream<Item = std::io::Result<Bytes>>> Stream for TimedDownload<S> {
-    type Item = <S as Stream>::Item;
-
-    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        use std::task::ready;
-
+impl<S: AsyncRead> AsyncRead for TimedDownload<S> {
+    fn poll_read(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &mut io::ReadBuf<'_>,
+    ) -> std::task::Poll<std::io::Result<()>> {
        let this = self.project();
+        let before = buf.filled().len();
+        let read = std::task::ready!(this.inner.poll_read(cx, buf));

-        let res = ready!(this.inner.poll_next(cx));
-        match &res {
-            Some(Ok(_)) => {}
-            Some(Err(_)) => *this.outcome = metrics::AttemptOutcome::Err,
-            None => *this.outcome = metrics::AttemptOutcome::Ok,
+        let read_eof = buf.filled().len() == before;
+
+        match read {
+            Ok(()) if read_eof => *this.outcome = AttemptOutcome::Ok,
+            Ok(()) => { /* still in progress */ }
+            Err(_) => *this.outcome = AttemptOutcome::Err,
        }

-        Poll::Ready(res)
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.inner.size_hint()
+        std::task::Poll::Ready(read)
    }
 }

@@ -408,11 +371,11 @@ impl RemoteStorage for S3Bucket {

            let response = response?;

-            let keys = response.contents();
+            let keys = response.contents().unwrap_or_default();
            let empty = Vec::new();
            let prefixes = response.common_prefixes.as_ref().unwrap_or(&empty);

-            tracing::debug!("list: {} prefixes, {} keys", prefixes.len(), keys.len());
+            tracing::info!("list: {} prefixes, {} keys", prefixes.len(), keys.len());

            for object in keys {
                let object_path = object.key().expect("response does not contain a key");
@@ -437,7 +400,7 @@ impl RemoteStorage for S3Bucket {

    async fn upload(
        &self,
-        from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
        from_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
@@ -447,8 +410,8 @@ impl RemoteStorage for S3Bucket {

        let started_at = start_measuring_requests(kind);

-        let body = Body::wrap_stream(from);
-        let bytes_stream = ByteStream::new(SdkBody::from_body_0_4(body));
+        let body = Body::wrap_stream(ReaderStream::new(from));
+        let bytes_stream = ByteStream::new(SdkBody::from(body));

        let res = self
            .client
@@ -511,7 +474,7 @@ impl RemoteStorage for S3Bucket {
        for path in paths {
            let obj_id = ObjectIdentifier::builder()
                .set_key(Some(self.relative_path_to_s3_object(path)))
-                .build()?;
+                .build();
            delete_objects.push(obj_id);
        }

@@ -522,11 +485,7 @@ impl RemoteStorage for S3Bucket {
                .client
                .delete_objects()
                .bucket(self.bucket_name.clone())
-                .delete(
-                    Delete::builder()
-                        .set_objects(Some(chunk.to_vec()))
-                        .build()?,
-                )
+                .delete(Delete::builder().set_objects(Some(chunk.to_vec())).build())
                .send()
                .await;

--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -1,8 +1,6 @@
 //! This module provides a wrapper around a real RemoteStorage implementation that
 //! causes the first N attempts at each upload or download operatio to fail. For
 //! testing purposes.
-use bytes::Bytes;
-use futures::stream::Stream;
 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::sync::Mutex;
@@ -110,7 +108,7 @@ impl RemoteStorage for UnreliableWrapper {

    async fn upload(
        &self,
-        data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+        data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
        data_size_bytes: usize,
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -7,9 +7,7 @@ use std::sync::Arc;
 use std::time::UNIX_EPOCH;

 use anyhow::Context;
-use bytes::Bytes;
 use camino::Utf8Path;
-use futures::stream::Stream;
 use once_cell::sync::OnceCell;
 use remote_storage::{
    AzureConfig, Download, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,
@@ -182,14 +180,23 @@ async fn azure_delete_objects_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Resu
    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
        .with_context(|| "RemotePath conversion")?;

-    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-    ctx.client.upload(data, len, &path1, None).await?;
+    let data1 = "remote blob data1".as_bytes();
+    let data1_len = data1.len();
+    let data2 = "remote blob data2".as_bytes();
+    let data2_len = data2.len();
+    let data3 = "remote blob data3".as_bytes();
+    let data3_len = data3.len();
+    ctx.client
+        .upload(std::io::Cursor::new(data1), data1_len, &path1, None)
+        .await?;

-    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
-    ctx.client.upload(data, len, &path2, None).await?;
+    ctx.client
+        .upload(std::io::Cursor::new(data2), data2_len, &path2, None)
+        .await?;

-    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-    ctx.client.upload(data, len, &path3, None).await?;
+    ctx.client
+        .upload(std::io::Cursor::new(data3), data3_len, &path3, None)
+        .await?;

    ctx.client.delete_objects(&[path1, path2]).await?;

@@ -212,56 +219,53 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
        .with_context(|| "RemotePath conversion")?;

-    let orig = bytes::Bytes::from_static("remote blob data here".as_bytes());
+    let data = "remote blob data here".as_bytes();
+    let data_len = data.len() as u64;

-    let (data, len) = wrap_stream(orig.clone());
-
-    ctx.client.upload(data, len, &path, None).await?;
-
-    async fn download_and_compare(dl: Download) -> anyhow::Result<Vec<u8>> {
-        let mut buf = Vec::new();
-        tokio::io::copy_buf(
-            &mut tokio_util::io::StreamReader::new(dl.download_stream),
-            &mut buf,
-        )
+    ctx.client
+        .upload(std::io::Cursor::new(data), data.len(), &path, None)
        .await?;
+
+    async fn download_and_compare(mut dl: Download) -> anyhow::Result<Vec<u8>> {
+        let mut buf = Vec::new();
+        tokio::io::copy(&mut dl.download_stream, &mut buf).await?;
        Ok(buf)
    }
    // Normal download request
    let dl = ctx.client.download(&path).await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig);
+    assert_eq!(buf, data);

    // Full range (end specified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 0, Some(len as u64))
+        .download_byte_range(&path, 0, Some(data_len))
        .await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig);
+    assert_eq!(buf, data);

    // partial range (end specified)
    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig[4..10]);
+    assert_eq!(buf, data[4..10]);

    // partial range (end beyond real end)
    let dl = ctx
        .client
-        .download_byte_range(&path, 8, Some(len as u64 * 100))
+        .download_byte_range(&path, 8, Some(data_len * 100))
        .await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig[8..]);
+    assert_eq!(buf, data[8..]);

    // Partial range (end unspecified)
    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig[4..]);
+    assert_eq!(buf, data[4..]);

    // Full range (end unspecified)
    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
    let buf = download_and_compare(dl).await?;
-    assert_eq!(&buf, &orig);
+    assert_eq!(buf, data);

    debug!("Cleanup: deleting file at path {path:?}");
    ctx.client
@@ -277,7 +281,6 @@ fn ensure_logging_ready() {
        utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
        )
        .expect("logging init failed");
    });
@@ -500,8 +503,11 @@ async fn upload_azure_data(
            let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
            debug!("Creating remote item {i} at path {blob_path:?}");

-            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, len, &blob_path, None).await?;
+            let data = format!("remote blob data {i}").into_bytes();
+            let data_len = data.len();
+            task_client
+                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
+                .await?;

            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
        });
@@ -582,8 +588,11 @@ async fn upload_simple_azure_data(
            .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
            debug!("Creating remote item {i} at path {blob_path:?}");

-            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, len, &blob_path, None).await?;
+            let data = format!("remote blob data {i}").into_bytes();
+            let data_len = data.len();
+            task_client
+                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
+                .await?;

            Ok::<_, anyhow::Error>(blob_path)
        });
@@ -612,32 +621,3 @@ async fn upload_simple_azure_data(
        ControlFlow::Continue(uploaded_blobs)
    }
 }
-
-// FIXME: copypasted from test_real_s3, can't remember how to share a module which is not compiled
-// to binary
-fn upload_stream(
-    content: std::borrow::Cow<'static, [u8]>,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    use std::borrow::Cow;
-
-    let content = match content {
-        Cow::Borrowed(x) => Bytes::from_static(x),
-        Cow::Owned(vec) => Bytes::from(vec),
-    };
-    wrap_stream(content)
-}
-
-fn wrap_stream(
-    content: bytes::Bytes,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    let len = content.len();
-    let content = futures::future::ready(Ok(content));
-
-    (futures::stream::once(content), len)
-}
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -7,9 +7,7 @@ use std::sync::Arc;
 use std::time::UNIX_EPOCH;

 use anyhow::Context;
-use bytes::Bytes;
 use camino::Utf8Path;
-use futures::stream::Stream;
 use once_cell::sync::OnceCell;
 use remote_storage::{
    GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
@@ -178,14 +176,23 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
        .with_context(|| "RemotePath conversion")?;

-    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-    ctx.client.upload(data, len, &path1, None).await?;
+    let data1 = "remote blob data1".as_bytes();
+    let data1_len = data1.len();
+    let data2 = "remote blob data2".as_bytes();
+    let data2_len = data2.len();
+    let data3 = "remote blob data3".as_bytes();
+    let data3_len = data3.len();
+    ctx.client
+        .upload(std::io::Cursor::new(data1), data1_len, &path1, None)
+        .await?;

-    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
-    ctx.client.upload(data, len, &path2, None).await?;
+    ctx.client
+        .upload(std::io::Cursor::new(data2), data2_len, &path2, None)
+        .await?;

-    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-    ctx.client.upload(data, len, &path3, None).await?;
+    ctx.client
+        .upload(std::io::Cursor::new(data3), data3_len, &path3, None)
+        .await?;

    ctx.client.delete_objects(&[path1, path2]).await?;

@@ -203,7 +210,6 @@ fn ensure_logging_ready() {
        utils::logging::init(
            utils::logging::LogFormat::Test,
            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
        )
        .expect("logging init failed");
    });
@@ -425,9 +431,11 @@ async fn upload_s3_data(
            let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
            debug!("Creating remote item {i} at path {blob_path:?}");

-            let (data, data_len) =
-                upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, data_len, &blob_path, None).await?;
+            let data = format!("remote blob data {i}").into_bytes();
+            let data_len = data.len();
+            task_client
+                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
+                .await?;

            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
        });
@@ -508,9 +516,11 @@ async fn upload_simple_s3_data(
            .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
            debug!("Creating remote item {i} at path {blob_path:?}");

-            let (data, data_len) =
-                upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, data_len, &blob_path, None).await?;
+            let data = format!("remote blob data {i}").into_bytes();
+            let data_len = data.len();
+            task_client
+                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
+                .await?;

            Ok::<_, anyhow::Error>(blob_path)
        });
@@ -539,30 +549,3 @@ async fn upload_simple_s3_data(
        ControlFlow::Continue(uploaded_blobs)
    }
 }
-
-fn upload_stream(
-    content: std::borrow::Cow<'static, [u8]>,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    use std::borrow::Cow;
-
-    let content = match content {
-        Cow::Borrowed(x) => Bytes::from_static(x),
-        Cow::Owned(vec) => Bytes::from(vec),
-    };
-    wrap_stream(content)
-}
-
-fn wrap_stream(
-    content: bytes::Bytes,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    let len = content.len();
-    let content = futures::future::ready(Ok(content));
-
-    (futures::stream::once(content), len)
-}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -50,8 +50,6 @@ const_format.workspace = true
 # why is it only here? no other crate should use it, streams are rarely needed.
 tokio-stream = { version = "0.1.14" }

-serde_path_to_error.workspace = true
-
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
--- a/libs/utils/scripts/restore_from_wal_initdb.sh
+++ b/libs/utils/scripts/restore_from_wal_initdb.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# like restore_from_wal.sh, but takes existing initdb.tar.zst
-
-set -euxo pipefail
-
-PG_BIN=$1
-WAL_PATH=$2
-DATA_DIR=$3
-PORT=$4
-echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf
-echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf
-REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
-declare -i WAL_SIZE=$REDO_POS+114
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
-cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
-for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-rm -f 000000010000000000000001
--- a/libs/utils/src/completion.rs
+++ b/libs/utils/src/completion.rs
@@ -1,14 +1,16 @@
-use tokio_util::task::{task_tracker::TaskTrackerToken, TaskTracker};
+use std::sync::Arc;
+
+use tokio::sync::{mpsc, Mutex};

 /// While a reference is kept around, the associated [`Barrier::wait`] will wait.
 ///
 /// Can be cloned, moved and kept around in futures as "guard objects".
 #[derive(Clone)]
-pub struct Completion(TaskTrackerToken);
+pub struct Completion(mpsc::Sender<()>);

 /// Barrier will wait until all clones of [`Completion`] have been dropped.
 #[derive(Clone)]
-pub struct Barrier(TaskTracker);
+pub struct Barrier(Arc<Mutex<mpsc::Receiver<()>>>);

 impl Default for Barrier {
    fn default() -> Self {
@@ -19,7 +21,7 @@ impl Default for Barrier {

 impl Barrier {
    pub async fn wait(self) {
-        self.0.wait().await;
+        self.0.lock().await.recv().await;
    }

    pub async fn maybe_wait(barrier: Option<Barrier>) {
@@ -31,7 +33,8 @@ impl Barrier {

 impl PartialEq for Barrier {
    fn eq(&self, other: &Self) -> bool {
-        TaskTracker::ptr_eq(&self.0, &other.0)
+        // we don't use dyn so this is good
+        Arc::ptr_eq(&self.0, &other.0)
    }
 }

@@ -39,10 +42,8 @@ impl Eq for Barrier {}

 /// Create new Guard and Barrier pair.
 pub fn channel() -> (Completion, Barrier) {
-    let tracker = TaskTracker::new();
-    // otherwise wait never exits
-    tracker.close();
-
-    let token = tracker.token();
-    (Completion(token), Barrier(tracker))
+    let (tx, rx) = mpsc::channel::<()>(1);
+    let rx = Mutex::new(rx);
+    let rx = Arc::new(rx);
+    (Completion(tx), Barrier(rx))
 }
--- a/libs/utils/src/generation.rs
+++ b/libs/utils/src/generation.rs
@@ -152,16 +152,3 @@ impl Debug for Generation {
        }
    }
 }
-
-#[cfg(test)]
-mod test {
-    use super::*;
-
-    #[test]
-    fn generation_gt() {
-        // Important that a None generation compares less than a valid one, during upgrades from
-        // pre-generation systems.
-        assert!(Generation::none() < Generation::new(0));
-        assert!(Generation::none() < Generation::new(1));
-    }
-}
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -25,12 +25,8 @@ pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
    if body.remaining() == 0 {
        return Ok(None);
    }
-
-    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
-
-    serde_path_to_error::deserialize(&mut deser)
-        // intentionally stringify because the debug version is not helpful in python logs
-        .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
+    serde_json::from_reader(body.reader())
+        .context("Failed to parse json request")
        .map(Some)
        .map_err(ApiError::BadRequest)
 }
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,7 +1,6 @@
 use std::str::FromStr;

 use anyhow::Context;
-use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, EnumVariantNames};

@@ -25,48 +24,16 @@ impl LogFormat {
    }
 }

-struct TracingEventCountMetric {
-    error: IntCounter,
-    warn: IntCounter,
-    info: IntCounter,
-    debug: IntCounter,
-    trace: IntCounter,
-}
-
-static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
-    let vec = metrics::register_int_counter_vec!(
+static TRACING_EVENT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
+    metrics::register_int_counter_vec!(
        "libmetrics_tracing_event_count",
        "Number of tracing events, by level",
        &["level"]
    )
-    .expect("failed to define metric");
-    TracingEventCountMetric::new(vec)
+    .expect("failed to define metric")
 });

-impl TracingEventCountMetric {
-    fn new(vec: IntCounterVec) -> Self {
-        Self {
-            error: vec.with_label_values(&["error"]),
-            warn: vec.with_label_values(&["warn"]),
-            info: vec.with_label_values(&["info"]),
-            debug: vec.with_label_values(&["debug"]),
-            trace: vec.with_label_values(&["trace"]),
-        }
-    }
-
-    fn inc_for_level(&self, level: tracing::Level) {
-        let counter = match level {
-            tracing::Level::ERROR => &self.error,
-            tracing::Level::WARN => &self.warn,
-            tracing::Level::INFO => &self.info,
-            tracing::Level::DEBUG => &self.debug,
-            tracing::Level::TRACE => &self.trace,
-        };
-        counter.inc();
-    }
-}
-
-struct TracingEventCountLayer(&'static TracingEventCountMetric);
+struct TracingEventCountLayer(&'static metrics::IntCounterVec);

 impl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer
 where
@@ -77,7 +44,15 @@ where
        event: &tracing::Event<'_>,
        _ctx: tracing_subscriber::layer::Context<'_, S>,
    ) {
-        self.0.inc_for_level(*event.metadata().level());
+        let level = event.metadata().level();
+        let level = match *level {
+            tracing::Level::ERROR => "error",
+            tracing::Level::WARN => "warn",
+            tracing::Level::INFO => "info",
+            tracing::Level::DEBUG => "debug",
+            tracing::Level::TRACE => "trace",
+        };
+        self.0.with_label_values(&[level]).inc();
    }
 }

@@ -91,17 +66,9 @@ pub enum TracingErrorLayerEnablement {
    EnableWithRustLogFilter,
 }

-/// Where the logging should output to.
-#[derive(Clone, Copy)]
-pub enum Output {
-    Stdout,
-    Stderr,
-}
-
 pub fn init(
    log_format: LogFormat,
    tracing_error_layer_enablement: TracingErrorLayerEnablement,
-    output: Output,
 ) -> anyhow::Result<()> {
    // We fall back to printing all spans at info-level or above if
    // the RUST_LOG environment variable is not set.
@@ -118,12 +85,7 @@ pub fn init(
        let log_layer = tracing_subscriber::fmt::layer()
            .with_target(false)
            .with_ansi(false)
-            .with_writer(move || -> Box<dyn std::io::Write> {
-                match output {
-                    Output::Stdout => Box::new(std::io::stdout()),
-                    Output::Stderr => Box::new(std::io::stderr()),
-                }
-            });
+            .with_writer(std::io::stdout);
        let log_layer = match log_format {
            LogFormat::Json => log_layer.json().boxed(),
            LogFormat::Plain => log_layer.boxed(),
@@ -131,9 +93,7 @@ pub fn init(
        };
        log_layer.with_filter(rust_log_env_filter())
    });
-    let r = r.with(
-        TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
-    );
+    let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
    match tracing_error_layer_enablement {
        TracingErrorLayerEnablement::EnableWithRustLogFilter => r
            .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
@@ -284,14 +244,14 @@ impl std::fmt::Debug for SecretString {
 mod tests {
    use metrics::{core::Opts, IntCounterVec};

-    use crate::logging::{TracingEventCountLayer, TracingEventCountMetric};
+    use super::TracingEventCountLayer;

    #[test]
    fn tracing_event_count_metric() {
        let counter_vec =
            IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap();
-        let metric = Box::leak(Box::new(TracingEventCountMetric::new(counter_vec.clone())));
-        let layer = TracingEventCountLayer(metric);
+        let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static
+        let layer = TracingEventCountLayer(counter_vec);
        use tracing_subscriber::prelude::*;

        tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {
--- a/libs/utils/src/simple_rcu.rs
+++ b/libs/utils/src/simple_rcu.rs
@@ -1,10 +1,10 @@
 //!
 //! RCU stands for Read-Copy-Update. It's a synchronization mechanism somewhat
 //! similar to a lock, but it allows readers to "hold on" to an old value of RCU
-//! without blocking writers, and allows writing a new value without blocking
-//! readers. When you update the value, the new value is immediately visible
+//! without blocking writers, and allows writing a new values without blocking
+//! readers. When you update the new value, the new value is immediately visible
 //! to new readers, but the update waits until all existing readers have
-//! finished, so that on return, no one sees the old value anymore.
+//! finishe, so that no one sees the old value anymore.
 //!
 //! This implementation isn't wait-free; it uses an RwLock that is held for a
 //! short duration when the value is read or updated.
@@ -26,7 +26,6 @@
 //! Increment the value by one, and wait for old readers to finish:
 //!
 //! ```
-//! # async fn dox() {
 //! # let rcu = utils::simple_rcu::Rcu::new(1);
 //! let write_guard = rcu.lock_for_write();
 //!
@@ -37,17 +36,15 @@
 //!
 //! // Concurrent reads and writes are now possible again. Wait for all the readers
 //! // that still observe the old value to finish.
-//! waitlist.wait().await;
-//! # }
+//! waitlist.wait();
 //! ```
 //!
 #![warn(missing_docs)]

 use std::ops::Deref;
+use std::sync::mpsc::{sync_channel, Receiver, SyncSender};
 use std::sync::{Arc, Weak};
-use std::sync::{RwLock, RwLockWriteGuard};
-
-use tokio::sync::watch;
+use std::sync::{Mutex, RwLock, RwLockWriteGuard};

 ///
 /// Rcu allows multiple readers to read and hold onto a value without blocking
@@ -71,21 +68,22 @@ struct RcuCell<V> {
    value: V,

    /// A dummy channel. We never send anything to this channel. The point is
-    /// that when the RcuCell is dropped, any subscribed Receivers will be notified
+    /// that when the RcuCell is dropped, any cloned Senders will be notified
    /// that the channel is closed. Updaters can use this to wait out until the
    /// RcuCell has been dropped, i.e. until the old value is no longer in use.
    ///
-    /// We never send anything to this, we just need to hold onto it so that the
-    /// Receivers will be notified when it's dropped.
-    watch: watch::Sender<()>,
+    /// We never do anything with the receiver, we just need to hold onto it so
+    /// that the Senders will be notified when it's dropped. But because it's
+    /// not Sync, we need a Mutex on it.
+    watch: (SyncSender<()>, Mutex<Receiver<()>>),
 }

 impl<V> RcuCell<V> {
    fn new(value: V) -> Self {
-        let (watch_sender, _) = watch::channel(());
+        let (watch_sender, watch_receiver) = sync_channel(0);
        RcuCell {
            value,
-            watch: watch_sender,
+            watch: (watch_sender, Mutex::new(watch_receiver)),
        }
    }
 }
@@ -143,10 +141,10 @@ impl<V> Deref for RcuReadGuard<V> {
 ///
 /// Write guard returned by `write`
 ///
-/// NB: Holding this guard blocks all concurrent `read` and `write` calls, so it should only be
-/// held for a short duration!
+/// NB: Holding this guard blocks all concurrent `read` and `write` calls, so
+/// it should only be held for a short duration!
 ///
-/// Calling [`Self::store_and_unlock`] consumes the guard, making new reads and new writes possible
+/// Calling `store` consumes the guard, making new reads and new writes possible
 /// again.
 ///
 pub struct RcuWriteGuard<'a, V> {
@@ -181,7 +179,7 @@ impl<'a, V> RcuWriteGuard<'a, V> {
            // the watches for any that do.
            self.inner.old_cells.retain(|weak| {
                if let Some(cell) = weak.upgrade() {
-                    watches.push(cell.watch.subscribe());
+                    watches.push(cell.watch.0.clone());
                    true
                } else {
                    false
@@ -195,20 +193,20 @@ impl<'a, V> RcuWriteGuard<'a, V> {
 ///
 /// List of readers who can still see old values.
 ///
-pub struct RcuWaitList(Vec<watch::Receiver<()>>);
+pub struct RcuWaitList(Vec<SyncSender<()>>);

 impl RcuWaitList {
    ///
    /// Wait for old readers to finish.
    ///
-    pub async fn wait(mut self) {
+    pub fn wait(mut self) {
        // after all the old_cells are no longer in use, we're done
        for w in self.0.iter_mut() {
            // This will block until the Receiver is closed. That happens when
            // the RcuCell is dropped.
            #[allow(clippy::single_match)]
-            match w.changed().await {
-                Ok(_) => panic!("changed() unexpectedly succeeded on dummy channel"),
+            match w.send(()) {
+                Ok(_) => panic!("send() unexpectedly succeeded on dummy channel"),
                Err(_) => {
                    // closed, which means that the cell has been dropped, and
                    // its value is no longer in use
@@ -222,10 +220,11 @@ impl RcuWaitList {
 mod tests {
    use super::*;
    use std::sync::{Arc, Mutex};
+    use std::thread::{sleep, spawn};
    use std::time::Duration;

-    #[tokio::test]
-    async fn two_writers() {
+    #[test]
+    fn two_writers() {
        let rcu = Rcu::new(1);

        let read1 = rcu.read();
@@ -249,35 +248,33 @@ mod tests {
        assert_eq!(*read1, 1);

        let log = Arc::new(Mutex::new(Vec::new()));
-        // Wait for the old readers to finish in separate tasks.
+        // Wait for the old readers to finish in separate threads.
        let log_clone = Arc::clone(&log);
-        let task2 = tokio::spawn(async move {
-            wait2.wait().await;
+        let thread2 = spawn(move || {
+            wait2.wait();
            log_clone.lock().unwrap().push("wait2 done");
        });
        let log_clone = Arc::clone(&log);
-        let task3 = tokio::spawn(async move {
-            wait3.wait().await;
+        let thread3 = spawn(move || {
+            wait3.wait();
            log_clone.lock().unwrap().push("wait3 done");
        });

        // without this sleep the test can pass on accident if the writer is slow
-        tokio::time::sleep(Duration::from_millis(100)).await;
+        sleep(Duration::from_millis(500));

        // Release first reader. This allows first write to finish, but calling
-        // wait() on the 'task3' would still block.
+        // wait() on the second one would still block.
        log.lock().unwrap().push("dropping read1");
        drop(read1);
-        task2.await.unwrap();
+        thread2.join().unwrap();

-        assert!(!task3.is_finished());
-
-        tokio::time::sleep(Duration::from_millis(100)).await;
+        sleep(Duration::from_millis(500));

        // Release second reader, and finish second writer.
        log.lock().unwrap().push("dropping read2");
        drop(read2);
-        task3.await.unwrap();
+        thread3.join().unwrap();

        assert_eq!(
            log.lock().unwrap().as_slice(),
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -30,32 +30,18 @@ async fn warn_if_stuck<Fut: std::future::Future>(

    let mut fut = std::pin::pin!(fut);

-    let mut warned = false;
-    let ret = loop {
+    loop {
        match tokio::time::timeout(warn_period, &mut fut).await {
-            Ok(ret) => break ret,
+            Ok(ret) => return ret,
            Err(_) => {
                tracing::warn!(
                    gate = name,
                    elapsed_ms = started.elapsed().as_millis(),
                    "still waiting, taking longer than expected..."
                );
-                warned = true;
            }
        }
-    };
-
-    // If we emitted a warning for slowness, also emit a message when we complete, so that
-    // someone debugging a shutdown can know for sure whether we have moved past this operation.
-    if warned {
-        tracing::info!(
-            gate = name,
-            elapsed_ms = started.elapsed().as_millis(),
-            "completed, after taking longer than expected"
-        )
    }
-
-    ret
 }

 #[derive(Debug)]
--- a/libs/utils/src/timeout.rs
+++ b/libs/utils/src/timeout.rs
@@ -2,11 +2,8 @@ use std::time::Duration;

 use tokio_util::sync::CancellationToken;

-#[derive(thiserror::Error, Debug)]
 pub enum TimeoutCancellableError {
-    #[error("Timed out")]
    Timeout,
-    #[error("Cancelled")]
    Cancelled,
 }

--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -1,6 +1,3 @@
-//! Links with walproposer, pgcommon, pgport and runs bindgen on walproposer.h
-//! to generate Rust bindings for it.
-
 use std::{env, path::PathBuf, process::Command};

 use anyhow::{anyhow, Context};
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -1,6 +1,3 @@
-//! A C-Rust shim: defines implementation of C walproposer API, assuming wp
-//! callback_data stores Box to some Rust implementation.
-
 #![allow(dead_code)]

 use std::ffi::CStr;
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -436,9 +436,9 @@ mod tests {
                event_mask: 0,
            }),
            expected_messages: vec![
-                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160001, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
+                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160000, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
                vec![
-                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 76, 143, 54, 6, 60, 108, 110,
                    147, 188, 32, 214, 90, 130, 15, 61, 158, 76, 143, 54, 6, 60, 108, 110, 147,
                    188, 32, 214, 90, 130, 15, 61, 1, 0, 0, 0, 0, 0, 0, 1,
@@ -478,7 +478,7 @@ mod tests {
        // walproposer will panic when it finishes sync_safekeepers
        std::panic::catch_unwind(|| wp.start()).unwrap_err();
        // validate the resulting LSN
-        assert_eq!(receiver.try_recv(), Ok(1337));
+        assert_eq!(receiver.recv()?, 1337);
        Ok(())
        // drop() will free up resources here
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -36,7 +36,6 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 itertools.workspace = true
-md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
 num_cpus = { version = "1.15" }
@@ -52,7 +51,6 @@ regex.workspace = true
 scopeguard.workspace = true
 serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
-serde_path_to_error.workspace = true
 serde_with.workspace = true
 signal-hook.workspace = true
 smallvec = { workspace = true, features = ["write"] }
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -3,7 +3,6 @@ use pageserver::repository::Key;
 use pageserver::tenant::layer_map::LayerMap;
 use pageserver::tenant::storage_layer::LayerFileName;
 use pageserver::tenant::storage_layer::PersistentLayerDesc;
-use pageserver_api::shard::TenantShardId;
 use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use std::cmp::{max, min};
 use std::fs::File;
@@ -212,7 +211,7 @@ fn bench_sequential(c: &mut Criterion) {
        let i32 = (i as u32) % 100;
        let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
        let layer = PersistentLayerDesc::new_img(
-            TenantShardId::unsharded(TenantId::generate()),
+            TenantId::generate(),
            TimelineId::generate(),
            zero.add(10 * i32)..zero.add(10 * i32 + 1),
            Lsn(i),
--- a/pageserver/client/Cargo.toml
+++ b/pageserver/client/Cargo.toml
@@ -1,14 +0,0 @@
-[package]
-name = "pageserver_client"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-pageserver_api.workspace = true
-thiserror.workspace = true
-async-trait.workspace = true
-reqwest.workspace = true
-utils.workspace = true
-serde.workspace = true
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/pageserver/client/src/lib.rs
+++ b/pageserver/client/src/lib.rs
@@ -1 +0,0 @@
-pub mod mgmt_api;
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -1,188 +0,0 @@
-use pageserver_api::models::*;
-use reqwest::{IntoUrl, Method};
-use utils::{
-    http::error::HttpErrorBody,
-    id::{TenantId, TimelineId},
-};
-
-#[derive(Debug)]
-pub struct Client {
-    mgmt_api_endpoint: String,
-    authorization_header: Option<String>,
-    client: reqwest::Client,
-}
-
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error("receive body: {0}")]
-    ReceiveBody(reqwest::Error),
-
-    #[error("receive error body: {0}")]
-    ReceiveErrorBody(String),
-
-    #[error("pageserver API: {0}")]
-    ApiError(String),
-}
-
-pub type Result<T> = std::result::Result<T, Error>;
-
-#[async_trait::async_trait]
-pub trait ResponseErrorMessageExt: Sized {
-    async fn error_from_body(self) -> Result<Self>;
-}
-
-#[async_trait::async_trait]
-impl ResponseErrorMessageExt for reqwest::Response {
-    async fn error_from_body(mut self) -> Result<Self> {
-        let status = self.status();
-        if !(status.is_client_error() || status.is_server_error()) {
-            return Ok(self);
-        }
-
-        let url = self.url().to_owned();
-        Err(match self.json::<HttpErrorBody>().await {
-            Ok(HttpErrorBody { msg }) => Error::ApiError(msg),
-            Err(_) => {
-                Error::ReceiveErrorBody(format!("Http error ({}) at {}.", status.as_u16(), url))
-            }
-        })
-    }
-}
-
-impl Client {
-    pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
-        Self {
-            mgmt_api_endpoint,
-            authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
-            client: reqwest::Client::new(),
-        }
-    }
-
-    pub async fn list_tenants(&self) -> Result<Vec<pageserver_api::models::TenantInfo>> {
-        let uri = format!("{}/v1/tenant", self.mgmt_api_endpoint);
-        let resp = self.get(&uri).await?;
-        resp.json().await.map_err(Error::ReceiveBody)
-    }
-
-    pub async fn list_timelines(
-        &self,
-        tenant_id: TenantId,
-    ) -> Result<Vec<pageserver_api::models::TimelineInfo>> {
-        let uri = format!("{}/v1/tenant/{tenant_id}/timeline", self.mgmt_api_endpoint);
-        self.get(&uri)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn timeline_info(
-        &self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> Result<pageserver_api::models::TimelineInfo> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
-            self.mgmt_api_endpoint
-        );
-        self.get(&uri)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn keyspace(
-        &self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> Result<pageserver_api::models::partitioning::Partitioning> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/keyspace",
-            self.mgmt_api_endpoint
-        );
-        self.get(&uri)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
-        self.request(Method::GET, uri, ()).await
-    }
-
-    async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
-        &self,
-        method: Method,
-        uri: U,
-        body: B,
-    ) -> Result<reqwest::Response> {
-        let req = self.client.request(method, uri);
-        let req = if let Some(value) = &self.authorization_header {
-            req.header(reqwest::header::AUTHORIZATION, value)
-        } else {
-            req
-        };
-        let res = req.json(&body).send().await.map_err(Error::ReceiveBody)?;
-        let response = res.error_from_body().await?;
-        Ok(response)
-    }
-
-    pub async fn status(&self) -> Result<()> {
-        let uri = format!("{}/v1/status", self.mgmt_api_endpoint);
-        self.get(&uri).await?;
-        Ok(())
-    }
-
-    pub async fn tenant_create(&self, req: &TenantCreateRequest) -> Result<TenantId> {
-        let uri = format!("{}/v1/tenant", self.mgmt_api_endpoint);
-        self.request(Method::POST, &uri, req)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {
-        let uri = format!("{}/v1/tenant/config", self.mgmt_api_endpoint);
-        self.request(Method::PUT, &uri, req).await?;
-        Ok(())
-    }
-
-    pub async fn location_config(
-        &self,
-        tenant_id: TenantId,
-        config: LocationConfig,
-        flush_ms: Option<std::time::Duration>,
-    ) -> Result<()> {
-        let req_body = TenantLocationConfigRequest { tenant_id, config };
-        let path = format!(
-            "{}/v1/tenant/{}/location_config",
-            self.mgmt_api_endpoint, tenant_id
-        );
-        let path = if let Some(flush_ms) = flush_ms {
-            format!("{}?flush_ms={}", path, flush_ms.as_millis())
-        } else {
-            path
-        };
-        self.request(Method::PUT, &path, &req_body).await?;
-        Ok(())
-    }
-
-    pub async fn timeline_create(
-        &self,
-        tenant_id: TenantId,
-        req: &TimelineCreateRequest,
-    ) -> Result<TimelineInfo> {
-        let uri = format!(
-            "{}/v1/tenant/{}/timeline",
-            self.mgmt_api_endpoint, tenant_id
-        );
-        self.request(Method::POST, &uri, req)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-}
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -18,5 +18,3 @@ tokio.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
-serde.workspace = true
-serde_json.workspace = true
--- a/pageserver/ctl/src/index_part.rs
+++ b/pageserver/ctl/src/index_part.rs
@@ -1,38 +0,0 @@
-use std::collections::HashMap;
-
-use anyhow::Context;
-use camino::Utf8PathBuf;
-use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
-use pageserver::tenant::storage_layer::LayerFileName;
-use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
-use utils::lsn::Lsn;
-
-#[derive(clap::Subcommand)]
-pub(crate) enum IndexPartCmd {
-    Dump { path: Utf8PathBuf },
-}
-
-pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
-    match cmd {
-        IndexPartCmd::Dump { path } => {
-            let bytes = tokio::fs::read(path).await.context("read file")?;
-            let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
-            #[derive(serde::Serialize)]
-            struct Output<'a> {
-                layer_metadata: &'a HashMap<LayerFileName, IndexLayerMetadata>,
-                disk_consistent_lsn: Lsn,
-                timeline_metadata: &'a TimelineMetadata,
-            }
-
-            let output = Output {
-                layer_metadata: &des.layer_metadata,
-                disk_consistent_lsn: des.get_disk_consistent_lsn(),
-                timeline_metadata: &des.metadata,
-            };
-
-            let output = serde_json::to_string_pretty(&output).context("serialize output")?;
-            println!("{output}");
-            Ok(())
-        }
-    }
-}
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -1,15 +1,13 @@
 use std::path::{Path, PathBuf};

 use anyhow::Result;
-use camino::{Utf8Path, Utf8PathBuf};
+use camino::Utf8Path;
 use clap::Subcommand;
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::block_io::BlockCursor;
 use pageserver::tenant::disk_btree::DiskBtreeReader;
 use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
-use pageserver::tenant::storage_layer::{delta_layer, image_layer};
-use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use pageserver::{page_cache, virtual_file};
 use pageserver::{
@@ -22,7 +20,6 @@ use pageserver::{
 };
 use std::fs;
 use utils::bin_ser::BeSer;
-use utils::id::{TenantId, TimelineId};

 use crate::layer_map_analyzer::parse_filename;

@@ -48,13 +45,6 @@ pub(crate) enum LayerCmd {
        /// The id from list-layer command
        id: usize,
    },
-    RewriteSummary {
-        layer_file_path: Utf8PathBuf,
-        #[clap(long)]
-        new_tenant_id: Option<TenantId>,
-        #[clap(long)]
-        new_timeline_id: Option<TimelineId>,
-    },
 }

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
@@ -110,7 +100,6 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    println!("- timeline {}", timeline.file_name().to_string_lossy());
                }
            }
-            Ok(())
        }
        LayerCmd::ListLayer {
            path,
@@ -139,7 +128,6 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    idx += 1;
                }
            }
-            Ok(())
        }
        LayerCmd::DumpLayer {
            path,
@@ -180,63 +168,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
                    idx += 1;
                }
            }
-            Ok(())
-        }
-        LayerCmd::RewriteSummary {
-            layer_file_path,
-            new_tenant_id,
-            new_timeline_id,
-        } => {
-            pageserver::virtual_file::init(10);
-            pageserver::page_cache::init(100);
-
-            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
-
-            macro_rules! rewrite_closure {
-                ($($summary_ty:tt)*) => {{
-                    |summary| $($summary_ty)* {
-                        tenant_id: new_tenant_id.unwrap_or(summary.tenant_id),
-                        timeline_id: new_timeline_id.unwrap_or(summary.timeline_id),
-                        ..summary
-                    }
-                }};
-            }
-
-            let res = ImageLayer::rewrite_summary(
-                layer_file_path,
-                rewrite_closure!(image_layer::Summary),
-                &ctx,
-            )
-            .await;
-            match res {
-                Ok(()) => {
-                    println!("Successfully rewrote summary of image layer {layer_file_path}");
-                    return Ok(());
-                }
-                Err(image_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
-                Err(image_layer::RewriteSummaryError::Other(e)) => {
-                    return Err(e);
-                }
-            }
-
-            let res = DeltaLayer::rewrite_summary(
-                layer_file_path,
-                rewrite_closure!(delta_layer::Summary),
-                &ctx,
-            )
-            .await;
-            match res {
-                Ok(()) => {
-                    println!("Successfully rewrote summary of delta layer {layer_file_path}");
-                    return Ok(());
-                }
-                Err(delta_layer::RewriteSummaryError::MagicMismatch) => (), // fallthrough
-                Err(delta_layer::RewriteSummaryError::Other(e)) => {
-                    return Err(e);
-                }
-            }
-
-            anyhow::bail!("not an image or delta layer: {layer_file_path}");
        }
    }
+    Ok(())
 }
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -5,13 +5,11 @@
 //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.

 mod draw_timeline_dir;
-mod index_part;
 mod layer_map_analyzer;
 mod layers;

 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use index_part::IndexPartCmd;
 use layers::LayerCmd;
 use pageserver::{
    context::{DownloadBehavior, RequestContext},
@@ -40,8 +38,6 @@ struct CliOpts {
 #[derive(Subcommand)]
 enum Commands {
    Metadata(MetadataCmd),
-    #[command(subcommand)]
-    IndexPart(IndexPartCmd),
    PrintLayerFile(PrintLayerFileCmd),
    DrawTimeline {},
    AnalyzeLayerMap(AnalyzeLayerMapCmd),
@@ -87,9 +83,6 @@ async fn main() -> anyhow::Result<()> {
        Commands::Metadata(cmd) => {
            handle_metadata(&cmd)?;
        }
-        Commands::IndexPart(cmd) => {
-            index_part::main(&cmd).await?;
-        }
        Commands::DrawTimeline {} => {
            draw_timeline_dir::main()?;
        }
--- a/pageserver/test_data/short_v14_redo.page
+++ b/pageserver/test_data/short_v14_redo.page
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -14,7 +14,7 @@ use pageserver::control_plane_client::ControlPlaneClient;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::WALRECEIVER_RUNTIME;
-use pageserver::tenant::{secondary, TenantSharedResources};
+use pageserver::tenant::TenantSharedResources;
 use remote_storage::GenericRemoteStorage;
 use tokio::time::Instant;
 use tracing::*;
@@ -103,11 +103,7 @@ fn main() -> anyhow::Result<()> {
    } else {
        TracingErrorLayerEnablement::Disabled
    };
-    logging::init(
-        conf.log_format,
-        tracing_error_layer_enablement,
-        logging::Output::Stdout,
-    )?;
+    logging::init(conf.log_format, tracing_error_layer_enablement)?;

    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
    // disarming this hook on pageserver, because we never tear down tracing.
@@ -402,11 +398,15 @@ fn start_pageserver(
    let (init_remote_done_tx, init_remote_done_rx) = utils::completion::channel();
    let (init_done_tx, init_done_rx) = utils::completion::channel();

+    let (init_logical_size_done_tx, init_logical_size_done_rx) = utils::completion::channel();
+
    let (background_jobs_can_start, background_jobs_barrier) = utils::completion::channel();

    let order = pageserver::InitializationOrder {
        initial_tenant_load_remote: Some(init_done_tx),
        initial_tenant_load: Some(init_remote_done_tx),
+        initial_logical_size_can_start: init_done_rx.clone(),
+        initial_logical_size_attempt: Some(init_logical_size_done_tx),
        background_jobs_can_start: background_jobs_barrier.clone(),
    };

@@ -425,6 +425,7 @@ fn start_pageserver(
    let tenant_manager = Arc::new(tenant_manager);

    BACKGROUND_RUNTIME.spawn({
+        let init_done_rx = init_done_rx;
        let shutdown_pageserver = shutdown_pageserver.clone();
        let drive_init = async move {
            // NOTE: unlike many futures in pageserver, this one is cancellation-safe
@@ -459,7 +460,7 @@ fn start_pageserver(
            });

            let WaitForPhaseResult {
-                timeout_remaining: _timeout,
+                timeout_remaining: timeout,
                skipped: init_load_skipped,
            } = wait_for_phase("initial_tenant_load", init_load_done, timeout).await;

@@ -467,6 +468,26 @@ fn start_pageserver(

            scopeguard::ScopeGuard::into_inner(guard);

+            let guard = scopeguard::guard_on_success((), |_| {
+                tracing::info!("Cancelled before initial logical sizes completed")
+            });
+
+            let logical_sizes_done = std::pin::pin!(async {
+                init_logical_size_done_rx.wait().await;
+                startup_checkpoint(
+                    started_startup_at,
+                    "initial_logical_sizes",
+                    "Initial logical sizes completed",
+                );
+            });
+
+            let WaitForPhaseResult {
+                timeout_remaining: _,
+                skipped: logical_sizes_skipped,
+            } = wait_for_phase("initial_logical_sizes", logical_sizes_done, timeout).await;
+
+            scopeguard::ScopeGuard::into_inner(guard);
+
            // allow background jobs to start: we either completed prior stages, or they reached timeout
            // and were skipped.  It is important that we do not let them block background jobs indefinitely,
            // because things like consumption metrics for billing are blocked by this barrier.
@@ -489,6 +510,9 @@ fn start_pageserver(
            if let Some(f) = init_load_skipped {
                f.await;
            }
+            if let Some(f) = logical_sizes_skipped {
+                f.await;
+            }
            scopeguard::ScopeGuard::into_inner(guard);

            startup_checkpoint(started_startup_at, "complete", "Startup complete");
@@ -504,17 +528,6 @@ fn start_pageserver(
        }
    });

-    let secondary_controller = if let Some(remote_storage) = &remote_storage {
-        secondary::spawn_tasks(
-            tenant_manager.clone(),
-            remote_storage.clone(),
-            background_jobs_barrier.clone(),
-            shutdown_pageserver.clone(),
-        )
-    } else {
-        secondary::null_controller()
-    };
-
    // shared state between the disk-usage backed eviction background task and the http endpoint
    // that allows triggering disk-usage based eviction manually. note that the http endpoint
    // is still accessible even if background task is not configured as long as remote storage has
@@ -544,7 +557,6 @@ fn start_pageserver(
                broker_client.clone(),
                disk_usage_eviction_state,
                deletion_queue.new_client(),
-                secondary_controller,
            )
            .context("Failed to initialize router state")?,
        );
@@ -571,6 +583,7 @@ fn start_pageserver(
    }

    if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+        let background_jobs_barrier = background_jobs_barrier;
        let metrics_ctx = RequestContext::todo_child(
            TaskKind::MetricsCollection,
            // This task itself shouldn't download anything.
@@ -608,7 +621,6 @@ fn start_pageserver(
                    conf.synthetic_size_calculation_interval,
                    conf.id,
                    local_disk_storage,
-                    cancel,
                    metrics_ctx,
                )
                .instrument(info_span!("metrics_collection"))
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -5,7 +5,6 @@
 //! See also `settings.md` for better description on every parameter.

 use anyhow::{anyhow, bail, ensure, Context, Result};
-use pageserver_api::shard::TenantShardId;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use serde::de::IntoDeserializer;
 use std::env;
@@ -26,7 +25,7 @@ use toml_edit::{Document, Item};
 use camino::{Utf8Path, Utf8PathBuf};
 use postgres_backend::AuthType;
 use utils::{
-    id::{NodeId, TimelineId},
+    id::{NodeId, TenantId, TimelineId},
    logging::LogFormat,
 };

@@ -41,8 +40,6 @@ use crate::{
    TIMELINE_DELETE_MARK_SUFFIX, TIMELINE_UNINIT_MARK_SUFFIX,
 };

-use self::defaults::DEFAULT_CONCURRENT_TENANT_WARMUP;
-
 pub mod defaults {
    use crate::tenant::config::defaults::*;
    use const_format::formatcp;
@@ -63,8 +60,6 @@ pub mod defaults {

    pub const DEFAULT_LOG_FORMAT: &str = "plain";

-    pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
-
    pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize =
        super::ConfigurableSemaphore::DEFAULT_INITIAL.get();

@@ -74,8 +69,6 @@ pub mod defaults {
    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";

-    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
-
    ///
    /// Default built-in configuration file.
    ///
@@ -98,7 +91,6 @@ pub mod defaults {
 #log_format = '{DEFAULT_LOG_FORMAT}'

 #concurrent_tenant_size_logical_size_queries = '{DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES}'
-#concurrent_tenant_warmup = '{DEFAULT_CONCURRENT_TENANT_WARMUP}'

 #metric_collection_interval = '{DEFAULT_METRIC_COLLECTION_INTERVAL}'
 #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
@@ -124,8 +116,6 @@ pub mod defaults {
 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
 #gc_feedback = false

-#heatmap_upload_concurrency = {DEFAULT_HEATMAP_UPLOAD_CONCURRENCY}
-
 [remote_storage]

 "#
@@ -185,11 +175,6 @@ pub struct PageServerConf {

    pub log_format: LogFormat,

-    /// Number of tenants which will be concurrently loaded from remote storage proactively on startup,
-    /// does not limit tenants loaded in response to client I/O.  A lower value implicitly deprioritizes
-    /// loading such tenants, vs. other work in the system.
-    pub concurrent_tenant_warmup: ConfigurableSemaphore,
-
    /// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
    pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
    /// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
@@ -229,10 +214,6 @@ pub struct PageServerConf {
    /// If true, pageserver will make best-effort to operate without a control plane: only
    /// for use in major incidents.
    pub control_plane_emergency_mode: bool,
-
-    /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize
-    /// heatmap uploads vs. other remote storage operations.
-    pub heatmap_upload_concurrency: usize,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -293,7 +274,6 @@ struct PageServerConfigBuilder {

    log_format: BuilderValue<LogFormat>,

-    concurrent_tenant_warmup: BuilderValue<NonZeroUsize>,
    concurrent_tenant_size_logical_size_queries: BuilderValue<NonZeroUsize>,

    metric_collection_interval: BuilderValue<Duration>,
@@ -312,8 +292,6 @@ struct PageServerConfigBuilder {
    control_plane_api: BuilderValue<Option<Url>>,
    control_plane_api_token: BuilderValue<Option<SecretString>>,
    control_plane_emergency_mode: BuilderValue<bool>,
-
-    heatmap_upload_concurrency: BuilderValue<usize>,
 }

 impl Default for PageServerConfigBuilder {
@@ -351,8 +329,6 @@ impl Default for PageServerConfigBuilder {
            .expect("cannot parse default keepalive interval")),
            log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),

-            concurrent_tenant_warmup: Set(NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
-                .expect("Invalid default constant")),
            concurrent_tenant_size_logical_size_queries: Set(
                ConfigurableSemaphore::DEFAULT_INITIAL,
            ),
@@ -384,8 +360,6 @@ impl Default for PageServerConfigBuilder {
            control_plane_api: Set(None),
            control_plane_api_token: Set(None),
            control_plane_emergency_mode: Set(false),
-
-            heatmap_upload_concurrency: Set(DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
        }
    }
 }
@@ -466,10 +440,6 @@ impl PageServerConfigBuilder {
        self.log_format = BuilderValue::Set(log_format)
    }

-    pub fn concurrent_tenant_warmup(&mut self, u: NonZeroUsize) {
-        self.concurrent_tenant_warmup = BuilderValue::Set(u);
-    }
-
    pub fn concurrent_tenant_size_logical_size_queries(&mut self, u: NonZeroUsize) {
        self.concurrent_tenant_size_logical_size_queries = BuilderValue::Set(u);
    }
@@ -530,14 +500,7 @@ impl PageServerConfigBuilder {
        self.control_plane_emergency_mode = BuilderValue::Set(enabled)
    }

-    pub fn heatmap_upload_concurrency(&mut self, value: usize) {
-        self.heatmap_upload_concurrency = BuilderValue::Set(value)
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
-        let concurrent_tenant_warmup = self
-            .concurrent_tenant_warmup
-            .ok_or(anyhow!("missing concurrent_tenant_warmup"))?;
        let concurrent_tenant_size_logical_size_queries = self
            .concurrent_tenant_size_logical_size_queries
            .ok_or(anyhow!(
@@ -590,7 +553,6 @@ impl PageServerConfigBuilder {
                .broker_keepalive_interval
                .ok_or(anyhow!("No broker keepalive interval provided"))?,
            log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
-            concurrent_tenant_warmup: ConfigurableSemaphore::new(concurrent_tenant_warmup),
            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(
                concurrent_tenant_size_logical_size_queries,
            ),
@@ -632,10 +594,6 @@ impl PageServerConfigBuilder {
            control_plane_emergency_mode: self
                .control_plane_emergency_mode
                .ok_or(anyhow!("missing control_plane_emergency_mode"))?,
-
-            heatmap_upload_concurrency: self
-                .heatmap_upload_concurrency
-                .ok_or(anyhow!("missing heatmap_upload_concurrency"))?,
        })
    }
 }
@@ -670,13 +628,12 @@ impl PageServerConf {
        self.deletion_prefix().join(format!("header-{VERSION:02x}"))
    }

-    pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenants_path().join(tenant_shard_id.to_string())
+    pub fn tenant_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenants_path().join(tenant_id.to_string())
    }

-    pub fn tenant_ignore_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenant_path(tenant_shard_id)
-            .join(IGNORED_TENANT_FILE_NAME)
+    pub fn tenant_ignore_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenant_path(tenant_id).join(IGNORED_TENANT_FILE_NAME)
    }

    /// Points to a place in pageserver's local directory,
@@ -684,53 +641,47 @@ impl PageServerConf {
    ///
    /// Legacy: superseded by tenant_location_config_path.  Eventually
    /// remove this function.
-    pub fn tenant_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenant_path(tenant_shard_id).join(TENANT_CONFIG_NAME)
+    pub fn tenant_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenant_path(tenant_id).join(TENANT_CONFIG_NAME)
    }

-    pub fn tenant_location_config_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenant_path(tenant_shard_id)
+    pub fn tenant_location_config_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenant_path(tenant_id)
            .join(TENANT_LOCATION_CONFIG_NAME)
    }

-    pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenant_path(tenant_shard_id)
-            .join(TIMELINES_SEGMENT_NAME)
+    pub fn timelines_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenant_path(tenant_id).join(TIMELINES_SEGMENT_NAME)
    }

-    pub fn timeline_path(
-        &self,
-        tenant_shard_id: &TenantShardId,
-        timeline_id: &TimelineId,
-    ) -> Utf8PathBuf {
-        self.timelines_path(tenant_shard_id)
-            .join(timeline_id.to_string())
+    pub fn timeline_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
+        self.timelines_path(tenant_id).join(timeline_id.to_string())
    }

    pub fn timeline_uninit_mark_file_path(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
    ) -> Utf8PathBuf {
        path_with_suffix_extension(
-            self.timeline_path(&tenant_shard_id, &timeline_id),
+            self.timeline_path(&tenant_id, &timeline_id),
            TIMELINE_UNINIT_MARK_SUFFIX,
        )
    }

    pub fn timeline_delete_mark_file_path(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
    ) -> Utf8PathBuf {
        path_with_suffix_extension(
-            self.timeline_path(&tenant_shard_id, &timeline_id),
+            self.timeline_path(&tenant_id, &timeline_id),
            TIMELINE_DELETE_MARK_SUFFIX,
        )
    }

-    pub fn tenant_deleted_mark_file_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
-        self.tenant_path(tenant_shard_id)
+    pub fn tenant_deleted_mark_file_path(&self, tenant_id: &TenantId) -> Utf8PathBuf {
+        self.tenant_path(tenant_id)
            .join(TENANT_DELETED_MARKER_FILE_NAME)
    }

@@ -740,24 +691,20 @@ impl PageServerConf {

    pub fn trace_path(
        &self,
-        tenant_shard_id: &TenantShardId,
+        tenant_id: &TenantId,
        timeline_id: &TimelineId,
        connection_id: &ConnectionId,
    ) -> Utf8PathBuf {
        self.traces_path()
-            .join(tenant_shard_id.to_string())
+            .join(tenant_id.to_string())
            .join(timeline_id.to_string())
            .join(connection_id.to_string())
    }

    /// Points to a place in pageserver's local directory,
    /// where certain timeline's metadata file should be located.
-    pub fn metadata_path(
-        &self,
-        tenant_shard_id: &TenantShardId,
-        timeline_id: &TimelineId,
-    ) -> Utf8PathBuf {
-        self.timeline_path(tenant_shard_id, timeline_id)
+    pub fn metadata_path(&self, tenant_id: &TenantId, timeline_id: &TimelineId) -> Utf8PathBuf {
+        self.timeline_path(tenant_id, timeline_id)
            .join(METADATA_FILE_NAME)
    }

@@ -820,7 +767,7 @@ impl PageServerConf {
                    builder.remote_storage_config(RemoteStorageConfig::from_toml(item)?)
                }
                "tenant_config" => {
-                    t_conf = TenantConfOpt::try_from(item.to_owned()).context(format!("failed to parse: '{key}'"))?;
+                    t_conf = Self::parse_toml_tenant_conf(item)?;
                }
                "id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
                "broker_endpoint" => builder.broker_endpoint(parse_toml_string(key, item)?.parse().context("failed to parse broker endpoint")?),
@@ -828,11 +775,6 @@ impl PageServerConf {
                "log_format" => builder.log_format(
                    LogFormat::from_config(&parse_toml_string(key, item)?)?
                ),
-                "concurrent_tenant_warmup" => builder.concurrent_tenant_warmup({
-                    let input = parse_toml_string(key, item)?;
-                    let permits = input.parse::<usize>().context("expected a number of initial permits, not {s:?}")?;
-                    NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?
-                }),
                "concurrent_tenant_size_logical_size_queries" => builder.concurrent_tenant_size_logical_size_queries({
                    let input = parse_toml_string(key, item)?;
                    let permits = input.parse::<usize>().context("expected a number of initial permits, not {s:?}")?;
@@ -874,9 +816,7 @@ impl PageServerConf {
                },
                "control_plane_emergency_mode" => {
                    builder.control_plane_emergency_mode(parse_toml_bool(key, item)?)
-                },
-                "heatmap_upload_concurrency" => {
-                    builder.heatmap_upload_concurrency(parse_toml_u64(key, item)? as usize)
+
                },
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
@@ -901,10 +841,114 @@ impl PageServerConf {
        Ok(conf)
    }

+    // subroutine of parse_and_validate to parse `[tenant_conf]` section
+
+    pub fn parse_toml_tenant_conf(item: &toml_edit::Item) -> Result<TenantConfOpt> {
+        let mut t_conf: TenantConfOpt = Default::default();
+        if let Some(checkpoint_distance) = item.get("checkpoint_distance") {
+            t_conf.checkpoint_distance =
+                Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
+        }
+
+        if let Some(checkpoint_timeout) = item.get("checkpoint_timeout") {
+            t_conf.checkpoint_timeout = Some(parse_toml_duration(
+                "checkpoint_timeout",
+                checkpoint_timeout,
+            )?);
+        }
+
+        if let Some(compaction_target_size) = item.get("compaction_target_size") {
+            t_conf.compaction_target_size = Some(parse_toml_u64(
+                "compaction_target_size",
+                compaction_target_size,
+            )?);
+        }
+
+        if let Some(compaction_period) = item.get("compaction_period") {
+            t_conf.compaction_period =
+                Some(parse_toml_duration("compaction_period", compaction_period)?);
+        }
+
+        if let Some(compaction_threshold) = item.get("compaction_threshold") {
+            t_conf.compaction_threshold =
+                Some(parse_toml_u64("compaction_threshold", compaction_threshold)?.try_into()?);
+        }
+
+        if let Some(image_creation_threshold) = item.get("image_creation_threshold") {
+            t_conf.image_creation_threshold = Some(
+                parse_toml_u64("image_creation_threshold", image_creation_threshold)?.try_into()?,
+            );
+        }
+
+        if let Some(gc_horizon) = item.get("gc_horizon") {
+            t_conf.gc_horizon = Some(parse_toml_u64("gc_horizon", gc_horizon)?);
+        }
+
+        if let Some(gc_period) = item.get("gc_period") {
+            t_conf.gc_period = Some(parse_toml_duration("gc_period", gc_period)?);
+        }
+
+        if let Some(pitr_interval) = item.get("pitr_interval") {
+            t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
+        }
+        if let Some(walreceiver_connect_timeout) = item.get("walreceiver_connect_timeout") {
+            t_conf.walreceiver_connect_timeout = Some(parse_toml_duration(
+                "walreceiver_connect_timeout",
+                walreceiver_connect_timeout,
+            )?);
+        }
+        if let Some(lagging_wal_timeout) = item.get("lagging_wal_timeout") {
+            t_conf.lagging_wal_timeout = Some(parse_toml_duration(
+                "lagging_wal_timeout",
+                lagging_wal_timeout,
+            )?);
+        }
+        if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
+            t_conf.max_lsn_wal_lag =
+                Some(deserialize_from_item("max_lsn_wal_lag", max_lsn_wal_lag)?);
+        }
+        if let Some(trace_read_requests) = item.get("trace_read_requests") {
+            t_conf.trace_read_requests =
+                Some(trace_read_requests.as_bool().with_context(|| {
+                    "configure option trace_read_requests is not a bool".to_string()
+                })?);
+        }
+
+        if let Some(eviction_policy) = item.get("eviction_policy") {
+            t_conf.eviction_policy = Some(
+                deserialize_from_item("eviction_policy", eviction_policy)
+                    .context("parse eviction_policy")?,
+            );
+        }
+
+        if let Some(item) = item.get("min_resident_size_override") {
+            t_conf.min_resident_size_override = Some(
+                deserialize_from_item("min_resident_size_override", item)
+                    .context("parse min_resident_size_override")?,
+            );
+        }
+
+        if let Some(item) = item.get("evictions_low_residence_duration_metric_threshold") {
+            t_conf.evictions_low_residence_duration_metric_threshold = Some(parse_toml_duration(
+                "evictions_low_residence_duration_metric_threshold",
+                item,
+            )?);
+        }
+
+        if let Some(gc_feedback) = item.get("gc_feedback") {
+            t_conf.gc_feedback = Some(
+                gc_feedback
+                    .as_bool()
+                    .with_context(|| "configure option gc_feedback is not a bool".to_string())?,
+            );
+        }
+
+        Ok(t_conf)
+    }
+
    #[cfg(test)]
    pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {
-        let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into());
-        Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}"))
+        Utf8PathBuf::from(format!("../tmp_check/test_{test_name}"))
    }

    pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {
@@ -930,10 +974,6 @@ impl PageServerConf {
            broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
            broker_keepalive_interval: Duration::from_secs(5000),
            log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
-            concurrent_tenant_warmup: ConfigurableSemaphore::new(
-                NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
-                    .expect("Invalid default constant"),
-            ),
            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
            eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::default(
            ),
@@ -948,7 +988,6 @@ impl PageServerConf {
            control_plane_api: None,
            control_plane_api_token: None,
            control_plane_emergency_mode: false,
-            heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
        }
    }
 }
@@ -1152,9 +1191,6 @@ background_task_maximum_delay = '334 s'
                    storage_broker::DEFAULT_KEEPALIVE_INTERVAL
                )?,
                log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
-                concurrent_tenant_warmup: ConfigurableSemaphore::new(
-                    NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP).unwrap()
-                ),
                concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
                eviction_task_immitated_concurrent_logical_size_queries:
                    ConfigurableSemaphore::default(),
@@ -1176,8 +1212,7 @@ background_task_maximum_delay = '334 s'
                )?,
                control_plane_api: None,
                control_plane_api_token: None,
-                control_plane_emergency_mode: false,
-                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY
+                control_plane_emergency_mode: false
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1221,9 +1256,6 @@ background_task_maximum_delay = '334 s'
                broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
                broker_keepalive_interval: Duration::from_secs(5),
                log_format: LogFormat::Json,
-                concurrent_tenant_warmup: ConfigurableSemaphore::new(
-                    NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP).unwrap()
-                ),
                concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
                eviction_task_immitated_concurrent_logical_size_queries:
                    ConfigurableSemaphore::default(),
@@ -1237,8 +1269,7 @@ background_task_maximum_delay = '334 s'
                background_task_maximum_delay: Duration::from_secs(334),
                control_plane_api: None,
                control_plane_api_token: None,
-                control_plane_emergency_mode: false,
-                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY
+                control_plane_emergency_mode: false
            },
            "Should be able to parse all basic config values correctly"
        );
@@ -1386,37 +1417,6 @@ trace_read_requests = {trace_read_requests}"#,
        Ok(())
    }

-    #[test]
-    fn parse_incorrect_tenant_config() -> anyhow::Result<()> {
-        let config_string = r#"
-            [tenant_config]
-            checkpoint_distance = -1 # supposed to be an u64
-        "#
-        .to_string();
-
-        let toml: Document = config_string.parse()?;
-        let item = toml.get("tenant_config").unwrap();
-        let error = TenantConfOpt::try_from(item.to_owned()).unwrap_err();
-
-        let expected_error_str = "checkpoint_distance: invalid value: integer `-1`, expected u64";
-        assert_eq!(error.to_string(), expected_error_str);
-
-        Ok(())
-    }
-
-    #[test]
-    fn parse_override_tenant_config() -> anyhow::Result<()> {
-        let config_string = r#"tenant_config={ min_resident_size_override =  400 }"#.to_string();
-
-        let toml: Document = config_string.parse()?;
-        let item = toml.get("tenant_config").unwrap();
-        let conf = TenantConfOpt::try_from(item.to_owned()).unwrap();
-
-        assert_eq!(conf.min_resident_size_override, Some(400));
-
-        Ok(())
-    }
-
    #[test]
    fn eviction_pageserver_config_parse() -> anyhow::Result<()> {
        let tempdir = tempdir()?;
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,7 +3,7 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError, Tenant};
+use crate::tenant::{mgr, LogicalSizeCalculationCause};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
@@ -12,7 +12,6 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::{Duration, SystemTime};
 use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::NodeId;

@@ -38,7 +37,6 @@ type RawMetric = (MetricsKey, (EventType, u64));
 type Cache = HashMap<MetricsKey, (EventType, u64)>;

 /// Main thread that serves metrics collection
-#[allow(clippy::too_many_arguments)]
 pub async fn collect_metrics(
    metric_collection_endpoint: &Url,
    metric_collection_interval: Duration,
@@ -46,7 +44,6 @@ pub async fn collect_metrics(
    synthetic_size_calculation_interval: Duration,
    node_id: NodeId,
    local_disk_storage: Utf8PathBuf,
-    cancel: CancellationToken,
    ctx: RequestContext,
 ) -> anyhow::Result<()> {
    if _cached_metric_collection_interval != Duration::ZERO {
@@ -66,13 +63,9 @@ pub async fn collect_metrics(
        "synthetic size calculation",
        false,
        async move {
-            calculate_synthetic_size_worker(
-                synthetic_size_calculation_interval,
-                &cancel,
-                &worker_ctx,
-            )
-            .instrument(info_span!("synthetic_size_worker"))
-            .await?;
+            calculate_synthetic_size_worker(synthetic_size_calculation_interval, &worker_ctx)
+                .instrument(info_span!("synthetic_size_worker"))
+                .await?;
            Ok(())
        },
    );
@@ -248,7 +241,6 @@ async fn reschedule(
 /// Caclculate synthetic size for each active tenant
 async fn calculate_synthetic_size_worker(
    synthetic_size_calculation_interval: Duration,
-    cancel: &CancellationToken,
    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
    info!("starting calculate_synthetic_size_worker");
@@ -256,6 +248,8 @@ async fn calculate_synthetic_size_worker(
        info!("calculate_synthetic_size_worker stopped");
    };

+    let cause = LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
+
    loop {
        let started_at = Instant::now();

@@ -267,25 +261,21 @@ async fn calculate_synthetic_size_worker(
            }
        };

-        for (tenant_shard_id, tenant_state) in tenants {
+        for (tenant_id, tenant_state) in tenants {
            if tenant_state != TenantState::Active {
                continue;
            }

-            if !tenant_shard_id.is_zero() {
-                // We only send consumption metrics from shard 0, so don't waste time calculating
-                // synthetic size on other shards.
-                continue;
+            if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
+                // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
+                // We can put in some prioritization for consumption metrics.
+                // Same for the loop that fetches computed metrics.
+                // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
+                // which turns out is really handy to understand the system.
+                if let Err(e) = tenant.calculate_synthetic_size(cause, ctx).await {
+                    error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
+                }
            }
-
-            let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) else {
-                continue;
-            };
-
-            // there is never any reason to exit calculate_synthetic_size_worker following any
-            // return value -- we don't need to care about shutdown because no tenant is found when
-            // pageserver is shut down.
-            calculate_and_log(&tenant, cancel, ctx).await;
        }

        crate::tenant::tasks::warn_when_period_overrun(
@@ -296,7 +286,7 @@ async fn calculate_synthetic_size_worker(

        let res = tokio::time::timeout_at(
            started_at + synthetic_size_calculation_interval,
-            cancel.cancelled(),
+            task_mgr::shutdown_token().cancelled(),
        )
        .await;
        if res.is_ok() {
@@ -304,31 +294,3 @@ async fn calculate_synthetic_size_worker(
        }
    }
 }
-
-async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
-    const CAUSE: LogicalSizeCalculationCause =
-        LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
-
-    // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
-    // We can put in some prioritization for consumption metrics.
-    // Same for the loop that fetches computed metrics.
-    // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
-    // which turns out is really handy to understand the system.
-    let Err(e) = tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await else {
-        return;
-    };
-
-    // this error can be returned if timeline is shutting down, but it does not
-    // mean the synthetic size worker should terminate. we do not need any checks
-    // in this function because `mgr::get_tenant` will error out after shutdown has
-    // progressed to shutting down tenants.
-    let shutting_down = matches!(
-        e.downcast_ref::<PageReconstructError>(),
-        Some(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
-    );
-
-    if !shutting_down {
-        let tenant_shard_id = tenant.tenant_shard_id();
-        error!("failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}");
-    }
-}
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -1,4 +1,5 @@
-use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize};
+use crate::context::RequestContext;
+use anyhow::Context;
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
@@ -197,12 +198,12 @@ pub(super) async fn collect_all_metrics(
    };

    let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
-        if state != TenantState::Active || !id.is_zero() {
+        if state != TenantState::Active {
            None
        } else {
            crate::tenant::mgr::get_tenant(id, true)
                .ok()
-                .map(|tenant| (id.tenant_id, tenant))
+                .map(|tenant| (id, tenant))
        }
    });

@@ -350,17 +351,14 @@ impl TimelineSnapshot {
            let last_record_lsn = t.get_last_record_lsn();

            let current_exact_logical_size = {
-                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_shard_id.tenant_id, timeline_id = %t.timeline_id);
-                let size = span.in_scope(|| {
-                    t.get_current_logical_size(
-                        crate::tenant::timeline::GetLogicalSizePriority::Background,
-                        ctx,
-                    )
-                });
-                match size {
+                let span = tracing::info_span!("collect_metrics_iteration", tenant_id = %t.tenant_id, timeline_id = %t.timeline_id);
+                let res = span
+                    .in_scope(|| t.get_current_logical_size(ctx))
+                    .context("get_current_logical_size");
+                match res? {
                    // Only send timeline logical size when it is fully calculated.
-                    CurrentLogicalSize::Exact(ref size) => Some(size.into()),
-                    CurrentLogicalSize::Approximate(_) => None,
+                    (size, is_exact) if is_exact => Some(size),
+                    (_, _) => None,
                }
            };

--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -1,15 +1,16 @@
 use std::collections::HashMap;

-use pageserver_api::{
-    control_api::{
-        ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
-    },
-    shard::TenantShardId,
+use pageserver_api::control_api::{
+    ReAttachRequest, ReAttachResponse, ValidateRequest, ValidateRequestTenant, ValidateResponse,
 };
 use serde::{de::DeserializeOwned, Serialize};
 use tokio_util::sync::CancellationToken;
 use url::Url;
-use utils::{backoff, generation::Generation, id::NodeId};
+use utils::{
+    backoff,
+    generation::Generation,
+    id::{NodeId, TenantId},
+};

 use crate::config::PageServerConf;

@@ -30,11 +31,11 @@ pub enum RetryForeverError {

 #[async_trait::async_trait]
 pub trait ControlPlaneGenerationsApi {
-    async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError>;
+    async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError>;
    async fn validate(
        &self,
-        tenants: Vec<(TenantShardId, Generation)>,
-    ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError>;
+        tenants: Vec<(TenantId, Generation)>,
+    ) -> Result<HashMap<TenantId, bool>, RetryForeverError>;
 }

 impl ControlPlaneClient {
@@ -126,7 +127,7 @@ impl ControlPlaneClient {
 #[async_trait::async_trait]
 impl ControlPlaneGenerationsApi for ControlPlaneClient {
    /// Block until we get a successful response, or error out if we are shut down
-    async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
+    async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
        let re_attach_path = self
            .base_url
            .join("re-attach")
@@ -153,8 +154,8 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
    /// Block until we get a successful response, or error out if we are shut down
    async fn validate(
        &self,
-        tenants: Vec<(TenantShardId, Generation)>,
-    ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
+        tenants: Vec<(TenantId, Generation)>,
+    ) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
        let re_attach_path = self
            .base_url
            .join("validate")
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -10,12 +10,11 @@ use crate::control_plane_client::ControlPlaneGenerationsApi;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::virtual_file::MaybeFatalIo;
 use crate::virtual_file::VirtualFile;
 use anyhow::Context;
 use camino::Utf8PathBuf;
-use pageserver_api::shard::TenantShardId;
+use hex::FromHex;
 use remote_storage::{GenericRemoteStorage, RemotePath};
 use serde::Deserialize;
 use serde::Serialize;
@@ -26,7 +25,7 @@ use tracing::Instrument;
 use tracing::{self, debug, error};
 use utils::crashsafe::path_with_suffix_extension;
 use utils::generation::Generation;
-use utils::id::TimelineId;
+use utils::id::{TenantId, TimelineId};
 use utils::lsn::AtomicLsn;
 use utils::lsn::Lsn;

@@ -160,10 +159,11 @@ pub struct DeletionQueueClient {
    lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
 }

-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Serialize, Deserialize)]
 struct TenantDeletionList {
    /// For each Timeline, a list of key fragments to append to the timeline remote path
    /// when reconstructing a full key
+    #[serde(serialize_with = "to_hex_map", deserialize_with = "from_hex_map")]
    timelines: HashMap<TimelineId, Vec<String>>,

    /// The generation in which this deletion was emitted: note that this may not be the
@@ -178,11 +178,43 @@ impl TenantDeletionList {
    }
 }

+/// For HashMaps using a `hex` compatible key, where we would like to encode the key as a string
+fn to_hex_map<S, V, I>(input: &HashMap<I, V>, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+    V: Serialize,
+    I: AsRef<[u8]>,
+{
+    let transformed = input.iter().map(|(k, v)| (hex::encode(k), v));
+
+    transformed
+        .collect::<HashMap<String, &V>>()
+        .serialize(serializer)
+}
+
+/// For HashMaps using a FromHex key, where we would like to decode the key
+fn from_hex_map<'de, D, V, I>(deserializer: D) -> Result<HashMap<I, V>, D::Error>
+where
+    D: serde::de::Deserializer<'de>,
+    V: Deserialize<'de>,
+    I: FromHex + std::hash::Hash + Eq,
+{
+    let hex_map = HashMap::<String, V>::deserialize(deserializer)?;
+    hex_map
+        .into_iter()
+        .map(|(k, v)| {
+            I::from_hex(k)
+                .map(|k| (k, v))
+                .map_err(|_| serde::de::Error::custom("Invalid hex ID"))
+        })
+        .collect()
+}
+
 /// Files ending with this suffix will be ignored and erased
 /// during recovery as startup.
 const TEMP_SUFFIX: &str = "tmp";

-#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[derive(Debug, Serialize, Deserialize)]
 struct DeletionList {
    /// Serialization version, for future use
    version: u8,
@@ -194,7 +226,8 @@ struct DeletionList {
    /// nested HashMaps by TenantTimelineID.  Each Tenant only appears once
    /// with one unique generation ID: if someone tries to push a second generation
    /// ID for the same tenant, we will start a new DeletionList.
-    tenants: HashMap<TenantShardId, TenantDeletionList>,
+    #[serde(serialize_with = "to_hex_map", deserialize_with = "from_hex_map")]
+    tenants: HashMap<TenantId, TenantDeletionList>,

    /// Avoid having to walk `tenants` to calculate the number of keys in
    /// the nested deletion lists
@@ -266,7 +299,7 @@ impl DeletionList {
    /// deletion list.
    fn push(
        &mut self,
-        tenant: &TenantShardId,
+        tenant: &TenantId,
        timeline: &TimelineId,
        generation: Generation,
        objects: &mut Vec<RemotePath>,
@@ -358,7 +391,7 @@ struct TenantLsnState {

 #[derive(Default)]
 struct VisibleLsnUpdates {
-    tenants: HashMap<TenantShardId, TenantLsnState>,
+    tenants: HashMap<TenantId, TenantLsnState>,
 }

 impl VisibleLsnUpdates {
@@ -415,7 +448,7 @@ impl DeletionQueueClient {

    pub(crate) fn recover(
        &self,
-        attached_tenants: HashMap<TenantShardId, Generation>,
+        attached_tenants: HashMap<TenantId, Generation>,
    ) -> Result<(), DeletionQueueError> {
        self.do_push(
            &self.tx,
@@ -432,7 +465,7 @@ impl DeletionQueueClient {
    /// backend will later wake up and notice that the tenant's generation requires validation.
    pub(crate) async fn update_remote_consistent_lsn(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        current_generation: Generation,
        lsn: Lsn,
@@ -443,13 +476,10 @@ impl DeletionQueueClient {
            .write()
            .expect("Lock should never be poisoned");

-        let tenant_entry = locked
-            .tenants
-            .entry(tenant_shard_id)
-            .or_insert(TenantLsnState {
-                timelines: HashMap::new(),
-                generation: current_generation,
-            });
+        let tenant_entry = locked.tenants.entry(tenant_id).or_insert(TenantLsnState {
+            timelines: HashMap::new(),
+            generation: current_generation,
+        });

        if tenant_entry.generation != current_generation {
            // Generation might have changed if we were detached and then re-attached: in this case,
@@ -476,29 +506,27 @@ impl DeletionQueueClient {
    /// generations in `layers` are the generations in which those layers were written.
    pub(crate) async fn push_layers(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> Result<(), DeletionQueueError> {
        if current_generation.is_none() {
            debug!("Enqueuing deletions in legacy mode, skipping queue");
-
            let mut layer_paths = Vec::new();
-            for (layer, meta) in layers {
+            for (layer, generation) in layers {
                layer_paths.push(remote_layer_path(
-                    &tenant_shard_id.tenant_id,
+                    &tenant_id,
                    &timeline_id,
-                    meta.shard,
                    &layer,
-                    meta.generation,
+                    generation,
                ));
            }
            self.push_immediate(layer_paths).await?;
            return self.flush_immediate().await;
        }

-        self.push_layers_sync(tenant_shard_id, timeline_id, current_generation, layers)
+        self.push_layers_sync(tenant_id, timeline_id, current_generation, layers)
    }

    /// When a Tenant has a generation, push_layers is always synchronous because
@@ -508,10 +536,10 @@ impl DeletionQueueClient {
    /// support (`<https://github.com/neondatabase/neon/issues/5395>`)
    pub(crate) fn push_layers_sync(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        current_generation: Generation,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> Result<(), DeletionQueueError> {
        metrics::DELETION_QUEUE
            .keys_submitted
@@ -519,7 +547,7 @@ impl DeletionQueueClient {
        self.do_push(
            &self.tx,
            ListWriterQueueMessage::Delete(DeletionOp {
-                tenant_shard_id,
+                tenant_id,
                timeline_id,
                layers,
                generation: current_generation,
@@ -722,7 +750,6 @@ impl DeletionQueue {
 mod test {
    use camino::Utf8Path;
    use hex_literal::hex;
-    use pageserver_api::shard::ShardIndex;
    use std::{io::ErrorKind, time::Duration};
    use tracing::info;

@@ -787,12 +814,12 @@ mod test {
        }

        fn set_latest_generation(&self, gen: Generation) {
-            let tenant_shard_id = self.harness.tenant_shard_id;
+            let tenant_id = self.harness.tenant_id;
            self.mock_control_plane
                .latest_generation
                .lock()
                .unwrap()
-                .insert(tenant_shard_id, gen);
+                .insert(tenant_id, gen);
        }

        /// Returns remote layer file name, suitable for use in assert_remote_files
@@ -801,8 +828,8 @@ mod test {
            file_name: LayerFileName,
            gen: Generation,
        ) -> anyhow::Result<String> {
-            let tenant_shard_id = self.harness.tenant_shard_id;
-            let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
+            let tenant_id = self.harness.tenant_id;
+            let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
            let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path());
            std::fs::create_dir_all(&remote_timeline_path)?;
            let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix());
@@ -820,7 +847,7 @@ mod test {

    #[derive(Debug, Clone)]
    struct MockControlPlane {
-        pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantShardId, Generation>>>,
+        pub latest_generation: std::sync::Arc<std::sync::Mutex<HashMap<TenantId, Generation>>>,
    }

    impl MockControlPlane {
@@ -834,20 +861,20 @@ mod test {
    #[async_trait::async_trait]
    impl ControlPlaneGenerationsApi for MockControlPlane {
        #[allow(clippy::diverging_sub_expression)] // False positive via async_trait
-        async fn re_attach(&self) -> Result<HashMap<TenantShardId, Generation>, RetryForeverError> {
+        async fn re_attach(&self) -> Result<HashMap<TenantId, Generation>, RetryForeverError> {
            unimplemented!()
        }
        async fn validate(
            &self,
-            tenants: Vec<(TenantShardId, Generation)>,
-        ) -> Result<HashMap<TenantShardId, bool>, RetryForeverError> {
+            tenants: Vec<(TenantId, Generation)>,
+        ) -> Result<HashMap<TenantId, bool>, RetryForeverError> {
            let mut result = HashMap::new();

            let latest_generation = self.latest_generation.lock().unwrap();

-            for (tenant_shard_id, generation) in tenants {
-                if let Some(latest) = latest_generation.get(&tenant_shard_id) {
-                    result.insert(tenant_shard_id, *latest == generation);
+            for (tenant_id, generation) in tenants {
+                if let Some(latest) = latest_generation.get(&tenant_id) {
+                    result.insert(tenant_id, *latest == generation);
                }
            }

@@ -951,10 +978,10 @@ mod test {
        client.recover(HashMap::new())?;

        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
-        let tenant_shard_id = ctx.harness.tenant_shard_id;
+        let tenant_id = ctx.harness.tenant_id;

        let content: Vec<u8> = "victim1 contents".into();
-        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
+        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
        let deletion_prefix = ctx.harness.conf.deletion_prefix();

@@ -962,8 +989,6 @@ mod test {
        // we delete, and the generation of the running Tenant.
        let layer_generation = Generation::new(0xdeadbeef);
        let now_generation = Generation::new(0xfeedbeef);
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        let remote_layer_file_name_1 =
            format!("{}{}", layer_file_name_1, layer_generation.get_suffix());
@@ -984,10 +1009,10 @@ mod test {
        info!("Pushing");
        client
            .push_layers(
-                tenant_shard_id,
+                tenant_id,
                TIMELINE_ID,
                now_generation,
-                [(layer_file_name_1.clone(), layer_metadata)].to_vec(),
+                [(layer_file_name_1.clone(), layer_generation)].to_vec(),
            )
            .await?;
        assert_remote_files(&[&remote_layer_file_name_1], &remote_timeline_path);
@@ -1026,13 +1051,11 @@ mod test {
        let stale_generation = latest_generation.previous();
        // Generation that our example layer file was written with
        let layer_generation = stale_generation.previous();
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        ctx.set_latest_generation(latest_generation);

-        let tenant_shard_id = ctx.harness.tenant_shard_id;
-        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
+        let tenant_id = ctx.harness.tenant_id;
+        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());

        // Initial state: a remote layer exists
@@ -1042,10 +1065,10 @@ mod test {
        tracing::debug!("Pushing...");
        client
            .push_layers(
-                tenant_shard_id,
+                tenant_id,
                TIMELINE_ID,
                stale_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1057,10 +1080,10 @@ mod test {
        tracing::debug!("Pushing...");
        client
            .push_layers(
-                tenant_shard_id,
+                tenant_id,
                TIMELINE_ID,
                latest_generation,
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1079,16 +1102,14 @@ mod test {
        let client = ctx.deletion_queue.new_client();
        client.recover(HashMap::new())?;

-        let tenant_shard_id = ctx.harness.tenant_shard_id;
+        let tenant_id = ctx.harness.tenant_id;

-        let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID);
+        let relative_remote_path = remote_timeline_path(&tenant_id, &TIMELINE_ID);
        let remote_timeline_path = ctx.remote_fs_dir.join(relative_remote_path.get_path());
        let deletion_prefix = ctx.harness.conf.deletion_prefix();

        let layer_generation = Generation::new(0xdeadbeef);
        let now_generation = Generation::new(0xfeedbeef);
-        let layer_metadata =
-            LayerFileMetadata::new(0xf00, layer_generation, ShardIndex::unsharded());

        // Inject a deletion in the generation before generation_now: after restart,
        // this deletion should _not_ get executed (only the immediately previous
@@ -1097,10 +1118,10 @@ mod test {
            ctx.write_remote_layer(EXAMPLE_LAYER_NAME, layer_generation)?;
        client
            .push_layers(
-                tenant_shard_id,
+                tenant_id,
                TIMELINE_ID,
                now_generation.previous(),
-                [(EXAMPLE_LAYER_NAME.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1111,10 +1132,10 @@ mod test {
            ctx.write_remote_layer(EXAMPLE_LAYER_NAME_ALT, layer_generation)?;
        client
            .push_layers(
-                tenant_shard_id,
+                tenant_id,
                TIMELINE_ID,
                now_generation,
-                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_metadata.clone())].to_vec(),
+                [(EXAMPLE_LAYER_NAME_ALT.clone(), layer_generation)].to_vec(),
            )
            .await?;

@@ -1142,7 +1163,7 @@ mod test {
        drop(client);
        ctx.restart().await;
        let client = ctx.deletion_queue.new_client();
-        client.recover(HashMap::from([(tenant_shard_id, now_generation)]))?;
+        client.recover(HashMap::from([(tenant_id, now_generation)]))?;

        info!("Flush-executing");
        client.flush_execute().await?;
@@ -1204,13 +1225,12 @@ pub(crate) mod mock {
                match msg {
                    ListWriterQueueMessage::Delete(op) => {
                        let mut objects = op.objects;
-                        for (layer, meta) in op.layers {
+                        for (layer, generation) in op.layers {
                            objects.push(remote_layer_path(
-                                &op.tenant_shard_id.tenant_id,
+                                &op.tenant_id,
                                &op.timeline_id,
-                                meta.shard,
                                &layer,
-                                meta.generation,
+                                generation,
                            ));
                        }

@@ -1290,34 +1310,4 @@ pub(crate) mod mock {
            }
        }
    }
-
-    /// Test round-trip serialization/deserialization, and test stability of the format
-    /// vs. a static expected string for the serialized version.
-    #[test]
-    fn deletion_list_serialization() -> anyhow::Result<()> {
-        let tenant_id = "ad6c1a56f5680419d3a16ff55d97ec3c"
-            .to_string()
-            .parse::<TenantShardId>()?;
-        let timeline_id = "be322c834ed9e709e63b5c9698691910"
-            .to_string()
-            .parse::<TimelineId>()?;
-        let generation = Generation::new(123);
-
-        let object =
-            RemotePath::from_string(&format!("tenants/{tenant_id}/timelines/{timeline_id}/foo"))?;
-        let mut objects = [object].to_vec();
-
-        let mut example = DeletionList::new(1);
-        example.push(&tenant_id, &timeline_id, generation, &mut objects);
-
-        let encoded = serde_json::to_string(&example)?;
-
-        let expected = "{\"version\":1,\"sequence\":1,\"tenants\":{\"ad6c1a56f5680419d3a16ff55d97ec3c\":{\"timelines\":{\"be322c834ed9e709e63b5c9698691910\":[\"foo\"]},\"generation\":123}},\"size\":1}".to_string();
-        assert_eq!(encoded, expected);
-
-        let decoded = serde_json::from_str::<DeletionList>(&encoded)?;
-        assert_eq!(example, decoded);
-
-        Ok(())
-    }
 }
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -19,7 +19,6 @@ use std::collections::HashMap;
 use std::fs::create_dir_all;
 use std::time::Duration;

-use pageserver_api::shard::TenantShardId;
 use regex::Regex;
 use remote_storage::RemotePath;
 use tokio_util::sync::CancellationToken;
@@ -27,13 +26,13 @@ use tracing::debug;
 use tracing::info;
 use tracing::warn;
 use utils::generation::Generation;
+use utils::id::TenantId;
 use utils::id::TimelineId;

 use crate::config::PageServerConf;
 use crate::deletion_queue::TEMP_SUFFIX;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
-use crate::tenant::remote_timeline_client::LayerFileMetadata;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::virtual_file::on_fatal_io_error;
 use crate::virtual_file::MaybeFatalIo;
@@ -54,22 +53,22 @@ const FRONTEND_FLUSHING_TIMEOUT: Duration = Duration::from_millis(100);

 #[derive(Debug)]
 pub(super) struct DeletionOp {
-    pub(super) tenant_shard_id: TenantShardId,
+    pub(super) tenant_id: TenantId,
    pub(super) timeline_id: TimelineId,
    // `layers` and `objects` are both just lists of objects.  `layers` is used if you do not
    // have a config object handy to project it to a remote key, and need the consuming worker
    // to do it for you.
-    pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>,
+    pub(super) layers: Vec<(LayerFileName, Generation)>,
    pub(super) objects: Vec<RemotePath>,

-    /// The _current_ generation of the Tenant shard attachment in which we are enqueuing
+    /// The _current_ generation of the Tenant attachment in which we are enqueuing
    /// this deletion.
    pub(super) generation: Generation,
 }

 #[derive(Debug)]
 pub(super) struct RecoverOp {
-    pub(super) attached_tenants: HashMap<TenantShardId, Generation>,
+    pub(super) attached_tenants: HashMap<TenantId, Generation>,
 }

 #[derive(Debug)]
@@ -206,7 +205,7 @@ impl ListWriter {

    async fn recover(
        &mut self,
-        attached_tenants: HashMap<TenantShardId, Generation>,
+        attached_tenants: HashMap<TenantId, Generation>,
    ) -> Result<(), anyhow::Error> {
        debug!(
            "recovering with {} attached tenants",
@@ -309,21 +308,10 @@ impl ListWriter {
                // generation was issued to another node in the interval while we restarted,
                // then we may treat deletion lists from the previous generation as if they
                // belong to our currently attached generation, and proceed to validate & execute.
-                for (tenant_shard_id, tenant_list) in &mut deletion_list.tenants {
-                    if let Some(attached_gen) = attached_tenants.get(tenant_shard_id) {
+                for (tenant_id, tenant_list) in &mut deletion_list.tenants {
+                    if let Some(attached_gen) = attached_tenants.get(tenant_id) {
                        if attached_gen.previous() == tenant_list.generation {
-                            info!(
-                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,
-                                shard_id=%tenant_shard_id.shard_slug(),
-                                old_gen=?tenant_list.generation, new_gen=?attached_gen,
-                                "Updating gen on recovered list");
                            tenant_list.generation = *attached_gen;
-                        } else {
-                            info!(
-                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,
-                                shard_id=%tenant_shard_id.shard_slug(),
-                                old_gen=?tenant_list.generation, new_gen=?attached_gen,
-                                "Encountered stale generation on recovered list");
                        }
                    }
                }
@@ -399,26 +387,25 @@ impl ListWriter {
                    );

                    let mut layer_paths = Vec::new();
-                    for (layer, meta) in op.layers {
+                    for (layer, generation) in op.layers {
                        layer_paths.push(remote_layer_path(
-                            &op.tenant_shard_id.tenant_id,
+                            &op.tenant_id,
                            &op.timeline_id,
-                            meta.shard,
                            &layer,
-                            meta.generation,
+                            generation,
                        ));
                    }
                    layer_paths.extend(op.objects);

                    if !self.pending.push(
-                        &op.tenant_shard_id,
+                        &op.tenant_id,
                        &op.timeline_id,
                        op.generation,
                        &mut layer_paths,
                    ) {
                        self.flush().await;
                        let retry_succeeded = self.pending.push(
-                            &op.tenant_shard_id,
+                            &op.tenant_id,
                            &op.timeline_id,
                            op.generation,
                            &mut layer_paths,
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -178,14 +178,7 @@ where
                .unwrap_or(false);

            if valid && *validated_generation == tenant_lsn_state.generation {
-                for (timeline_id, pending_lsn) in tenant_lsn_state.timelines {
-                    tracing::debug!(
-                        %tenant_id,
-                        %timeline_id,
-                        current = %pending_lsn.result_slot.load(),
-                        projected = %pending_lsn.projected,
-                        "advancing validated remote_consistent_lsn",
-                    );
+                for (_timeline_id, pending_lsn) in tenant_lsn_state.timelines {
                    pending_lsn.result_slot.store(pending_lsn.projected);
                }
            } else {
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -42,6 +42,7 @@
 //   reading these fields. We use the Debug impl for semi-structured logging, though.

 use std::{
+    collections::HashMap,
    sync::Arc,
    time::{Duration, SystemTime},
 };
@@ -124,7 +125,7 @@ pub fn launch_disk_usage_global_eviction_task(
 async fn disk_usage_eviction_task(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
-    storage: &GenericRemoteStorage,
+    _storage: &GenericRemoteStorage,
    tenants_dir: &Utf8Path,
    cancel: CancellationToken,
 ) {
@@ -148,14 +149,8 @@ async fn disk_usage_eviction_task(
        let start = Instant::now();

        async {
-            let res = disk_usage_eviction_task_iteration(
-                state,
-                task_config,
-                storage,
-                tenants_dir,
-                &cancel,
-            )
-            .await;
+            let res =
+                disk_usage_eviction_task_iteration(state, task_config, tenants_dir, &cancel).await;

            match res {
                Ok(()) => {}
@@ -186,13 +181,12 @@ pub trait Usage: Clone + Copy + std::fmt::Debug {
 async fn disk_usage_eviction_task_iteration(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
-    storage: &GenericRemoteStorage,
    tenants_dir: &Utf8Path,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
    let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
        .context("get filesystem-level disk usage before evictions")?;
-    let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
+    let res = disk_usage_eviction_task_iteration_impl(state, usage_pre, cancel).await;
    match res {
        Ok(outcome) => {
            debug!(?outcome, "disk_usage_eviction_iteration finished");
@@ -274,9 +268,8 @@ struct LayerCount {
    count: usize,
 }

-pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
+pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    state: &State,
-    _storage: &GenericRemoteStorage,
    usage_pre: U,
    cancel: &CancellationToken,
 ) -> anyhow::Result<IterationOutcome<U>> {
@@ -317,7 +310,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                .unwrap()
                .as_micros(),
            partition,
-            desc.tenant_shard_id,
+            desc.tenant_id,
            desc.timeline_id,
            candidate.layer,
        );
@@ -328,16 +321,16 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    // Walk through the list of candidates, until we have accumulated enough layers to get
    // us back under the pressure threshold. 'usage_planned' is updated so that it tracks
    // how much disk space would be used after evicting all the layers up to the current
-    // point in the list.
+    // point in the list. The layers are collected in 'batched', grouped per timeline.
    //
    // If we get far enough in the list that we start to evict layers that are below
    // the tenant's min-resident-size threshold, print a warning, and memorize the disk
    // usage at that point, in 'usage_planned_min_resident_size_respecting'.
+    let mut batched: HashMap<_, Vec<_>> = HashMap::new();
    let mut warned = None;
    let mut usage_planned = usage_pre;
-    let mut evicted_amount = 0;
-
-    for (i, (partition, candidate)) in candidates.iter().enumerate() {
+    let mut max_batch_size = 0;
+    for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
        if !usage_planned.has_pressure() {
            debug!(
                no_candidates_evicted = i,
@@ -346,13 +339,25 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
            break;
        }

-        if partition == &MinResidentSizePartition::Below && warned.is_none() {
+        if partition == MinResidentSizePartition::Below && warned.is_none() {
            warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
            warned = Some(usage_planned);
        }

        usage_planned.add_available_bytes(candidate.layer.layer_desc().file_size);
-        evicted_amount += 1;
+
+        // FIXME: batching makes no sense anymore because of no layermap locking, should just spawn
+        // tasks to evict all seen layers until we have evicted enough
+
+        let batch = batched.entry(TimelineKey(candidate.timeline)).or_default();
+
+        // semaphore will later be used to limit eviction concurrency, and we can express at
+        // most u32 number of permits. unlikely we would have u32::MAX layers to be evicted,
+        // but fail gracefully by not making batches larger.
+        if batch.len() < u32::MAX as usize {
+            batch.push(candidate.layer);
+            max_batch_size = max_batch_size.max(batch.len());
+        }
    }

    let usage_planned = match warned {
@@ -367,79 +372,100 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    };
    debug!(?usage_planned, "usage planned");

-    // phase2: evict layers
+    // phase2: evict victims batched by timeline

    let mut js = tokio::task::JoinSet::new();
-    let limit = 1000;

-    let mut evicted = candidates.into_iter().take(evicted_amount).fuse();
-    let mut consumed_all = false;
+    // ratelimit to 1k files or any higher max batch size
+    let limit = Arc::new(tokio::sync::Semaphore::new(1000.max(max_batch_size)));

-    // After the evictions, `usage_assumed` is the post-eviction usage,
-    // according to internal accounting.
-    let mut usage_assumed = usage_pre;
-    let mut evictions_failed = LayerCount::default();
+    for (timeline, batch) in batched {
+        let tenant_id = timeline.tenant_id;
+        let timeline_id = timeline.timeline_id;
+        let batch_size =
+            u32::try_from(batch.len()).expect("batch size limited to u32::MAX during partitioning");

-    let evict_layers = async move {
-        loop {
-            let next = if js.len() >= limit || consumed_all {
-                js.join_next().await
-            } else if !js.is_empty() {
-                // opportunistically consume ready result, one per each new evicted
-                futures::future::FutureExt::now_or_never(js.join_next()).and_then(|x| x)
-            } else {
-                None
-            };
+        // I dislike naming of `available_permits` but it means current total amount of permits
+        // because permits can be added
+        assert!(batch_size as usize <= limit.available_permits());

-            if let Some(next) = next {
-                match next {
-                    Ok(Ok(file_size)) => {
-                        usage_assumed.add_available_bytes(file_size);
+        debug!(%timeline_id, "evicting batch for timeline");
+
+        let evict = {
+            let limit = limit.clone();
+            let cancel = cancel.clone();
+            async move {
+                let mut evicted_bytes = 0;
+                let mut evictions_failed = LayerCount::default();
+
+                let Ok(_permit) = limit.acquire_many_owned(batch_size).await else {
+                    // semaphore closing means cancelled
+                    return (evicted_bytes, evictions_failed);
+                };
+
+                let results = timeline.evict_layers(&batch).await;
+
+                match results {
+                    Ok(results) => {
+                        assert_eq!(results.len(), batch.len());
+                        for (result, layer) in results.into_iter().zip(batch.iter()) {
+                            let file_size = layer.layer_desc().file_size;
+                            match result {
+                                Some(Ok(())) => {
+                                    evicted_bytes += file_size;
+                                }
+                                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
+                                    evictions_failed.file_sizes += file_size;
+                                    evictions_failed.count += 1;
+                                }
+                                None => {
+                                    assert!(cancel.is_cancelled());
+                                }
+                            }
+                        }
                    }
-                    Ok(Err((file_size, EvictionError::NotFound | EvictionError::Downloaded))) => {
-                        evictions_failed.file_sizes += file_size;
-                        evictions_failed.count += 1;
+                    Err(e) => {
+                        warn!("failed to evict batch: {:#}", e);
                    }
-                    Err(je) if je.is_cancelled() => unreachable!("not used"),
-                    Err(je) if je.is_panic() => { /* already logged */ }
-                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
                }
+                (evicted_bytes, evictions_failed)
            }
-
-            if consumed_all && js.is_empty() {
-                break;
-            }
-
-            // calling again when consumed_all is fine as evicted is fused.
-            let Some((_partition, candidate)) = evicted.next() else {
-                consumed_all = true;
-                continue;
-            };
-
-            js.spawn(async move {
-                let rtc = candidate.timeline.remote_client.as_ref().expect(
-                    "holding the witness, all timelines must have a remote timeline client",
-                );
-                let file_size = candidate.layer.layer_desc().file_size;
-                candidate
-                    .layer
-                    .evict_and_wait(rtc)
-                    .await
-                    .map(|()| file_size)
-                    .map_err(|e| (file_size, e))
-            });
-
-            tokio::task::yield_now().await;
        }
+        .instrument(tracing::info_span!("evict_batch", %tenant_id, %timeline_id, batch_size));

+        js.spawn(evict);
+
+        // spwaning multiple thousands of these is essentially blocking, so give already spawned a
+        // chance of making progress
+        tokio::task::yield_now().await;
+    }
+
+    let join_all = async move {
+        // After the evictions, `usage_assumed` is the post-eviction usage,
+        // according to internal accounting.
+        let mut usage_assumed = usage_pre;
+        let mut evictions_failed = LayerCount::default();
+
+        while let Some(res) = js.join_next().await {
+            match res {
+                Ok((evicted_bytes, failed)) => {
+                    usage_assumed.add_available_bytes(evicted_bytes);
+                    evictions_failed.file_sizes += failed.file_sizes;
+                    evictions_failed.count += failed.count;
+                }
+                Err(je) if je.is_cancelled() => unreachable!("not used"),
+                Err(je) if je.is_panic() => { /* already logged */ }
+                Err(je) => tracing::error!("unknown JoinError: {je:?}"),
+            }
+        }
        (usage_assumed, evictions_failed)
    };

    let (usage_assumed, evictions_failed) = tokio::select! {
-        tuple = evict_layers => { tuple },
+        tuple = join_all => { tuple },
        _ = cancel.cancelled() => {
-            // dropping joinset will abort all pending evict_and_waits and that is fine, our
-            // requests will still stand
+            // close the semaphore to stop any pending acquires
+            limit.close();
            return Ok(IterationOutcome::Cancelled);
        }
    };
@@ -546,7 +572,7 @@ async fn collect_eviction_candidates(
                continue;
            }
            let info = tl.get_local_layers_for_disk_usage_eviction().await;
-            debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
+            debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
            tenant_candidates.extend(
                info.resident_layers
                    .into_iter()
--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -1,2 +1,4 @@
 pub mod routes;
 pub use routes::make_router;
+
+pub use pageserver_api::models;
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -84,6 +84,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: Get tenant status
      responses:
@@ -180,6 +181,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: Get timelines for tenant
      responses:
@@ -230,6 +232,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -335,6 +338,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -397,6 +401,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -464,6 +469,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -517,6 +523,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Schedules attach operation to happen in the background for the given tenant.
@@ -617,98 +624,6 @@ paths:
                $ref: "#/components/schemas/ServiceUnavailableError"


-  /v1/tenant/{tenant_id}/location_config:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-      - name: flush_ms
-        in: query
-        required: false
-        schema:
-          type: integer
-    put:
-      description: |
-        Configures a _tenant location_, that is how a particular pageserver handles
-        a particular tenant.  This includes _attached_ tenants, i.e. those ingesting WAL
-        and page service requests, and _secondary_ tenants, i.e. those which are just keeping
-        a warm cache in anticipation of transitioning to attached state in the future.
-
-        This is a declarative, idempotent API: there are not separate endpoints
-        for different tenant location configurations.  Rather, this single endpoint accepts
-        a description of the desired location configuration, and makes whatever changes
-        are required to reach that state.
-
-        In imperative terms, this API is used to attach and detach tenants, and
-        to transition tenants to and from secondary mode.
-
-        This is a synchronous API: there is no 202 response.  State transitions should always
-        be fast (milliseconds), with the exception of requests setting `flush_ms`, in which case
-        the caller controls the runtime of the request.
-
-        In some state transitions, it makes sense to flush dirty data to remote storage: this includes transitions
-        to AttachedStale and Detached.  Flushing is never necessary for correctness, but is an
-        important optimization when doing migrations.  The `flush_ms` parameter controls whether
-        flushing should be attempted, and how much time is allowed for flushing.  If the time limit expires,
-        the requested transition will continue without waiting for any outstanding data to flush.  Callers
-        should use a duration which is substantially less than their HTTP client's request
-        timeout.  It is safe to supply flush_ms irrespective of the request body: in state transitions
-        where flushing doesn't make sense, the server will ignore it.
-
-        It is safe to retry requests, but if one receives a 409 or 503 response, it is not
-        useful to retry aggressively: there is probably an existing request still ongoing.
-      requestBody:
-        required: false
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/TenantLocationConfigRequest"
-      responses:
-        "200":
-          description: Tenant is now in requested state
-        "503":
-          description: Tenant's state cannot be changed right now.  Wait a few seconds and retry.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "409":
-          description: |
-            The tenant is already known to Pageserver in some way,
-            and hence this `/attach` call has been rejected.
-
-            Some examples of how this can happen:
-            - tenant was created on this pageserver
-            - tenant attachment was started by an earlier call to `/attach`.
-
-            Callers should poll the tenant status's `attachment_status` field,
-            like for status 202. See the longer description for `POST /attach`
-            for details.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ConflictError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-
  /v1/tenant/{tenant_id}/detach:
    parameters:
      - name: tenant_id
@@ -716,6 +631,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: detach_ignored
        in: query
        required: false
@@ -775,6 +691,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Remove tenant data (including all corresponding timelines) from pageserver's memory.
@@ -823,6 +740,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Schedules an operation that attempts to load a tenant from the local disk and
@@ -879,6 +797,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: |
        Calculate tenant's synthetic size
@@ -921,6 +840,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: inputs_only
        in: query
        required: false
@@ -990,10 +910,11 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
-        Create a timeline. Returns new timeline id on success.
-        Recreating the same timeline will succeed if the parameters match the existing timeline.
+        Create a timeline. Returns new timeline id on success.\
+        If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
        If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
      requestBody:
        content:
@@ -1014,9 +935,6 @@ paths:
                  format: hex
                pg_version:
                  type: integer
-                existing_initdb_timeline_id:
-                  type: string
-                  format: hex
      responses:
        "201":
          description: TimelineInfo
@@ -1123,6 +1041,7 @@ paths:
            application/json:
              schema:
                type: string
+                format: hex
        "400":
          description: Malformed tenant create request
          content:
@@ -1219,6 +1138,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: |
        Returns tenant's config description: specific config overrides a tenant has
@@ -1324,6 +1244,7 @@ components:
          properties:
            new_tenant_id:
              type: string
+              format: hex
            generation:
              type: integer
              description: Attachment generation number.
@@ -1352,30 +1273,7 @@ components:
          properties:
            tenant_id:
              type: string
-    TenantLocationConfigRequest:
-      type: object
-      required:
-        - tenant_id
-      properties:
-        tenant_id:
-          type: string
-        mode:
-          type: string
-          enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
-          description: Mode of functionality that this pageserver will run in for this tenant.
-        generation:
-          type: integer
-          description: Attachment generation number, mandatory when `mode` is an attached state
-        secondary_conf:
-          $ref: '#/components/schemas/SecondaryConfig'
-        tenant_conf:
-          $ref: '#/components/schemas/TenantConfig'
-    SecondaryConfig:
-      type: object
-      properties:
-        warm:
-          type: boolean
-          description: Whether to poll remote storage for layers to download.  If false, secondary locations don't download anything.
+              format: hex
    TenantConfig:
      type: object
      properties:
@@ -1405,8 +1303,6 @@ components:
          type: integer
        trace_read_requests:
          type: boolean
-        heatmap_period:
-          type: integer
    TenantConfigResponse:
      type: object
      properties:
@@ -1429,6 +1325,7 @@ components:
          format: hex
        tenant_id:
          type: string
+          format: hex
        last_record_lsn:
          type: string
          format: hex
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -2,27 +2,19 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! a neon Timeline.
 //!
-use std::io::SeekFrom;
 use std::path::{Path, PathBuf};

 use anyhow::{bail, ensure, Context, Result};
-use async_compression::tokio::bufread::ZstdDecoder;
-use async_compression::{tokio::write::ZstdEncoder, zstd::CParameter, Level};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
-use nix::NixPath;
-use tokio::fs::{File, OpenOptions};
-use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
+use tokio::io::{AsyncRead, AsyncReadExt};
 use tokio_tar::Archive;
-use tokio_tar::Builder;
-use tokio_tar::HeaderMode;
 use tracing::*;
 use walkdir::WalkDir;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::*;
-use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
@@ -41,9 +33,7 @@ use utils::lsn::Lsn;
 pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result<Lsn> {
    // Read control file to extract the LSN
    let controlfile_path = path.join("global").join("pg_control");
-    let controlfile_buf = std::fs::read(&controlfile_path)
-        .with_context(|| format!("reading controlfile: {controlfile_path}"))?;
-    let controlfile = ControlFileData::decode(&controlfile_buf)?;
+    let controlfile = ControlFileData::decode(&std::fs::read(controlfile_path)?)?;
    let lsn = controlfile.checkPoint;

    Ok(Lsn(lsn))
@@ -628,65 +618,3 @@ async fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result<Bytes>
    reader.read_to_end(&mut buf).await?;
    Ok(Bytes::from(buf))
 }
-
-pub async fn create_tar_zst(pgdata_path: &Utf8Path, tmp_path: &Utf8Path) -> Result<(File, u64)> {
-    let file = OpenOptions::new()
-        .create(true)
-        .truncate(true)
-        .read(true)
-        .write(true)
-        .open(&tmp_path)
-        .await
-        .with_context(|| format!("tempfile creation {tmp_path}"))?;
-
-    let mut paths = Vec::new();
-    for entry in WalkDir::new(pgdata_path) {
-        let entry = entry?;
-        let metadata = entry.metadata().expect("error getting dir entry metadata");
-        // Also allow directories so that we also get empty directories
-        if !(metadata.is_file() || metadata.is_dir()) {
-            continue;
-        }
-        let path = entry.into_path();
-        paths.push(path);
-    }
-    // Do a sort to get a more consistent listing
-    paths.sort_unstable();
-    let zstd = ZstdEncoder::with_quality_and_params(
-        file,
-        Level::Default,
-        &[CParameter::enable_long_distance_matching(true)],
-    );
-    let mut builder = Builder::new(zstd);
-    // Use reproducible header mode
-    builder.mode(HeaderMode::Deterministic);
-    for path in paths {
-        let rel_path = path.strip_prefix(pgdata_path)?;
-        if rel_path.is_empty() {
-            // The top directory should not be compressed,
-            // the tar crate doesn't like that
-            continue;
-        }
-        builder.append_path_with_name(&path, rel_path).await?;
-    }
-    let mut zstd = builder.into_inner().await?;
-    zstd.shutdown().await?;
-    let mut compressed = zstd.into_inner();
-    let compressed_len = compressed.metadata().await?.len();
-    const INITDB_TAR_ZST_WARN_LIMIT: u64 = 2 * 1024 * 1024;
-    if compressed_len > INITDB_TAR_ZST_WARN_LIMIT {
-        warn!("compressed {INITDB_PATH} size of {compressed_len} is above limit {INITDB_TAR_ZST_WARN_LIMIT}.");
-    }
-    compressed.seek(SeekFrom::Start(0)).await?;
-    Ok((compressed, compressed_len))
-}
-
-pub async fn extract_tar_zst(
-    pgdata_path: &Utf8Path,
-    tar_zst: impl AsyncBufRead + Unpin,
-) -> Result<()> {
-    let tar = Box::pin(ZstdDecoder::new(tar_zst));
-    let mut archive = Archive::new(tar);
-    archive.unpack(pgdata_path).await?;
-    Ok(())
-}
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -1,12 +1,11 @@
+use crate::repository::{key_range_size, singleton_range, Key};
 use postgres_ffi::BLCKSZ;
 use std::ops::Range;

-use crate::key::Key;
-
 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default)]
 pub struct KeySpace {
    /// Contiguous ranges of keys that belong to the key space. In key order,
    /// and with no overlap.
@@ -187,33 +186,6 @@ impl KeySpaceRandomAccum {
    }
 }

-pub fn key_range_size(key_range: &Range<Key>) -> u32 {
-    let start = key_range.start;
-    let end = key_range.end;
-
-    if end.field1 != start.field1
-        || end.field2 != start.field2
-        || end.field3 != start.field3
-        || end.field4 != start.field4
-    {
-        return u32::MAX;
-    }
-
-    let start = (start.field5 as u64) << 32 | start.field6 as u64;
-    let end = (end.field5 as u64) << 32 | end.field6 as u64;
-
-    let diff = end - start;
-    if diff > u32::MAX as u64 {
-        u32::MAX
-    } else {
-        diff as u32
-    }
-}
-
-pub fn singleton_range(key: Key) -> Range<Key> {
-    key..key.next()
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -10,7 +10,7 @@ pub mod deletion_queue;
 pub mod disk_usage_eviction_task;
 pub mod http;
 pub mod import_datadir;
-pub use pageserver_api::keyspace;
+pub mod keyspace;
 pub mod metrics;
 pub mod page_cache;
 pub mod page_service;
@@ -186,6 +186,13 @@ pub struct InitializationOrder {
    /// Each initial tenant load task carries this until completion.
    pub initial_tenant_load: Option<utils::completion::Completion>,

+    /// Barrier for when we can start initial logical size calculations.
+    pub initial_logical_size_can_start: utils::completion::Barrier,
+
+    /// Each timeline owns a clone of this to be consumed on the initial logical size calculation
+    /// attempt. It is important to drop this once the attempt has completed.
+    pub initial_logical_size_attempt: Option<utils::completion::Completion>,
+
    /// Barrier for when we can start any background jobs.
    ///
    /// This can be broken up later on, but right now there is just one class of a background job.
@@ -205,7 +212,7 @@ async fn timed<Fut: std::future::Future>(
    match tokio::time::timeout(warn_at, &mut fut).await {
        Ok(ret) => {
            tracing::info!(
-                stage = name,
+                task = name,
                elapsed_ms = started.elapsed().as_millis(),
                "completed"
            );
@@ -213,7 +220,7 @@ async fn timed<Fut: std::future::Future>(
        }
        Err(_) => {
            tracing::info!(
-                stage = name,
+                task = name,
                elapsed_ms = started.elapsed().as_millis(),
                "still waiting, taking longer than expected..."
            );
@@ -222,7 +229,7 @@ async fn timed<Fut: std::future::Future>(

            // this has a global allowed_errors
            tracing::warn!(
-                stage = name,
+                task = name,
                elapsed_ms = started.elapsed().as_millis(),
                "completed, took longer than expected"
            );
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2,13 +2,11 @@ use enum_map::EnumMap;
 use metrics::metric_vec_duration::DurationResultObserver;
 use metrics::{
    register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
-    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
-    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
-    IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    register_int_counter, register_int_counter_vec, register_int_gauge, register_int_gauge_vec,
+    register_uint_gauge, register_uint_gauge_vec, Counter, CounterVec, GaugeVec, Histogram,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
-use pageserver_api::shard::TenantShardId;
 use strum::{EnumCount, IntoEnumIterator, VariantNames};
 use strum_macros::{EnumVariantNames, IntoStaticStr};
 use utils::id::{TenantId, TimelineId};
@@ -286,63 +284,6 @@ pub static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> = Lazy::new(|| PageCacheS
    },
 });

-pub(crate) mod page_cache_eviction_metrics {
-    use std::num::NonZeroUsize;
-
-    use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
-    use once_cell::sync::Lazy;
-
-    #[derive(Clone, Copy)]
-    pub(crate) enum Outcome {
-        FoundSlotUnused { iters: NonZeroUsize },
-        FoundSlotEvicted { iters: NonZeroUsize },
-        ItersExceeded { iters: NonZeroUsize },
-    }
-
-    static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_page_cache_find_victim_iters_total",
-            "Counter for the number of iterations in the find_victim loop",
-            &["outcome"],
-        )
-        .expect("failed to define a metric")
-    });
-
-    static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_page_cache_find_victim_calls",
-            "Incremented at the end of each find_victim() call.\
-             Filter by outcome to get e.g., eviction rate.",
-            &["outcome"]
-        )
-        .unwrap()
-    });
-
-    pub(crate) fn observe(outcome: Outcome) {
-        macro_rules! dry {
-            ($label:literal, $iters:expr) => {{
-                static LABEL: &'static str = $label;
-                static ITERS_TOTAL: Lazy<IntCounter> =
-                    Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
-                static CALLS: Lazy<IntCounter> =
-                    Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
-                ITERS_TOTAL.inc_by(($iters.get()) as u64);
-                CALLS.inc();
-            }};
-        }
-        match outcome {
-            Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
-            Outcome::FoundSlotEvicted { iters } => {
-                dry!("found_evicted", iters)
-            }
-            Outcome::ItersExceeded { iters } => {
-                dry!("err_iters_exceeded", iters);
-                super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
-            }
-        }
-    }
-}
-
 pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_page_cache_acquire_pinned_slot_seconds",
@@ -352,6 +293,14 @@ pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::n
    .expect("failed to define a metric")
 });

+pub(crate) static PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_page_cache_find_victim_iters_total",
+        "Counter for the number of iterations in the find_victim loop",
+    )
+    .expect("failed to define a metric")
+});
+
 static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "page_cache_errors_total",
@@ -453,129 +402,6 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    .expect("failed to define current logical size metric")
 });

-pub(crate) mod initial_logical_size {
-    use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
-    use once_cell::sync::Lazy;
-
-    pub(crate) struct StartCalculation(IntCounterVec);
-    pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
-        StartCalculation(
-            register_int_counter_vec!(
-                "pageserver_initial_logical_size_start_calculation",
-                "Incremented each time we start an initial logical size calculation attempt. \
-                 The `circumstances` label provides some additional details.",
-                &["attempt", "circumstances"]
-            )
-            .unwrap(),
-        )
-    });
-
-    struct DropCalculation {
-        first: IntCounter,
-        retry: IntCounter,
-    }
-
-    static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
-        let vec = register_int_counter_vec!(
-            "pageserver_initial_logical_size_drop_calculation",
-            "Incremented each time we abort a started size calculation attmpt.",
-            &["attempt"]
-        )
-        .unwrap();
-        DropCalculation {
-            first: vec.with_label_values(&["first"]),
-            retry: vec.with_label_values(&["retry"]),
-        }
-    });
-
-    pub(crate) struct Calculated {
-        pub(crate) births: IntCounter,
-        pub(crate) deaths: IntCounter,
-    }
-
-    pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
-        births: register_int_counter!(
-            "pageserver_initial_logical_size_finish_calculation",
-            "Incremented every time we finish calculation of initial logical size.\
-             If everything is working well, this should happen at most once per Timeline object."
-        )
-        .unwrap(),
-        deaths: register_int_counter!(
-            "pageserver_initial_logical_size_drop_finished_calculation",
-            "Incremented when we drop a finished initial logical size calculation result.\
-             Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
-        )
-        .unwrap(),
-    });
-
-    pub(crate) struct OngoingCalculationGuard {
-        inc_drop_calculation: Option<IntCounter>,
-    }
-
-    #[derive(strum_macros::IntoStaticStr)]
-    pub(crate) enum StartCircumstances {
-        EmptyInitial,
-        SkippedConcurrencyLimiter,
-        AfterBackgroundTasksRateLimit,
-    }
-
-    impl StartCalculation {
-        pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
-            let circumstances_label: &'static str = circumstances.into();
-            self.0.with_label_values(&["first", circumstances_label]);
-            OngoingCalculationGuard {
-                inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
-            }
-        }
-        pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
-            let circumstances_label: &'static str = circumstances.into();
-            self.0.with_label_values(&["retry", circumstances_label]);
-            OngoingCalculationGuard {
-                inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
-            }
-        }
-    }
-
-    impl Drop for OngoingCalculationGuard {
-        fn drop(&mut self) {
-            if let Some(counter) = self.inc_drop_calculation.take() {
-                counter.inc();
-            }
-        }
-    }
-
-    impl OngoingCalculationGuard {
-        pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
-            drop(self.inc_drop_calculation.take());
-            CALCULATED.births.inc();
-            FinishedCalculationGuard {
-                inc_on_drop: CALCULATED.deaths.clone(),
-            }
-        }
-    }
-
-    pub(crate) struct FinishedCalculationGuard {
-        inc_on_drop: IntCounter,
-    }
-
-    impl Drop for FinishedCalculationGuard {
-        fn drop(&mut self) {
-            self.inc_on_drop.inc();
-        }
-    }
-
-    // context: https://github.com/neondatabase/neon/issues/5963
-    pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
-        Lazy::new(|| {
-            register_int_counter!(
-                "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
-                "Counter for the following event: walreceiver calls\
-                 Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
-            )
-            .unwrap()
-        });
-}
-
 pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_tenant_states_count",
@@ -651,7 +477,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
        "pageserver_evictions_with_low_residence_duration",
        "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
         Residence duration is determined using the `residence_duration_data_source`.",
-        &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
+        &["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
    )
    .expect("failed to define a metric")
 });
@@ -684,54 +510,14 @@ pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
    .expect("Failed to register pageserver_startup_is_loading")
 });

-/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
-/// like how long it took to load.
-///
-/// Note that these are process-global metrics, _not_ per-tenant metrics.  Per-tenant
-/// metrics are rather expensive, and usually fine grained stuff makes more sense
-/// at a timeline level than tenant level.
-pub(crate) struct TenantMetrics {
-    /// How long did tenants take to go from construction to active state?
-    pub(crate) activation: Histogram,
-    pub(crate) preload: Histogram,
-    pub(crate) attach: Histogram,
-
-    /// How many tenants are included in the initial startup of the pagesrever?
-    pub(crate) startup_scheduled: IntCounter,
-    pub(crate) startup_complete: IntCounter,
-}
-
-pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
-    TenantMetrics {
-    activation: register_histogram!(
+/// How long did tenants take to go from construction to active state?
+pub(crate) static TENANT_ACTIVATION: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
        "pageserver_tenant_activation_seconds",
        "Time taken by tenants to activate, in seconds",
        CRITICAL_OP_BUCKETS.into()
    )
-    .expect("Failed to register metric"),
-    preload: register_histogram!(
-        "pageserver_tenant_preload_seconds",
-        "Time taken by tenants to load remote metadata on startup/attach, in seconds",
-        CRITICAL_OP_BUCKETS.into()
-    )
-    .expect("Failed to register metric"),
-    attach: register_histogram!(
-        "pageserver_tenant_attach_seconds",
-        "Time taken by tenants to intialize, after remote metadata is already loaded",
-        CRITICAL_OP_BUCKETS.into()
-    )
-    .expect("Failed to register metric"),
-    startup_scheduled: register_int_counter!(
-        "pageserver_tenant_startup_scheduled",
-        "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
-    ).expect("Failed to register metric"),
-    startup_complete: register_int_counter!(
-        "pageserver_tenant_startup_complete",
-        "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
-         should eventually reach `pageserver_tenant_startup_scheduled_total`.  Does not include broken \
-         tenants: such cases will lead to this metric never reaching the scheduled count."
-    ).expect("Failed to register metric"),
-}
+    .expect("Failed to register pageserver_tenant_activation_seconds metric")
 });

 /// Each `Timeline`'s  [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
@@ -755,16 +541,10 @@ impl EvictionsWithLowResidenceDurationBuilder {
        }
    }

-    fn build(
-        &self,
-        tenant_id: &str,
-        shard_id: &str,
-        timeline_id: &str,
-    ) -> EvictionsWithLowResidenceDuration {
+    fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
        let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
            .get_metric_with_label_values(&[
                tenant_id,
-                shard_id,
                timeline_id,
                self.data_source,
                &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
@@ -795,24 +575,21 @@ impl EvictionsWithLowResidenceDuration {
    pub fn change_threshold(
        &mut self,
        tenant_id: &str,
-        shard_id: &str,
        timeline_id: &str,
        new_threshold: Duration,
    ) {
        if new_threshold == self.threshold {
            return;
        }
-        let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
-            self.data_source,
-            new_threshold,
-        )
-        .build(tenant_id, shard_id, timeline_id);
+        let mut with_new =
+            EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
+                .build(tenant_id, timeline_id);
        std::mem::swap(self, &mut with_new);
-        with_new.remove(tenant_id, shard_id, timeline_id);
+        with_new.remove(tenant_id, timeline_id);
    }

    // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
-    fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
+    fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
        let Some(_counter) = self.counter.take() else {
            return;
        };
@@ -821,7 +598,6 @@ impl EvictionsWithLowResidenceDuration {

        let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
            tenant_id,
-            shard_id,
            timeline_id,
            self.data_source,
            &threshold,
@@ -862,7 +638,7 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
 ///
 /// Operations:
 /// - open ([`std::fs::OpenOptions::open`])
-/// - close (dropping [`crate::virtual_file::VirtualFile`])
+/// - close (dropping [`std::fs::File`])
 /// - close-by-replace (close by replacement algorithm)
 /// - read (`read_at`)
 /// - write (`write_at`)
@@ -874,7 +650,6 @@ const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
 )]
 pub(crate) enum StorageIoOperation {
    Open,
-    OpenAfterReplace,
    Close,
    CloseByReplace,
    Read,
@@ -888,7 +663,6 @@ impl StorageIoOperation {
    pub fn as_str(&self) -> &'static str {
        match self {
            StorageIoOperation::Open => "open",
-            StorageIoOperation::OpenAfterReplace => "open-after-replace",
            StorageIoOperation::Close => "close",
            StorageIoOperation::CloseByReplace => "close-by-replace",
            StorageIoOperation::Read => "read",
@@ -943,25 +717,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub(crate) mod virtual_file_descriptor_cache {
-    use super::*;
-
-    pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
-        register_uint_gauge!(
-            "pageserver_virtual_file_descriptor_cache_size_max",
-            "Maximum number of open file descriptors in the cache."
-        )
-        .unwrap()
-    });
-
-    // SIZE_CURRENT: derive it like so:
-    // ```
-    // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
-    // -ignoring(operation)
-    // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
-    // ```
-}
-
 #[derive(Debug)]
 struct GlobalAndPerTimelineHistogram {
    global: Histogram,
@@ -1288,52 +1043,6 @@ pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
 }
 });

-pub(crate) struct WalIngestMetrics {
-    pub(crate) records_received: IntCounter,
-    pub(crate) records_committed: IntCounter,
-    pub(crate) records_filtered: IntCounter,
-}
-
-pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
-    records_received: register_int_counter!(
-        "pageserver_wal_ingest_records_received",
-        "Number of WAL records received from safekeepers"
-    )
-    .expect("failed to define a metric"),
-    records_committed: register_int_counter!(
-        "pageserver_wal_ingest_records_committed",
-        "Number of WAL records which resulted in writes to pageserver storage"
-    )
-    .expect("failed to define a metric"),
-    records_filtered: register_int_counter!(
-        "pageserver_wal_ingest_records_filtered",
-        "Number of WAL records filtered out due to sharding"
-    )
-    .expect("failed to define a metric"),
-});
-pub(crate) struct SecondaryModeMetrics {
-    pub(crate) upload_heatmap: IntCounter,
-    pub(crate) upload_heatmap_errors: IntCounter,
-    pub(crate) upload_heatmap_duration: Histogram,
-}
-pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
-    upload_heatmap: register_int_counter!(
-        "pageserver_secondary_upload_heatmap",
-        "Number of heatmaps written to remote storage by attached tenants"
-    )
-    .expect("failed to define a metric"),
-    upload_heatmap_errors: register_int_counter!(
-        "pageserver_secondary_upload_heatmap_errors",
-        "Failures writing heatmap to remote storage"
-    )
-    .expect("failed to define a metric"),
-    upload_heatmap_duration: register_histogram!(
-        "pageserver_secondary_upload_heatmap_duration",
-        "Time to build and upload a heatmap, including any waiting inside the S3 client"
-    )
-    .expect("failed to define a metric"),
-});
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RemoteOpKind {
    Upload,
@@ -1384,16 +1093,25 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("Failed to register tenant_task_events metric")
 });

-pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
-        "pageserver_background_loop_semaphore_wait_start_count",
-        "Counter for background loop concurrency-limiting semaphore acquire calls started",
-        "pageserver_background_loop_semaphore_wait_finish_count",
-        "Counter for background loop concurrency-limiting semaphore acquire calls finished",
-        &["task"],
-    )
-    .unwrap()
-});
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT: Lazy<IntCounterVec> =
+    Lazy::new(|| {
+        register_int_counter_vec!(
+            "pageserver_background_loop_semaphore_wait_start_count",
+            "Counter for background loop concurrency-limiting semaphore acquire calls started",
+            &["task"],
+        )
+        .unwrap()
+    });
+
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT: Lazy<IntCounterVec> =
+    Lazy::new(|| {
+        register_int_counter_vec!(
+            "pageserver_background_loop_semaphore_wait_finish_count",
+            "Counter for background loop concurrency-limiting semaphore acquire calls finished",
+            &["task"],
+        )
+        .unwrap()
+    });

 pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
@@ -1534,20 +1252,9 @@ pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    .unwrap()
 });

-pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
-        "pageserver_wal_redo_process_launch_duration",
-        "Histogram of the duration of successful WalRedoProcess::launch calls",
-        redo_histogram_time_buckets!(),
-    )
-    .expect("failed to define a metric")
-});
-
 pub(crate) struct WalRedoProcessCounters {
    pub(crate) started: IntCounter,
    pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
-    pub(crate) active_stderr_logger_tasks_started: IntCounter,
-    pub(crate) active_stderr_logger_tasks_finished: IntCounter,
 }

 #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
@@ -1571,19 +1278,6 @@ impl Default for WalRedoProcessCounters {
            &["cause"],
        )
        .unwrap();
-
-        let active_stderr_logger_tasks_started = register_int_counter!(
-            "pageserver_walredo_stderr_logger_tasks_started_total",
-            "Number of active walredo stderr logger tasks that have started",
-        )
-        .unwrap();
-
-        let active_stderr_logger_tasks_finished = register_int_counter!(
-            "pageserver_walredo_stderr_logger_tasks_finished_total",
-            "Number of active walredo stderr logger tasks that have finished",
-        )
-        .unwrap();
-
        Self {
            started,
            killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
@@ -1591,8 +1285,6 @@ impl Default for WalRedoProcessCounters {
                let cause_str: &'static str = cause.into();
                killed.with_label_values(&[cause_str])
            })),
-            active_stderr_logger_tasks_started,
-            active_stderr_logger_tasks_finished,
        }
    }
 }
@@ -1667,7 +1359,6 @@ impl StorageTimeMetrics {
 #[derive(Debug)]
 pub struct TimelineMetrics {
    tenant_id: String,
-    shard_id: String,
    timeline_id: String,
    pub flush_time_histo: StorageTimeMetrics,
    pub compact_time_histo: StorageTimeMetrics,
@@ -1688,12 +1379,11 @@ pub struct TimelineMetrics {

 impl TimelineMetrics {
    pub fn new(
-        tenant_shard_id: &TenantShardId,
+        tenant_id: &TenantId,
        timeline_id: &TimelineId,
        evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
    ) -> Self {
-        let tenant_id = tenant_shard_id.tenant_id.to_string();
-        let shard_id = format!("{}", tenant_shard_id.shard_slug());
+        let tenant_id = tenant_id.to_string();
        let timeline_id = timeline_id.to_string();
        let flush_time_histo =
            StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
@@ -1730,12 +1420,11 @@ impl TimelineMetrics {
        let evictions = EVICTIONS
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
-            .build(&tenant_id, &shard_id, &timeline_id);
+        let evictions_with_low_residence_duration =
+            evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);

        TimelineMetrics {
            tenant_id,
-            shard_id,
            timeline_id,
            flush_time_histo,
            compact_time_histo,
@@ -1781,7 +1470,6 @@ impl Drop for TimelineMetrics {
    fn drop(&mut self) {
        let tenant_id = &self.tenant_id;
        let timeline_id = &self.timeline_id;
-        let shard_id = &self.shard_id;
        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
        {
            RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
@@ -1795,7 +1483,7 @@ impl Drop for TimelineMetrics {
        self.evictions_with_low_residence_duration
            .write()
            .unwrap()
-            .remove(tenant_id, shard_id, timeline_id);
+            .remove(tenant_id, timeline_id);

        // The following metrics are born outside of the TimelineMetrics lifecycle but still
        // removed at the end of it. The idea is to have the metrics outlive the
@@ -1883,9 +1571,9 @@ pub struct RemoteTimelineClientMetrics {
 }

 impl RemoteTimelineClientMetrics {
-    pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
+    pub fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
        RemoteTimelineClientMetrics {
-            tenant_id: tenant_shard_id.tenant_id.to_string(),
+            tenant_id: tenant_id.to_string(),
            timeline_id: timeline_id.to_string(),
            calls_unfinished_gauge: Mutex::new(HashMap::default()),
            bytes_started_counter: Mutex::new(HashMap::default()),
@@ -2253,14 +1941,9 @@ pub fn preinitialize_metrics() {
    // Deletion queue stats
    Lazy::force(&DELETION_QUEUE);

-    // Tenant stats
-    Lazy::force(&TENANT);
-
    // Tenant manager stats
    Lazy::force(&TENANT_MANAGER);

-    Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
-
    // countervecs
    [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
        .into_iter()
@@ -2278,7 +1961,6 @@ pub fn preinitialize_metrics() {
        &WAL_REDO_TIME,
        &WAL_REDO_RECORDS_HISTOGRAM,
        &WAL_REDO_BYTES_HISTOGRAM,
-        &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
    ]
    .into_iter()
    .for_each(|h| {
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -28,7 +28,7 @@
 //! Page cache maps from a cache key to a buffer slot.
 //! The cache key uniquely identifies the piece of data that is being cached.
 //!
-//! The cache key for **materialized pages** is  [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
+//! The cache key for **materialized pages** is  [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
 //! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
 //!
 //! The cache key for **immutable file** pages is [`FileId`] and a block number.
@@ -83,15 +83,13 @@ use std::{

 use anyhow::Context;
 use once_cell::sync::OnceCell;
-use pageserver_api::shard::TenantShardId;
-use utils::{id::TimelineId, lsn::Lsn};
-
-use crate::{
-    context::RequestContext,
-    metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
-    repository::Key,
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
 };

+use crate::{context::RequestContext, metrics::PageCacheSizeMetrics, repository::Key};
+
 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
 const TEST_PAGE_CACHE_SIZE: usize = 50;

@@ -152,13 +150,7 @@ enum CacheKey {

 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
 struct MaterializedPageHashKey {
-    /// Why is this TenantShardId rather than TenantId?
-    ///
-    /// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant.  However, this
-    /// this not the case for certain internally-generated pages (e.g. relation sizes).  In future, we may make this
-    /// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
-    /// special-cased in some other way.
-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
    key: Key,
 }
@@ -382,7 +374,7 @@ impl PageCache {
    /// returned page.
    pub async fn lookup_materialized_page(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        key: &Key,
        lsn: Lsn,
@@ -399,7 +391,7 @@ impl PageCache {

        let mut cache_key = CacheKey::MaterializedPage {
            hash_key: MaterializedPageHashKey {
-                tenant_shard_id,
+                tenant_id,
                timeline_id,
                key: *key,
            },
@@ -440,7 +432,7 @@ impl PageCache {
    ///
    pub async fn memorize_materialized_page(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        key: Key,
        lsn: Lsn,
@@ -448,7 +440,7 @@ impl PageCache {
    ) -> anyhow::Result<()> {
        let cache_key = CacheKey::MaterializedPage {
            hash_key: MaterializedPageHashKey {
-                tenant_shard_id,
+                tenant_id,
                timeline_id,
                key,
            },
@@ -905,10 +897,8 @@ impl PageCache {
                            // Note that just yielding to tokio during iteration without such
                            // priority boosting is likely counter-productive. We'd just give more opportunities
                            // for B to bump usage count, further starving A.
-                            page_cache_eviction_metrics::observe(
-                                page_cache_eviction_metrics::Outcome::ItersExceeded {
-                                    iters: iters.try_into().unwrap(),
-                                },
+                            crate::metrics::page_cache_errors_inc(
+                                crate::metrics::PageCacheErrorKind::EvictIterLimit,
                            );
                            anyhow::bail!("exceeded evict iter limit");
                        }
@@ -919,18 +909,8 @@ impl PageCache {
                    // remove mapping for old buffer
                    self.remove_mapping(old_key);
                    inner.key = None;
-                    page_cache_eviction_metrics::observe(
-                        page_cache_eviction_metrics::Outcome::FoundSlotEvicted {
-                            iters: iters.try_into().unwrap(),
-                        },
-                    );
-                } else {
-                    page_cache_eviction_metrics::observe(
-                        page_cache_eviction_metrics::Outcome::FoundSlotUnused {
-                            iters: iters.try_into().unwrap(),
-                        },
-                    );
                }
+                crate::metrics::PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL.inc_by(iters as u64);
                return Ok((slot_idx, inner));
            }
        }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -53,23 +53,21 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics;
 use crate::metrics::LIVE_CONNECTIONS_COUNT;
-use crate::pgdatadir_mapping::rel_block_to_key;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::mgr;
 use crate::tenant::mgr::get_active_tenant_with_timeout;
 use crate::tenant::mgr::GetActiveTenantError;
-use crate::tenant::mgr::ShardSelector;
 use crate::tenant::Timeline;
 use crate::trace::Tracer;

 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

-// How long we may wait for a [`TenantSlot::InProgress`]` and/or a [`Tenant`] which
+// How long we may block waiting for a [`TenantSlot::InProgress`]` and/or a [`Tenant`] which
 // is not yet in state [`TenantState::Active`].
-const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000);
+const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);

 /// Read the end of a tar archive.
 ///
@@ -401,25 +399,18 @@ impl PageServerHandler {
    {
        debug_assert_current_span_has_tenant_and_timeline_id();

-        // Note that since one connection may contain getpage requests that target different
-        // shards (e.g. during splitting when the compute is not yet aware of the split), the tenant
-        // that we look up here may not be the one that serves all the actual requests: we will double
-        // check the mapping of key->shard later before calling into Timeline for getpage requests.
+        // Make request tracer if needed
        let tenant = mgr::get_active_tenant_with_timeout(
            tenant_id,
-            ShardSelector::First,
            ACTIVE_TENANT_TIMEOUT,
            &task_mgr::shutdown_token(),
        )
        .await?;
-
-        // Make request tracer if needed
        let mut tracer = if tenant.get_trace_read_requests() {
            let connection_id = ConnectionId::generate();
-            let path =
-                tenant
-                    .conf
-                    .trace_path(&tenant.tenant_shard_id(), &timeline_id, &connection_id);
+            let path = tenant
+                .conf
+                .trace_path(&tenant_id, &timeline_id, &connection_id);
            Some(Tracer::new(path))
        } else {
            None
@@ -571,7 +562,6 @@ impl PageServerHandler {
        info!("creating new timeline");
        let tenant = get_active_tenant_with_timeout(
            tenant_id,
-            ShardSelector::Zero,
            ACTIVE_TENANT_TIMEOUT,
            &task_mgr::shutdown_token(),
        )
@@ -634,7 +624,7 @@ impl PageServerHandler {
        debug_assert_current_span_has_tenant_and_timeline_id();

        let timeline = self
-            .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
+            .get_active_tenant_timeline(tenant_id, timeline_id)
            .await?;
        let last_record_lsn = timeline.get_last_record_lsn();
        if last_record_lsn != start_lsn {
@@ -813,49 +803,9 @@ impl PageServerHandler {
        }
        */

-        let key = rel_block_to_key(req.rel, req.blkno);
-        let page = if timeline.get_shard_identity().is_key_local(&key) {
-            timeline
-                .get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest, ctx)
-                .await?
-        } else {
-            // The Tenant shard we looked up at connection start does not hold this particular
-            // key: look for other shards in this tenant.  This scenario occurs if a pageserver
-            // has multiple shards for the same tenant.
-            //
-            // TODO: optimize this (https://github.com/neondatabase/neon/pull/6037)
-            let timeline = match self
-                .get_active_tenant_timeline(
-                    timeline.tenant_shard_id.tenant_id,
-                    timeline.timeline_id,
-                    ShardSelector::Page(key),
-                )
-                .await
-            {
-                Ok(t) => t,
-                Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
-                    // We already know this tenant exists in general, because we resolved it at
-                    // start of connection.  Getting a NotFound here indicates that the shard containing
-                    // the requested page is not present on this node.
-
-                    // TODO: this should be some kind of structured error that the client will understand,
-                    // so that it can block until its config is updated: this error is expected in the case
-                    // that the Tenant's shards' placements are being updated and the client hasn't been
-                    // informed yet.
-                    //
-                    // https://github.com/neondatabase/neon/issues/6038
-                    return Err(anyhow::anyhow!("Request routed to wrong shard"));
-                }
-                Err(e) => return Err(e.into()),
-            };
-
-            // Take a GateGuard for the duration of this request.  If we were using our main Timeline object,
-            // the GateGuard was already held over the whole connection.
-            let _timeline_guard = timeline.gate.enter().map_err(|_| QueryError::Shutdown)?;
-            timeline
-                .get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest, ctx)
-                .await?
-        };
+        let page = timeline
+            .get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest, ctx)
+            .await?;

        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
            page,
@@ -884,7 +834,7 @@ impl PageServerHandler {

        // check that the timeline exists
        let timeline = self
-            .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
+            .get_active_tenant_timeline(tenant_id, timeline_id)
            .await?;
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        if let Some(lsn) = lsn {
@@ -990,11 +940,9 @@ impl PageServerHandler {
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
-        selector: ShardSelector,
    ) -> Result<Arc<Timeline>, GetActiveTimelineError> {
        let tenant = get_active_tenant_with_timeout(
            tenant_id,
-            selector,
            ACTIVE_TENANT_TIMEOUT,
            &task_mgr::shutdown_token(),
        )
@@ -1168,7 +1116,7 @@ where

            self.check_permission(Some(tenant_id))?;
            let timeline = self
-                .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
+                .get_active_tenant_timeline(tenant_id, timeline_id)
                .await?;

            let end_of_timeline = timeline.get_last_record_rlsn();
@@ -1355,7 +1303,6 @@ where

            let tenant = get_active_tenant_with_timeout(
                tenant_id,
-                ShardSelector::Zero,
                ACTIVE_TENANT_TIMEOUT,
                &task_mgr::shutdown_token(),
            )
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -13,7 +13,6 @@ use crate::repository::*;
 use crate::walrecord::NeonWalRecord;
 use anyhow::Context;
 use bytes::{Buf, Bytes};
-use pageserver_api::key::is_rel_block_key;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -22,7 +21,6 @@ use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::{bin_ser::BeSer, lsn::Lsn};
@@ -283,10 +281,6 @@ impl Timeline {
    }

    /// Get a list of all existing relations in given tablespace and database.
-    ///
-    /// # Cancel-Safety
-    ///
-    /// This method is cancellation-safe.
    pub async fn list_rels(
        &self,
        spcnode: Oid,
@@ -371,7 +365,6 @@ impl Timeline {
    pub async fn find_lsn_for_timestamp(
        &self,
        search_timestamp: TimestampTz,
-        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> Result<LsnForTimestamp, PageReconstructError> {
        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
@@ -390,9 +383,6 @@ impl Timeline {
        let mut found_smaller = false;
        let mut found_larger = false;
        while low < high {
-            if cancel.is_cancelled() {
-                return Err(PageReconstructError::Cancelled);
-            }
            // cannot overflow, high and low are both smaller than u64::MAX / 2
            let mid = (high + low) / 2;

@@ -635,10 +625,6 @@ impl Timeline {
    ///
    /// Only relation blocks are counted currently. That excludes metadata,
    /// SLRUs, twophase files etc.
-    ///
-    /// # Cancel-Safety
-    ///
-    /// This method is cancellation-safe.
    pub async fn get_current_logical_size_non_incremental(
        &self,
        lsn: Lsn,
@@ -822,7 +808,10 @@ impl<'a> DatadirModification<'a> {
        self.put(DBDIR_KEY, Value::Image(buf.into()));

        // Create AuxFilesDirectory
-        self.init_aux_dir()?;
+        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+            files: HashMap::new(),
+        })?;
+        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));

        let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
            xids: HashSet::new(),
@@ -930,7 +919,10 @@ impl<'a> DatadirModification<'a> {
            self.put(DBDIR_KEY, Value::Image(buf.into()));

            // Create AuxFilesDirectory as well
-            self.init_aux_dir()?;
+            let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+                files: HashMap::new(),
+            })?;
+            self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
        }
        if r.is_none() {
            // Create RelDirectory
@@ -1255,14 +1247,6 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
-        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-            files: HashMap::new(),
-        })?;
-        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
-        Ok(())
-    }
-
    pub async fn put_file(
        &mut self,
        path: &str,
@@ -1325,7 +1309,7 @@ impl<'a> DatadirModification<'a> {
        // Flush relation and  SLRU data blocks, keep metadata.
        let mut retained_pending_updates = HashMap::new();
        for (key, value) in self.pending_updates.drain() {
-            if is_rel_block_key(&key) || is_slru_block_key(key) {
+            if is_rel_block_key(key) || is_slru_block_key(key) {
                // This bails out on first error without modifying pending_updates.
                // That's Ok, cf this function's doc comment.
                writer.put(key, self.lsn, &value, ctx).await?;
@@ -1370,10 +1354,6 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub(crate) fn is_empty(&self) -> bool {
-        self.pending_updates.is_empty() && self.pending_deletions.is_empty()
-    }
-
    // Internal helper functions to batch the modifications

    async fn get(&self, key: Key, ctx: &RequestContext) -> Result<Bytes, PageReconstructError> {
@@ -1585,7 +1565,7 @@ fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
    }
 }

-pub(crate) fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
+fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
    Key {
        field1: 0x00,
        field2: rel.spcnode,
@@ -1769,13 +1749,6 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

-// AUX_FILES currently stores only data for logical replication (slots etc), and
-// we don't preserve these on a branch because safekeepers can't follow timeline
-// switch (and generally it likely should be optional), so ignore these.
-pub fn is_inherited_key(key: Key) -> bool {
-    key != AUX_FILES_KEY
-}
-
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
@@ -1791,6 +1764,10 @@ pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    })
 }

+fn is_rel_block_key(key: Key) -> bool {
+    key.field1 == 0x00 && key.field4 != 0
+}
+
 pub fn is_rel_fsm_block_key(key: Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
 }
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -2,11 +2,38 @@ use crate::walrecord::NeonWalRecord;
 use anyhow::Result;
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
-use std::ops::AddAssign;
+use std::ops::{AddAssign, Range};
 use std::time::Duration;

 pub use pageserver_api::key::{Key, KEY_SIZE};

+pub fn key_range_size(key_range: &Range<Key>) -> u32 {
+    let start = key_range.start;
+    let end = key_range.end;
+
+    if end.field1 != start.field1
+        || end.field2 != start.field2
+        || end.field3 != start.field3
+        || end.field4 != start.field4
+    {
+        return u32::MAX;
+    }
+
+    let start = (start.field5 as u64) << 32 | start.field6 as u64;
+    let end = (end.field5 as u64) << 32 | end.field6 as u64;
+
+    let diff = end - start;
+    if diff > u32::MAX as u64 {
+        u32::MAX
+    } else {
+        diff as u32
+    }
+}
+
+pub fn singleton_range(key: Key) -> Range<Key> {
+    key..key.next()
+}
+
 /// A 'value' stored for a one Key.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(test, derive(PartialEq))]
@@ -111,14 +138,6 @@ pub struct GcResult {

    #[serde(serialize_with = "serialize_duration_as_millis")]
    pub elapsed: Duration,
-
-    /// The layers which were garbage collected.
-    ///
-    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
-    /// dropped in tests.
-    #[cfg(feature = "testing")]
-    #[serde(skip)]
-    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
 }

 // helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
@@ -139,11 +158,5 @@ impl AddAssign for GcResult {
        self.layers_removed += other.layers_removed;

        self.elapsed += other.elapsed;
-
-        #[cfg(feature = "testing")]
-        {
-            let mut other = other;
-            self.doomed_layers.append(&mut other.doomed_layers);
-        }
    }
 }
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -42,7 +42,6 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};

 use futures::FutureExt;
-use pageserver_api::shard::TenantShardId;
 use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
 use tokio::task_local;
@@ -52,7 +51,7 @@ use tracing::{debug, error, info, warn};

 use once_cell::sync::Lazy;

-use utils::id::TimelineId;
+use utils::id::{TenantId, TimelineId};

 use crate::shutdown_pageserver;

@@ -258,9 +257,6 @@ pub enum TaskKind {
    /// See [`crate::disk_usage_eviction_task`].
    DiskUsageEviction,

-    /// See [`crate::tenant::secondary`].
-    SecondaryUploads,
-
    // Initial logical size calculation
    InitialLogicalSizeCalculation,

@@ -321,7 +317,7 @@ struct PageServerTask {

    /// Tasks may optionally be launched for a particular tenant/timeline, enabling
    /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,

    mutable: Mutex<MutableTaskState>,
@@ -333,7 +329,7 @@ struct PageServerTask {
 pub fn spawn<F>(
    runtime: &tokio::runtime::Handle,
    kind: TaskKind,
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,
    name: &str,
    shutdown_process_on_error: bool,
@@ -349,7 +345,7 @@ where
        kind,
        name: name.to_string(),
        cancel: cancel.clone(),
-        tenant_shard_id,
+        tenant_id,
        timeline_id,
        mutable: Mutex::new(MutableTaskState { join_handle: None }),
    });
@@ -428,28 +424,28 @@ async fn task_finish(
            Ok(Err(err)) => {
                if shutdown_process_on_error {
                    error!(
-                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                    shutdown_process = true;
                } else {
                    error!(
-                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                }
            }
            Err(err) => {
                if shutdown_process_on_error {
                    error!(
-                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                    shutdown_process = true;
                } else {
                    error!(
-                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                }
            }
@@ -471,11 +467,11 @@ async fn task_finish(
 ///
 /// Or to shut down all tasks for given timeline:
 ///
-///   shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
+///   shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
 ///
 pub async fn shutdown_tasks(
    kind: Option<TaskKind>,
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,
 ) {
    let mut victim_tasks = Vec::new();
@@ -484,35 +480,35 @@ pub async fn shutdown_tasks(
        let tasks = TASKS.lock().unwrap();
        for task in tasks.values() {
            if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
+                && (tenant_id.is_none() || task.tenant_id == tenant_id)
                && (timeline_id.is_none() || task.timeline_id == timeline_id)
            {
                task.cancel.cancel();
                victim_tasks.push((
                    Arc::clone(task),
                    task.kind,
-                    task.tenant_shard_id,
+                    task.tenant_id,
                    task.timeline_id,
                ));
            }
        }
    }

-    let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
+    let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();

-    for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
+    for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
        let join_handle = {
            let mut task_mut = task.mutable.lock().unwrap();
            task_mut.join_handle.take()
        };
        if let Some(mut join_handle) = join_handle {
            if log_all {
-                if tenant_shard_id.is_none() {
+                if tenant_id.is_none() {
                    // there are quite few of these
                    info!(name = task.name, kind = ?task_kind, "stopping global task");
                } else {
                    // warn to catch these in tests; there shouldn't be any
-                    warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
+                    warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
                }
            }
            if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
@@ -521,13 +517,12 @@ pub async fn shutdown_tasks(
            {
                // allow some time to elapse before logging to cut down the number of log
                // lines.
-                info!("waiting for task {} to shut down", task.name);
+                info!("waiting for {} to shut down", task.name);
                // we never handled this return value, but:
                // - we don't deschedule which would lead to is_cancelled
                // - panics are already logged (is_panicked)
                // - task errors are already logged in the wrapper
                let _ = join_handle.await;
-                info!("task {} completed", task.name);
            }
        } else {
            // Possibly one of:
@@ -561,14 +556,9 @@ pub async fn shutdown_watcher() {
 /// cancelled. It can however be moved to other tasks, such as `tokio::task::spawn_blocking` or
 /// `tokio::task::JoinSet::spawn`.
 pub fn shutdown_token() -> CancellationToken {
-    let res = SHUTDOWN_TOKEN.try_with(|t| t.clone());
-
-    if cfg!(test) {
-        // in tests this method is called from non-taskmgr spawned tasks, and that is all ok.
-        res.unwrap_or_default()
-    } else {
-        res.expect("shutdown_token() called in an unexpected task or thread")
-    }
+    SHUTDOWN_TOKEN
+        .try_with(|t| t.clone())
+        .expect("shutdown_token() called in an unexpected task or thread")
 }

 /// Has the current task been requested to shut down?
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -8,12 +8,9 @@
 //! We cannot use global or default config instead, because wrong settings
 //! may lead to a data loss.
 //!
-use anyhow::bail;
+use anyhow::Context;
 use pageserver_api::models;
-use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
-use serde::de::IntoDeserializer;
 use serde::{Deserialize, Serialize};
-use serde_json::Value;
 use std::num::NonZeroU64;
 use std::time::Duration;
 use utils::generation::Generation;
@@ -91,14 +88,6 @@ pub(crate) struct LocationConf {
    /// The location-specific part of the configuration, describes the operating
    /// mode of this pageserver for this tenant.
    pub(crate) mode: LocationMode,
-
-    /// The detailed shard identity.  This structure is already scoped within
-    /// a TenantShardId, but we need the full ShardIdentity to enable calculating
-    /// key->shard mappings.
-    #[serde(default = "ShardIdentity::unsharded")]
-    #[serde(skip_serializing_if = "ShardIdentity::is_unsharded")]
-    pub(crate) shard: ShardIdentity,
-
    /// The pan-cluster tenant configuration, the same on all locations
    pub(crate) tenant_conf: TenantConfOpt,
 }
@@ -171,8 +160,6 @@ impl LocationConf {
                generation,
                attach_mode: AttachmentMode::Single,
            }),
-            // Legacy configuration loads are always from tenants created before sharding existed.
-            shard: ShardIdentity::unsharded(),
            tenant_conf,
        }
    }
@@ -200,7 +187,6 @@ impl LocationConf {

        fn get_generation(conf: &'_ models::LocationConfig) -> Result<Generation, anyhow::Error> {
            conf.generation
-                .map(Generation::new)
                .ok_or_else(|| anyhow::anyhow!("Generation must be set when attaching"))
        }

@@ -240,21 +226,7 @@ impl LocationConf {
            }
        };

-        let shard = if conf.shard_count == 0 {
-            ShardIdentity::unsharded()
-        } else {
-            ShardIdentity::new(
-                ShardNumber(conf.shard_number),
-                ShardCount(conf.shard_count),
-                ShardStripeSize(conf.shard_stripe_size),
-            )?
-        };
-
-        Ok(Self {
-            shard,
-            mode,
-            tenant_conf,
-        })
+        Ok(Self { mode, tenant_conf })
    }
 }

@@ -269,7 +241,6 @@ impl Default for LocationConf {
                attach_mode: AttachmentMode::Single,
            }),
            tenant_conf: TenantConfOpt::default(),
-            shard: ShardIdentity::unsharded(),
        }
    }
 }
@@ -334,11 +305,6 @@ pub struct TenantConf {
    #[serde(with = "humantime_serde")]
    pub evictions_low_residence_duration_metric_threshold: Duration,
    pub gc_feedback: bool,
-
-    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
-    /// may be disabled if a Tenant will not have secondary locations: only secondary
-    /// locations will use the heatmap uploaded by attached locations.
-    pub heatmap_period: Duration,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -419,11 +385,6 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub gc_feedback: Option<bool>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(with = "humantime_serde")]
-    #[serde(default)]
-    pub heatmap_period: Option<Duration>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -492,7 +453,6 @@ impl TenantConfOpt {
                .evictions_low_residence_duration_metric_threshold
                .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
            gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
-            heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
        }
    }
 }
@@ -530,54 +490,109 @@ impl Default for TenantConf {
            )
            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
            gc_feedback: false,
-            heatmap_period: Duration::ZERO,
        }
    }
 }

+// Helper function to standardize the error messages we produce on bad durations
+//
+// Intended to be used with anyhow's `with_context`, e.g.:
+//
+//   let value = result.with_context(bad_duration("name", &value))?;
+//
+fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn() -> String {
+    move || format!("Cannot parse `{field_name}` duration {value:?}")
+}
+
 impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
    type Error = anyhow::Error;

    fn try_from(request_data: &'_ models::TenantConfig) -> Result<Self, Self::Error> {
-        // Convert the request_data to a JSON Value
-        let json_value: Value = serde_json::to_value(request_data)?;
+        let mut tenant_conf = TenantConfOpt::default();

-        // Create a Deserializer from the JSON Value
-        let deserializer = json_value.into_deserializer();
+        if let Some(gc_period) = &request_data.gc_period {
+            tenant_conf.gc_period = Some(
+                humantime::parse_duration(gc_period)
+                    .with_context(bad_duration("gc_period", gc_period))?,
+            );
+        }
+        tenant_conf.gc_horizon = request_data.gc_horizon;
+        tenant_conf.image_creation_threshold = request_data.image_creation_threshold;

-        // Use serde_path_to_error to deserialize the JSON Value into TenantConfOpt
-        let tenant_conf: TenantConfOpt = serde_path_to_error::deserialize(deserializer)?;
+        if let Some(pitr_interval) = &request_data.pitr_interval {
+            tenant_conf.pitr_interval = Some(
+                humantime::parse_duration(pitr_interval)
+                    .with_context(bad_duration("pitr_interval", pitr_interval))?,
+            );
+        }
+
+        if let Some(walreceiver_connect_timeout) = &request_data.walreceiver_connect_timeout {
+            tenant_conf.walreceiver_connect_timeout = Some(
+                humantime::parse_duration(walreceiver_connect_timeout).with_context(
+                    bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
+                )?,
+            );
+        }
+        if let Some(lagging_wal_timeout) = &request_data.lagging_wal_timeout {
+            tenant_conf.lagging_wal_timeout = Some(
+                humantime::parse_duration(lagging_wal_timeout)
+                    .with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
+            );
+        }
+        if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+            tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+        }
+        if let Some(trace_read_requests) = request_data.trace_read_requests {
+            tenant_conf.trace_read_requests = Some(trace_read_requests);
+        }
+
+        tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
+        if let Some(checkpoint_timeout) = &request_data.checkpoint_timeout {
+            tenant_conf.checkpoint_timeout = Some(
+                humantime::parse_duration(checkpoint_timeout)
+                    .with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
+            );
+        }
+
+        tenant_conf.compaction_target_size = request_data.compaction_target_size;
+        tenant_conf.compaction_threshold = request_data.compaction_threshold;
+
+        if let Some(compaction_period) = &request_data.compaction_period {
+            tenant_conf.compaction_period = Some(
+                humantime::parse_duration(compaction_period)
+                    .with_context(bad_duration("compaction_period", compaction_period))?,
+            );
+        }
+
+        if let Some(eviction_policy) = &request_data.eviction_policy {
+            tenant_conf.eviction_policy = Some(
+                serde::Deserialize::deserialize(eviction_policy)
+                    .context("parse field `eviction_policy`")?,
+            );
+        }
+
+        tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
+
+        if let Some(evictions_low_residence_duration_metric_threshold) =
+            &request_data.evictions_low_residence_duration_metric_threshold
+        {
+            tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
+                humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
+                    .with_context(bad_duration(
+                        "evictions_low_residence_duration_metric_threshold",
+                        evictions_low_residence_duration_metric_threshold,
+                    ))?,
+            );
+        }
+        tenant_conf.gc_feedback = request_data.gc_feedback;

        Ok(tenant_conf)
    }
 }

-impl TryFrom<toml_edit::Item> for TenantConfOpt {
-    type Error = anyhow::Error;
-
-    fn try_from(item: toml_edit::Item) -> Result<Self, Self::Error> {
-        match item {
-            toml_edit::Item::Value(value) => {
-                let d = value.into_deserializer();
-                return serde_path_to_error::deserialize(d)
-                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
-            }
-            toml_edit::Item::Table(table) => {
-                let deserializer = toml_edit::de::Deserializer::new(table.into());
-                return serde_path_to_error::deserialize(deserializer)
-                    .map_err(|e| anyhow::anyhow!("{}: {}", e.path(), e.inner().message()));
-            }
-            _ => {
-                bail!("expected non-inline table but found {item}")
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
-    use models::TenantConfig;

    #[test]
    fn de_serializing_pageserver_config_omits_empty_values() {
@@ -594,38 +609,4 @@ mod tests {
        assert_eq!(json_form, "{\"gc_horizon\":42}");
        assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
    }
-
-    #[test]
-    fn test_try_from_models_tenant_config_err() {
-        let tenant_config = models::TenantConfig {
-            lagging_wal_timeout: Some("5a".to_string()),
-            ..TenantConfig::default()
-        };
-
-        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config);
-
-        assert!(
-            tenant_conf_opt.is_err(),
-            "Suceeded to convert TenantConfig to TenantConfOpt"
-        );
-
-        let expected_error_str =
-            "lagging_wal_timeout: invalid value: string \"5a\", expected a duration";
-        assert_eq!(tenant_conf_opt.unwrap_err().to_string(), expected_error_str);
-    }
-
-    #[test]
-    fn test_try_from_models_tenant_config_success() {
-        let tenant_config = models::TenantConfig {
-            lagging_wal_timeout: Some("5s".to_string()),
-            ..TenantConfig::default()
-        };
-
-        let tenant_conf_opt = TenantConfOpt::try_from(&tenant_config).unwrap();
-
-        assert_eq!(
-            tenant_conf_opt.lagging_wal_timeout,
-            Some(Duration::from_secs(5))
-        );
-    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -2,19 +2,22 @@ use std::sync::Arc;

 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
-use pageserver_api::{models::TenantState, shard::TenantShardId};
+use pageserver_api::models::TenantState;
 use remote_storage::{GenericRemoteStorage, RemotePath};
 use tokio::sync::OwnedMutexGuard;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, instrument, Instrument, Span};
+use tracing::{error, instrument, warn, Instrument, Span};

-use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId};
+use utils::{
+    backoff, completion, crashsafe, fs_ext,
+    id::{TenantId, TimelineId},
+};

 use crate::{
    config::PageServerConf,
    context::RequestContext,
    task_mgr::{self, TaskKind},
-    tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
+    InitializationOrder,
 };

 use super::{
@@ -56,10 +59,10 @@ type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;

 fn remote_tenant_delete_mark_path(
    conf: &PageServerConf,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
 ) -> anyhow::Result<RemotePath> {
    let tenant_remote_path = conf
-        .tenant_path(tenant_shard_id)
+        .tenant_path(tenant_id)
        .strip_prefix(&conf.workdir)
        .context("Failed to strip workdir prefix")
        .and_then(RemotePath::new)
@@ -70,25 +73,23 @@ fn remote_tenant_delete_mark_path(
 async fn create_remote_delete_mark(
    conf: &PageServerConf,
    remote_storage: &GenericRemoteStorage,
-    tenant_shard_id: &TenantShardId,
-    cancel: &CancellationToken,
+    tenant_id: &TenantId,
 ) -> Result<(), DeleteTenantError> {
-    let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
+    let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_id)?;

    let data: &[u8] = &[];
    backoff::retry(
        || async {
-            let data = bytes::Bytes::from_static(data);
-            let stream = futures::stream::once(futures::future::ready(Ok(data)));
            remote_storage
-                .upload(stream, 0, &remote_mark_path, None)
+                .upload(data, 0, &remote_mark_path, None)
                .await
        },
        |_e| false,
        FAILED_UPLOAD_WARN_THRESHOLD,
        FAILED_REMOTE_OP_RETRIES,
        "mark_upload",
-        backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
+        // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
+        backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
    )
    .await
    .context("mark_upload")?;
@@ -98,9 +99,9 @@ async fn create_remote_delete_mark(

 async fn create_local_delete_mark(
    conf: &PageServerConf,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
 ) -> Result<(), DeleteTenantError> {
-    let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
+    let marker_path = conf.tenant_deleted_mark_file_path(tenant_id);

    // Note: we're ok to replace existing file.
    let _ = std::fs::OpenOptions::new()
@@ -169,18 +170,18 @@ async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), Del
 async fn remove_tenant_remote_delete_mark(
    conf: &PageServerConf,
    remote_storage: Option<&GenericRemoteStorage>,
-    tenant_shard_id: &TenantShardId,
-    cancel: &CancellationToken,
+    tenant_id: &TenantId,
 ) -> Result<(), DeleteTenantError> {
    if let Some(remote_storage) = remote_storage {
-        let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
+        let path = remote_tenant_delete_mark_path(conf, tenant_id)?;
        backoff::retry(
            || async { remote_storage.delete(&path).await },
            |_e| false,
            FAILED_UPLOAD_WARN_THRESHOLD,
            FAILED_REMOTE_OP_RETRIES,
            "remove_tenant_remote_delete_mark",
-            backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
+            // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
+            backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
        )
        .await
        .context("remove_tenant_remote_delete_mark")?;
@@ -191,7 +192,7 @@ async fn remove_tenant_remote_delete_mark(
 // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
 async fn cleanup_remaining_fs_traces(
    conf: &PageServerConf,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
 ) -> Result<(), DeleteTenantError> {
    let rm = |p: Utf8PathBuf, is_dir: bool| async move {
        if is_dir {
@@ -203,8 +204,8 @@ async fn cleanup_remaining_fs_traces(
        .with_context(|| format!("failed to delete {p}"))
    };

-    rm(conf.tenant_config_path(tenant_shard_id), false).await?;
-    rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
+    rm(conf.tenant_config_path(tenant_id), false).await?;
+    rm(conf.tenant_location_config_path(tenant_id), false).await?;

    fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
        Err(anyhow::anyhow!(
@@ -212,7 +213,7 @@ async fn cleanup_remaining_fs_traces(
        ))?
    });

-    rm(conf.timelines_path(tenant_shard_id), true).await?;
+    rm(conf.timelines_path(tenant_id), true).await?;

    fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
        Err(anyhow::anyhow!(
@@ -226,14 +227,14 @@ async fn cleanup_remaining_fs_traces(
    // to be reordered later and thus missed if a crash occurs.
    // Note that we dont need to sync after mark file is removed
    // because we can tolerate the case when mark file reappears on startup.
-    let tenant_path = &conf.tenant_path(tenant_shard_id);
+    let tenant_path = &conf.tenant_path(tenant_id);
    if tenant_path.exists() {
-        crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
+        crashsafe::fsync_async(&conf.tenant_path(tenant_id))
            .await
            .context("fsync_pre_mark_remove")?;
    }

-    rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
+    rm(conf.tenant_deleted_mark_file_path(tenant_id), false).await?;

    fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
        Err(anyhow::anyhow!(
@@ -241,7 +242,7 @@ async fn cleanup_remaining_fs_traces(
        ))?
    });

-    rm(conf.tenant_path(tenant_shard_id), true).await?;
+    rm(conf.tenant_path(tenant_id), true).await?;

    Ok(())
 }
@@ -286,8 +287,6 @@ impl DeleteTenantFlow {
    ) -> Result<(), DeleteTenantError> {
        span::debug_assert_current_span_has_tenant_id();

-        pausable_failpoint!("tenant-delete-before-run");
-
        let mut guard = Self::prepare(&tenant).await?;

        if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await {
@@ -322,15 +321,9 @@ impl DeleteTenantFlow {
        // Though sounds scary, different mark name?
        // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
        if let Some(remote_storage) = &remote_storage {
-            create_remote_delete_mark(
-                conf,
-                remote_storage,
-                &tenant.tenant_shard_id,
-                // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
-                &CancellationToken::new(),
-            )
-            .await
-            .context("remote_mark")?
+            create_remote_delete_mark(conf, remote_storage, &tenant.tenant_id)
+                .await
+                .context("remote_mark")?
        }

        fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
@@ -339,7 +332,7 @@ impl DeleteTenantFlow {
            ))?
        });

-        create_local_delete_mark(conf, &tenant.tenant_shard_id)
+        create_local_delete_mark(conf, &tenant.tenant_id)
            .await
            .context("local delete mark")?;

@@ -381,11 +374,9 @@ impl DeleteTenantFlow {
            return Ok(acquire(tenant));
        }

+        let tenant_id = tenant.tenant_id;
        // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
-        if conf
-            .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
-            .exists()
-        {
+        if conf.tenant_deleted_mark_file_path(&tenant_id).exists() {
            Ok(acquire(tenant))
        } else {
            Ok(None)
@@ -397,6 +388,7 @@ impl DeleteTenantFlow {
        tenant: &Arc<Tenant>,
        preload: Option<TenantPreload>,
        tenants: &'static std::sync::RwLock<TenantsMap>,
+        init_order: Option<InitializationOrder>,
        ctx: &RequestContext,
    ) -> Result<(), DeleteTenantError> {
        let (_, progress) = completion::channel();
@@ -406,7 +398,10 @@ impl DeleteTenantFlow {
            .await
            .expect("cant be stopping or broken");

-        tenant.attach(preload, ctx).await.context("attach")?;
+        tenant
+            .attach(init_order, preload, ctx)
+            .await
+            .context("attach")?;

        Self::background(
            guard,
@@ -464,12 +459,12 @@ impl DeleteTenantFlow {
        tenants: &'static std::sync::RwLock<TenantsMap>,
        tenant: Arc<Tenant>,
    ) {
-        let tenant_shard_id = tenant.tenant_shard_id;
+        let tenant_id = tenant.tenant_id;

        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id),
+            Some(tenant_id),
            None,
            "tenant_delete",
            false,
@@ -483,7 +478,7 @@ impl DeleteTenantFlow {
                Ok(())
            }
            .instrument({
-                let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
+                let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_id);
                span.follows_from(Span::current());
                span
            }),
@@ -521,7 +516,7 @@ impl DeleteTenantFlow {
            }
        }

-        let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
+        let timelines_path = conf.timelines_path(&tenant.tenant_id);
        // May not exist if we fail in cleanup_remaining_fs_traces after removing it
        if timelines_path.exists() {
            // sanity check to guard against layout changes
@@ -530,14 +525,7 @@ impl DeleteTenantFlow {
                .context("timelines dir not empty")?;
        }

-        remove_tenant_remote_delete_mark(
-            conf,
-            remote_storage.as_ref(),
-            &tenant.tenant_shard_id,
-            // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
-            &CancellationToken::new(),
-        )
-        .await?;
+        remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_id).await?;

        fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
            Err(anyhow::anyhow!(
@@ -545,73 +533,21 @@ impl DeleteTenantFlow {
            ))?
        });

-        cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
+        cleanup_remaining_fs_traces(conf, &tenant.tenant_id)
            .await
            .context("cleanup_remaining_fs_traces")?;

        {
-            pausable_failpoint!("tenant-delete-before-map-remove");
+            let mut locked = tenants.write().unwrap();
+            if locked.remove(&tenant.tenant_id).is_none() {
+                warn!("Tenant got removed from tenants map during deletion");
+            };

-            // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
-            // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
-            //
-            // This complexity will go away when we simplify how deletion works:
-            // https://github.com/neondatabase/neon/issues/5080
-            loop {
-                // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
-                // we encounter an InProgress marker, yield the barrier it contains and wait on it.
-                let barrier = {
-                    let mut locked = tenants.write().unwrap();
-                    let removed = locked.remove(tenant.tenant_shard_id);
-
-                    // FIXME: we should not be modifying this from outside of mgr.rs.
-                    // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
-                    crate::metrics::TENANT_MANAGER
-                        .tenant_slots
-                        .set(locked.len() as u64);
-
-                    match removed {
-                        TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
-                            match tenant.current_state() {
-                                TenantState::Stopping { .. } | TenantState::Broken { .. } => {
-                                    // Expected: we put the tenant into stopping state before we start deleting it
-                                }
-                                state => {
-                                    // Unexpected state
-                                    tracing::warn!(
-                                        "Tenant in unexpected state {state} after deletion"
-                                    );
-                                }
-                            }
-                            break;
-                        }
-                        TenantsMapRemoveResult::Occupied(TenantSlot::Secondary) => {
-                            // This is unexpected: this secondary tenants should not have been created, and we
-                            // are not in a position to shut it down from here.
-                            tracing::warn!("Tenant transitioned to secondary mode while deleting!");
-                            break;
-                        }
-                        TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
-                            unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
-                        }
-                        TenantsMapRemoveResult::Vacant => {
-                            tracing::warn!(
-                                "Tenant removed from TenantsMap before deletion completed"
-                            );
-                            break;
-                        }
-                        TenantsMapRemoveResult::InProgress(barrier) => {
-                            // An InProgress entry was found, we must wait on its barrier
-                            barrier
-                        }
-                    }
-                };
-
-                tracing::info!(
-                    "Waiting for competing operation to complete before deleting state for tenant"
-                );
-                barrier.wait().await;
-            }
+            // FIXME: we should not be modifying this from outside of mgr.rs.
+            // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
+            crate::metrics::TENANT_MANAGER
+                .tenant_slots
+                .set(locked.len() as u64);
        }

        *guard = Self::Finished;
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -7,19 +7,18 @@ use crate::page_cache::{self, PAGE_SZ};
 use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReader};
 use crate::virtual_file::VirtualFile;
 use camino::Utf8PathBuf;
-use pageserver_api::shard::TenantShardId;
 use std::cmp::min;
 use std::fs::OpenOptions;
 use std::io::{self, ErrorKind};
 use std::ops::DerefMut;
 use std::sync::atomic::AtomicU64;
 use tracing::*;
-use utils::id::TimelineId;
+use utils::id::{TenantId, TimelineId};

 pub struct EphemeralFile {
    page_cache_file_id: page_cache::FileId,

-    _tenant_shard_id: TenantShardId,
+    _tenant_id: TenantId,
    _timeline_id: TimelineId,
    file: VirtualFile,
    len: u64,
@@ -32,7 +31,7 @@ pub struct EphemeralFile {
 impl EphemeralFile {
    pub async fn create(
        conf: &PageServerConf,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
    ) -> Result<EphemeralFile, io::Error> {
        static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
@@ -40,7 +39,7 @@ impl EphemeralFile {
            NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);

        let filename = conf
-            .timeline_path(&tenant_shard_id, &timeline_id)
+            .timeline_path(&tenant_id, &timeline_id)
            .join(Utf8PathBuf::from(format!(
                "ephemeral-{filename_disambiguator}"
            )));
@@ -53,7 +52,7 @@ impl EphemeralFile {

        Ok(EphemeralFile {
            page_cache_file_id: page_cache::next_file_id(),
-            _tenant_shard_id: tenant_shard_id,
+            _tenant_id: tenant_id,
            _timeline_id: timeline_id,
            file,
            len: 0,
@@ -283,7 +282,7 @@ mod tests {
    ) -> Result<
        (
            &'static PageServerConf,
-            TenantShardId,
+            TenantId,
            TimelineId,
            RequestContext,
        ),
@@ -296,13 +295,13 @@ mod tests {
        // OK in a test.
        let conf: &'static PageServerConf = Box::leak(Box::new(conf));

-        let tenant_shard_id = TenantShardId::from_str("11000000000000000000000000000000").unwrap();
+        let tenant_id = TenantId::from_str("11000000000000000000000000000000").unwrap();
        let timeline_id = TimelineId::from_str("22000000000000000000000000000000").unwrap();
-        fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?;
+        fs::create_dir_all(conf.timeline_path(&tenant_id, &timeline_id))?;

        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);

-        Ok((conf, tenant_shard_id, timeline_id, ctx))
+        Ok((conf, tenant_id, timeline_id, ctx))
    }

    #[tokio::test]
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -11,12 +11,15 @@
 use std::io::{self};

 use anyhow::{ensure, Context};
-use pageserver_api::shard::TenantShardId;
 use serde::{de::Error, Deserialize, Serialize, Serializer};
 use thiserror::Error;
 use utils::bin_ser::SerializeError;
 use utils::crashsafe::path_with_suffix_extension;
-use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
+use utils::{
+    bin_ser::BeSer,
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};

 use crate::config::PageServerConf;
 use crate::virtual_file::VirtualFile;
@@ -269,14 +272,14 @@ impl Serialize for TimelineMetadata {
 }

 /// Save timeline metadata to file
-#[tracing::instrument(skip_all, fields(%tenant_id=tenant_shard_id.tenant_id, %shard_id=tenant_shard_id.shard_slug(), %timeline_id))]
+#[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
 pub async fn save_metadata(
    conf: &'static PageServerConf,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
    timeline_id: &TimelineId,
    data: &TimelineMetadata,
 ) -> anyhow::Result<()> {
-    let path = conf.metadata_path(tenant_shard_id, timeline_id);
+    let path = conf.metadata_path(tenant_id, timeline_id);
    let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
    let metadata_bytes = data.to_bytes().context("serialize metadata")?;
    VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
@@ -296,10 +299,10 @@ pub enum LoadMetadataError {

 pub fn load_metadata(
    conf: &'static PageServerConf,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
    timeline_id: &TimelineId,
 ) -> Result<TimelineMetadata, LoadMetadataError> {
-    let metadata_path = conf.metadata_path(tenant_shard_id, timeline_id);
+    let metadata_path = conf.metadata_path(tenant_id, timeline_id);
    let metadata_bytes = std::fs::read(metadata_path)?;

    Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -180,7 +180,7 @@
 //! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
 //! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map

-pub(crate) mod download;
+mod download;
 pub mod index;
 mod upload;

@@ -188,20 +188,15 @@ use anyhow::Context;
 use camino::Utf8Path;
 use chrono::{NaiveDateTime, Utc};

-pub(crate) use download::download_initdb_tar_zst;
-use pageserver_api::shard::{ShardIndex, TenantShardId};
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
-pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
-use utils::timeout::{timeout_cancellable, TimeoutCancellableError};

 use std::collections::{HashMap, VecDeque};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};
-use std::time::Duration;

 use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
 use std::ops::DerefMut;
@@ -254,11 +249,6 @@ pub(crate) const FAILED_REMOTE_OP_RETRIES: u32 = 10;
 // retries. Uploads and deletions are retried forever, though.
 pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;

-pub(crate) const INITDB_PATH: &str = "initdb.tar.zst";
-
-/// Default buffer size when interfacing with [`tokio::fs::File`].
-pub(crate) const BUFFER_SIZE: usize = 32 * 1024;
-
 pub enum MaybeDeletedIndexPart {
    IndexPart(IndexPart),
    Deleted(IndexPart),
@@ -307,7 +297,7 @@ pub struct RemoteTimelineClient {

    runtime: tokio::runtime::Handle,

-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
    generation: Generation,

@@ -318,47 +308,6 @@ pub struct RemoteTimelineClient {
    storage_impl: GenericRemoteStorage,

    deletion_queue_client: DeletionQueueClient,
-
-    cancel: CancellationToken,
-}
-
-/// This timeout is intended to deal with hangs in lower layers, e.g. stuck TCP flows.  It is not
-/// intended to be snappy enough for prompt shutdown, as we have a CancellationToken for that.
-const UPLOAD_TIMEOUT: Duration = Duration::from_secs(120);
-const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(120);
-
-/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to anyhow.
-///
-/// This is a convenience for the various upload functions.  In future
-/// the anyhow::Error result should be replaced with a more structured type that
-/// enables callers to avoid handling shutdown as an error.
-async fn upload_cancellable<F>(cancel: &CancellationToken, future: F) -> anyhow::Result<()>
-where
-    F: std::future::Future<Output = anyhow::Result<()>>,
-{
-    match timeout_cancellable(UPLOAD_TIMEOUT, cancel, future).await {
-        Ok(Ok(())) => Ok(()),
-        Ok(Err(e)) => Err(e),
-        Err(TimeoutCancellableError::Timeout) => Err(anyhow::anyhow!("Timeout")),
-        Err(TimeoutCancellableError::Cancelled) => Err(anyhow::anyhow!("Shutting down")),
-    }
-}
-/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to DownloaDError.
-async fn download_cancellable<F, R>(
-    cancel: &CancellationToken,
-    future: F,
-) -> Result<R, DownloadError>
-where
-    F: std::future::Future<Output = Result<R, DownloadError>>,
-{
-    match timeout_cancellable(DOWNLOAD_TIMEOUT, cancel, future).await {
-        Ok(Ok(r)) => Ok(r),
-        Ok(Err(e)) => Err(e),
-        Err(TimeoutCancellableError::Timeout) => {
-            Err(DownloadError::Other(anyhow::anyhow!("Timed out")))
-        }
-        Err(TimeoutCancellableError::Cancelled) => Err(DownloadError::Cancelled),
-    }
 }

 impl RemoteTimelineClient {
@@ -372,7 +321,7 @@ impl RemoteTimelineClient {
        remote_storage: GenericRemoteStorage,
        deletion_queue_client: DeletionQueueClient,
        conf: &'static PageServerConf,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        generation: Generation,
    ) -> RemoteTimelineClient {
@@ -384,17 +333,13 @@ impl RemoteTimelineClient {
            } else {
                BACKGROUND_RUNTIME.handle().clone()
            },
-            tenant_shard_id,
+            tenant_id,
            timeline_id,
            generation,
            storage_impl: remote_storage,
            deletion_queue_client,
            upload_queue: Mutex::new(UploadQueue::Uninitialized),
-            metrics: Arc::new(RemoteTimelineClientMetrics::new(
-                &tenant_shard_id,
-                &timeline_id,
-            )),
-            cancel: CancellationToken::new(),
+            metrics: Arc::new(RemoteTimelineClientMetrics::new(&tenant_id, &timeline_id)),
        }
    }

@@ -515,13 +460,13 @@ impl RemoteTimelineClient {

        let index_part = download::download_index_part(
            &self.storage_impl,
-            &self.tenant_shard_id,
+            &self.tenant_id,
            &self.timeline_id,
            self.generation,
            cancel,
        )
        .measure_remote_op(
-            self.tenant_shard_id.tenant_id,
+            self.tenant_id,
            self.timeline_id,
            RemoteOpFileKind::Index,
            RemoteOpKind::Download,
@@ -545,7 +490,6 @@ impl RemoteTimelineClient {
        &self,
        layer_file_name: &LayerFileName,
        layer_metadata: &LayerFileMetadata,
-        cancel: &CancellationToken,
    ) -> anyhow::Result<u64> {
        let downloaded_size = {
            let _unfinished_gauge_guard = self.metrics.call_begin(
@@ -558,14 +502,13 @@ impl RemoteTimelineClient {
            download::download_layer_file(
                self.conf,
                &self.storage_impl,
-                self.tenant_shard_id,
+                self.tenant_id,
                self.timeline_id,
                layer_file_name,
                layer_metadata,
-                cancel,
            )
            .measure_remote_op(
-                self.tenant_shard_id.tenant_id,
+                self.tenant_id,
                self.timeline_id,
                RemoteOpFileKind::Layer,
                RemoteOpKind::Download,
@@ -711,10 +654,10 @@ impl RemoteTimelineClient {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

-        let with_metadata =
+        let with_generations =
            self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());

-        self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);
+        self.schedule_deletion_of_unlinked0(upload_queue, with_generations);

        // Launch the tasks immediately, if possible
        self.launch_queued_tasks(upload_queue);
@@ -749,7 +692,7 @@ impl RemoteTimelineClient {
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
        names: I,
-    ) -> Vec<(LayerFileName, LayerFileMetadata)>
+    ) -> Vec<(LayerFileName, Generation)>
    where
        I: IntoIterator<Item = LayerFileName>,
    {
@@ -757,17 +700,16 @@ impl RemoteTimelineClient {
        // so we don't need update it. Just serialize it.
        let metadata = upload_queue.latest_metadata.clone();

-        // Decorate our list of names with each name's metadata, dropping
-        // names that are unexpectedly missing from our metadata.  This metadata
-        // is later used when physically deleting layers, to construct key paths.
-        let with_metadata: Vec<_> = names
+        // Decorate our list of names with each name's generation, dropping
+        // names that are unexpectedly missing from our metadata.
+        let with_generations: Vec<_> = names
            .into_iter()
            .filter_map(|name| {
                let meta = upload_queue.latest_files.remove(&name);

                if let Some(meta) = meta {
                    upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
-                    Some((name, meta))
+                    Some((name, meta.generation))
                } else {
                    // This can only happen if we forgot to to schedule the file upload
                    // before scheduling the delete. Log it because it is a rare/strange
@@ -780,10 +722,9 @@ impl RemoteTimelineClient {
            .collect();

        #[cfg(feature = "testing")]
-        for (name, metadata) in &with_metadata {
-            let gen = metadata.generation;
-            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) {
-                if unexpected == gen {
+        for (name, gen) in &with_generations {
+            if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), *gen) {
+                if &unexpected == gen {
                    tracing::error!("{name} was unlinked twice with same generation");
                } else {
                    tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}");
@@ -798,14 +739,14 @@ impl RemoteTimelineClient {
            self.schedule_index_upload(upload_queue, metadata);
        }

-        with_metadata
+        with_generations
    }

    /// Schedules deletion for layer files which have previously been unlinked from the
    /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`].
    pub(crate) fn schedule_deletion_of_unlinked(
        self: &Arc<Self>,
-        layers: Vec<(LayerFileName, LayerFileMetadata)>,
+        layers: Vec<(LayerFileName, Generation)>,
    ) -> anyhow::Result<()> {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;
@@ -818,22 +759,16 @@ impl RemoteTimelineClient {
    fn schedule_deletion_of_unlinked0(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
-        with_metadata: Vec<(LayerFileName, LayerFileMetadata)>,
+        with_generations: Vec<(LayerFileName, Generation)>,
    ) {
-        for (name, meta) in &with_metadata {
-            info!(
-                "scheduling deletion of layer {}{} (shard {})",
-                name,
-                meta.generation.get_suffix(),
-                meta.shard
-            );
+        for (name, gen) in &with_generations {
+            info!("scheduling deletion of layer {}{}", name, gen.get_suffix());
        }

        #[cfg(feature = "testing")]
-        for (name, meta) in &with_metadata {
-            let gen = meta.generation;
+        for (name, gen) in &with_generations {
            match upload_queue.dangling_files.remove(name) {
-                Some(same) if same == gen => { /* expected */ }
+                Some(same) if &same == gen => { /* expected */ }
                Some(other) => {
                    tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}");
                }
@@ -845,7 +780,7 @@ impl RemoteTimelineClient {

        // schedule the actual deletions
        let op = UploadOp::Delete(Delete {
-            layers: with_metadata,
+            layers: with_generations,
        });
        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -874,29 +809,23 @@ impl RemoteTimelineClient {
        Ok(())
    }

+    ///
    /// Wait for all previously scheduled uploads/deletions to complete
-    pub(crate) async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
+    ///
+    pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
        let mut receiver = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
-            self.schedule_barrier0(upload_queue)
+            self.schedule_barrier(upload_queue)
        };

        if receiver.changed().await.is_err() {
            anyhow::bail!("wait_completion aborted because upload queue was stopped");
        }
-
        Ok(())
    }

-    pub(crate) fn schedule_barrier(self: &Arc<Self>) -> anyhow::Result<()> {
-        let mut guard = self.upload_queue.lock().unwrap();
-        let upload_queue = guard.initialized_mut()?;
-        self.schedule_barrier0(upload_queue);
-        Ok(())
-    }
-
-    fn schedule_barrier0(
+    fn schedule_barrier(
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
    ) -> tokio::sync::watch::Receiver<()> {
@@ -912,56 +841,6 @@ impl RemoteTimelineClient {
        receiver
    }

-    /// Wait for all previously scheduled operations to complete, and then stop.
-    ///
-    /// Not cancellation safe
-    pub(crate) async fn shutdown(self: &Arc<Self>) -> Result<(), StopError> {
-        // On cancellation the queue is left in ackward state of refusing new operations but
-        // proper stop is yet to be called. On cancel the original or some later task must call
-        // `stop` or `shutdown`.
-        let sg = scopeguard::guard((), |_| {
-            tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error")
-        });
-
-        let fut = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = match &mut *guard {
-                UploadQueue::Stopped(_) => return Ok(()),
-                UploadQueue::Uninitialized => return Err(StopError::QueueUninitialized),
-                UploadQueue::Initialized(ref mut init) => init,
-            };
-
-            // if the queue is already stuck due to a shutdown operation which was cancelled, then
-            // just don't add more of these as they would never complete.
-            //
-            // TODO: if launch_queued_tasks were to be refactored to accept a &mut UploadQueue
-            // in every place we would not have to jump through this hoop, and this method could be
-            // made cancellable.
-            if !upload_queue.shutting_down {
-                upload_queue.shutting_down = true;
-                upload_queue.queued_operations.push_back(UploadOp::Shutdown);
-                // this operation is not counted similar to Barrier
-
-                self.launch_queued_tasks(upload_queue);
-            }
-
-            upload_queue.shutdown_ready.clone().acquire_owned()
-        };
-
-        let res = fut.await;
-
-        scopeguard::ScopeGuard::into_inner(sg);
-
-        match res {
-            Ok(_permit) => unreachable!("shutdown_ready should not have been added permits"),
-            Err(_closed) => {
-                // expected
-            }
-        }
-
-        self.stop()
-    }
-
    /// Set the deleted_at field in the remote index file.
    ///
    /// This fails if the upload queue has not been `stop()`ed.
@@ -1013,11 +892,10 @@ impl RemoteTimelineClient {
            || {
                upload::upload_index_part(
                    &self.storage_impl,
-                    &self.tenant_shard_id,
+                    &self.tenant_id,
                    &self.timeline_id,
                    self.generation,
                    &index_part_with_deleted_at,
-                    &self.cancel,
                )
            },
            |_e| false,
@@ -1027,7 +905,8 @@ impl RemoteTimelineClient {
            // when executed as part of tenant deletion this happens in the background
            2,
            "persist_index_part_with_deleted_flag",
-            backoff::Cancel::new(self.cancel.clone(), || anyhow::anyhow!("Cancelled")),
+            // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
+            backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
        )
        .await?;

@@ -1071,9 +950,8 @@ impl RemoteTimelineClient {
                .drain()
                .map(|(file_name, meta)| {
                    remote_layer_path(
-                        &self.tenant_shard_id.tenant_id,
+                        &self.tenant_id,
                        &self.timeline_id,
-                        meta.shard,
                        &file_name,
                        meta.generation,
                    )
@@ -1086,7 +964,7 @@ impl RemoteTimelineClient {

        // Do not delete index part yet, it is needed for possible retry. If we remove it first
        // and retry will arrive to different pageserver there wont be any traces of it on remote storage
-        let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);
+        let timeline_storage_path = remote_timeline_path(&self.tenant_id, &self.timeline_id);

        // Execute all pending deletions, so that when we proceed to do a list_prefixes below, we aren't
        // taking the burden of listing all the layers that we already know we should delete.
@@ -1122,22 +1000,12 @@ impl RemoteTimelineClient {
            .unwrap_or(
                // No generation-suffixed indices, assume we are dealing with
                // a legacy index.
-                remote_index_path(&self.tenant_shard_id, &self.timeline_id, Generation::none()),
+                remote_index_path(&self.tenant_id, &self.timeline_id, Generation::none()),
            );

        let remaining_layers: Vec<RemotePath> = remaining
            .into_iter()
-            .filter(|p| {
-                if p == &latest_index {
-                    return false;
-                }
-                if let Some(name) = p.object_name() {
-                    if name == INITDB_PATH {
-                        return false;
-                    }
-                }
-                true
-            })
+            .filter(|p| p!= &latest_index)
            .inspect(|path| {
                if let Some(name) = path.object_name() {
                    info!(%name, "deleting a file not referenced from index_part.json");
@@ -1203,9 +1071,7 @@ impl RemoteTimelineClient {
                    upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
                }

-                UploadOp::Barrier(_) | UploadOp::Shutdown => {
-                    upload_queue.inprogress_tasks.is_empty()
-                }
+                UploadOp::Barrier(_) => upload_queue.inprogress_tasks.is_empty(),
            };

            // If we cannot launch this task, don't look any further.
@@ -1218,13 +1084,6 @@ impl RemoteTimelineClient {
                break;
            }

-            if let UploadOp::Shutdown = next_op {
-                // leave the op in the queue but do not start more tasks; it will be dropped when
-                // the stop is called.
-                upload_queue.shutdown_ready.close();
-                break;
-            }
-
            // We can launch this task. Remove it from the queue first.
            let next_op = upload_queue.queued_operations.pop_front().unwrap();

@@ -1245,7 +1104,6 @@ impl RemoteTimelineClient {
                    sender.send_replace(());
                    continue;
                }
-                UploadOp::Shutdown => unreachable!("shutdown is intentionally never popped off"),
            };

            // Assign unique ID to this task
@@ -1264,12 +1122,12 @@ impl RemoteTimelineClient {

            // Spawn task to perform the task
            let self_rc = Arc::clone(self);
-            let tenant_shard_id = self.tenant_shard_id;
+            let tenant_id = self.tenant_id;
            let timeline_id = self.timeline_id;
            task_mgr::spawn(
                &self.runtime,
                TaskKind::RemoteUploadTask,
-                Some(self.tenant_shard_id),
+                Some(self.tenant_id),
                Some(self.timeline_id),
                "remote upload",
                false,
@@ -1277,7 +1135,7 @@ impl RemoteTimelineClient {
                    self_rc.perform_upload_task(task).await;
                    Ok(())
                }
-                .instrument(info_span!(parent: None, "remote_upload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, %upload_task_id)),
+                .instrument(info_span!(parent: None, "remote_upload", %tenant_id, %timeline_id, %upload_task_id)),
            );

            // Loop back to process next task
@@ -1327,10 +1185,9 @@ impl RemoteTimelineClient {
                        path,
                        layer_metadata,
                        self.generation,
-                        &self.cancel,
                    )
                    .measure_remote_op(
-                        self.tenant_shard_id.tenant_id,
+                        self.tenant_id,
                        self.timeline_id,
                        RemoteOpFileKind::Layer,
                        RemoteOpKind::Upload,
@@ -1350,14 +1207,13 @@ impl RemoteTimelineClient {

                    let res = upload::upload_index_part(
                        &self.storage_impl,
-                        &self.tenant_shard_id,
+                        &self.tenant_id,
                        &self.timeline_id,
                        self.generation,
                        index_part,
-                        &self.cancel,
                    )
                    .measure_remote_op(
-                        self.tenant_shard_id.tenant_id,
+                        self.tenant_id,
                        self.timeline_id,
                        RemoteOpFileKind::Index,
                        RemoteOpKind::Upload,
@@ -1373,22 +1229,20 @@ impl RemoteTimelineClient {
                    }
                    res
                }
-                UploadOp::Delete(delete) => {
-                    pausable_failpoint!("before-delete-layer-pausable");
-                    self.deletion_queue_client
-                        .push_layers(
-                            self.tenant_shard_id,
-                            self.timeline_id,
-                            self.generation,
-                            delete.layers.clone(),
-                        )
-                        .await
-                        .map_err(|e| anyhow::anyhow!(e))
-                }
-                unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {
+                UploadOp::Delete(delete) => self
+                    .deletion_queue_client
+                    .push_layers(
+                        self.tenant_id,
+                        self.timeline_id,
+                        self.generation,
+                        delete.layers.clone(),
+                    )
+                    .await
+                    .map_err(|e| anyhow::anyhow!(e)),
+                UploadOp::Barrier(_) => {
                    // unreachable. Barrier operations are handled synchronously in
                    // launch_queued_tasks
-                    warn!("unexpected {unexpected:?} operation in perform_upload_task");
+                    warn!("unexpected Barrier operation in perform_upload_task");
                    break;
                }
            };
@@ -1482,7 +1336,7 @@ impl RemoteTimelineClient {
                    upload_queue.num_inprogress_deletions -= 1;
                    None
                }
-                UploadOp::Barrier(..) | UploadOp::Shutdown => unreachable!(),
+                UploadOp::Barrier(_) => unreachable!(),
            };

            // Launch any queued tasks that were unblocked by this one.
@@ -1496,7 +1350,7 @@ impl RemoteTimelineClient {
            // data safety guarantees (see docs/rfcs/025-generation-numbers.md)
            self.deletion_queue_client
                .update_remote_consistent_lsn(
-                    self.tenant_shard_id,
+                    self.tenant_id,
                    self.timeline_id,
                    self.generation,
                    lsn,
@@ -1537,7 +1391,7 @@ impl RemoteTimelineClient {
                    reason: "should we track deletes? positive or negative sign?",
                },
            ),
-            UploadOp::Barrier(..) | UploadOp::Shutdown => {
+            UploadOp::Barrier(_) => {
                // we do not account these
                return None;
            }
@@ -1563,13 +1417,10 @@ impl RemoteTimelineClient {
    }

    /// Close the upload queue for new operations and cancel queued operations.
-    ///
-    /// Use [`RemoteTimelineClient::shutdown`] for graceful stop.
-    ///
    /// In-progress operations will still be running after this function returns.
    /// Use `task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id))`
    /// to wait for them to complete, after calling this function.
-    pub(crate) fn stop(&self) -> Result<(), StopError> {
+    pub fn stop(&self) -> Result<(), StopError> {
        // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue
        // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.
        // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.
@@ -1607,8 +1458,6 @@ impl RemoteTimelineClient {
                        queued_operations: VecDeque::default(),
                        #[cfg(feature = "testing")]
                        dangling_files: HashMap::default(),
-                        shutting_down: false,
-                        shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
                    };

                    let upload_queue = std::mem::replace(
@@ -1652,51 +1501,26 @@ impl RemoteTimelineClient {
            }
        }
    }
-
-    pub(crate) fn get_layers_metadata(
-        &self,
-        layers: Vec<LayerFileName>,
-    ) -> anyhow::Result<Vec<Option<LayerFileMetadata>>> {
-        let q = self.upload_queue.lock().unwrap();
-        let q = match &*q {
-            UploadQueue::Stopped(_) | UploadQueue::Uninitialized => {
-                anyhow::bail!("queue is in state {}", q.as_str())
-            }
-            UploadQueue::Initialized(inner) => inner,
-        };
-
-        let decorated = layers.into_iter().map(|l| q.latest_files.get(&l).cloned());
-
-        Ok(decorated.collect())
-    }
 }

-pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
-    let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}");
+pub fn remote_timelines_path(tenant_id: &TenantId) -> RemotePath {
+    let path = format!("tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}");
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-pub fn remote_timeline_path(
-    tenant_shard_id: &TenantShardId,
-    timeline_id: &TimelineId,
-) -> RemotePath {
-    remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
+pub fn remote_timeline_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
+    remote_timelines_path(tenant_id).join(Utf8Path::new(&timeline_id.to_string()))
 }

-/// Note that the shard component of a remote layer path is _not_ always the same
-/// as in the TenantShardId of the caller: tenants may reference layers from a different
-/// ShardIndex.  Use the ShardIndex from the layer's metadata.
 pub fn remote_layer_path(
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    shard: ShardIndex,
    layer_file_name: &LayerFileName,
    generation: Generation,
 ) -> RemotePath {
    // Generation-aware key format
    let path = format!(
-        "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}",
-        shard.get_suffix(),
+        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
        layer_file_name.file_name(),
        generation.get_suffix()
    );
@@ -1704,33 +1528,19 @@ pub fn remote_layer_path(
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
-    RemotePath::from_string(&format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
-    ))
-    .expect("Failed to construct path")
-}
-
 pub fn remote_index_path(
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
    timeline_id: &TimelineId,
    generation: Generation,
 ) -> RemotePath {
    RemotePath::from_string(&format!(
-        "tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
+        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
        IndexPart::FILE_NAME,
        generation.get_suffix()
    ))
    .expect("Failed to construct path")
 }

-pub const HEATMAP_BASENAME: &str = "heatmap-v1.json";
-
-pub(crate) fn remote_heatmap_path(tenant_shard_id: &TenantShardId) -> RemotePath {
-    RemotePath::from_string(&format!("tenants/{tenant_shard_id}/{HEATMAP_BASENAME}"))
-        .expect("Failed to construct path")
-}
-
 /// Given the key of an index, parse out the generation part of the name
 pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
    let file_name = match path.get_path().file_name() {
@@ -1866,17 +1676,16 @@ mod tests {
            Arc::new(RemoteTimelineClient {
                conf: self.harness.conf,
                runtime: tokio::runtime::Handle::current(),
-                tenant_shard_id: self.harness.tenant_shard_id,
+                tenant_id: self.harness.tenant_id,
                timeline_id: TIMELINE_ID,
                generation,
                storage_impl: self.harness.remote_storage.clone(),
                deletion_queue_client: self.harness.deletion_queue.new_client(),
                upload_queue: Mutex::new(UploadQueue::Uninitialized),
                metrics: Arc::new(RemoteTimelineClientMetrics::new(
-                    &self.harness.tenant_shard_id,
+                    &self.harness.tenant_id,
                    &TIMELINE_ID,
                )),
-                cancel: CancellationToken::new(),
            })
        }

@@ -1950,7 +1759,6 @@ mod tests {
        println!("remote_timeline_dir: {remote_timeline_dir}");

        let generation = harness.generation;
-        let shard = harness.shard;

        // Create a couple of dummy files,  schedule upload for them

@@ -1967,7 +1775,7 @@ mod tests {
                harness.conf,
                &timeline,
                name,
-                LayerFileMetadata::new(contents.len() as u64, generation, shard),
+                LayerFileMetadata::new(contents.len() as u64, generation),
            )
        }).collect::<Vec<_>>();

@@ -2116,7 +1924,7 @@ mod tests {
            harness.conf,
            &timeline,
            layer_file_name_1.clone(),
-            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),
+            LayerFileMetadata::new(content_1.len() as u64, harness.generation),
        );

        #[derive(Debug, PartialEq, Clone, Copy)]
@@ -2202,12 +2010,7 @@ mod tests {
        std::fs::create_dir_all(remote_timeline_dir).expect("creating test dir should work");

        let index_path = test_state.harness.remote_fs_dir.join(
-            remote_index_path(
-                &test_state.harness.tenant_shard_id,
-                &TIMELINE_ID,
-                generation,
-            )
-            .get_path(),
+            remote_index_path(&test_state.harness.tenant_id, &TIMELINE_ID, generation).get_path(),
        );
        eprintln!("Writing {index_path}");
        std::fs::write(&index_path, index_part_bytes).unwrap();
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Arseny Sher	32d4e4914a	Add wait events without query to metric.	2023-11-16 23:56:04 +01:00
Arseny Sher	d4d577e7ff	Add query to pg_wait_sampling metric	2023-11-16 22:42:08 +01:00
Arseny Sher	f552aa05fa	Add pg_wait_sampling metric for vms.	2023-11-16 22:04:29 +01:00
Arthur Petukhovsky	779badb7c5	Join postgres multiline logs	2023-11-16 20:54:02 +00:00
Arseny Sher	e6eb548491	create extension pg_wait_sampling in compute_ctl	2023-11-16 20:54:02 +00:00
Arseny Sher	16e9eb2832	Try to enable a custom postgres_exporter query.	2023-11-16 20:54:02 +00:00
Arseny Sher	042686183b	Add pg_wait_sampling extension.	2023-11-16 20:54:02 +00:00