scrubber: make scan-metadata enumerate relics & unreadable timelines

scrubber: report on generation-ful-ness of indices
scrubber: handle initdb files
2026-02-05 03:30:36 +00:00 · 2023-12-08 14:48:41 +00:00 · 2023-12-08 13:59:42 +00:00 · 2023-12-08 13:49:40 +00:00 · 2023-12-08 13:49:40 +00:00
113 changed files with 2097 additions and 4226 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -199,10 +199,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v3
@@ -1101,10 +1097,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v3
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -142,10 +142,6 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done

      - name: Checkout
        uses: actions/checkout@v4
@@ -242,20 +238,6 @@ jobs:
      options: --init

    steps:
-      - name: Fix git ownership
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done
-
      - name: Checkout
        uses: actions/checkout@v4
        with:
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -44,12 +44,6 @@ dependencies = [
 "memchr",
 ]

-[[package]]
-name = "allocator-api2"
-version = "0.2.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
-
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -184,7 +178,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
 dependencies = [
 "concurrent-queue",
- "event-listener 2.5.3",
+ "event-listener",
 "futures-core",
 ]

@@ -205,13 +199,11 @@ dependencies = [

 [[package]]
 name = "async-lock"
-version = "3.2.0"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
+checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
 dependencies = [
- "event-listener 4.0.0",
- "event-listener-strategy",
- "pin-project-lite",
+ "event-listener",
 ]

 [[package]]
@@ -694,9 +686,9 @@ dependencies = [

 [[package]]
 name = "azure_core"
-version = "0.18.0"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6218987c374650fdad0b476bfc675729762c28dfb35f58608a38a2b1ea337dd"
+checksum = "8e29286b9edfdd6f2c7e9d970bb5b015df8621258acab9ecfcea09b2d7692467"
 dependencies = [
 "async-trait",
 "base64 0.21.1",
@@ -704,10 +696,8 @@ dependencies = [
 "dyn-clone",
 "futures",
 "getrandom 0.2.11",
- "hmac",
 "http-types",
 "log",
- "once_cell",
 "paste",
 "pin-project",
 "quick-xml",
@@ -716,7 +706,6 @@ dependencies = [
 "rustc_version",
 "serde",
 "serde_json",
- "sha2",
 "time",
 "url",
 "uuid",
@@ -724,9 +713,9 @@ dependencies = [

 [[package]]
 name = "azure_identity"
-version = "0.18.1"
+version = "0.16.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1eacc4f7fb2a73d57c39139d0fc3aed78435606055779ddaef4b43cdf919a8"
+checksum = "5b67b337346da8739e91ea1e9400a6ebc9bc54e0b2af1d23c9bcd565950588f9"
 dependencies = [
 "async-lock",
 "async-trait",
@@ -736,6 +725,7 @@ dependencies = [
 "oauth2",
 "pin-project",
 "serde",
+ "serde_json",
 "time",
 "tz-rs",
 "url",
@@ -744,18 +734,21 @@ dependencies = [

 [[package]]
 name = "azure_storage"
-version = "0.18.0"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ade8f2653e408de88b9eafec9f48c3c26b94026375e88adbd34523a7dd9795a1"
+checksum = "bed0ccefde57930b2886fd4aed1f70ac469c197b8c2e94828290d71bcbdb5d97"
 dependencies = [
 "RustyXML",
- "async-lock",
 "async-trait",
 "azure_core",
 "bytes",
+ "futures",
+ "hmac",
 "log",
 "serde",
 "serde_derive",
+ "serde_json",
+ "sha2",
 "time",
 "url",
 "uuid",
@@ -763,14 +756,13 @@ dependencies = [

 [[package]]
 name = "azure_storage_blobs"
-version = "0.18.0"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "025701c7cc5b523100f0f3b2b01723564ec5a86c03236521c06826337047e872"
+checksum = "f91a52da2d192cfe43759f61e8bb31a5969f1722d5b85ac89627f356ad674ab4"
 dependencies = [
 "RustyXML",
 "azure_core",
 "azure_storage",
- "azure_svc_blobstorage",
 "bytes",
 "futures",
 "log",
@@ -782,22 +774,6 @@ dependencies = [
 "uuid",
 ]

-[[package]]
-name = "azure_svc_blobstorage"
-version = "0.18.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76051e5bb67cea1055abe5e530a0878feac7e0ab4cbbcb4a6adc953a58993389"
-dependencies = [
- "azure_core",
- "bytes",
- "futures",
- "log",
- "once_cell",
- "serde",
- "serde_json",
- "time",
-]
-
 [[package]]
 name = "backtrace"
 version = "0.3.67"
@@ -914,7 +890,7 @@ checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
 dependencies = [
 "memchr",
 "once_cell",
- "regex-automata 0.1.10",
+ "regex-automata",
 "serde",
 ]

@@ -1704,27 +1680,6 @@ version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"

-[[package]]
-name = "event-listener"
-version = "4.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
-dependencies = [
- "concurrent-queue",
- "parking",
- "pin-project-lite",
-]
-
-[[package]]
-name = "event-listener-strategy"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
-dependencies = [
- "event-listener 4.0.0",
- "pin-project-lite",
-]
-
 [[package]]
 name = "fail"
 version = "0.5.1"
@@ -2087,10 +2042,6 @@ name = "hashbrown"
 version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
-dependencies = [
- "ahash",
- "allocator-api2",
-]

 [[package]]
 name = "hashlink"
@@ -2582,7 +2533,7 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
 dependencies = [
- "regex-automata 0.1.10",
+ "regex-automata",
 ]

 [[package]]
@@ -2608,9 +2559,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "memchr"
-version = "2.6.4"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"

 [[package]]
 name = "memoffset"
@@ -3103,7 +3054,6 @@ dependencies = [
 "humantime-serde",
 "hyper",
 "itertools",
- "md5",
 "metrics",
 "nix 0.26.2",
 "num-traits",
@@ -3718,9 +3668,9 @@ dependencies = [

 [[package]]
 name = "quick-xml"
-version = "0.31.0"
+version = "0.30.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
+checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
 dependencies = [
 "memchr",
 "serde",
@@ -3860,14 +3810,13 @@ dependencies = [

 [[package]]
 name = "regex"
-version = "1.10.2"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
+checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974"
 dependencies = [
 "aho-corasick",
 "memchr",
- "regex-automata 0.4.3",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.7.2",
 ]

 [[package]]
@@ -3879,17 +3828,6 @@ dependencies = [
 "regex-syntax 0.6.29",
 ]

-[[package]]
-name = "regex-automata"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
-dependencies = [
- "aho-corasick",
- "memchr",
- "regex-syntax 0.8.2",
-]
-
 [[package]]
 name = "regex-syntax"
 version = "0.6.29"
@@ -3898,9 +3836,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"

 [[package]]
 name = "regex-syntax"
-version = "0.8.2"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"

 [[package]]
 name = "relative-path"
@@ -5286,8 +5224,6 @@ dependencies = [
 "futures-core",
 "futures-io",
 "futures-sink",
- "futures-util",
- "hashbrown 0.14.0",
 "pin-project-lite",
 "tokio",
 "tracing",
@@ -5765,7 +5701,6 @@ dependencies = [
 "serde",
 "serde_assert",
 "serde_json",
- "serde_path_to_error",
 "serde_with",
 "signal-hook",
 "strum",
@@ -6284,8 +6219,7 @@ dependencies = [
 "prost",
 "rand 0.8.5",
 "regex",
- "regex-automata 0.4.3",
- "regex-syntax 0.8.2",
+ "regex-syntax 0.7.2",
 "reqwest",
 "ring 0.16.20",
 "rustls",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,10 +38,10 @@ license = "Apache-2.0"
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-azure_core = "0.18"
-azure_identity = "0.18"
-azure_storage = "0.18"
-azure_storage_blobs = "0.18"
+azure_core = "0.16"
+azure_identity = "0.16"
+azure_storage = "0.16"
+azure_storage_blobs = "0.16"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
@@ -109,7 +109,7 @@ pin-project-lite = "0.2"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
-regex = "1.10.2"
+regex = "1.4"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
 reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_19"] }
 reqwest-middleware = "0.2.0"
@@ -149,7 +149,7 @@ tokio-postgres-rustls = "0.10.0"
 tokio-rustls = "0.24"
 tokio-stream = "0.1"
 tokio-tar = "0.3"
-tokio-util = { version = "0.7.10", features = ["io", "rt"] }
+tokio-util = { version = "0.7", features = ["io"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -252,7 +252,7 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
                    IF NOT EXISTS (
                        SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
                    THEN
-                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
+                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION IN ROLE pg_read_all_data, pg_write_all_data;
                        IF array_length(roles, 1) IS NOT NULL THEN
                            EXECUTE format('GRANT neon_superuser TO %s',
                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -193,11 +193,16 @@ impl Escaping for PgIdent {
 /// Build a list of existing Postgres roles
 pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
    let postgres_roles = xact
-        .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
+        .query(
+            "SELECT rolname, rolpassword, rolreplication, rolbypassrls FROM pg_catalog.pg_authid",
+            &[],
+        )?
        .iter()
        .map(|row| Role {
            name: row.get("rolname"),
            encrypted_password: row.get("rolpassword"),
+            replication: Some(row.get("rolreplication")),
+            bypassrls: Some(row.get("rolbypassrls")),
            options: None,
        })
        .collect();
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -252,6 +252,8 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
        let action = if let Some(r) = pg_role {
            if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
                || (r.encrypted_password.is_some() && role.encrypted_password.is_none())
+                || !r.bypassrls.unwrap_or(false)
+                || !r.replication.unwrap_or(false)
            {
                RoleAction::Update
            } else if let Some(pg_pwd) = &r.encrypted_password {
@@ -283,22 +285,14 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
        match action {
            RoleAction::None => {}
            RoleAction::Update => {
-                // This can be run on /every/ role! Not just ones created through the console.
-                // This means that if you add some funny ALTER here that adds a permission,
-                // this will get run even on user-created roles! This will result in different
-                // behavior before and after a spec gets reapplied. The below ALTER as it stands
-                // now only grants LOGIN and changes the password. Please do not allow this branch
-                // to do anything silly.
-                let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
+                let mut query: String =
+                    format!("ALTER ROLE {} BYPASSRLS REPLICATION", name.pg_quote());
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
            }
            RoleAction::Create => {
-                // This branch only runs when roles are created through the console, so it is
-                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB IN ROLE neon_superuser",
+                    "CREATE ROLE {} CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                    name.pg_quote()
                );
                info!("role create query: '{}'", &query);
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -168,7 +168,7 @@ fn print_timelines_tree(
                    info: t.clone(),
                    children: BTreeSet::new(),
                    name: timeline_name_mappings
-                        .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
+                        .remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
                },
            )
        })
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -407,7 +407,6 @@ impl PageServerNode {
                .map(|x| x.parse::<bool>())
                .transpose()
                .context("Failed to parse 'gc_feedback' as bool")?,
-            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
        };

        let request = models::TenantCreateRequest {
@@ -505,7 +504,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'gc_feedback' as bool")?,
-                heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
            }
        };

--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -165,7 +165,7 @@ pub fn migrate_tenant(
        let found = other_ps_tenants
            .into_iter()
            .map(|t| t.id)
-            .any(|i| i.tenant_id == tenant_id);
+            .any(|i| i == tenant_id);
        if !found {
            continue;
        }
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -207,6 +207,8 @@ pub struct DeltaOp {
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
+    pub replication: Option<bool>,
+    pub bypassrls: Option<bool>,
    pub options: GenericOptions,
 }

--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,11 +3,8 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 #![deny(clippy::undocumented_unsafe_blocks)]
-
 use once_cell::sync::Lazy;
-use prometheus::core::{
-    Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec,
-};
+use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::Error;
@@ -135,137 +132,3 @@ fn get_rusage_stats() -> libc::rusage {
        rusage.assume_init()
    }
 }
-
-/// Create an [`IntCounterPairVec`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_int_counter_pair_vec {
-    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr, $LABELS_NAMES:expr $(,)?) => {{
-        match (
-            $crate::register_int_counter_vec!($NAME1, $HELP1, $LABELS_NAMES),
-            $crate::register_int_counter_vec!($NAME2, $HELP2, $LABELS_NAMES),
-        ) {
-            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPairVec::new(inc, dec)),
-            (Err(e), _) | (_, Err(e)) => Err(e),
-        }
-    }};
-}
-/// Create an [`IntCounterPair`] and registers to default registry.
-#[macro_export(local_inner_macros)]
-macro_rules! register_int_counter_pair {
-    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr $(,)?) => {{
-        match (
-            $crate::register_int_counter!($NAME1, $HELP1),
-            $crate::register_int_counter!($NAME2, $HELP2),
-        ) {
-            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPair::new(inc, dec)),
-            (Err(e), _) | (_, Err(e)) => Err(e),
-        }
-    }};
-}
-
-/// A Pair of [`GenericCounterVec`]s. Like an [`GenericGaugeVec`] but will always observe changes
-pub struct GenericCounterPairVec<P: Atomic> {
-    inc: GenericCounterVec<P>,
-    dec: GenericCounterVec<P>,
-}
-
-/// A Pair of [`GenericCounter`]s. Like an [`GenericGauge`] but will always observe changes
-pub struct GenericCounterPair<P: Atomic> {
-    inc: GenericCounter<P>,
-    dec: GenericCounter<P>,
-}
-
-impl<P: Atomic> GenericCounterPairVec<P> {
-    pub fn new(inc: GenericCounterVec<P>, dec: GenericCounterVec<P>) -> Self {
-        Self { inc, dec }
-    }
-
-    /// `get_metric_with_label_values` returns the [`GenericCounterPair<P>`] for the given slice
-    /// of label values (same order as the VariableLabels in Desc). If that combination of
-    /// label values is accessed for the first time, a new [`GenericCounterPair<P>`] is created.
-    ///
-    /// An error is returned if the number of label values is not the same as the
-    /// number of VariableLabels in Desc.
-    pub fn get_metric_with_label_values(&self, vals: &[&str]) -> Result<GenericCounterPair<P>> {
-        Ok(GenericCounterPair {
-            inc: self.inc.get_metric_with_label_values(vals)?,
-            dec: self.dec.get_metric_with_label_values(vals)?,
-        })
-    }
-
-    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error
-    /// occurs.
-    pub fn with_label_values(&self, vals: &[&str]) -> GenericCounterPair<P> {
-        self.get_metric_with_label_values(vals).unwrap()
-    }
-}
-
-impl<P: Atomic> GenericCounterPair<P> {
-    pub fn new(inc: GenericCounter<P>, dec: GenericCounter<P>) -> Self {
-        Self { inc, dec }
-    }
-
-    /// Increment the gauge by 1, returning a guard that decrements by 1 on drop.
-    pub fn guard(&self) -> GenericCounterPairGuard<P> {
-        self.inc.inc();
-        GenericCounterPairGuard(self.dec.clone())
-    }
-
-    /// Increment the gauge by n, returning a guard that decrements by n on drop.
-    pub fn guard_by(&self, n: P::T) -> GenericCounterPairGuardBy<P> {
-        self.inc.inc_by(n);
-        GenericCounterPairGuardBy(self.dec.clone(), n)
-    }
-
-    /// Increase the gauge by 1.
-    #[inline]
-    pub fn inc(&self) {
-        self.inc.inc();
-    }
-
-    /// Decrease the gauge by 1.
-    #[inline]
-    pub fn dec(&self) {
-        self.dec.inc();
-    }
-
-    /// Add the given value to the gauge. (The value can be
-    /// negative, resulting in a decrement of the gauge.)
-    #[inline]
-    pub fn inc_by(&self, v: P::T) {
-        self.inc.inc_by(v);
-    }
-
-    /// Subtract the given value from the gauge. (The value can be
-    /// negative, resulting in an increment of the gauge.)
-    #[inline]
-    pub fn dec_by(&self, v: P::T) {
-        self.dec.inc_by(v);
-    }
-}
-
-/// Guard returned by [`GenericCounterPair::guard`]
-pub struct GenericCounterPairGuard<P: Atomic>(GenericCounter<P>);
-
-impl<P: Atomic> Drop for GenericCounterPairGuard<P> {
-    fn drop(&mut self) {
-        self.0.inc();
-    }
-}
-/// Guard returned by [`GenericCounterPair::guard_by`]
-pub struct GenericCounterPairGuardBy<P: Atomic>(GenericCounter<P>, P::T);
-
-impl<P: Atomic> Drop for GenericCounterPairGuardBy<P> {
-    fn drop(&mut self) {
-        self.0.inc_by(self.1);
-    }
-}
-
-/// A Pair of [`IntCounterVec`]s. Like an [`IntGaugeVec`] but will always observe changes
-pub type IntCounterPairVec = GenericCounterPairVec<AtomicU64>;
-
-/// A Pair of [`IntCounter`]s. Like an [`IntGauge`] but will always observe changes
-pub type IntCounterPair = GenericCounterPair<AtomicU64>;
-
-/// A guard for [`IntCounterPair`] that will decrement the gauge on drop
-pub type IntCounterPairGuard = GenericCounterPairGuard<AtomicU64>;
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -237,7 +237,6 @@ pub struct TenantConfig {
    pub min_resident_size_override: Option<u64>,
    pub evictions_low_residence_duration_metric_threshold: Option<String>,
    pub gc_feedback: Option<bool>,
-    pub heatmap_period: Option<String>,
 }

 /// A flattened analog of a `pagesever::tenant::LocationMode`, which
@@ -358,7 +357,7 @@ pub enum TenantAttachmentStatus {

 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
-    pub id: TenantShardId,
+    pub id: TenantId,
    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    pub state: TenantState,
    /// Sum of the size of all layer files.
@@ -370,7 +369,7 @@ pub struct TenantInfo {
 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TimelineInfo {
-    pub tenant_id: TenantShardId,
+    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,

    pub ancestor_timeline_id: Option<TimelineId>,
@@ -386,9 +385,6 @@ pub struct TimelineInfo {
    /// The LSN that we are advertizing to safekeepers
    pub remote_consistent_lsn_visible: Lsn,

-    /// The LSN from the start of the root timeline (never changes)
-    pub initdb_lsn: Lsn,
-
    pub current_logical_size: u64,
    pub current_logical_size_is_accurate: bool,

@@ -827,7 +823,7 @@ mod tests {
    fn test_tenantinfo_serde() {
        // Test serialization/deserialization of TenantInfo
        let original_active = TenantInfo {
-            id: TenantShardId::unsharded(TenantId::generate()),
+            id: TenantId::generate(),
            state: TenantState::Active,
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
@@ -844,7 +840,7 @@ mod tests {
        });

        let original_broken = TenantInfo {
-            id: TenantShardId::unsharded(TenantId::generate()),
+            id: TenantId::generate(),
            state: TenantState::Broken {
                reason: "reason".into(),
                backtrace: "backtrace info".into(),
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -76,11 +76,6 @@ impl TenantShardId {
    pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
        ShardSlug(self)
    }
-
-    /// Convenience for code that has special behavior on the 0th shard.
-    pub fn is_zero(&self) -> bool {
-        self.shard_number == ShardNumber(0)
-    }
 }

 /// Formatting helper
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -271,12 +271,17 @@ impl RemoteStorage for AzureBlobStorage {

        let mut builder = blob_client.get();

-        let range: Range = if let Some(end_exclusive) = end_exclusive {
-            (start_inclusive..end_exclusive).into()
+        if let Some(end_exclusive) = end_exclusive {
+            builder = builder.range(Range::new(start_inclusive, end_exclusive));
        } else {
-            (start_inclusive..).into()
-        };
-        builder = builder.range(range);
+            // Open ranges are not supported by the SDK so we work around
+            // by setting the upper limit extremely high (but high enough
+            // to still be representable by signed 64 bit integers).
+            // TODO remove workaround once the SDK adds open range support
+            // https://github.com/Azure/azure-sdk-for-rust/issues/1438
+            let end_exclusive = u64::MAX / 4;
+            builder = builder.range(Range::new(start_inclusive, end_exclusive));
+        }

        self.download_for_builder(builder).await
    }
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -50,8 +50,6 @@ const_format.workspace = true
 # why is it only here? no other crate should use it, streams are rarely needed.
 tokio-stream = { version = "0.1.14" }

-serde_path_to_error.workspace = true
-
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
--- a/libs/utils/src/completion.rs
+++ b/libs/utils/src/completion.rs
@@ -1,14 +1,16 @@
-use tokio_util::task::{task_tracker::TaskTrackerToken, TaskTracker};
+use std::sync::Arc;
+
+use tokio::sync::{mpsc, Mutex};

 /// While a reference is kept around, the associated [`Barrier::wait`] will wait.
 ///
 /// Can be cloned, moved and kept around in futures as "guard objects".
 #[derive(Clone)]
-pub struct Completion(TaskTrackerToken);
+pub struct Completion(mpsc::Sender<()>);

 /// Barrier will wait until all clones of [`Completion`] have been dropped.
 #[derive(Clone)]
-pub struct Barrier(TaskTracker);
+pub struct Barrier(Arc<Mutex<mpsc::Receiver<()>>>);

 impl Default for Barrier {
    fn default() -> Self {
@@ -19,7 +21,7 @@ impl Default for Barrier {

 impl Barrier {
    pub async fn wait(self) {
-        self.0.wait().await;
+        self.0.lock().await.recv().await;
    }

    pub async fn maybe_wait(barrier: Option<Barrier>) {
@@ -31,7 +33,8 @@ impl Barrier {

 impl PartialEq for Barrier {
    fn eq(&self, other: &Self) -> bool {
-        TaskTracker::ptr_eq(&self.0, &other.0)
+        // we don't use dyn so this is good
+        Arc::ptr_eq(&self.0, &other.0)
    }
 }

@@ -39,10 +42,8 @@ impl Eq for Barrier {}

 /// Create new Guard and Barrier pair.
 pub fn channel() -> (Completion, Barrier) {
-    let tracker = TaskTracker::new();
-    // otherwise wait never exits
-    tracker.close();
-
-    let token = tracker.token();
-    (Completion(token), Barrier(tracker))
+    let (tx, rx) = mpsc::channel::<()>(1);
+    let rx = Mutex::new(rx);
+    let rx = Arc::new(rx);
+    (Completion(tx), Barrier(rx))
 }
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -25,12 +25,8 @@ pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
    if body.remaining() == 0 {
        return Ok(None);
    }
-
-    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
-
-    serde_path_to_error::deserialize(&mut deser)
-        // intentionally stringify because the debug version is not helpful in python logs
-        .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
+    serde_json::from_reader(body.reader())
+        .context("Failed to parse json request")
        .map(Some)
        .map_err(ApiError::BadRequest)
 }
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,7 +1,6 @@
 use std::str::FromStr;

 use anyhow::Context;
-use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, EnumVariantNames};

@@ -25,48 +24,16 @@ impl LogFormat {
    }
 }

-struct TracingEventCountMetric {
-    error: IntCounter,
-    warn: IntCounter,
-    info: IntCounter,
-    debug: IntCounter,
-    trace: IntCounter,
-}
-
-static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
-    let vec = metrics::register_int_counter_vec!(
+static TRACING_EVENT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
+    metrics::register_int_counter_vec!(
        "libmetrics_tracing_event_count",
        "Number of tracing events, by level",
        &["level"]
    )
-    .expect("failed to define metric");
-    TracingEventCountMetric::new(vec)
+    .expect("failed to define metric")
 });

-impl TracingEventCountMetric {
-    fn new(vec: IntCounterVec) -> Self {
-        Self {
-            error: vec.with_label_values(&["error"]),
-            warn: vec.with_label_values(&["warn"]),
-            info: vec.with_label_values(&["info"]),
-            debug: vec.with_label_values(&["debug"]),
-            trace: vec.with_label_values(&["trace"]),
-        }
-    }
-
-    fn inc_for_level(&self, level: tracing::Level) {
-        let counter = match level {
-            tracing::Level::ERROR => &self.error,
-            tracing::Level::WARN => &self.warn,
-            tracing::Level::INFO => &self.info,
-            tracing::Level::DEBUG => &self.debug,
-            tracing::Level::TRACE => &self.trace,
-        };
-        counter.inc();
-    }
-}
-
-struct TracingEventCountLayer(&'static TracingEventCountMetric);
+struct TracingEventCountLayer(&'static metrics::IntCounterVec);

 impl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer
 where
@@ -77,7 +44,15 @@ where
        event: &tracing::Event<'_>,
        _ctx: tracing_subscriber::layer::Context<'_, S>,
    ) {
-        self.0.inc_for_level(*event.metadata().level());
+        let level = event.metadata().level();
+        let level = match *level {
+            tracing::Level::ERROR => "error",
+            tracing::Level::WARN => "warn",
+            tracing::Level::INFO => "info",
+            tracing::Level::DEBUG => "debug",
+            tracing::Level::TRACE => "trace",
+        };
+        self.0.with_label_values(&[level]).inc();
    }
 }

@@ -131,9 +106,7 @@ pub fn init(
        };
        log_layer.with_filter(rust_log_env_filter())
    });
-    let r = r.with(
-        TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
-    );
+    let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
    match tracing_error_layer_enablement {
        TracingErrorLayerEnablement::EnableWithRustLogFilter => r
            .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
@@ -284,14 +257,14 @@ impl std::fmt::Debug for SecretString {
 mod tests {
    use metrics::{core::Opts, IntCounterVec};

-    use crate::logging::{TracingEventCountLayer, TracingEventCountMetric};
+    use super::TracingEventCountLayer;

    #[test]
    fn tracing_event_count_metric() {
        let counter_vec =
            IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap();
-        let metric = Box::leak(Box::new(TracingEventCountMetric::new(counter_vec.clone())));
-        let layer = TracingEventCountLayer(metric);
+        let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static
+        let layer = TracingEventCountLayer(counter_vec);
        use tracing_subscriber::prelude::*;

        tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -30,32 +30,18 @@ async fn warn_if_stuck<Fut: std::future::Future>(

    let mut fut = std::pin::pin!(fut);

-    let mut warned = false;
-    let ret = loop {
+    loop {
        match tokio::time::timeout(warn_period, &mut fut).await {
-            Ok(ret) => break ret,
+            Ok(ret) => return ret,
            Err(_) => {
                tracing::warn!(
                    gate = name,
                    elapsed_ms = started.elapsed().as_millis(),
                    "still waiting, taking longer than expected..."
                );
-                warned = true;
            }
        }
-    };
-
-    // If we emitted a warning for slowness, also emit a message when we complete, so that
-    // someone debugging a shutdown can know for sure whether we have moved past this operation.
-    if warned {
-        tracing::info!(
-            gate = name,
-            elapsed_ms = started.elapsed().as_millis(),
-            "completed, after taking longer than expected"
-        )
    }
-
-    ret
 }

 #[derive(Debug)]
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -436,9 +436,9 @@ mod tests {
                event_mask: 0,
            }),
            expected_messages: vec![
-                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160001, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
+                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160000, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
                vec![
-                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 76, 143, 54, 6, 60, 108, 110,
                    147, 188, 32, 214, 90, 130, 15, 61, 158, 76, 143, 54, 6, 60, 108, 110, 147,
                    188, 32, 214, 90, 130, 15, 61, 1, 0, 0, 0, 0, 0, 0, 1,
@@ -478,7 +478,7 @@ mod tests {
        // walproposer will panic when it finishes sync_safekeepers
        std::panic::catch_unwind(|| wp.start()).unwrap_err();
        // validate the resulting LSN
-        assert_eq!(receiver.try_recv(), Ok(1337));
+        assert_eq!(receiver.recv()?, 1337);
        Ok(())
        // drop() will free up resources here
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -36,7 +36,6 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 itertools.workspace = true
-md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
 num_cpus = { version = "1.15" }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -14,7 +14,7 @@ use pageserver::control_plane_client::ControlPlaneClient;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::WALRECEIVER_RUNTIME;
-use pageserver::tenant::{secondary, TenantSharedResources};
+use pageserver::tenant::TenantSharedResources;
 use remote_storage::GenericRemoteStorage;
 use tokio::time::Instant;
 use tracing::*;
@@ -425,6 +425,7 @@ fn start_pageserver(
    let tenant_manager = Arc::new(tenant_manager);

    BACKGROUND_RUNTIME.spawn({
+        let init_done_rx = init_done_rx;
        let shutdown_pageserver = shutdown_pageserver.clone();
        let drive_init = async move {
            // NOTE: unlike many futures in pageserver, this one is cancellation-safe
@@ -504,17 +505,6 @@ fn start_pageserver(
        }
    });

-    let secondary_controller = if let Some(remote_storage) = &remote_storage {
-        secondary::spawn_tasks(
-            tenant_manager.clone(),
-            remote_storage.clone(),
-            background_jobs_barrier.clone(),
-            shutdown_pageserver.clone(),
-        )
-    } else {
-        secondary::null_controller()
-    };
-
    // shared state between the disk-usage backed eviction background task and the http endpoint
    // that allows triggering disk-usage based eviction manually. note that the http endpoint
    // is still accessible even if background task is not configured as long as remote storage has
@@ -544,7 +534,6 @@ fn start_pageserver(
                broker_client.clone(),
                disk_usage_eviction_state,
                deletion_queue.new_client(),
-                secondary_controller,
            )
            .context("Failed to initialize router state")?,
        );
@@ -571,6 +560,7 @@ fn start_pageserver(
    }

    if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+        let background_jobs_barrier = background_jobs_barrier;
        let metrics_ctx = RequestContext::todo_child(
            TaskKind::MetricsCollection,
            // This task itself shouldn't download anything.
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -70,8 +70,6 @@ pub mod defaults {
    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
    pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";

-    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
-
    ///
    /// Default built-in configuration file.
    ///
@@ -119,8 +117,6 @@ pub mod defaults {
 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
 #gc_feedback = false

-#heatmap_upload_concurrency = {DEFAULT_HEATMAP_UPLOAD_CONCURRENCY}
-
 [remote_storage]

 "#
@@ -219,10 +215,6 @@ pub struct PageServerConf {
    /// If true, pageserver will make best-effort to operate without a control plane: only
    /// for use in major incidents.
    pub control_plane_emergency_mode: bool,
-
-    /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize
-    /// heatmap uploads vs. other remote storage operations.
-    pub heatmap_upload_concurrency: usize,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -301,8 +293,6 @@ struct PageServerConfigBuilder {
    control_plane_api: BuilderValue<Option<Url>>,
    control_plane_api_token: BuilderValue<Option<SecretString>>,
    control_plane_emergency_mode: BuilderValue<bool>,
-
-    heatmap_upload_concurrency: BuilderValue<usize>,
 }

 impl Default for PageServerConfigBuilder {
@@ -371,8 +361,6 @@ impl Default for PageServerConfigBuilder {
            control_plane_api: Set(None),
            control_plane_api_token: Set(None),
            control_plane_emergency_mode: Set(false),
-
-            heatmap_upload_concurrency: Set(DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
        }
    }
 }
@@ -513,10 +501,6 @@ impl PageServerConfigBuilder {
        self.control_plane_emergency_mode = BuilderValue::Set(enabled)
    }

-    pub fn heatmap_upload_concurrency(&mut self, value: usize) {
-        self.heatmap_upload_concurrency = BuilderValue::Set(value)
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
        let concurrent_tenant_size_logical_size_queries = self
            .concurrent_tenant_size_logical_size_queries
@@ -611,10 +595,6 @@ impl PageServerConfigBuilder {
            control_plane_emergency_mode: self
                .control_plane_emergency_mode
                .ok_or(anyhow!("missing control_plane_emergency_mode"))?,
-
-            heatmap_upload_concurrency: self
-                .heatmap_upload_concurrency
-                .ok_or(anyhow!("missing heatmap_upload_concurrency"))?,
        })
    }
 }
@@ -848,9 +828,7 @@ impl PageServerConf {
                },
                "control_plane_emergency_mode" => {
                    builder.control_plane_emergency_mode(parse_toml_bool(key, item)?)
-                },
-                "heatmap_upload_concurrency" => {
-                    builder.heatmap_upload_concurrency(parse_toml_u64(key, item)? as usize)
+
                },
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
@@ -918,7 +896,6 @@ impl PageServerConf {
            control_plane_api: None,
            control_plane_api_token: None,
            control_plane_emergency_mode: false,
-            heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
        }
    }
 }
@@ -1143,8 +1120,7 @@ background_task_maximum_delay = '334 s'
                )?,
                control_plane_api: None,
                control_plane_api_token: None,
-                control_plane_emergency_mode: false,
-                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY
+                control_plane_emergency_mode: false
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1201,8 +1177,7 @@ background_task_maximum_delay = '334 s'
                background_task_maximum_delay: Duration::from_secs(334),
                control_plane_api: None,
                control_plane_api_token: None,
-                control_plane_emergency_mode: false,
-                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY
+                control_plane_emergency_mode: false
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,7 +3,7 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError, Tenant};
+use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
@@ -256,6 +256,8 @@ async fn calculate_synthetic_size_worker(
        info!("calculate_synthetic_size_worker stopped");
    };

+    let cause = LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
+
    loop {
        let started_at = Instant::now();

@@ -267,25 +269,26 @@ async fn calculate_synthetic_size_worker(
            }
        };

-        for (tenant_shard_id, tenant_state) in tenants {
+        for (tenant_id, tenant_state) in tenants {
            if tenant_state != TenantState::Active {
                continue;
            }

-            if !tenant_shard_id.is_zero() {
-                // We only send consumption metrics from shard 0, so don't waste time calculating
-                // synthetic size on other shards.
-                continue;
+            if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
+                // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
+                // We can put in some prioritization for consumption metrics.
+                // Same for the loop that fetches computed metrics.
+                // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
+                // which turns out is really handy to understand the system.
+                if let Err(e) = tenant.calculate_synthetic_size(cause, cancel, ctx).await {
+                    if let Some(PageReconstructError::Cancelled) =
+                        e.downcast_ref::<PageReconstructError>()
+                    {
+                        return Ok(());
+                    }
+                    error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
+                }
            }
-
-            let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) else {
-                continue;
-            };
-
-            // there is never any reason to exit calculate_synthetic_size_worker following any
-            // return value -- we don't need to care about shutdown because no tenant is found when
-            // pageserver is shut down.
-            calculate_and_log(&tenant, cancel, ctx).await;
        }

        crate::tenant::tasks::warn_when_period_overrun(
@@ -296,7 +299,7 @@ async fn calculate_synthetic_size_worker(

        let res = tokio::time::timeout_at(
            started_at + synthetic_size_calculation_interval,
-            cancel.cancelled(),
+            task_mgr::shutdown_token().cancelled(),
        )
        .await;
        if res.is_ok() {
@@ -304,31 +307,3 @@ async fn calculate_synthetic_size_worker(
        }
    }
 }
-
-async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
-    const CAUSE: LogicalSizeCalculationCause =
-        LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
-
-    // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
-    // We can put in some prioritization for consumption metrics.
-    // Same for the loop that fetches computed metrics.
-    // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
-    // which turns out is really handy to understand the system.
-    let Err(e) = tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await else {
-        return;
-    };
-
-    // this error can be returned if timeline is shutting down, but it does not
-    // mean the synthetic size worker should terminate. we do not need any checks
-    // in this function because `mgr::get_tenant` will error out after shutdown has
-    // progressed to shutting down tenants.
-    let shutting_down = matches!(
-        e.downcast_ref::<PageReconstructError>(),
-        Some(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
-    );
-
-    if !shutting_down {
-        let tenant_shard_id = tenant.tenant_shard_id();
-        error!("failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}");
-    }
-}
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -2,6 +2,7 @@ use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogi
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
+use pageserver_api::shard::ShardNumber;
 use std::{sync::Arc, time::SystemTime};
 use utils::{
    id::{TenantId, TimelineId},
@@ -197,12 +198,12 @@ pub(super) async fn collect_all_metrics(
    };

    let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
-        if state != TenantState::Active || !id.is_zero() {
+        if state != TenantState::Active {
            None
        } else {
            crate::tenant::mgr::get_tenant(id, true)
                .ok()
-                .map(|tenant| (id.tenant_id, tenant))
+                .map(|tenant| (id, tenant))
        }
    });

@@ -228,6 +229,11 @@ where
    while let Some((tenant_id, tenant)) = tenants.next().await {
        let mut tenant_resident_size = 0;

+        // Sharded tenants report all consumption metrics from shard zero
+        if tenant.tenant_shard_id().shard_number != ShardNumber(0) {
+            continue;
+        }
+
        for timeline in tenant.list_timelines() {
            let timeline_id = timeline.timeline_id;

--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -42,6 +42,7 @@
 //   reading these fields. We use the Debug impl for semi-structured logging, though.

 use std::{
+    collections::HashMap,
    sync::Arc,
    time::{Duration, SystemTime},
 };
@@ -124,7 +125,7 @@ pub fn launch_disk_usage_global_eviction_task(
 async fn disk_usage_eviction_task(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
-    storage: &GenericRemoteStorage,
+    _storage: &GenericRemoteStorage,
    tenants_dir: &Utf8Path,
    cancel: CancellationToken,
 ) {
@@ -148,14 +149,8 @@ async fn disk_usage_eviction_task(
        let start = Instant::now();

        async {
-            let res = disk_usage_eviction_task_iteration(
-                state,
-                task_config,
-                storage,
-                tenants_dir,
-                &cancel,
-            )
-            .await;
+            let res =
+                disk_usage_eviction_task_iteration(state, task_config, tenants_dir, &cancel).await;

            match res {
                Ok(()) => {}
@@ -186,13 +181,12 @@ pub trait Usage: Clone + Copy + std::fmt::Debug {
 async fn disk_usage_eviction_task_iteration(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
-    storage: &GenericRemoteStorage,
    tenants_dir: &Utf8Path,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
    let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
        .context("get filesystem-level disk usage before evictions")?;
-    let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
+    let res = disk_usage_eviction_task_iteration_impl(state, usage_pre, cancel).await;
    match res {
        Ok(outcome) => {
            debug!(?outcome, "disk_usage_eviction_iteration finished");
@@ -274,9 +268,8 @@ struct LayerCount {
    count: usize,
 }

-pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
+pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    state: &State,
-    _storage: &GenericRemoteStorage,
    usage_pre: U,
    cancel: &CancellationToken,
 ) -> anyhow::Result<IterationOutcome<U>> {
@@ -328,16 +321,16 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    // Walk through the list of candidates, until we have accumulated enough layers to get
    // us back under the pressure threshold. 'usage_planned' is updated so that it tracks
    // how much disk space would be used after evicting all the layers up to the current
-    // point in the list.
+    // point in the list. The layers are collected in 'batched', grouped per timeline.
    //
    // If we get far enough in the list that we start to evict layers that are below
    // the tenant's min-resident-size threshold, print a warning, and memorize the disk
    // usage at that point, in 'usage_planned_min_resident_size_respecting'.
+    let mut batched: HashMap<_, Vec<_>> = HashMap::new();
    let mut warned = None;
    let mut usage_planned = usage_pre;
-    let mut evicted_amount = 0;
-
-    for (i, (partition, candidate)) in candidates.iter().enumerate() {
+    let mut max_batch_size = 0;
+    for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
        if !usage_planned.has_pressure() {
            debug!(
                no_candidates_evicted = i,
@@ -346,13 +339,25 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
            break;
        }

-        if partition == &MinResidentSizePartition::Below && warned.is_none() {
+        if partition == MinResidentSizePartition::Below && warned.is_none() {
            warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
            warned = Some(usage_planned);
        }

        usage_planned.add_available_bytes(candidate.layer.layer_desc().file_size);
-        evicted_amount += 1;
+
+        // FIXME: batching makes no sense anymore because of no layermap locking, should just spawn
+        // tasks to evict all seen layers until we have evicted enough
+
+        let batch = batched.entry(TimelineKey(candidate.timeline)).or_default();
+
+        // semaphore will later be used to limit eviction concurrency, and we can express at
+        // most u32 number of permits. unlikely we would have u32::MAX layers to be evicted,
+        // but fail gracefully by not making batches larger.
+        if batch.len() < u32::MAX as usize {
+            batch.push(candidate.layer);
+            max_batch_size = max_batch_size.max(batch.len());
+        }
    }

    let usage_planned = match warned {
@@ -367,79 +372,100 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    };
    debug!(?usage_planned, "usage planned");

-    // phase2: evict layers
+    // phase2: evict victims batched by timeline

    let mut js = tokio::task::JoinSet::new();
-    let limit = 1000;

-    let mut evicted = candidates.into_iter().take(evicted_amount).fuse();
-    let mut consumed_all = false;
+    // ratelimit to 1k files or any higher max batch size
+    let limit = Arc::new(tokio::sync::Semaphore::new(1000.max(max_batch_size)));

-    // After the evictions, `usage_assumed` is the post-eviction usage,
-    // according to internal accounting.
-    let mut usage_assumed = usage_pre;
-    let mut evictions_failed = LayerCount::default();
+    for (timeline, batch) in batched {
+        let tenant_shard_id = timeline.tenant_shard_id;
+        let timeline_id = timeline.timeline_id;
+        let batch_size =
+            u32::try_from(batch.len()).expect("batch size limited to u32::MAX during partitioning");

-    let evict_layers = async move {
-        loop {
-            let next = if js.len() >= limit || consumed_all {
-                js.join_next().await
-            } else if !js.is_empty() {
-                // opportunistically consume ready result, one per each new evicted
-                futures::future::FutureExt::now_or_never(js.join_next()).and_then(|x| x)
-            } else {
-                None
-            };
+        // I dislike naming of `available_permits` but it means current total amount of permits
+        // because permits can be added
+        assert!(batch_size as usize <= limit.available_permits());

-            if let Some(next) = next {
-                match next {
-                    Ok(Ok(file_size)) => {
-                        usage_assumed.add_available_bytes(file_size);
+        debug!(%timeline_id, "evicting batch for timeline");
+
+        let evict = {
+            let limit = limit.clone();
+            let cancel = cancel.clone();
+            async move {
+                let mut evicted_bytes = 0;
+                let mut evictions_failed = LayerCount::default();
+
+                let Ok(_permit) = limit.acquire_many_owned(batch_size).await else {
+                    // semaphore closing means cancelled
+                    return (evicted_bytes, evictions_failed);
+                };
+
+                let results = timeline.evict_layers(&batch).await;
+
+                match results {
+                    Ok(results) => {
+                        assert_eq!(results.len(), batch.len());
+                        for (result, layer) in results.into_iter().zip(batch.iter()) {
+                            let file_size = layer.layer_desc().file_size;
+                            match result {
+                                Some(Ok(())) => {
+                                    evicted_bytes += file_size;
+                                }
+                                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
+                                    evictions_failed.file_sizes += file_size;
+                                    evictions_failed.count += 1;
+                                }
+                                None => {
+                                    assert!(cancel.is_cancelled());
+                                }
+                            }
+                        }
                    }
-                    Ok(Err((file_size, EvictionError::NotFound | EvictionError::Downloaded))) => {
-                        evictions_failed.file_sizes += file_size;
-                        evictions_failed.count += 1;
+                    Err(e) => {
+                        warn!("failed to evict batch: {:#}", e);
                    }
-                    Err(je) if je.is_cancelled() => unreachable!("not used"),
-                    Err(je) if je.is_panic() => { /* already logged */ }
-                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
                }
+                (evicted_bytes, evictions_failed)
            }
-
-            if consumed_all && js.is_empty() {
-                break;
-            }
-
-            // calling again when consumed_all is fine as evicted is fused.
-            let Some((_partition, candidate)) = evicted.next() else {
-                consumed_all = true;
-                continue;
-            };
-
-            js.spawn(async move {
-                let rtc = candidate.timeline.remote_client.as_ref().expect(
-                    "holding the witness, all timelines must have a remote timeline client",
-                );
-                let file_size = candidate.layer.layer_desc().file_size;
-                candidate
-                    .layer
-                    .evict_and_wait(rtc)
-                    .await
-                    .map(|()| file_size)
-                    .map_err(|e| (file_size, e))
-            });
-
-            tokio::task::yield_now().await;
        }
+        .instrument(tracing::info_span!("evict_batch", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, batch_size));

+        js.spawn(evict);
+
+        // spwaning multiple thousands of these is essentially blocking, so give already spawned a
+        // chance of making progress
+        tokio::task::yield_now().await;
+    }
+
+    let join_all = async move {
+        // After the evictions, `usage_assumed` is the post-eviction usage,
+        // according to internal accounting.
+        let mut usage_assumed = usage_pre;
+        let mut evictions_failed = LayerCount::default();
+
+        while let Some(res) = js.join_next().await {
+            match res {
+                Ok((evicted_bytes, failed)) => {
+                    usage_assumed.add_available_bytes(evicted_bytes);
+                    evictions_failed.file_sizes += failed.file_sizes;
+                    evictions_failed.count += failed.count;
+                }
+                Err(je) if je.is_cancelled() => unreachable!("not used"),
+                Err(je) if je.is_panic() => { /* already logged */ }
+                Err(je) => tracing::error!("unknown JoinError: {je:?}"),
+            }
+        }
        (usage_assumed, evictions_failed)
    };

    let (usage_assumed, evictions_failed) = tokio::select! {
-        tuple = evict_layers => { tuple },
+        tuple = join_all => { tuple },
        _ = cancel.cancelled() => {
-            // dropping joinset will abort all pending evict_and_waits and that is fine, our
-            // requests will still stand
+            // close the semaphore to stop any pending acquires
+            limit.close();
            return Ok(IterationOutcome::Cancelled);
        }
    };
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -84,6 +84,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: Get tenant status
      responses:
@@ -180,6 +181,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: Get timelines for tenant
      responses:
@@ -230,6 +232,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -335,6 +338,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -397,6 +401,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -464,6 +469,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: timeline_id
        in: path
        required: true
@@ -517,6 +523,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Schedules attach operation to happen in the background for the given tenant.
@@ -624,6 +631,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: flush_ms
        in: query
        required: false
@@ -716,6 +724,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: detach_ignored
        in: query
        required: false
@@ -775,6 +784,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Remove tenant data (including all corresponding timelines) from pageserver's memory.
@@ -823,6 +833,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
        Schedules an operation that attempts to load a tenant from the local disk and
@@ -879,6 +890,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: |
        Calculate tenant's synthetic size
@@ -921,6 +933,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
      - name: inputs_only
        in: query
        required: false
@@ -990,10 +1003,11 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    post:
      description: |
-        Create a timeline. Returns new timeline id on success.
-        Recreating the same timeline will succeed if the parameters match the existing timeline.
+        Create a timeline. Returns new timeline id on success.\
+        If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
        If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
      requestBody:
        content:
@@ -1123,6 +1137,7 @@ paths:
            application/json:
              schema:
                type: string
+                format: hex
        "400":
          description: Malformed tenant create request
          content:
@@ -1219,6 +1234,7 @@ paths:
        required: true
        schema:
          type: string
+          format: hex
    get:
      description: |
        Returns tenant's config description: specific config overrides a tenant has
@@ -1324,6 +1340,7 @@ components:
          properties:
            new_tenant_id:
              type: string
+              format: hex
            generation:
              type: integer
              description: Attachment generation number.
@@ -1352,6 +1369,7 @@ components:
          properties:
            tenant_id:
              type: string
+              format: hex
    TenantLocationConfigRequest:
      type: object
      required:
@@ -1359,6 +1377,7 @@ components:
      properties:
        tenant_id:
          type: string
+          format: hex
        mode:
          type: string
          enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
@@ -1405,8 +1424,6 @@ components:
          type: integer
        trace_read_requests:
          type: boolean
-        heatmap_period:
-          type: integer
    TenantConfigResponse:
      type: object
      properties:
@@ -1429,6 +1446,7 @@ components:
          format: hex
        tenant_id:
          type: string
+          format: hex
        last_record_lsn:
          type: string
          format: hex
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -42,7 +42,6 @@ use crate::tenant::mgr::{
    GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError,
    TenantSlotError, TenantSlotUpsertError, TenantStateError,
 };
-use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::timeline::CompactFlags;
@@ -76,11 +75,9 @@ pub struct State {
    broker_client: storage_broker::BrokerClientChannel,
    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
    deletion_queue_client: DeletionQueueClient,
-    secondary_controller: SecondaryController,
 }

 impl State {
-    #[allow(clippy::too_many_arguments)]
    pub fn new(
        conf: &'static PageServerConf,
        tenant_manager: Arc<TenantManager>,
@@ -89,7 +86,6 @@ impl State {
        broker_client: storage_broker::BrokerClientChannel,
        disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
        deletion_queue_client: DeletionQueueClient,
-        secondary_controller: SecondaryController,
    ) -> anyhow::Result<Self> {
        let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml", "/metrics"]
            .iter()
@@ -104,7 +100,6 @@ impl State {
            broker_client,
            disk_usage_eviction_state,
            deletion_queue_client,
-            secondary_controller,
        })
    }

@@ -141,6 +136,11 @@ impl From<PageReconstructError> for ApiError {
    fn from(pre: PageReconstructError) -> ApiError {
        match pre {
            PageReconstructError::Other(pre) => ApiError::InternalServerError(pre),
+            PageReconstructError::NeedsDownload(_, _) => {
+                // This shouldn't happen, because we use a RequestContext that requests to
+                // download any missing layer files on-demand.
+                ApiError::InternalServerError(anyhow::anyhow!("need to download remote layer file"))
+            }
            PageReconstructError::Cancelled => {
                ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
            }
@@ -319,7 +319,6 @@ async fn build_timeline_info_common(
    ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
-    let initdb_lsn = timeline.initdb_lsn;
    let last_record_lsn = timeline.get_last_record_lsn();
    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
        let guard = timeline.last_received_wal.lock().unwrap();
@@ -353,14 +352,14 @@ async fn build_timeline_info_common(
    let walreceiver_status = timeline.walreceiver_status();

    let info = TimelineInfo {
-        tenant_id: timeline.tenant_shard_id,
+        // TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
+        tenant_id: timeline.tenant_shard_id.tenant_id,
        timeline_id: timeline.timeline_id,
        ancestor_timeline_id,
        ancestor_lsn,
        disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
        remote_consistent_lsn: remote_consistent_lsn_projected,
        remote_consistent_lsn_visible,
-        initdb_lsn,
        last_record_lsn,
        prev_record_lsn: Some(timeline.get_prev_record_lsn()),
        latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
@@ -453,7 +452,7 @@ async fn timeline_create_handler(
                    .map_err(ApiError::InternalServerError)?;
                json_response(StatusCode::CREATED, timeline_info)
            }
-            Err(tenant::CreateTimelineError::Conflict | tenant::CreateTimelineError::AlreadyCreating) => {
+            Err(tenant::CreateTimelineError::AlreadyExists) => {
                json_response(StatusCode::CONFLICT, ())
            }
            Err(tenant::CreateTimelineError::AncestorLsn(err)) => {
@@ -481,15 +480,15 @@ async fn timeline_list_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let include_non_incremental_logical_size: Option<bool> =
        parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);

    let response_data = async {
-        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+        let tenant = mgr::get_tenant(tenant_id, true)?;
        let timelines = tenant.list_timelines();

        let mut response_data = Vec::with_capacity(timelines.len());
@@ -508,9 +507,7 @@ async fn timeline_list_handler(
        }
        Ok::<Vec<TimelineInfo>, ApiError>(response_data)
    }
-    .instrument(info_span!("timeline_list",
-                tenant_id = %tenant_shard_id.tenant_id,
-                shard_id = %tenant_shard_id.shard_slug()))
+    .instrument(info_span!("timeline_list", %tenant_id))
    .await?;

    json_response(StatusCode::OK, response_data)
@@ -520,17 +517,17 @@ async fn timeline_detail_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let include_non_incremental_logical_size: Option<bool> =
        parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    // Logical size calculation needs downloading.
    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);

    let timeline_info = async {
-        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+        let tenant = mgr::get_tenant(tenant_id, true)?;

        let timeline = tenant
            .get_timeline(timeline_id, false)
@@ -547,10 +544,7 @@ async fn timeline_detail_handler(

        Ok::<_, ApiError>(timeline_info)
    }
-    .instrument(info_span!("timeline_detail",
-                tenant_id = %tenant_shard_id.tenant_id,
-                shard_id = %tenant_shard_id.shard_slug(),
-                %timeline_id))
+    .instrument(info_span!("timeline_detail", %tenant_id, %timeline_id))
    .await?;

    json_response(StatusCode::OK, timeline_info)
@@ -560,15 +554,8 @@ async fn get_lsn_by_timestamp_handler(
    request: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    if !tenant_shard_id.is_zero() {
-        // Requires SLRU contents, which are only stored on shard zero
-        return Err(ApiError::BadRequest(anyhow!(
-            "Size calculations are only available on shard zero"
-        )));
-    }
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;

    let version: Option<u8> = parse_query_param(&request, "version")?;

@@ -580,7 +567,7 @@ async fn get_lsn_by_timestamp_handler(
    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let result = timeline
        .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
        .await?;
@@ -615,15 +602,8 @@ async fn get_timestamp_of_lsn_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    if !tenant_shard_id.is_zero() {
-        // Requires SLRU contents, which are only stored on shard zero
-        return Err(ApiError::BadRequest(anyhow!(
-            "Size calculations are only available on shard zero"
-        )));
-    }
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;

    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;

@@ -633,7 +613,7 @@ async fn get_timestamp_of_lsn_handler(
        .map_err(ApiError::BadRequest)?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;

    match result {
@@ -825,11 +805,11 @@ async fn tenant_status(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;

    let tenant_info = async {
-        let tenant = mgr::get_tenant(tenant_shard_id, false)?;
+        let tenant = mgr::get_tenant(tenant_id, false)?;

        // Calculate total physical size of all timelines
        let mut current_physical_size = 0;
@@ -839,15 +819,13 @@ async fn tenant_status(

        let state = tenant.current_state();
        Result::<_, ApiError>::Ok(TenantInfo {
-            id: tenant_shard_id,
+            id: tenant_id,
            state: state.clone(),
            current_physical_size: Some(current_physical_size),
            attachment_status: state.attachment_status(),
        })
    }
-    .instrument(info_span!("tenant_status_handler",
-                tenant_id = %tenant_shard_id.tenant_id,
-                shard_id = %tenant_shard_id.shard_slug()))
+    .instrument(info_span!("tenant_status_handler", %tenant_id))
    .await?;

    json_response(StatusCode::OK, tenant_info)
@@ -890,20 +868,14 @@ async fn tenant_size_handler(
    request: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
    let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
    let headers = request.headers();

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
-
-    if !tenant_shard_id.is_zero() {
-        return Err(ApiError::BadRequest(anyhow!(
-            "Size calculations are only available on shard zero"
-        )));
-    }
+    let tenant = mgr::get_tenant(tenant_id, true)?;

    // this can be long operation
    let inputs = tenant
@@ -955,7 +927,7 @@ async fn tenant_size_handler(
    json_response(
        StatusCode::OK,
        TenantHistorySize {
-            id: tenant_shard_id.tenant_id,
+            id: tenant_id,
            size: sizes.as_ref().map(|x| x.total_size),
            segment_sizes: sizes.map(|x| x.segments),
            inputs,
@@ -967,14 +939,14 @@ async fn layer_map_info_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let reset: LayerAccessStatsReset =
        parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);

-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let layer_map_info = timeline.layer_map_info(reset).await;

    json_response(StatusCode::OK, layer_map_info)
@@ -984,12 +956,13 @@ async fn layer_download_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let layer_file_name = get_request_param(&request, "layer_file_name")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let downloaded = timeline
        .download_layer(layer_file_name)
        .await
@@ -1000,7 +973,7 @@ async fn layer_download_handler(
        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
        None => json_response(
            StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
        ),
    }
 }
@@ -1009,12 +982,12 @@ async fn evict_timeline_layer_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let layer_file_name = get_request_param(&request, "layer_file_name")?;

-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let evicted = timeline
        .evict_layer(layer_file_name)
        .await
@@ -1025,7 +998,7 @@ async fn evict_timeline_layer_handler(
        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
        None => json_response(
            StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
        ),
    }
 }
@@ -1157,10 +1130,10 @@ async fn get_tenant_config_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_shard_id, false)?;
+    let tenant = mgr::get_tenant(tenant_id, false)?;

    let response = HashMap::from([
        (
@@ -1254,9 +1227,9 @@ async fn handle_tenant_break(
    r: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;

-    let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
+    let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
        .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;

    tenant.set_broken("broken from test".to_owned()).await;
@@ -1297,15 +1270,14 @@ async fn timeline_gc_handler(
    mut request: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done =
-        mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
    let gc_result = wait_task_done
        .await
        .context("wait for gc task")
@@ -1320,9 +1292,9 @@ async fn timeline_compact_handler(
    request: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    let mut flags = EnumSet::empty();
    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1330,14 +1302,14 @@ async fn timeline_compact_handler(
    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
        timeline
            .compact(&cancel, flags, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;
        json_response(StatusCode::OK, ())
    }
-    .instrument(info_span!("manual_compaction", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .instrument(info_span!("manual_compaction", %tenant_id, %timeline_id))
    .await
 }

@@ -1346,9 +1318,9 @@ async fn timeline_checkpoint_handler(
    request: Request<Body>,
    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    let mut flags = EnumSet::empty();
    if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1356,7 +1328,7 @@ async fn timeline_checkpoint_handler(
    }
    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
        timeline
            .freeze_and_flush()
            .await
@@ -1368,7 +1340,7 @@ async fn timeline_checkpoint_handler(

        json_response(StatusCode::OK, ())
    }
-    .instrument(info_span!("manual_checkpoint", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .instrument(info_span!("manual_checkpoint", %tenant_id, %timeline_id))
    .await
 }

@@ -1376,12 +1348,12 @@ async fn timeline_download_remote_layers_handler_post(
    mut request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    match timeline.spawn_download_all_remote_layers(body).await {
        Ok(st) => json_response(StatusCode::ACCEPTED, st),
        Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -1392,11 +1364,11 @@ async fn timeline_download_remote_layers_handler_get(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    check_permission(&request, Some(tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;

-    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let info = timeline
        .get_download_all_remote_layers_task_info()
        .context("task never started since last pageserver process start")
@@ -1442,9 +1414,9 @@ async fn getpage_at_lsn_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    struct Key(crate::repository::Key);

@@ -1463,7 +1435,7 @@ async fn getpage_at_lsn_handler(

    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;

        let page = timeline.get(key.0, lsn, &ctx).await?;

@@ -1475,7 +1447,7 @@ async fn getpage_at_lsn_handler(
                .unwrap(),
        )
    }
-    .instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .instrument(info_span!("timeline_get", %tenant_id, %timeline_id))
    .await
 }

@@ -1483,9 +1455,9 @@ async fn timeline_collect_keyspace(
    request: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+    check_permission(&request, Some(tenant_id))?;

    struct Partitioning {
        keys: crate::keyspace::KeySpace,
@@ -1554,7 +1526,7 @@ async fn timeline_collect_keyspace(

    async {
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
        let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
        let keys = timeline
            .collect_keyspace(at_lsn, &ctx)
@@ -1563,15 +1535,15 @@ async fn timeline_collect_keyspace(

        json_response(StatusCode::OK, Partitioning { keys, at_lsn })
    }
-    .instrument(info_span!("timeline_collect_keyspace", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
    .await
 }

 async fn active_timeline_of_active_tenant(
-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
 ) -> Result<Arc<Timeline>, ApiError> {
-    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+    let tenant = mgr::get_tenant(tenant_id, true)?;
    tenant
        .get_timeline(timeline_id, true)
        .map_err(|e| ApiError::NotFound(e.into()))
@@ -1593,7 +1565,7 @@ async fn always_panic_handler(

 async fn disk_usage_eviction_run(
    mut r: Request<Body>,
-    cancel: CancellationToken,
+    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    check_permission(&r, None)?;

@@ -1621,48 +1593,57 @@ async fn disk_usage_eviction_run(
        }
    }

-    let config = json_request::<Config>(&mut r).await?;
+    let config = json_request::<Config>(&mut r)
+        .await
+        .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?;

    let usage = Usage {
        config,
        freed_bytes: 0,
    };

+    let (tx, rx) = tokio::sync::oneshot::channel();
+
    let state = get_state(&r);

-    let Some(storage) = state.remote_storage.as_ref() else {
+    if state.remote_storage.as_ref().is_none() {
        return Err(ApiError::InternalServerError(anyhow::anyhow!(
            "remote storage not configured, cannot run eviction iteration"
        )));
-    };
+    }

    let state = state.disk_usage_eviction_state.clone();

-    let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
-        &state, storage, usage, &cancel,
-    )
-    .await;
+    let cancel = CancellationToken::new();
+    let child_cancel = cancel.clone();
+    let _g = cancel.drop_guard();

-    info!(?res, "disk_usage_eviction_task_iteration_impl finished");
+    crate::task_mgr::spawn(
+        crate::task_mgr::BACKGROUND_RUNTIME.handle(),
+        TaskKind::DiskUsageEviction,
+        None,
+        None,
+        "ondemand disk usage eviction",
+        false,
+        async move {
+            let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
+                &state,
+                usage,
+                &child_cancel,
+            )
+            .await;

-    let res = res.map_err(ApiError::InternalServerError)?;
+            info!(?res, "disk_usage_eviction_task_iteration_impl finished");

-    json_response(StatusCode::OK, res)
-}
+            let _ = tx.send(res);
+            Ok(())
+        }
+        .in_current_span(),
+    );

-async fn secondary_upload_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&request);
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    state
-        .secondary_controller
-        .upload_tenant(tenant_shard_id)
-        .await
-        .map_err(ApiError::InternalServerError)?;
+    let response = rx.await.unwrap().map_err(ApiError::InternalServerError)?;

-    json_response(StatusCode::OK, ())
+    json_response(StatusCode::OK, response)
 }

 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -1839,25 +1820,23 @@ pub fn make_router(
        })
        .get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
        .post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
-        .get("/v1/tenant/:tenant_shard_id", |r| {
-            api_handler(r, tenant_status)
-        })
+        .get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
        .delete("/v1/tenant/:tenant_shard_id", |r| {
            api_handler(r, tenant_delete_handler)
        })
-        .get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
+        .get("/v1/tenant/:tenant_id/synthetic_size", |r| {
            api_handler(r, tenant_size_handler)
        })
        .put("/v1/tenant/config", |r| {
            api_handler(r, update_tenant_config_handler)
        })
-        .get("/v1/tenant/:tenant_shard_id/config", |r| {
+        .get("/v1/tenant/:tenant_id/config", |r| {
            api_handler(r, get_tenant_config_handler)
        })
        .put("/v1/tenant/:tenant_shard_id/location_config", |r| {
            api_handler(r, put_tenant_location_config_handler)
        })
-        .get("/v1/tenant/:tenant_shard_id/timeline", |r| {
+        .get("/v1/tenant/:tenant_id/timeline", |r| {
            api_handler(r, timeline_list_handler)
        })
        .post("/v1/tenant/:tenant_shard_id/timeline", |r| {
@@ -1878,74 +1857,67 @@ pub fn make_router(
        .post("/v1/tenant/:tenant_id/ignore", |r| {
            api_handler(r, tenant_ignore_handler)
        })
-        .get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
+        .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_detail_handler)
        })
        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
            |r| api_handler(r, get_lsn_by_timestamp_handler),
        )
        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_timestamp_of_lsn",
            |r| api_handler(r, get_timestamp_of_lsn_handler),
        )
+        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
+            api_handler(r, timeline_gc_handler)
+        })
+        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
+            testing_api_handler("run timeline compaction", r, timeline_compact_handler)
+        })
        .put(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
-            |r| api_handler(r, timeline_gc_handler),
-        )
-        .put(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
-            |r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
-        )
-        .put(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
            |r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
        )
        .post(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
            |r| api_handler(r, timeline_download_remote_layers_handler_post),
        )
        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
            |r| api_handler(r, timeline_download_remote_layers_handler_get),
        )
        .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_delete_handler)
        })
+        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
+            api_handler(r, layer_map_info_handler)
+        })
        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
-            |r| api_handler(r, layer_map_info_handler),
-        )
-        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| api_handler(r, layer_download_handler),
        )
        .delete(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| api_handler(r, evict_timeline_layer_handler),
        )
-        .post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
-            api_handler(r, secondary_upload_handler)
-        })
        .put("/v1/disk_usage_eviction/run", |r| {
            api_handler(r, disk_usage_eviction_run)
        })
        .put("/v1/deletion_queue/flush", |r| {
            api_handler(r, deletion_queue_flush)
        })
-        .put("/v1/tenant/:tenant_shard_id/break", |r| {
+        .put("/v1/tenant/:tenant_id/break", |r| {
            testing_api_handler("set tenant state to broken", r, handle_tenant_break)
        })
        .get("/v1/panic", |r| api_handler(r, always_panic_handler))
        .post("/v1/tracing/event", |r| {
            testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
        })
+        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/getpage", |r| {
+            testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler)
+        })
        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
-            |r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
-        )
-        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/keyspace",
            |r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
        )
        .any(handler_404))
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2,10 +2,9 @@ use enum_map::EnumMap;
 use metrics::metric_vec_duration::DurationResultObserver;
 use metrics::{
    register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
-    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
-    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
-    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
-    IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    register_int_counter, register_int_counter_vec, register_int_gauge, register_int_gauge_vec,
+    register_uint_gauge, register_uint_gauge_vec, Counter, CounterVec, GaugeVec, Histogram,
+    HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
@@ -286,63 +285,6 @@ pub static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> = Lazy::new(|| PageCacheS
    },
 });

-pub(crate) mod page_cache_eviction_metrics {
-    use std::num::NonZeroUsize;
-
-    use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
-    use once_cell::sync::Lazy;
-
-    #[derive(Clone, Copy)]
-    pub(crate) enum Outcome {
-        FoundSlotUnused { iters: NonZeroUsize },
-        FoundSlotEvicted { iters: NonZeroUsize },
-        ItersExceeded { iters: NonZeroUsize },
-    }
-
-    static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_page_cache_find_victim_iters_total",
-            "Counter for the number of iterations in the find_victim loop",
-            &["outcome"],
-        )
-        .expect("failed to define a metric")
-    });
-
-    static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_page_cache_find_victim_calls",
-            "Incremented at the end of each find_victim() call.\
-             Filter by outcome to get e.g., eviction rate.",
-            &["outcome"]
-        )
-        .unwrap()
-    });
-
-    pub(crate) fn observe(outcome: Outcome) {
-        macro_rules! dry {
-            ($label:literal, $iters:expr) => {{
-                static LABEL: &'static str = $label;
-                static ITERS_TOTAL: Lazy<IntCounter> =
-                    Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
-                static CALLS: Lazy<IntCounter> =
-                    Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
-                ITERS_TOTAL.inc_by(($iters.get()) as u64);
-                CALLS.inc();
-            }};
-        }
-        match outcome {
-            Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
-            Outcome::FoundSlotEvicted { iters } => {
-                dry!("found_evicted", iters)
-            }
-            Outcome::ItersExceeded { iters } => {
-                dry!("err_iters_exceeded", iters);
-                super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
-            }
-        }
-    }
-}
-
 pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "pageserver_page_cache_acquire_pinned_slot_seconds",
@@ -352,6 +294,14 @@ pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::n
    .expect("failed to define a metric")
 });

+pub(crate) static PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_page_cache_find_victim_iters_total",
+        "Counter for the number of iterations in the find_victim loop",
+    )
+    .expect("failed to define a metric")
+});
+
 static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "page_cache_errors_total",
@@ -651,7 +601,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
        "pageserver_evictions_with_low_residence_duration",
        "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
         Residence duration is determined using the `residence_duration_data_source`.",
-        &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
+        &["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
    )
    .expect("failed to define a metric")
 });
@@ -715,16 +665,10 @@ impl EvictionsWithLowResidenceDurationBuilder {
        }
    }

-    fn build(
-        &self,
-        tenant_id: &str,
-        shard_id: &str,
-        timeline_id: &str,
-    ) -> EvictionsWithLowResidenceDuration {
+    fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
        let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
            .get_metric_with_label_values(&[
                tenant_id,
-                shard_id,
                timeline_id,
                self.data_source,
                &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
@@ -755,24 +699,21 @@ impl EvictionsWithLowResidenceDuration {
    pub fn change_threshold(
        &mut self,
        tenant_id: &str,
-        shard_id: &str,
        timeline_id: &str,
        new_threshold: Duration,
    ) {
        if new_threshold == self.threshold {
            return;
        }
-        let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
-            self.data_source,
-            new_threshold,
-        )
-        .build(tenant_id, shard_id, timeline_id);
+        let mut with_new =
+            EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
+                .build(tenant_id, timeline_id);
        std::mem::swap(self, &mut with_new);
-        with_new.remove(tenant_id, shard_id, timeline_id);
+        with_new.remove(tenant_id, timeline_id);
    }

    // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
-    fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
+    fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
        let Some(_counter) = self.counter.take() else {
            return;
        };
@@ -781,7 +722,6 @@ impl EvictionsWithLowResidenceDuration {

        let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
            tenant_id,
-            shard_id,
            timeline_id,
            self.data_source,
            &threshold,
@@ -902,26 +842,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
    )
    .expect("failed to define a metric")
 });
-
-pub(crate) mod virtual_file_descriptor_cache {
-    use super::*;
-
-    pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
-        register_uint_gauge!(
-            "pageserver_virtual_file_descriptor_cache_size_max",
-            "Maximum number of open file descriptors in the cache."
-        )
-        .unwrap()
-    });
-
-    // SIZE_CURRENT: derive it like so:
-    // ```
-    // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
-    // -ignoring(operation)
-    // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
-    // ```
-}
-
 #[derive(Debug)]
 struct GlobalAndPerTimelineHistogram {
    global: Histogram,
@@ -1271,28 +1191,6 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
    )
    .expect("failed to define a metric"),
 });
-pub(crate) struct SecondaryModeMetrics {
-    pub(crate) upload_heatmap: IntCounter,
-    pub(crate) upload_heatmap_errors: IntCounter,
-    pub(crate) upload_heatmap_duration: Histogram,
-}
-pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
-    upload_heatmap: register_int_counter!(
-        "pageserver_secondary_upload_heatmap",
-        "Number of heatmaps written to remote storage by attached tenants"
-    )
-    .expect("failed to define a metric"),
-    upload_heatmap_errors: register_int_counter!(
-        "pageserver_secondary_upload_heatmap_errors",
-        "Failures writing heatmap to remote storage"
-    )
-    .expect("failed to define a metric"),
-    upload_heatmap_duration: register_histogram!(
-        "pageserver_secondary_upload_heatmap_duration",
-        "Time to build and upload a heatmap, including any waiting inside the S3 client"
-    )
-    .expect("failed to define a metric"),
-});

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RemoteOpKind {
@@ -1344,16 +1242,25 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("Failed to register tenant_task_events metric")
 });

-pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
-        "pageserver_background_loop_semaphore_wait_start_count",
-        "Counter for background loop concurrency-limiting semaphore acquire calls started",
-        "pageserver_background_loop_semaphore_wait_finish_count",
-        "Counter for background loop concurrency-limiting semaphore acquire calls finished",
-        &["task"],
-    )
-    .unwrap()
-});
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT: Lazy<IntCounterVec> =
+    Lazy::new(|| {
+        register_int_counter_vec!(
+            "pageserver_background_loop_semaphore_wait_start_count",
+            "Counter for background loop concurrency-limiting semaphore acquire calls started",
+            &["task"],
+        )
+        .unwrap()
+    });
+
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT: Lazy<IntCounterVec> =
+    Lazy::new(|| {
+        register_int_counter_vec!(
+            "pageserver_background_loop_semaphore_wait_finish_count",
+            "Counter for background loop concurrency-limiting semaphore acquire calls finished",
+            &["task"],
+        )
+        .unwrap()
+    });

 pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
@@ -1627,7 +1534,6 @@ impl StorageTimeMetrics {
 #[derive(Debug)]
 pub struct TimelineMetrics {
    tenant_id: String,
-    shard_id: String,
    timeline_id: String,
    pub flush_time_histo: StorageTimeMetrics,
    pub compact_time_histo: StorageTimeMetrics,
@@ -1648,12 +1554,11 @@ pub struct TimelineMetrics {

 impl TimelineMetrics {
    pub fn new(
-        tenant_shard_id: &TenantShardId,
+        tenant_id: &TenantId,
        timeline_id: &TimelineId,
        evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
    ) -> Self {
-        let tenant_id = tenant_shard_id.tenant_id.to_string();
-        let shard_id = format!("{}", tenant_shard_id.shard_slug());
+        let tenant_id = tenant_id.to_string();
        let timeline_id = timeline_id.to_string();
        let flush_time_histo =
            StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
@@ -1690,12 +1595,11 @@ impl TimelineMetrics {
        let evictions = EVICTIONS
            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
-            .build(&tenant_id, &shard_id, &timeline_id);
+        let evictions_with_low_residence_duration =
+            evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);

        TimelineMetrics {
            tenant_id,
-            shard_id,
            timeline_id,
            flush_time_histo,
            compact_time_histo,
@@ -1741,7 +1645,6 @@ impl Drop for TimelineMetrics {
    fn drop(&mut self) {
        let tenant_id = &self.tenant_id;
        let timeline_id = &self.timeline_id;
-        let shard_id = &self.shard_id;
        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
        {
            RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
@@ -1755,7 +1658,7 @@ impl Drop for TimelineMetrics {
        self.evictions_with_low_residence_duration
            .write()
            .unwrap()
-            .remove(tenant_id, shard_id, timeline_id);
+            .remove(tenant_id, timeline_id);

        // The following metrics are born outside of the TimelineMetrics lifecycle but still
        // removed at the end of it. The idea is to have the metrics outlive the
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -28,7 +28,7 @@
 //! Page cache maps from a cache key to a buffer slot.
 //! The cache key uniquely identifies the piece of data that is being cached.
 //!
-//! The cache key for **materialized pages** is  [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
+//! The cache key for **materialized pages** is  [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
 //! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
 //!
 //! The cache key for **immutable file** pages is [`FileId`] and a block number.
@@ -83,15 +83,13 @@ use std::{

 use anyhow::Context;
 use once_cell::sync::OnceCell;
-use pageserver_api::shard::TenantShardId;
-use utils::{id::TimelineId, lsn::Lsn};
-
-use crate::{
-    context::RequestContext,
-    metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
-    repository::Key,
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
 };

+use crate::{context::RequestContext, metrics::PageCacheSizeMetrics, repository::Key};
+
 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
 const TEST_PAGE_CACHE_SIZE: usize = 50;

@@ -152,13 +150,7 @@ enum CacheKey {

 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
 struct MaterializedPageHashKey {
-    /// Why is this TenantShardId rather than TenantId?
-    ///
-    /// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant.  However, this
-    /// this not the case for certain internally-generated pages (e.g. relation sizes).  In future, we may make this
-    /// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
-    /// special-cased in some other way.
-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
    key: Key,
 }
@@ -382,7 +374,7 @@ impl PageCache {
    /// returned page.
    pub async fn lookup_materialized_page(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        key: &Key,
        lsn: Lsn,
@@ -399,7 +391,7 @@ impl PageCache {

        let mut cache_key = CacheKey::MaterializedPage {
            hash_key: MaterializedPageHashKey {
-                tenant_shard_id,
+                tenant_id,
                timeline_id,
                key: *key,
            },
@@ -440,7 +432,7 @@ impl PageCache {
    ///
    pub async fn memorize_materialized_page(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        key: Key,
        lsn: Lsn,
@@ -448,7 +440,7 @@ impl PageCache {
    ) -> anyhow::Result<()> {
        let cache_key = CacheKey::MaterializedPage {
            hash_key: MaterializedPageHashKey {
-                tenant_shard_id,
+                tenant_id,
                timeline_id,
                key,
            },
@@ -905,10 +897,8 @@ impl PageCache {
                            // Note that just yielding to tokio during iteration without such
                            // priority boosting is likely counter-productive. We'd just give more opportunities
                            // for B to bump usage count, further starving A.
-                            page_cache_eviction_metrics::observe(
-                                page_cache_eviction_metrics::Outcome::ItersExceeded {
-                                    iters: iters.try_into().unwrap(),
-                                },
+                            crate::metrics::page_cache_errors_inc(
+                                crate::metrics::PageCacheErrorKind::EvictIterLimit,
                            );
                            anyhow::bail!("exceeded evict iter limit");
                        }
@@ -919,18 +909,8 @@ impl PageCache {
                    // remove mapping for old buffer
                    self.remove_mapping(old_key);
                    inner.key = None;
-                    page_cache_eviction_metrics::observe(
-                        page_cache_eviction_metrics::Outcome::FoundSlotEvicted {
-                            iters: iters.try_into().unwrap(),
-                        },
-                    );
-                } else {
-                    page_cache_eviction_metrics::observe(
-                        page_cache_eviction_metrics::Outcome::FoundSlotUnused {
-                            iters: iters.try_into().unwrap(),
-                        },
-                    );
                }
+                crate::metrics::PAGE_CACHE_FIND_VICTIMS_ITERS_TOTAL.inc_by(iters as u64);
                return Ok((slot_idx, inner));
            }
        }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -822,7 +822,10 @@ impl<'a> DatadirModification<'a> {
        self.put(DBDIR_KEY, Value::Image(buf.into()));

        // Create AuxFilesDirectory
-        self.init_aux_dir()?;
+        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+            files: HashMap::new(),
+        })?;
+        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));

        let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
            xids: HashSet::new(),
@@ -930,7 +933,10 @@ impl<'a> DatadirModification<'a> {
            self.put(DBDIR_KEY, Value::Image(buf.into()));

            // Create AuxFilesDirectory as well
-            self.init_aux_dir()?;
+            let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+                files: HashMap::new(),
+            })?;
+            self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
        }
        if r.is_none() {
            // Create RelDirectory
@@ -1255,14 +1261,6 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
-        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-            files: HashMap::new(),
-        })?;
-        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
-        Ok(())
-    }
-
    pub async fn put_file(
        &mut self,
        path: &str,
@@ -1769,13 +1767,6 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

-// AUX_FILES currently stores only data for logical replication (slots etc), and
-// we don't preserve these on a branch because safekeepers can't follow timeline
-// switch (and generally it likely should be optional), so ignore these.
-pub fn is_inherited_key(key: Key) -> bool {
-    key != AUX_FILES_KEY
-}
-
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -42,7 +42,6 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};

 use futures::FutureExt;
-use pageserver_api::shard::TenantShardId;
 use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
 use tokio::task_local;
@@ -52,7 +51,7 @@ use tracing::{debug, error, info, warn};

 use once_cell::sync::Lazy;

-use utils::id::TimelineId;
+use utils::id::{TenantId, TimelineId};

 use crate::shutdown_pageserver;

@@ -258,9 +257,6 @@ pub enum TaskKind {
    /// See [`crate::disk_usage_eviction_task`].
    DiskUsageEviction,

-    /// See [`crate::tenant::secondary`].
-    SecondaryUploads,
-
    // Initial logical size calculation
    InitialLogicalSizeCalculation,

@@ -321,7 +317,7 @@ struct PageServerTask {

    /// Tasks may optionally be launched for a particular tenant/timeline, enabling
    /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,

    mutable: Mutex<MutableTaskState>,
@@ -333,7 +329,7 @@ struct PageServerTask {
 pub fn spawn<F>(
    runtime: &tokio::runtime::Handle,
    kind: TaskKind,
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,
    name: &str,
    shutdown_process_on_error: bool,
@@ -349,7 +345,7 @@ where
        kind,
        name: name.to_string(),
        cancel: cancel.clone(),
-        tenant_shard_id,
+        tenant_id,
        timeline_id,
        mutable: Mutex::new(MutableTaskState { join_handle: None }),
    });
@@ -428,28 +424,28 @@ async fn task_finish(
            Ok(Err(err)) => {
                if shutdown_process_on_error {
                    error!(
-                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                    shutdown_process = true;
                } else {
                    error!(
-                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                }
            }
            Err(err) => {
                if shutdown_process_on_error {
                    error!(
-                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                    shutdown_process = true;
                } else {
                    error!(
-                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_shard_id, task.timeline_id, err
+                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_id, task.timeline_id, err
                    );
                }
            }
@@ -471,11 +467,11 @@ async fn task_finish(
 ///
 /// Or to shut down all tasks for given timeline:
 ///
-///   shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
+///   shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
 ///
 pub async fn shutdown_tasks(
    kind: Option<TaskKind>,
-    tenant_shard_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,
 ) {
    let mut victim_tasks = Vec::new();
@@ -484,35 +480,35 @@ pub async fn shutdown_tasks(
        let tasks = TASKS.lock().unwrap();
        for task in tasks.values() {
            if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
+                && (tenant_id.is_none() || task.tenant_id == tenant_id)
                && (timeline_id.is_none() || task.timeline_id == timeline_id)
            {
                task.cancel.cancel();
                victim_tasks.push((
                    Arc::clone(task),
                    task.kind,
-                    task.tenant_shard_id,
+                    task.tenant_id,
                    task.timeline_id,
                ));
            }
        }
    }

-    let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
+    let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();

-    for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
+    for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
        let join_handle = {
            let mut task_mut = task.mutable.lock().unwrap();
            task_mut.join_handle.take()
        };
        if let Some(mut join_handle) = join_handle {
            if log_all {
-                if tenant_shard_id.is_none() {
+                if tenant_id.is_none() {
                    // there are quite few of these
                    info!(name = task.name, kind = ?task_kind, "stopping global task");
                } else {
                    // warn to catch these in tests; there shouldn't be any
-                    warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
+                    warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
                }
            }
            if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
@@ -521,13 +517,12 @@ pub async fn shutdown_tasks(
            {
                // allow some time to elapse before logging to cut down the number of log
                // lines.
-                info!("waiting for task {} to shut down", task.name);
+                info!("waiting for {} to shut down", task.name);
                // we never handled this return value, but:
                // - we don't deschedule which would lead to is_cancelled
                // - panics are already logged (is_panicked)
                // - task errors are already logged in the wrapper
                let _ = join_handle.await;
-                info!("task {} completed", task.name);
            }
        } else {
            // Possibly one of:
@@ -561,14 +556,9 @@ pub async fn shutdown_watcher() {
 /// cancelled. It can however be moved to other tasks, such as `tokio::task::spawn_blocking` or
 /// `tokio::task::JoinSet::spawn`.
 pub fn shutdown_token() -> CancellationToken {
-    let res = SHUTDOWN_TOKEN.try_with(|t| t.clone());
-
-    if cfg!(test) {
-        // in tests this method is called from non-taskmgr spawned tasks, and that is all ok.
-        res.unwrap_or_default()
-    } else {
-        res.expect("shutdown_token() called in an unexpected task or thread")
-    }
+    SHUTDOWN_TOKEN
+        .try_with(|t| t.clone())
+        .expect("shutdown_token() called in an unexpected task or thread")
 }

 /// Has the current task been requested to shut down?
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -48,7 +48,6 @@ use self::mgr::GetActiveTenantError;
 use self::mgr::GetTenantError;
 use self::mgr::TenantsMap;
 use self::remote_timeline_client::RemoteTimelineClient;
-use self::timeline::uninit::TimelineExclusionError;
 use self::timeline::uninit::TimelineUninitMark;
 use self::timeline::uninit::UninitializedTimeline;
 use self::timeline::EvictionTaskTenantState;
@@ -88,6 +87,7 @@ use std::process::Stdio;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;
+use std::sync::MutexGuard;
 use std::sync::{Mutex, RwLock};
 use std::time::{Duration, Instant};

@@ -144,7 +144,6 @@ pub mod storage_layer;
 pub mod config;
 pub mod delete;
 pub mod mgr;
-pub mod secondary;
 pub mod tasks;
 pub mod upload_queue;

@@ -249,12 +248,6 @@ pub struct Tenant {
    generation: Generation,

    timelines: Mutex<HashMap<TimelineId, Arc<Timeline>>>,
-
-    /// During timeline creation, we first insert the TimelineId to the
-    /// creating map, then `timelines`, then remove it from the creating map.
-    /// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
-    timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,
-
    // This mutex prevents creation of new timelines during GC.
    // Adding yet another mutex (in addition to `timelines`) is needed because holding
    // `timelines` mutex during all GC iteration
@@ -413,10 +406,8 @@ impl Debug for SetStoppingError {

 #[derive(thiserror::Error, Debug)]
 pub enum CreateTimelineError {
-    #[error("creation of timeline with the given ID is in progress")]
-    AlreadyCreating,
-    #[error("timeline already exists with different parameters")]
-    Conflict,
+    #[error("a timeline with the given ID already exists")]
+    AlreadyExists,
    #[error(transparent)]
    AncestorLsn(anyhow::Error),
    #[error("ancestor timeline is not active")]
@@ -617,7 +608,7 @@ impl Tenant {
        task_mgr::spawn(
            &tokio::runtime::Handle::current(),
            TaskKind::Attach,
-            Some(tenant_shard_id),
+            Some(tenant_shard_id.tenant_id),
            None,
            "attach tenant",
            false,
@@ -1466,7 +1457,7 @@ impl Tenant {
    /// For tests, use `DatadirModification::init_empty_test_timeline` + `commit` to setup the
    /// minimum amount of keys required to get a writable timeline.
    /// (Without it, `put` might fail due to `repartition` failing.)
-    pub(crate) async fn create_empty_timeline(
+    pub async fn create_empty_timeline(
        &self,
        new_timeline_id: TimelineId,
        initdb_lsn: Lsn,
@@ -1478,7 +1469,10 @@ impl Tenant {
            "Cannot create empty timelines on inactive tenant"
        );

-        let timeline_uninit_mark = self.create_timeline_uninit_mark(new_timeline_id)?;
+        let timeline_uninit_mark = {
+            let timelines = self.timelines.lock().unwrap();
+            self.create_timeline_uninit_mark(new_timeline_id, &timelines)?
+        };
        let new_metadata = TimelineMetadata::new(
            // Initialize disk_consistent LSN to 0, The caller must import some data to
            // make it valid, before calling finish_creation()
@@ -1555,7 +1549,7 @@ impl Tenant {
    /// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with
    /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
    #[allow(clippy::too_many_arguments)]
-    pub(crate) async fn create_timeline(
+    pub async fn create_timeline(
        &self,
        new_timeline_id: TimelineId,
        ancestor_timeline_id: Option<TimelineId>,
@@ -1576,51 +1570,26 @@ impl Tenant {
            .enter()
            .map_err(|_| CreateTimelineError::ShuttingDown)?;

-        // Get exclusive access to the timeline ID: this ensures that it does not already exist,
-        // and that no other creation attempts will be allowed in while we are working.  The
-        // uninit_mark is a guard.
-        let uninit_mark = match self.create_timeline_uninit_mark(new_timeline_id) {
-            Ok(m) => m,
-            Err(TimelineExclusionError::AlreadyCreating) => {
-                // Creation is in progress, we cannot create it again, and we cannot
-                // check if this request matches the existing one, so caller must try
-                // again later.
-                return Err(CreateTimelineError::AlreadyCreating);
-            }
-            Err(TimelineExclusionError::Other(e)) => {
-                return Err(CreateTimelineError::Other(e));
-            }
-            Err(TimelineExclusionError::AlreadyExists(existing)) => {
-                debug!("timeline {new_timeline_id} already exists");
+        if let Ok(existing) = self.get_timeline(new_timeline_id, false) {
+            debug!("timeline {new_timeline_id} already exists");

-                // Idempotency: creating the same timeline twice is not an error, unless
-                // the second creation has different parameters.
-                if existing.get_ancestor_timeline_id() != ancestor_timeline_id
-                    || existing.pg_version != pg_version
-                    || (ancestor_start_lsn.is_some()
-                        && ancestor_start_lsn != Some(existing.get_ancestor_lsn()))
-                {
-                    return Err(CreateTimelineError::Conflict);
-                }
-
-                if let Some(remote_client) = existing.remote_client.as_ref() {
-                    // Wait for uploads to complete, so that when we return Ok, the timeline
-                    // is known to be durable on remote storage. Just like we do at the end of
-                    // this function, after we have created the timeline ourselves.
-                    //
-                    // We only really care that the initial version of `index_part.json` has
-                    // been uploaded. That's enough to remember that the timeline
-                    // exists. However, there is no function to wait specifically for that so
-                    // we just wait for all in-progress uploads to finish.
-                    remote_client
-                        .wait_completion()
-                        .await
-                        .context("wait for timeline uploads to complete")?;
-                }
-
-                return Ok(existing);
+            if let Some(remote_client) = existing.remote_client.as_ref() {
+                // Wait for uploads to complete, so that when we return Ok, the timeline
+                // is known to be durable on remote storage. Just like we do at the end of
+                // this function, after we have created the timeline ourselves.
+                //
+                // We only really care that the initial version of `index_part.json` has
+                // been uploaded. That's enough to remember that the timeline
+                // exists. However, there is no function to wait specifically for that so
+                // we just wait for all in-progress uploads to finish.
+                remote_client
+                    .wait_completion()
+                    .await
+                    .context("wait for timeline uploads to complete")?;
            }
-        };
+
+            return Err(CreateTimelineError::AlreadyExists);
+        }

        let loaded_timeline = match ancestor_timeline_id {
            Some(ancestor_timeline_id) => {
@@ -1657,32 +1626,18 @@ impl Tenant {
                    ancestor_timeline.wait_lsn(*lsn, ctx).await?;
                }

-                self.branch_timeline(
-                    &ancestor_timeline,
-                    new_timeline_id,
-                    ancestor_start_lsn,
-                    uninit_mark,
-                    ctx,
-                )
-                .await?
+                self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx)
+                    .await?
            }
            None => {
-                self.bootstrap_timeline(
-                    new_timeline_id,
-                    pg_version,
-                    load_existing_initdb,
-                    uninit_mark,
-                    ctx,
-                )
-                .await?
+                self.bootstrap_timeline(new_timeline_id, pg_version, load_existing_initdb, ctx)
+                    .await?
            }
        };

-        // At this point we have dropped our guard on [`Self::timelines_creating`], and
-        // the timeline is visible in [`Self::timelines`], but it is _not_ durable yet.  We must
-        // not send a success to the caller until it is.  The same applies to handling retries,
-        // see the handling of [`TimelineExclusionError::AlreadyExists`] above.
        if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
+            // Wait for the upload of the 'index_part.json` file to finish, so that when we return
+            // Ok, the timeline is durable in remote storage.
            let kind = ancestor_timeline_id
                .map(|_| "branched")
                .unwrap_or("bootstrapped");
@@ -1962,7 +1917,7 @@ impl Tenant {
        //
        // this will additionally shutdown and await all timeline tasks.
        tracing::debug!("Waiting for tasks...");
-        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;

        // Wait for any in-flight operations to complete
        self.gate.close().await;
@@ -2159,14 +2114,6 @@ impl Tenant {
            .attach_mode
            .clone()
    }
-
-    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {
-        &self.tenant_shard_id
-    }
-
-    pub(crate) fn get_generation(&self) -> Generation {
-        self.generation
-    }
 }

 /// Given a Vec of timelines and their ancestors (timeline_id, ancestor_id),
@@ -2305,18 +2252,6 @@ impl Tenant {
            .or(self.conf.default_tenant_conf.min_resident_size_override)
    }

-    pub fn get_heatmap_period(&self) -> Option<Duration> {
-        let tenant_conf = self.tenant_conf.read().unwrap().tenant_conf;
-        let heatmap_period = tenant_conf
-            .heatmap_period
-            .unwrap_or(self.conf.default_tenant_conf.heatmap_period);
-        if heatmap_period.is_zero() {
-            None
-        } else {
-            Some(heatmap_period)
-        }
-    }
-
    pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
        self.tenant_conf.write().unwrap().tenant_conf = new_tenant_conf;
        // Don't hold self.timelines.lock() during the notifies.
@@ -2466,7 +2401,6 @@ impl Tenant {
            loading_started_at: Instant::now(),
            tenant_conf: Arc::new(RwLock::new(attached_conf)),
            timelines: Mutex::new(HashMap::new()),
-            timelines_creating: Mutex::new(HashSet::new()),
            gc_cs: tokio::sync::Mutex::new(()),
            walredo_mgr,
            remote_storage,
@@ -2858,9 +2792,8 @@ impl Tenant {
        start_lsn: Option<Lsn>,
        ctx: &RequestContext,
    ) -> Result<Arc<Timeline>, CreateTimelineError> {
-        let uninit_mark = self.create_timeline_uninit_mark(dst_id).unwrap();
        let tl = self
-            .branch_timeline_impl(src_timeline, dst_id, start_lsn, uninit_mark, ctx)
+            .branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx)
            .await?;
        tl.set_state(TimelineState::Active);
        Ok(tl)
@@ -2874,10 +2807,9 @@ impl Tenant {
        src_timeline: &Arc<Timeline>,
        dst_id: TimelineId,
        start_lsn: Option<Lsn>,
-        timeline_uninit_mark: TimelineUninitMark<'_>,
        ctx: &RequestContext,
    ) -> Result<Arc<Timeline>, CreateTimelineError> {
-        self.branch_timeline_impl(src_timeline, dst_id, start_lsn, timeline_uninit_mark, ctx)
+        self.branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx)
            .await
    }

@@ -2886,14 +2818,13 @@ impl Tenant {
        src_timeline: &Arc<Timeline>,
        dst_id: TimelineId,
        start_lsn: Option<Lsn>,
-        timeline_uninit_mark: TimelineUninitMark<'_>,
        _ctx: &RequestContext,
    ) -> Result<Arc<Timeline>, CreateTimelineError> {
        let src_id = src_timeline.timeline_id;

-        // We will validate our ancestor LSN in this function.  Acquire the GC lock so that
-        // this check cannot race with GC, and the ancestor LSN is guaranteed to remain
-        // valid while we are creating the branch.
+        // First acquire the GC lock so that another task cannot advance the GC
+        // cutoff in 'gc_info', and make 'start_lsn' invalid, while we are
+        // creating the branch.
        let _gc_cs = self.gc_cs.lock().await;

        // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN
@@ -2903,6 +2834,13 @@ impl Tenant {
            lsn
        });

+        // Create a placeholder for the new branch. This will error
+        // out if the new timeline ID is already in use.
+        let timeline_uninit_mark = {
+            let timelines = self.timelines.lock().unwrap();
+            self.create_timeline_uninit_mark(dst_id, &timelines)?
+        };
+
        // Ensure that `start_lsn` is valid, i.e. the LSN is within the PITR
        // horizon on the source timeline
        //
@@ -2994,38 +2932,21 @@ impl Tenant {
        Ok(new_timeline)
    }

-    /// For unit tests, make this visible so that other modules can directly create timelines
-    #[cfg(test)]
-    pub(crate) async fn bootstrap_timeline_test(
-        &self,
-        timeline_id: TimelineId,
-        pg_version: u32,
-        load_existing_initdb: Option<TimelineId>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<Arc<Timeline>> {
-        let uninit_mark = self.create_timeline_uninit_mark(timeline_id).unwrap();
-        self.bootstrap_timeline(
-            timeline_id,
-            pg_version,
-            load_existing_initdb,
-            uninit_mark,
-            ctx,
-        )
-        .await
-    }
-
    /// - run initdb to init temporary instance and get bootstrap data
    /// - after initialization completes, tar up the temp dir and upload it to S3.
    ///
    /// The caller is responsible for activating the returned timeline.
-    async fn bootstrap_timeline(
+    pub(crate) async fn bootstrap_timeline(
        &self,
        timeline_id: TimelineId,
        pg_version: u32,
        load_existing_initdb: Option<TimelineId>,
-        timeline_uninit_mark: TimelineUninitMark<'_>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Arc<Timeline>> {
+        let timeline_uninit_mark = {
+            let timelines = self.timelines.lock().unwrap();
+            self.create_timeline_uninit_mark(timeline_id, &timelines)?
+        };
        // create a `tenant/{tenant_id}/timelines/basebackup-{timeline_id}.{TEMP_FILE_SUFFIX}/`
        // temporary directory for basebackup files for the given timeline.

@@ -3106,9 +3027,8 @@ impl Tenant {
                    3,
                    u32::MAX,
                    "persist_initdb_tar_zst",
-                    backoff::Cancel::new(self.cancel.clone(), || {
-                        anyhow::anyhow!("initdb upload cancelled")
-                    }),
+                    // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
+                    backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
                )
                .await?;

@@ -3223,11 +3143,11 @@ impl Tenant {
    /// at 'disk_consistent_lsn'. After any initial data has been imported, call
    /// `finish_creation` to insert the Timeline into the timelines map and to remove the
    /// uninit mark file.
-    async fn prepare_new_timeline<'a>(
-        &'a self,
+    async fn prepare_new_timeline(
+        &self,
        new_timeline_id: TimelineId,
        new_metadata: &TimelineMetadata,
-        uninit_mark: TimelineUninitMark<'a>,
+        uninit_mark: TimelineUninitMark,
        start_lsn: Lsn,
        ancestor: Option<Arc<Timeline>>,
    ) -> anyhow::Result<UninitializedTimeline> {
@@ -3300,38 +3220,23 @@ impl Tenant {
    fn create_timeline_uninit_mark(
        &self,
        timeline_id: TimelineId,
-    ) -> Result<TimelineUninitMark, TimelineExclusionError> {
+        timelines: &MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
+    ) -> anyhow::Result<TimelineUninitMark> {
        let tenant_shard_id = self.tenant_shard_id;

+        anyhow::ensure!(
+            timelines.get(&timeline_id).is_none(),
+            "Timeline {tenant_shard_id}/{timeline_id} already exists in pageserver's memory"
+        );
+        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
+        anyhow::ensure!(
+            !timeline_path.exists(),
+            "Timeline {timeline_path} already exists, cannot create its uninit mark file",
+        );
+
        let uninit_mark_path = self
            .conf
            .timeline_uninit_mark_file_path(tenant_shard_id, timeline_id);
-        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
-
-        let uninit_mark = TimelineUninitMark::new(
-            self,
-            timeline_id,
-            uninit_mark_path.clone(),
-            timeline_path.clone(),
-        )?;
-
-        // At this stage, we have got exclusive access to in-memory state for this timeline ID
-        // for creation.
-        // A timeline directory should never exist on disk already:
-        // - a previous failed creation would have cleaned up after itself
-        // - a pageserver restart would clean up timeline directories that don't have valid remote state
-        //
-        // Therefore it is an unexpected internal error to encounter a timeline directory already existing here,
-        // this error may indicate a bug in cleanup on failed creations.
-        if timeline_path.exists() {
-            return Err(TimelineExclusionError::Other(anyhow::anyhow!(
-                "Timeline directory already exists! This is a bug."
-            )));
-        }
-
-        // Create the on-disk uninit mark _after_ the in-memory acquisition of the tenant ID: guarantees
-        // that during process runtime, colliding creations will be caught in-memory without getting
-        // as far as failing to write a file.
        fs::OpenOptions::new()
            .write(true)
            .create_new(true)
@@ -3345,6 +3250,8 @@ impl Tenant {
                format!("Failed to crate uninit mark for timeline {tenant_shard_id}/{timeline_id}")
            })?;

+        let uninit_mark = TimelineUninitMark::new(uninit_mark_path, timeline_path);
+
        Ok(uninit_mark)
    }

@@ -3787,7 +3694,6 @@ pub(crate) mod harness {
                    tenant_conf.evictions_low_residence_duration_metric_threshold,
                ),
                gc_feedback: Some(tenant_conf.gc_feedback),
-                heatmap_period: Some(tenant_conf.heatmap_period),
            }
        }
    }
@@ -4094,7 +4000,13 @@ mod tests {
            .await
        {
            Ok(_) => panic!("duplicate timeline creation should fail"),
-            Err(e) => assert_eq!(e.to_string(), "Already exists".to_string()),
+            Err(e) => assert_eq!(
+                e.to_string(),
+                format!(
+                    "Timeline {}/{} already exists in pageserver's memory",
+                    tenant.tenant_shard_id, TIMELINE_ID
+                )
+            ),
        }

        Ok(())
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -334,11 +334,6 @@ pub struct TenantConf {
    #[serde(with = "humantime_serde")]
    pub evictions_low_residence_duration_metric_threshold: Duration,
    pub gc_feedback: bool,
-
-    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
-    /// may be disabled if a Tenant will not have secondary locations: only secondary
-    /// locations will use the heatmap uploaded by attached locations.
-    pub heatmap_period: Duration,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -419,11 +414,6 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub gc_feedback: Option<bool>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(with = "humantime_serde")]
-    #[serde(default)]
-    pub heatmap_period: Option<Duration>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -492,7 +482,6 @@ impl TenantConfOpt {
                .evictions_low_residence_duration_metric_threshold
                .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
            gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
-            heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
        }
    }
 }
@@ -530,7 +519,6 @@ impl Default for TenantConf {
            )
            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
            gc_feedback: false,
-            heatmap_period: Duration::ZERO,
        }
    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -463,7 +463,7 @@ impl DeleteTenantFlow {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id),
+            Some(tenant_shard_id.tenant_id),
            None,
            "tenant_delete",
            false,
@@ -550,7 +550,7 @@ impl DeleteTenantFlow {
                // we encounter an InProgress marker, yield the barrier it contains and wait on it.
                let barrier = {
                    let mut locked = tenants.write().unwrap();
-                    let removed = locked.remove(tenant.tenant_shard_id);
+                    let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);

                    // FIXME: we should not be modifying this from outside of mgr.rs.
                    // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -98,6 +98,33 @@ pub(crate) enum TenantsMap {
    ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
 }

+/// Helper for mapping shard-unaware functions to a sharding-aware map
+/// TODO(sharding): all users of this must be made shard-aware.
+fn exactly_one_or_none<'a>(
+    map: &'a BTreeMap<TenantShardId, TenantSlot>,
+    tenant_id: &TenantId,
+) -> Option<(&'a TenantShardId, &'a TenantSlot)> {
+    let mut slots = map.range(TenantShardId::tenant_range(*tenant_id));
+
+    // Retrieve the first two slots in the range: if both are populated, we must panic because the caller
+    // needs a shard-naive view of the world in which only one slot can exist for a TenantId at a time.
+    let slot_a = slots.next();
+    let slot_b = slots.next();
+    match (slot_a, slot_b) {
+        (None, None) => None,
+        (Some(slot), None) => {
+            // Exactly one matching slot
+            Some(slot)
+        }
+        (Some(_slot_a), Some(_slot_b)) => {
+            // Multiple shards for this tenant: cannot handle this yet.
+            // TODO(sharding): callers of get() should be shard-aware.
+            todo!("Attaching multiple shards in teh same tenant to the same pageserver")
+        }
+        (None, Some(_)) => unreachable!(),
+    }
+}
+
 pub(crate) enum TenantsMapRemoveResult {
    Occupied(TenantSlot),
    Vacant,
@@ -120,11 +147,12 @@ impl TenantsMap {
    /// Convenience function for typical usage, where we want to get a `Tenant` object, for
    /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
    /// None is returned.
-    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
+    pub(crate) fn get(&self, tenant_id: &TenantId) -> Option<&Arc<Tenant>> {
        match self {
            TenantsMap::Initializing => None,
            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
+                // TODO(sharding): callers of get() should be shard-aware.
+                exactly_one_or_none(m, tenant_id).and_then(|(_, slot)| slot.get_attached())
            }
        }
    }
@@ -176,19 +204,25 @@ impl TenantsMap {
    ///
    /// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
    /// slot if the enclosed tenant is shutdown.
-    pub(crate) fn remove(&mut self, tenant_shard_id: TenantShardId) -> TenantsMapRemoveResult {
+    pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
        use std::collections::btree_map::Entry;
        match self {
            TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
-            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => match m.entry(tenant_shard_id) {
-                Entry::Occupied(entry) => match entry.get() {
-                    TenantSlot::InProgress(barrier) => {
-                        TenantsMapRemoveResult::InProgress(barrier.clone())
-                    }
-                    _ => TenantsMapRemoveResult::Occupied(entry.remove()),
-                },
-                Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
-            },
+            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
+                let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
+                match key {
+                    Some(key) => match m.entry(key) {
+                        Entry::Occupied(entry) => match entry.get() {
+                            TenantSlot::InProgress(barrier) => {
+                                TenantsMapRemoveResult::InProgress(barrier.clone())
+                            }
+                            _ => TenantsMapRemoveResult::Occupied(entry.remove()),
+                        },
+                        Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
+                    },
+                    None => TenantsMapRemoveResult::Vacant,
+                }
+            }
        }
    }

@@ -788,16 +822,14 @@ pub(crate) async fn set_new_tenant_config(
    new_tenant_conf: TenantConfOpt,
    tenant_id: TenantId,
 ) -> Result<(), SetNewTenantConfigError> {
-    // Legacy API: does not support sharding
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
-
    info!("configuring tenant {tenant_id}");
-    let tenant = get_tenant(tenant_shard_id, true)?;
+    let tenant = get_tenant(tenant_id, true)?;

    // This is a legacy API that only operates on attached tenants: the preferred
    // API to use is the location_config/ endpoint, which lets the caller provide
    // the full LocationConf.
    let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);

    Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
        .await
@@ -807,12 +839,6 @@ pub(crate) async fn set_new_tenant_config(
 }

 impl TenantManager {
-    /// Convenience function so that anyone with a TenantManager can get at the global configuration, without
-    /// having to pass it around everywhere as a separate object.
-    pub(crate) fn get_conf(&self) -> &'static PageServerConf {
-        self.conf
-    }
-
    /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or is not fitting to the query.
    /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
    pub(crate) fn get_attached_tenant_shard(
@@ -1093,20 +1119,6 @@ impl TenantManager {

        Ok(())
    }
-
-    pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
-        let locked = self.tenants.read().unwrap();
-        match &*locked {
-            TenantsMap::Initializing => Vec::new(),
-            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => map
-                .values()
-                .filter_map(|slot| {
-                    slot.get_attached()
-                        .and_then(|t| if t.is_active() { Some(t.clone()) } else { None })
-                })
-                .collect(),
-        }
-    }
 }

 #[derive(Debug, thiserror::Error)]
@@ -1131,11 +1143,14 @@ pub(crate) enum GetTenantError {
 ///
 /// This method is cancel-safe.
 pub(crate) fn get_tenant(
-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    active_only: bool,
 ) -> Result<Arc<Tenant>, GetTenantError> {
    let locked = TENANTS.read().unwrap();

+    // TODO(sharding): make all callers of get_tenant shard-aware
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
    let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;

    match peek_slot {
@@ -1147,18 +1162,14 @@ pub(crate) fn get_tenant(
            TenantState::Active => Ok(Arc::clone(tenant)),
            _ => {
                if active_only {
-                    Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
+                    Err(GetTenantError::NotActive(tenant_id))
                } else {
                    Ok(Arc::clone(tenant))
                }
            }
        },
-        Some(TenantSlot::InProgress(_)) => {
-            Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
-        }
-        None | Some(TenantSlot::Secondary) => {
-            Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
-        }
+        Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)),
+        None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)),
    }
 }

@@ -1531,8 +1542,7 @@ pub(crate) enum TenantMapListError {
 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
-{
+pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
    let tenants = TENANTS.read().unwrap();
    let m = match &*tenants {
        TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1540,10 +1550,12 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>,
    };
    Ok(m.iter()
        .filter_map(|(id, tenant)| match tenant {
-            TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
+            TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
            TenantSlot::Secondary => None,
            TenantSlot::InProgress(_) => None,
        })
+        // TODO(sharding): make callers of this function shard-aware
+        .map(|(k, v)| (k.tenant_id, v))
        .collect())
 }

@@ -2077,20 +2089,22 @@ use {
 };

 pub(crate) async fn immediate_gc(
-    tenant_shard_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
    gc_req: TimelineGcRequest,
    cancel: CancellationToken,
    ctx: &RequestContext,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
    let guard = TENANTS.read().unwrap();
-
    let tenant = guard
-        .get(&tenant_shard_id)
+        .get(&tenant_id)
        .map(Arc::clone)
-        .with_context(|| format!("tenant {tenant_shard_id}"))
+        .with_context(|| format!("tenant {tenant_id}"))
        .map_err(|e| ApiError::NotFound(e.into()))?;

+    // TODO(sharding): make callers of this function shard-aware
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
    let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
    // Use tenant's pitr setting
    let pitr = tenant.get_pitr_interval();
@@ -2102,9 +2116,9 @@ pub(crate) async fn immediate_gc(
    task_mgr::spawn(
        &tokio::runtime::Handle::current(),
        TaskKind::GarbageCollector,
-        Some(tenant_shard_id),
+        Some(tenant_id),
        Some(timeline_id),
-        &format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
+        &format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
        false,
        async move {
            fail::fail_point!("immediate_gc_task_pre");
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -180,7 +180,7 @@
 //! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
 //! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map

-pub(crate) mod download;
+mod download;
 pub mod index;
 mod upload;

@@ -1223,7 +1223,7 @@ impl RemoteTimelineClient {
            task_mgr::spawn(
                &self.runtime,
                TaskKind::RemoteUploadTask,
-                Some(self.tenant_shard_id),
+                Some(self.tenant_shard_id.tenant_id),
                Some(self.timeline_id),
                "remote upload",
                false,
@@ -1604,23 +1604,6 @@ impl RemoteTimelineClient {
            }
        }
    }
-
-    pub(crate) fn get_layers_metadata(
-        &self,
-        layers: Vec<LayerFileName>,
-    ) -> anyhow::Result<Vec<Option<LayerFileMetadata>>> {
-        let q = self.upload_queue.lock().unwrap();
-        let q = match &*q {
-            UploadQueue::Stopped(_) | UploadQueue::Uninitialized => {
-                anyhow::bail!("queue is in state {}", q.as_str())
-            }
-            UploadQueue::Initialized(inner) => inner,
-        };
-
-        let decorated = layers.into_iter().map(|l| q.latest_files.get(&l).cloned());
-
-        Ok(decorated.collect())
-    }
 }

 pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
@@ -1676,13 +1659,6 @@ pub fn remote_index_path(
    .expect("Failed to construct path")
 }

-pub const HEATMAP_BASENAME: &str = "heatmap-v1.json";
-
-pub(crate) fn remote_heatmap_path(tenant_shard_id: &TenantShardId) -> RemotePath {
-    RemotePath::from_string(&format!("tenants/{tenant_shard_id}/{HEATMAP_BASENAME}"))
-        .expect("Failed to construct path")
-}
-
 /// Given the key of an index, parse out the generation part of the name
 pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
    let file_name = match path.get_path().file_name() {
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -4,9 +4,8 @@ use anyhow::{bail, Context};
 use camino::Utf8Path;
 use fail::fail_point;
 use pageserver_api::shard::TenantShardId;
-use std::io::{ErrorKind, SeekFrom};
+use std::io::ErrorKind;
 use tokio::fs::{self, File};
-use tokio::io::AsyncSeekExt;

 use super::Generation;
 use crate::{
@@ -120,14 +119,11 @@ pub(crate) async fn upload_initdb_dir(
    storage: &GenericRemoteStorage,
    tenant_id: &TenantId,
    timeline_id: &TimelineId,
-    mut initdb_tar_zst: File,
+    initdb_tar_zst: File,
    size: u64,
 ) -> anyhow::Result<()> {
    tracing::trace!("uploading initdb dir");

-    // We might have read somewhat into the file already in the prior retry attempt
-    initdb_tar_zst.seek(SeekFrom::Start(0)).await?;
-
    let file = tokio_util::io::ReaderStream::with_capacity(initdb_tar_zst, super::BUFFER_SIZE);

    let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -1,104 +0,0 @@
-pub mod heatmap;
-mod heatmap_uploader;
-
-use std::sync::Arc;
-
-use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
-
-use self::heatmap_uploader::heatmap_uploader_task;
-
-use super::mgr::TenantManager;
-
-use pageserver_api::shard::TenantShardId;
-use remote_storage::GenericRemoteStorage;
-
-use tokio_util::sync::CancellationToken;
-use utils::completion::Barrier;
-
-enum UploadCommand {
-    Upload(TenantShardId),
-}
-
-struct CommandRequest<T> {
-    payload: T,
-    response_tx: tokio::sync::oneshot::Sender<CommandResponse>,
-}
-
-struct CommandResponse {
-    result: anyhow::Result<()>,
-}
-
-/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
-/// and heatmap uploads.  This is not a hot data path: it's primarily a hook for tests,
-/// where we want to immediately upload/download for a particular tenant.  In normal operation
-/// uploads & downloads are autonomous and not driven by this interface.
-pub struct SecondaryController {
-    upload_req_tx: tokio::sync::mpsc::Sender<CommandRequest<UploadCommand>>,
-}
-
-impl SecondaryController {
-    async fn dispatch<T>(
-        &self,
-        queue: &tokio::sync::mpsc::Sender<CommandRequest<T>>,
-        payload: T,
-    ) -> anyhow::Result<()> {
-        let (response_tx, response_rx) = tokio::sync::oneshot::channel();
-
-        queue
-            .send(CommandRequest {
-                payload,
-                response_tx,
-            })
-            .await
-            .map_err(|_| anyhow::anyhow!("Receiver shut down"))?;
-
-        let response = response_rx
-            .await
-            .map_err(|_| anyhow::anyhow!("Request dropped"))?;
-
-        response.result
-    }
-
-    pub async fn upload_tenant(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
-        self.dispatch(&self.upload_req_tx, UploadCommand::Upload(tenant_shard_id))
-            .await
-    }
-}
-
-pub fn spawn_tasks(
-    tenant_manager: Arc<TenantManager>,
-    remote_storage: GenericRemoteStorage,
-    background_jobs_can_start: Barrier,
-    cancel: CancellationToken,
-) -> SecondaryController {
-    let (upload_req_tx, upload_req_rx) =
-        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);
-
-    task_mgr::spawn(
-        BACKGROUND_RUNTIME.handle(),
-        TaskKind::SecondaryUploads,
-        None,
-        None,
-        "heatmap uploads",
-        false,
-        async move {
-            heatmap_uploader_task(
-                tenant_manager,
-                remote_storage,
-                upload_req_rx,
-                background_jobs_can_start,
-                cancel,
-            )
-            .await
-        },
-    );
-
-    SecondaryController { upload_req_tx }
-}
-
-/// For running with remote storage disabled: a SecondaryController that is connected to nothing.
-pub fn null_controller() -> SecondaryController {
-    let (upload_req_tx, _upload_req_rx) =
-        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);
-    SecondaryController { upload_req_tx }
-}
--- a/pageserver/src/tenant/secondary/heatmap.rs
+++ b/pageserver/src/tenant/secondary/heatmap.rs
@@ -1,64 +0,0 @@
-use std::time::SystemTime;
-
-use crate::tenant::{
-    remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerFileName,
-};
-
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
-
-use utils::{generation::Generation, id::TimelineId};
-
-#[derive(Serialize, Deserialize)]
-pub(super) struct HeatMapTenant {
-    /// Generation of the attached location that uploaded the heatmap: this is not required
-    /// for correctness, but acts as a hint to secondary locations in order to detect thrashing
-    /// in the unlikely event that two attached locations are both uploading conflicting heatmaps.
-    pub(super) generation: Generation,
-
-    pub(super) timelines: Vec<HeatMapTimeline>,
-}
-
-#[serde_as]
-#[derive(Serialize, Deserialize)]
-pub(crate) struct HeatMapTimeline {
-    #[serde_as(as = "DisplayFromStr")]
-    pub(super) timeline_id: TimelineId,
-
-    pub(super) layers: Vec<HeatMapLayer>,
-}
-
-#[serde_as]
-#[derive(Serialize, Deserialize)]
-pub(crate) struct HeatMapLayer {
-    pub(super) name: LayerFileName,
-    pub(super) metadata: IndexLayerMetadata,
-
-    #[serde_as(as = "TimestampSeconds<i64>")]
-    pub(super) access_time: SystemTime,
-    // TODO: an actual 'heat' score that would let secondary locations prioritize downloading
-    // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary.
-}
-
-impl HeatMapLayer {
-    pub(crate) fn new(
-        name: LayerFileName,
-        metadata: IndexLayerMetadata,
-        access_time: SystemTime,
-    ) -> Self {
-        Self {
-            name,
-            metadata,
-            access_time,
-        }
-    }
-}
-
-impl HeatMapTimeline {
-    pub(crate) fn new(timeline_id: TimelineId, layers: Vec<HeatMapLayer>) -> Self {
-        Self {
-            timeline_id,
-            layers,
-        }
-    }
-}
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -1,582 +0,0 @@
-use std::{
-    collections::HashMap,
-    sync::{Arc, Weak},
-    time::{Duration, Instant},
-};
-
-use crate::{
-    metrics::SECONDARY_MODE,
-    tenant::{
-        config::AttachmentMode, mgr::TenantManager, remote_timeline_client::remote_heatmap_path,
-        secondary::CommandResponse, span::debug_assert_current_span_has_tenant_id, Tenant,
-    },
-};
-
-use md5;
-use pageserver_api::shard::TenantShardId;
-use remote_storage::GenericRemoteStorage;
-
-use tokio::task::JoinSet;
-use tokio_util::sync::CancellationToken;
-use tracing::instrument;
-use utils::{backoff, completion::Barrier};
-
-use super::{heatmap::HeatMapTenant, CommandRequest, UploadCommand};
-
-/// Period between heatmap uploader walking Tenants to look for work to do.
-/// If any tenants have a heatmap upload period lower than this, it will be adjusted
-/// downward to match.
-const DEFAULT_SCHEDULING_INTERVAL: Duration = Duration::from_millis(60000);
-const MIN_SCHEDULING_INTERVAL: Duration = Duration::from_millis(1000);
-
-struct WriteInProgress {
-    barrier: Barrier,
-}
-
-struct UploadPending {
-    tenant: Arc<Tenant>,
-    last_digest: Option<md5::Digest>,
-}
-
-struct WriteComplete {
-    tenant_shard_id: TenantShardId,
-    completed_at: Instant,
-    digest: Option<md5::Digest>,
-    next_upload: Option<Instant>,
-}
-
-/// The heatmap uploader keeps a little bit of per-tenant state, mainly to remember
-/// when we last did a write.  We only populate this after doing at least one
-/// write for a tenant -- this avoids holding state for tenants that have
-/// uploads disabled.
-
-struct UploaderTenantState {
-    // This Weak only exists to enable culling idle instances of this type
-    // when the Tenant has been deallocated.
-    tenant: Weak<Tenant>,
-
-    /// Digest of the serialized heatmap that we last successfully uploaded
-    ///
-    /// md5 is generally a bad hash.  We use it because it's convenient for interop with AWS S3's ETag,
-    /// which is also an md5sum.
-    last_digest: Option<md5::Digest>,
-
-    /// When the last upload attempt completed (may have been successful or failed)
-    last_upload: Option<Instant>,
-
-    /// When should we next do an upload?  None means never.
-    next_upload: Option<Instant>,
-}
-
-/// This type is owned by a single task ([`heatmap_uploader_task`]) which runs an event
-/// handling loop and mutates it as needed: there are no locks here, because that event loop
-/// can hold &mut references to this type throughout.
-struct HeatmapUploader {
-    tenant_manager: Arc<TenantManager>,
-    remote_storage: GenericRemoteStorage,
-    cancel: CancellationToken,
-
-    tenants: HashMap<TenantShardId, UploaderTenantState>,
-
-    /// Tenants with work to do, for which tasks should be spawned as soon as concurrency
-    /// limits permit it.
-    tenants_pending: std::collections::VecDeque<UploadPending>,
-
-    /// Tenants for which a task in `tasks` has been spawned.
-    tenants_uploading: HashMap<TenantShardId, WriteInProgress>,
-
-    tasks: JoinSet<()>,
-
-    /// Channel for our child tasks to send results to: we use a channel for results rather than
-    /// just getting task results via JoinSet because we need the channel's recv() "sleep until something
-    /// is available" semantic, rather than JoinSet::join_next()'s "sleep until next thing is available _or_ I'm empty"
-    /// behavior.
-    task_result_tx: tokio::sync::mpsc::UnboundedSender<WriteComplete>,
-    task_result_rx: tokio::sync::mpsc::UnboundedReceiver<WriteComplete>,
-
-    concurrent_uploads: usize,
-
-    scheduling_interval: Duration,
-}
-
-/// The uploader task runs a loop that periodically wakes up and schedules tasks for
-/// tenants that require an upload, or handles any commands that have been sent into
-/// `command_queue`.  No I/O is done in this loop: that all happens in the tasks we
-/// spawn.
-///
-/// Scheduling iterations are somewhat infrequent.  However, each one will enqueue
-/// all tenants that require an upload, and in between scheduling iterations we will
-/// continue to spawn new tasks for pending tenants, as our concurrency limit permits.
-///
-/// While we take a CancellationToken here, it is subordinate to the CancellationTokens
-/// of tenants: i.e. we expect all Tenants to have been shut down before we are shut down, otherwise
-/// we might block waiting on a Tenant.
-pub(super) async fn heatmap_uploader_task(
-    tenant_manager: Arc<TenantManager>,
-    remote_storage: GenericRemoteStorage,
-    mut command_queue: tokio::sync::mpsc::Receiver<CommandRequest<UploadCommand>>,
-    background_jobs_can_start: Barrier,
-    cancel: CancellationToken,
-) -> anyhow::Result<()> {
-    let concurrent_uploads = tenant_manager.get_conf().heatmap_upload_concurrency;
-
-    let (result_tx, result_rx) = tokio::sync::mpsc::unbounded_channel();
-
-    let mut uploader = HeatmapUploader {
-        tenant_manager,
-        remote_storage,
-        cancel: cancel.clone(),
-        tasks: JoinSet::new(),
-        tenants: HashMap::new(),
-        tenants_pending: std::collections::VecDeque::new(),
-        tenants_uploading: HashMap::new(),
-        task_result_tx: result_tx,
-        task_result_rx: result_rx,
-        concurrent_uploads,
-        scheduling_interval: DEFAULT_SCHEDULING_INTERVAL,
-    };
-
-    tracing::info!("Waiting for background_jobs_can start...");
-    background_jobs_can_start.wait().await;
-    tracing::info!("background_jobs_can is ready, proceeding.");
-
-    while !cancel.is_cancelled() {
-        // Look for new work: this is relatively expensive because we have to go acquire the lock on
-        // the tenant manager to retrieve tenants, and then iterate over them to figure out which ones
-        // require an upload.
-        uploader.schedule_iteration().await?;
-
-        // Between scheduling iterations, we will:
-        //  - Drain any complete tasks and spawn pending tasks
-        //  - Handle incoming administrative commands
-        //  - Check our cancellation token
-        let next_scheduling_iteration = Instant::now()
-            .checked_add(uploader.scheduling_interval)
-            .unwrap_or_else(|| {
-                tracing::warn!(
-                    "Scheduling interval invalid ({}s), running immediately!",
-                    uploader.scheduling_interval.as_secs_f64()
-                );
-                Instant::now()
-            });
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => {
-                    // We do not simply drop the JoinSet, in order to have an orderly shutdown without cancellation.
-                    tracing::info!("Heatmap uploader joining tasks");
-                    while let Some(_r) = uploader.tasks.join_next().await {};
-                    tracing::info!("Heatmap uploader terminating");
-
-                    break;
-                },
-                _ = tokio::time::sleep(next_scheduling_iteration.duration_since(Instant::now())) => {
-                    tracing::debug!("heatmap_uploader_task: woke for scheduling interval");
-                    break;},
-                cmd = command_queue.recv() => {
-                    tracing::debug!("heatmap_uploader_task: woke for command queue");
-                    let cmd = match cmd {
-                        Some(c) =>c,
-                        None => {
-                            // SecondaryController was destroyed, and this has raced with
-                            // our CancellationToken
-                            tracing::info!("Heatmap uploader terminating");
-                            cancel.cancel();
-                            break;
-                        }
-                    };
-
-                    let CommandRequest{
-                        response_tx,
-                        payload
-                    } = cmd;
-                    uploader.handle_command(payload, response_tx);
-                },
-                _ = uploader.process_next_completion() => {
-                    if !cancel.is_cancelled() {
-                        uploader.spawn_pending();
-                    }
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-impl HeatmapUploader {
-    /// Periodic execution phase: inspect all attached tenants and schedule any work they require.
-    async fn schedule_iteration(&mut self) -> anyhow::Result<()> {
-        // Cull any entries in self.tenants whose Arc<Tenant> is gone
-        self.tenants
-            .retain(|_k, v| v.tenant.upgrade().is_some() && v.next_upload.is_some());
-
-        // The priority order of previously scheduled work may be invalidated by current state: drop
-        // all pending work (it will be re-scheduled if still needed)
-        self.tenants_pending.clear();
-
-        // Used a fixed 'now' through the following loop, for efficiency and fairness.
-        let now = Instant::now();
-
-        // While iterating over the potentially-long list of tenants, we will periodically yield
-        // to avoid blocking executor.
-        const YIELD_ITERATIONS: usize = 1000;
-
-        // Iterate over tenants looking for work to do.
-        let tenants = self.tenant_manager.get_attached_active_tenant_shards();
-        for (i, tenant) in tenants.into_iter().enumerate() {
-            // Process is shutting down, drop out
-            if self.cancel.is_cancelled() {
-                return Ok(());
-            }
-
-            // Skip tenants that already have a write in flight
-            if self
-                .tenants_uploading
-                .contains_key(tenant.get_tenant_shard_id())
-            {
-                continue;
-            }
-
-            self.maybe_schedule_upload(&now, tenant);
-
-            if i + 1 % YIELD_ITERATIONS == 0 {
-                tokio::task::yield_now().await;
-            }
-        }
-
-        // Spawn tasks for as many of our pending tenants as we can.
-        self.spawn_pending();
-
-        Ok(())
-    }
-
-    ///
-    /// Cancellation: this method is cancel-safe.
-    async fn process_next_completion(&mut self) {
-        match self.task_result_rx.recv().await {
-            Some(r) => {
-                self.on_completion(r);
-            }
-            None => {
-                unreachable!("Result sender is stored on Self");
-            }
-        }
-    }
-
-    /// The 'maybe' refers to the tenant's state: whether it is configured
-    /// for heatmap uploads at all, and whether sufficient time has passed
-    /// since the last upload.
-    fn maybe_schedule_upload(&mut self, now: &Instant, tenant: Arc<Tenant>) {
-        match tenant.get_heatmap_period() {
-            None => {
-                // Heatmaps are disabled for this tenant
-                return;
-            }
-            Some(period) => {
-                // If any tenant has asked for uploads more frequent than our scheduling interval,
-                // reduce it to match so that we can keep up.  This is mainly useful in testing, where
-                // we may set rather short intervals.
-                if period < self.scheduling_interval {
-                    self.scheduling_interval = std::cmp::max(period, MIN_SCHEDULING_INTERVAL);
-                }
-            }
-        }
-
-        // Stale attachments do not upload anything: if we are in this state, there is probably some
-        // other attachment in mode Single or Multi running on another pageserver, and we don't
-        // want to thrash and overwrite their heatmap uploads.
-        if tenant.get_attach_mode() == AttachmentMode::Stale {
-            return;
-        }
-
-        // Create an entry in self.tenants if one doesn't already exist: this will later be updated
-        // with the completion time in on_completion.
-        let state = self
-            .tenants
-            .entry(*tenant.get_tenant_shard_id())
-            .or_insert_with(|| UploaderTenantState {
-                tenant: Arc::downgrade(&tenant),
-                last_upload: None,
-                next_upload: Some(Instant::now()),
-                last_digest: None,
-            });
-
-        // Decline to do the upload if insufficient time has passed
-        if state.next_upload.map(|nu| &nu > now).unwrap_or(false) {
-            return;
-        }
-
-        let last_digest = state.last_digest;
-        self.tenants_pending.push_back(UploadPending {
-            tenant,
-            last_digest,
-        })
-    }
-
-    fn spawn_pending(&mut self) {
-        while !self.tenants_pending.is_empty()
-            && self.tenants_uploading.len() < self.concurrent_uploads
-        {
-            // unwrap: loop condition includes !is_empty()
-            let pending = self.tenants_pending.pop_front().unwrap();
-            self.spawn_upload(pending.tenant, pending.last_digest);
-        }
-    }
-
-    fn spawn_upload(&mut self, tenant: Arc<Tenant>, last_digest: Option<md5::Digest>) {
-        let remote_storage = self.remote_storage.clone();
-        let tenant_shard_id = *tenant.get_tenant_shard_id();
-        let (completion, barrier) = utils::completion::channel();
-        let result_tx = self.task_result_tx.clone();
-        self.tasks.spawn(async move {
-            // Guard for the barrier in [`WriteInProgress`]
-            let _completion = completion;
-
-            let started_at = Instant::now();
-            let digest = match upload_tenant_heatmap(remote_storage, &tenant, last_digest).await {
-                Ok(UploadHeatmapOutcome::Uploaded(digest)) => {
-                    let duration = Instant::now().duration_since(started_at);
-                    SECONDARY_MODE
-                        .upload_heatmap_duration
-                        .observe(duration.as_secs_f64());
-                    SECONDARY_MODE.upload_heatmap.inc();
-                    Some(digest)
-                }
-                Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_digest,
-                Err(UploadHeatmapError::Upload(e)) => {
-                    tracing::warn!(
-                        "Failed to upload heatmap for tenant {}: {e:#}",
-                        tenant.get_tenant_shard_id(),
-                    );
-                    let duration = Instant::now().duration_since(started_at);
-                    SECONDARY_MODE
-                        .upload_heatmap_duration
-                        .observe(duration.as_secs_f64());
-                    SECONDARY_MODE.upload_heatmap_errors.inc();
-                    last_digest
-                }
-                Err(UploadHeatmapError::Cancelled) => {
-                    tracing::info!("Cancelled heatmap upload, shutting down");
-                    last_digest
-                }
-            };
-
-            let now = Instant::now();
-            let next_upload = tenant
-                .get_heatmap_period()
-                .and_then(|period| now.checked_add(period));
-
-            result_tx
-                .send(WriteComplete {
-                    tenant_shard_id: *tenant.get_tenant_shard_id(),
-                    completed_at: now,
-                    digest,
-                    next_upload,
-                })
-                .ok();
-        });
-
-        self.tenants_uploading
-            .insert(tenant_shard_id, WriteInProgress { barrier });
-    }
-
-    #[instrument(skip_all, fields(tenant_id=%completion.tenant_shard_id.tenant_id, shard_id=%completion.tenant_shard_id.shard_slug()))]
-    fn on_completion(&mut self, completion: WriteComplete) {
-        tracing::debug!("Heatmap upload completed");
-        let WriteComplete {
-            tenant_shard_id,
-            completed_at,
-            digest,
-            next_upload,
-        } = completion;
-        self.tenants_uploading.remove(&tenant_shard_id);
-        use std::collections::hash_map::Entry;
-        match self.tenants.entry(tenant_shard_id) {
-            Entry::Vacant(_) => {
-                // Tenant state was dropped, nothing to update.
-            }
-            Entry::Occupied(mut entry) => {
-                entry.get_mut().last_upload = Some(completed_at);
-                entry.get_mut().last_digest = digest;
-                entry.get_mut().next_upload = next_upload
-            }
-        }
-    }
-
-    fn handle_command(
-        &mut self,
-        command: UploadCommand,
-        response_tx: tokio::sync::oneshot::Sender<CommandResponse>,
-    ) {
-        match command {
-            UploadCommand::Upload(tenant_shard_id) => {
-                // If an upload was ongoing for this tenant, let it finish first.
-                let barrier = if let Some(writing_state) =
-                    self.tenants_uploading.get(&tenant_shard_id)
-                {
-                    tracing::info!(
-                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                        "Waiting for heatmap write to complete");
-                    writing_state.barrier.clone()
-                } else {
-                    // Spawn the upload then immediately wait for it.  This will block processing of other commands and
-                    // starting of other background work.
-                    tracing::info!(
-                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                        "Starting heatmap write on command");
-                    let tenant = match self
-                        .tenant_manager
-                        .get_attached_tenant_shard(tenant_shard_id, true)
-                    {
-                        Ok(t) => t,
-                        Err(e) => {
-                            // Drop result of send: we don't care if caller dropped their receiver
-                            drop(response_tx.send(CommandResponse {
-                                result: Err(e.into()),
-                            }));
-                            return;
-                        }
-                    };
-                    self.spawn_upload(tenant, None);
-                    let writing_state = self
-                        .tenants_uploading
-                        .get(&tenant_shard_id)
-                        .expect("We just inserted this");
-                    tracing::info!(
-                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                        "Waiting for heatmap upload to complete");
-
-                    writing_state.barrier.clone()
-                };
-
-                // This task does no I/O: it only listens for a barrier's completion and then
-                // sends to the command response channel.  It is therefore safe to spawn this without
-                // any gates/task_mgr hooks.
-                tokio::task::spawn(async move {
-                    barrier.wait().await;
-
-                    tracing::info!(
-                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                        "Heatmap upload complete");
-
-                    // Drop result of send: we don't care if caller dropped their receiver
-                    drop(response_tx.send(CommandResponse { result: Ok(()) }))
-                });
-            }
-        }
-    }
-}
-
-enum UploadHeatmapOutcome {
-    /// We successfully wrote to remote storage, with this digest.
-    Uploaded(md5::Digest),
-    /// We did not upload because the heatmap digest was unchanged since the last upload
-    NoChange,
-    /// We skipped the upload for some reason, such as tenant/timeline not ready
-    Skipped,
-}
-
-#[derive(thiserror::Error, Debug)]
-enum UploadHeatmapError {
-    #[error("Cancelled")]
-    Cancelled,
-
-    #[error(transparent)]
-    Upload(#[from] anyhow::Error),
-}
-
-/// The inner upload operation.  This will skip if `last_digest` is Some and matches the digest
-/// of the object we would have uploaded.
-#[instrument(skip_all, fields(tenant_id = %tenant.get_tenant_shard_id().tenant_id, shard_id = %tenant.get_tenant_shard_id().shard_slug()))]
-async fn upload_tenant_heatmap(
-    remote_storage: GenericRemoteStorage,
-    tenant: &Arc<Tenant>,
-    last_digest: Option<md5::Digest>,
-) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
-    debug_assert_current_span_has_tenant_id();
-
-    let generation = tenant.get_generation();
-    if generation.is_none() {
-        // We do not expect this: generations were implemented before heatmap uploads.  However,
-        // handle it so that we don't have to make the generation in the heatmap an Option<>
-        // (Generation::none is not serializable)
-        tracing::warn!("Skipping heatmap upload for tenant with generation==None");
-        return Ok(UploadHeatmapOutcome::Skipped);
-    }
-
-    let mut heatmap = HeatMapTenant {
-        timelines: Vec::new(),
-        generation,
-    };
-    let timelines = tenant.timelines.lock().unwrap().clone();
-
-    let tenant_cancel = tenant.cancel.clone();
-
-    // Ensure that Tenant::shutdown waits for any upload in flight: this is needed because otherwise
-    // when we delete a tenant, we might race with an upload in flight and end up leaving a heatmap behind
-    // in remote storage.
-    let _guard = match tenant.gate.enter() {
-        Ok(g) => g,
-        Err(_) => {
-            tracing::info!("Skipping heatmap upload for tenant which is shutting down");
-            return Err(UploadHeatmapError::Cancelled);
-        }
-    };
-
-    for (timeline_id, timeline) in timelines {
-        let heatmap_timeline = timeline.generate_heatmap().await;
-        match heatmap_timeline {
-            None => {
-                tracing::debug!(
-                    "Skipping heatmap upload because timeline {timeline_id} is not ready"
-                );
-                return Ok(UploadHeatmapOutcome::Skipped);
-            }
-            Some(heatmap_timeline) => {
-                heatmap.timelines.push(heatmap_timeline);
-            }
-        }
-    }
-
-    // Serialize the heatmap
-    let bytes = serde_json::to_vec(&heatmap).map_err(|e| anyhow::anyhow!(e))?;
-    let size = bytes.len();
-
-    // Drop out early if nothing changed since our last upload
-    let digest = md5::compute(&bytes);
-    if Some(digest) == last_digest {
-        return Ok(UploadHeatmapOutcome::NoChange);
-    }
-
-    let path = remote_heatmap_path(tenant.get_tenant_shard_id());
-
-    // Write the heatmap.
-    tracing::debug!("Uploading {size} byte heatmap to {path}");
-    if let Err(e) = backoff::retry(
-        || async {
-            let bytes = futures::stream::once(futures::future::ready(Ok(bytes::Bytes::from(
-                bytes.clone(),
-            ))));
-            remote_storage
-                .upload_storage_object(bytes, size, &path)
-                .await
-        },
-        |_| false,
-        3,
-        u32::MAX,
-        "Uploading heatmap",
-        backoff::Cancel::new(tenant_cancel.clone(), || anyhow::anyhow!("Shutting down")),
-    )
-    .await
-    {
-        if tenant_cancel.is_cancelled() {
-            return Err(UploadHeatmapError::Cancelled);
-        } else {
-            return Err(e.into());
-        }
-    }
-
-    tracing::info!("Successfully uploaded {size} byte heatmap to {path}");
-
-    Ok(UploadHeatmapOutcome::Uploaded(digest))
-}
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -457,8 +457,6 @@ struct LayerInner {
    /// For loaded layers, this may be some other value if the tenant has undergone
    /// a shard split since the layer was originally written.
    shard: ShardIndex,
-
-    last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,
 }

 impl std::fmt::Display for LayerInner {
@@ -589,7 +587,6 @@ impl LayerInner {
            consecutive_failures: AtomicUsize::new(0),
            generation,
            shard,
-            last_evicted_at: std::sync::Mutex::default(),
        }
    }

@@ -725,14 +722,6 @@ impl LayerInner {
                    permit
                };

-                let since_last_eviction =
-                    self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
-                if let Some(since_last_eviction) = since_last_eviction {
-                    // FIXME: this will not always be recorded correctly until #6028 (the no
-                    // download needed branch above)
-                    LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
-                }
-
                let res = Arc::new(DownloadedLayer {
                    owner: Arc::downgrade(self),
                    kind: tokio::sync::OnceCell::default(),
@@ -848,7 +837,7 @@ impl LayerInner {
        crate::task_mgr::spawn(
            &tokio::runtime::Handle::current(),
            crate::task_mgr::TaskKind::RemoteDownloadTask,
-            Some(self.desc.tenant_shard_id),
+            Some(self.desc.tenant_shard_id.tenant_id),
            Some(self.desc.timeline_id),
            &task_name,
            false,
@@ -1128,8 +1117,6 @@ impl LayerInner {
        // we are still holding the permit, so no new spawn_download_and_wait can happen
        drop(self.status.send(Status::Evicted));

-        *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now());
-
        res
    }

@@ -1434,7 +1421,6 @@ pub(crate) struct LayerImplMetrics {

    rare_counters: enum_map::EnumMap<RareEvent, IntCounter>,
    inits_cancelled: metrics::core::GenericCounter<metrics::core::AtomicU64>,
-    redownload_after: metrics::Histogram,
 }

 impl Default for LayerImplMetrics {
@@ -1510,26 +1496,6 @@ impl Default for LayerImplMetrics {
        )
        .unwrap();

-        let redownload_after = {
-            let minute = 60.0;
-            let hour = 60.0 * minute;
-            metrics::register_histogram!(
-                "pageserver_layer_redownloaded_after",
-                "Time between evicting and re-downloading.",
-                vec![
-                    10.0,
-                    30.0,
-                    minute,
-                    5.0 * minute,
-                    15.0 * minute,
-                    30.0 * minute,
-                    hour,
-                    12.0 * hour,
-                ]
-            )
-            .unwrap()
-        };
-
        Self {
            started_evictions,
            completed_evictions,
@@ -1541,7 +1507,6 @@ impl Default for LayerImplMetrics {

            rare_counters,
            inits_cancelled,
-            redownload_after,
        }
    }
 }
@@ -1609,10 +1574,6 @@ impl LayerImplMetrics {
    fn inc_init_cancelled(&self) {
        self.inits_cancelled.inc()
    }
-
-    fn record_redownloaded_after(&self, duration: std::time::Duration) {
-        self.redownload_after.observe(duration.as_secs_f64())
-    }
 }

 #[derive(enum_map::Enum)]
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -54,18 +54,31 @@ impl BackgroundLoopKind {
    }
 }

-/// Cancellation safe.
-pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
+pub(crate) enum RateLimitError {
+    Cancelled,
+}
+
+pub(crate) async fn concurrent_background_tasks_rate_limit(
    loop_kind: BackgroundLoopKind,
    _ctx: &RequestContext,
-) -> impl Drop {
-    let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE
+    cancel: &CancellationToken,
+) -> Result<impl Drop, RateLimitError> {
+    crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT
        .with_label_values(&[loop_kind.as_static_str()])
-        .guard();
-
-    match CONCURRENT_BACKGROUND_TASKS.acquire().await {
-        Ok(permit) => permit,
-        Err(_closed) => unreachable!("we never close the semaphore"),
+        .inc();
+    scopeguard::defer!(
+        crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT.with_label_values(&[loop_kind.as_static_str()]).inc();
+    );
+    tokio::select! {
+        permit = CONCURRENT_BACKGROUND_TASKS.acquire() => {
+            match permit {
+                Ok(permit) => Ok(permit),
+                Err(_closed) => unreachable!("we never close the semaphore"),
+            }
+        },
+        _ = cancel.cancelled() => {
+            Err(RateLimitError::Cancelled)
+        }
    }
 }

@@ -74,13 +87,13 @@ pub fn start_background_loops(
    tenant: &Arc<Tenant>,
    background_jobs_can_start: Option<&completion::Barrier>,
 ) {
-    let tenant_shard_id = tenant.tenant_shard_id;
+    let tenant_id = tenant.tenant_shard_id.tenant_id;
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
        TaskKind::Compaction,
-        Some(tenant_shard_id),
+        Some(tenant_id),
        None,
-        &format!("compactor for tenant {tenant_shard_id}"),
+        &format!("compactor for tenant {tenant_id}"),
        false,
        {
            let tenant = Arc::clone(tenant);
@@ -92,7 +105,7 @@ pub fn start_background_loops(
                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                };
                compaction_loop(tenant, cancel)
-                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
+                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
                    .await;
                Ok(())
            }
@@ -101,9 +114,9 @@ pub fn start_background_loops(
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
        TaskKind::GarbageCollector,
-        Some(tenant_shard_id),
+        Some(tenant_id),
        None,
-        &format!("garbage collector for tenant {tenant_shard_id}"),
+        &format!("garbage collector for tenant {tenant_id}"),
        false,
        {
            let tenant = Arc::clone(tenant);
@@ -115,7 +128,7 @@ pub fn start_background_loops(
                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                };
                gc_loop(tenant, cancel)
-                    .instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
+                    .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
                    .await;
                Ok(())
            }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -29,7 +29,7 @@ use tokio::{
 };
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::sync::gate::Gate;
+use utils::{id::TenantTimelineId, sync::gate::Gate};

 use std::collections::{BinaryHeap, HashMap, HashSet};
 use std::ops::{Deref, Range};
@@ -51,7 +51,7 @@ use crate::tenant::storage_layer::{
    LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
    ValueReconstructState,
 };
-use crate::tenant::tasks::BackgroundLoopKind;
+use crate::tenant::tasks::{BackgroundLoopKind, RateLimitError};
 use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
    layer_map::{LayerMap, SearchResult},
@@ -66,7 +66,7 @@ use crate::metrics::{
    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::LsnForTimestamp;
-use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
+use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
 use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
@@ -77,7 +77,7 @@ use postgres_ffi::to_pg_timestamp;
 use utils::{
    completion,
    generation::Generation,
-    id::TimelineId,
+    id::{TenantId, TimelineId},
    lsn::{AtomicLsn, Lsn, RecordLsn},
    seqwait::SeqWait,
    simple_rcu::{Rcu, RcuReadGuard},
@@ -98,9 +98,8 @@ use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};

 use super::config::TenantConf;
-use super::remote_timeline_client::index::{IndexLayerMetadata, IndexPart};
+use super::remote_timeline_client::index::IndexPart;
 use super::remote_timeline_client::RemoteTimelineClient;
-use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
 use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};

 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -378,6 +377,9 @@ pub enum PageReconstructError {
    #[error(transparent)]
    Other(#[from] anyhow::Error),

+    /// The operation would require downloading a layer that is missing locally.
+    NeedsDownload(TenantTimelineId, LayerFileName),
+
    /// The operation was cancelled
    Cancelled,

@@ -406,6 +408,14 @@ impl std::fmt::Debug for PageReconstructError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        match self {
            Self::Other(err) => err.fmt(f),
+            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
+                write!(
+                    f,
+                    "layer {}/{} needs download",
+                    tenant_timeline_id,
+                    layer_file_name.file_name()
+                )
+            }
            Self::Cancelled => write!(f, "cancelled"),
            Self::AncestorStopping(timeline_id) => {
                write!(f, "ancestor timeline {timeline_id} is being stopped")
@@ -419,6 +429,14 @@ impl std::fmt::Display for PageReconstructError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
        match self {
            Self::Other(err) => err.fmt(f),
+            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
+                write!(
+                    f,
+                    "layer {}/{} needs download",
+                    tenant_timeline_id,
+                    layer_file_name.file_name()
+                )
+            }
            Self::Cancelled => write!(f, "cancelled"),
            Self::AncestorStopping(timeline_id) => {
                write!(f, "ancestor timeline {timeline_id} is being stopped")
@@ -446,12 +464,6 @@ pub(crate) enum CompactFlags {
    ForceRepartition,
 }

-impl std::fmt::Debug for Timeline {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "Timeline<{}>", self.timeline_id)
-    }
-}
-
 /// Public interface functions
 impl Timeline {
    /// Get the LSN where this branch was created
@@ -715,27 +727,19 @@ impl Timeline {
        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
    ) -> Result<(), CompactionError> {
-        // most likely the cancellation token is from background task, but in tests it could be the
-        // request task as well.
-
-        let prepare = async move {
-            let guard = self.compaction_lock.lock().await;
-
-            let permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
-                BackgroundLoopKind::Compaction,
-                ctx,
-            )
-            .await;
-
-            (guard, permit)
-        };
+        let _g = self.compaction_lock.lock().await;

        // this wait probably never needs any "long time spent" logging, because we already nag if
        // compaction task goes over it's period (20s) which is quite often in production.
-        let (_guard, _permit) = tokio::select! {
-            tuple = prepare => { tuple },
-            _ = self.cancel.cancelled() => return Ok(()),
-            _ = cancel.cancelled() => return Ok(()),
+        let _permit = match super::tasks::concurrent_background_tasks_rate_limit(
+            BackgroundLoopKind::Compaction,
+            ctx,
+            cancel,
+        )
+        .await
+        {
+            Ok(permit) => permit,
+            Err(RateLimitError::Cancelled) => return Ok(()),
        };

        let last_record_lsn = self.get_last_record_lsn();
@@ -922,7 +926,7 @@ impl Timeline {
        tracing::debug!("Waiting for WalReceiverManager...");
        task_mgr::shutdown_tasks(
            Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
        )
        .await;
@@ -973,7 +977,7 @@ impl Timeline {
        // Shut down the layer flush task before the remote client, as one depends on the other
        task_mgr::shutdown_tasks(
            Some(TaskKind::LayerFlushTask),
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
        )
        .await;
@@ -991,7 +995,12 @@ impl Timeline {

        tracing::debug!("Waiting for tasks...");

-        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;
+        task_mgr::shutdown_tasks(
+            None,
+            Some(self.tenant_shard_id.tenant_id),
+            Some(self.timeline_id),
+        )
+        .await;

        // Finally wait until any gate-holders are complete
        self.gate.close().await;
@@ -1114,9 +1123,8 @@ impl Timeline {
        Ok(Some(true))
    }

-    /// Evict just one layer.
-    ///
-    /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.
+    /// Like [`evict_layer_batch`](Self::evict_layer_batch), but for just one layer.
+    /// Additional case `Ok(None)` covers the case where the layer could not be found by its `layer_file_name`.
    pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
        let _gate = self
            .gate
@@ -1127,17 +1135,109 @@ impl Timeline {
            return Ok(None);
        };

-        let rtc = self
+        let Some(local_layer) = local_layer.keep_resident().await? else {
+            return Ok(Some(false));
+        };
+
+        let local_layer: Layer = local_layer.into();
+
+        let remote_client = self
            .remote_client
            .as_ref()
            .ok_or_else(|| anyhow::anyhow!("remote storage not configured; cannot evict"))?;

-        match local_layer.evict_and_wait(rtc).await {
-            Ok(()) => Ok(Some(true)),
-            Err(EvictionError::NotFound) => Ok(Some(false)),
-            Err(EvictionError::Downloaded) => Ok(Some(false)),
+        let results = self
+            .evict_layer_batch(remote_client, &[local_layer])
+            .await?;
+        assert_eq!(results.len(), 1);
+        let result: Option<Result<(), EvictionError>> = results.into_iter().next().unwrap();
+        match result {
+            None => anyhow::bail!("task_mgr shutdown requested"),
+            Some(Ok(())) => Ok(Some(true)),
+            Some(Err(e)) => Err(anyhow::Error::new(e)),
        }
    }
+
+    /// Evict a batch of layers.
+    pub(crate) async fn evict_layers(
+        &self,
+        layers_to_evict: &[Layer],
+    ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
+        let _gate = self
+            .gate
+            .enter()
+            .map_err(|_| anyhow::anyhow!("Shutting down"))?;
+
+        let remote_client = self
+            .remote_client
+            .as_ref()
+            .context("timeline must have RemoteTimelineClient")?;
+
+        self.evict_layer_batch(remote_client, layers_to_evict).await
+    }
+
+    /// Evict multiple layers at once, continuing through errors.
+    ///
+    /// The `remote_client` should be this timeline's `self.remote_client`.
+    /// We make the caller provide it so that they are responsible for handling the case
+    /// where someone wants to evict the layer but no remote storage is configured.
+    ///
+    /// Returns either `Err()` or `Ok(results)` where `results.len() == layers_to_evict.len()`.
+    /// If `Err()` is returned, no eviction was attempted.
+    /// Each position of `Ok(results)` corresponds to the layer in `layers_to_evict`.
+    /// Meaning of each `result[i]`:
+    /// - `Some(Err(...))` if layer replacement failed for some reason
+    ///    - replacement failed for an expectable reason (e.g., layer removed by GC before we grabbed all locks)
+    /// - `Some(Ok(()))` if everything went well.
+    /// - `None` if no eviction attempt was made for the layer because `cancel.is_cancelled() == true`.
+    async fn evict_layer_batch(
+        &self,
+        remote_client: &Arc<RemoteTimelineClient>,
+        layers_to_evict: &[Layer],
+    ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
+        {
+            // to avoid racing with detach and delete_timeline
+            let state = self.current_state();
+            anyhow::ensure!(
+                state == TimelineState::Active,
+                "timeline is not active but {state:?}"
+            );
+        }
+
+        let mut results = Vec::with_capacity(layers_to_evict.len());
+        for _ in 0..layers_to_evict.len() {
+            results.push(None);
+        }
+
+        let mut js = tokio::task::JoinSet::new();
+
+        for (i, l) in layers_to_evict.iter().enumerate() {
+            js.spawn({
+                let l = l.to_owned();
+                let remote_client = remote_client.clone();
+                async move { (i, l.evict_and_wait(&remote_client).await) }
+            });
+        }
+
+        let join = async {
+            while let Some(next) = js.join_next().await {
+                match next {
+                    Ok((i, res)) => results[i] = Some(res),
+                    Err(je) if je.is_cancelled() => unreachable!("not used"),
+                    Err(je) if je.is_panic() => { /* already logged */ }
+                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
+                }
+            }
+        };
+
+        tokio::select! {
+            _ = self.cancel.cancelled() => {},
+            _ = join => {}
+        }
+
+        assert_eq!(results.len(), layers_to_evict.len());
+        Ok(results)
+    }
 }

 /// Number of times we will compute partition within a checkpoint distance.
@@ -1214,20 +1314,16 @@ impl Timeline {
                &self.conf.default_tenant_conf,
            );

+            // TODO(sharding): make evictions state shard aware
+            // (https://github.com/neondatabase/neon/issues/5953)
            let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
-            let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());

            let timeline_id_str = self.timeline_id.to_string();
            self.metrics
                .evictions_with_low_residence_duration
                .write()
                .unwrap()
-                .change_threshold(
-                    &tenant_id_str,
-                    &shard_id_str,
-                    &timeline_id_str,
-                    new_threshold,
-                );
+                .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
        }
    }

@@ -1299,7 +1395,7 @@ impl Timeline {
                ancestor_lsn: metadata.ancestor_lsn(),

                metrics: TimelineMetrics::new(
-                    &tenant_shard_id,
+                    &tenant_shard_id.tenant_id,
                    &timeline_id,
                    crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
                        "mtime",
@@ -1400,7 +1496,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::LayerFlushTask,
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
            "layer flush task",
            false,
@@ -1751,7 +1847,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::InitialLogicalSizeCalculation,
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
            "initial size calculation",
            false,
@@ -1790,22 +1886,22 @@ impl Timeline {
            let skip_concurrency_limiter = &skip_concurrency_limiter;
            async move {
                let cancel = task_mgr::shutdown_token();
-                let wait_for_permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
+                let wait_for_permit = super::tasks::concurrent_background_tasks_rate_limit(
                    BackgroundLoopKind::InitialLogicalSizeCalculation,
                    background_ctx,
+                    &cancel,
                );

                use crate::metrics::initial_logical_size::StartCircumstances;
                let (_maybe_permit, circumstances) = tokio::select! {
-                    permit = wait_for_permit => {
-                        (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit)
+                    res = wait_for_permit => {
+                        match res {
+                            Ok(permit) => (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit),
+                            Err(RateLimitError::Cancelled) => {
+                                return Err(BackgroundCalculationError::Cancelled);
+                            }
+                        }
                    }
-                    _ = self_ref.cancel.cancelled() => {
-                        return Err(BackgroundCalculationError::Cancelled);
-                    }
-                    _ = cancel.cancelled() => {
-                        return Err(BackgroundCalculationError::Cancelled);
-                    },
                    () = skip_concurrency_limiter.cancelled() => {
                        // Some action that is part of a end user interaction requested logical size
                        // => break out of the rate limit
@@ -1924,7 +2020,7 @@ impl Timeline {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::OndemandLogicalSizeCalculation,
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
            "ondemand logical size calculation",
            false,
@@ -2070,55 +2166,6 @@ impl Timeline {

        None
    }
-
-    /// The timeline heatmap is a hint to secondary locations from the primary location,
-    /// indicating which layers are currently on-disk on the primary.
-    ///
-    /// None is returned if the Timeline is in a state where uploading a heatmap
-    /// doesn't make sense, such as shutting down or initializing.  The caller
-    /// should treat this as a cue to simply skip doing any heatmap uploading
-    /// for this timeline.
-    pub(crate) async fn generate_heatmap(&self) -> Option<HeatMapTimeline> {
-        let eviction_info = self.get_local_layers_for_disk_usage_eviction().await;
-
-        let remote_client = match &self.remote_client {
-            Some(c) => c,
-            None => return None,
-        };
-
-        let layer_file_names = eviction_info
-            .resident_layers
-            .iter()
-            .map(|l| l.layer.layer_desc().filename())
-            .collect::<Vec<_>>();
-
-        let decorated = match remote_client.get_layers_metadata(layer_file_names) {
-            Ok(d) => d,
-            Err(_) => {
-                // Getting metadata only fails on Timeline in bad state.
-                return None;
-            }
-        };
-
-        let heatmap_layers = std::iter::zip(
-            eviction_info.resident_layers.into_iter(),
-            decorated.into_iter(),
-        )
-        .filter_map(|(layer, remote_info)| {
-            remote_info.map(|remote_info| {
-                HeatMapLayer::new(
-                    layer.layer.layer_desc().filename(),
-                    IndexLayerMetadata::from(remote_info),
-                    layer.last_activity_ts,
-                )
-            })
-        });
-
-        Some(HeatMapTimeline::new(
-            self.timeline_id,
-            heatmap_layers.collect(),
-        ))
-    }
 }

 type TraversalId = String;
@@ -2232,7 +2279,7 @@ impl Timeline {
            }

            // Recurse into ancestor if needed
-            if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
+            if Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
                trace!(
                    "going into ancestor {}, cont_lsn is {}",
                    timeline.ancestor_lsn,
@@ -2414,7 +2461,13 @@ impl Timeline {
        // FIXME: It's pointless to check the cache for things that are not 8kB pages.
        // We should look at the key to determine if it's a cacheable object
        let (lsn, read_guard) = cache
-            .lookup_materialized_page(self.tenant_shard_id, self.timeline_id, key, lsn, ctx)
+            .lookup_materialized_page(
+                self.tenant_shard_id.tenant_id,
+                self.timeline_id,
+                key,
+                lsn,
+                ctx,
+            )
            .await?;
        let img = Bytes::from(read_guard.to_vec());
        Some((lsn, img))
@@ -3156,7 +3209,7 @@ impl DurationRecorder {
 #[derive(Default)]
 struct CompactLevel0Phase1StatsBuilder {
    version: Option<u64>,
-    tenant_id: Option<TenantShardId>,
+    tenant_id: Option<TenantId>,
    timeline_id: Option<TimelineId>,
    read_lock_acquisition_micros: DurationRecorder,
    read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
@@ -3173,7 +3226,7 @@ struct CompactLevel0Phase1StatsBuilder {
 #[derive(serde::Serialize)]
 struct CompactLevel0Phase1Stats {
    version: u64,
-    tenant_id: TenantShardId,
+    tenant_id: TenantId,
    timeline_id: TimelineId,
    read_lock_acquisition_micros: RecordedDuration,
    read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
@@ -3692,7 +3745,7 @@ impl Timeline {
            let ctx = ctx.attached_child();
            let mut stats = CompactLevel0Phase1StatsBuilder {
                version: Some(2),
-                tenant_id: Some(self.tenant_shard_id),
+                tenant_id: Some(self.tenant_shard_id.tenant_id),
                timeline_id: Some(self.timeline_id),
                ..Default::default()
            };
@@ -3860,14 +3913,7 @@ impl Timeline {
    /// within a layer file. We can only remove the whole file if it's fully
    /// obsolete.
    pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
-        // this is most likely the background tasks, but it might be the spawned task from
-        // immediate_gc
-        let cancel = crate::task_mgr::shutdown_token();
-        let _g = tokio::select! {
-            guard = self.gc_lock.lock() => guard,
-            _ = self.cancel.cancelled() => return Ok(GcResult::default()),
-            _ = cancel.cancelled() => return Ok(GcResult::default()),
-        };
+        let _g = self.gc_lock.lock().await;
        let timer = self.metrics.garbage_collect_histo.start_timer();

        fail_point!("before-timeline-gc");
@@ -4161,7 +4207,7 @@ impl Timeline {
                    let cache = page_cache::get();
                    if let Err(e) = cache
                        .memorize_materialized_page(
-                            self.tenant_shard_id,
+                            self.tenant_shard_id.tenant_id,
                            self.timeline_id,
                            key,
                            last_rec_lsn,
@@ -4205,7 +4251,7 @@ impl Timeline {
        let task_id = task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            task_mgr::TaskKind::DownloadAllRemoteLayers,
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
            "download all remote layers task",
            false,
@@ -4566,7 +4612,7 @@ mod tests {
            .await
            .unwrap();

-        let rtc = timeline
+        let rc = timeline
            .remote_client
            .clone()
            .expect("just configured this");
@@ -4579,12 +4625,16 @@ mod tests {
            .expect("should had been resident")
            .drop_eviction_guard();

-        let first = async { layer.evict_and_wait(&rtc).await };
-        let second = async { layer.evict_and_wait(&rtc).await };
+        let batch = [layer];
+
+        let first = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };
+        let second = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };

        let (first, second) = tokio::join!(first, second);

-        let res = layer.keep_resident().await;
+        let (first, second) = (only_one(first), only_one(second));
+
+        let res = batch[0].keep_resident().await;
        assert!(matches!(res, Ok(None)), "{res:?}");

        match (first, second) {
@@ -4605,6 +4655,14 @@ mod tests {
        RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
    }

+    fn only_one<T>(mut input: Vec<Option<T>>) -> T {
+        assert_eq!(1, input.len());
+        input
+            .pop()
+            .expect("length just checked")
+            .expect("no cancellation")
+    }
+
    async fn find_some_layer(timeline: &Timeline) -> Layer {
        let layers = timeline.layers.read().await;
        let desc = layers
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -43,7 +43,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
    // Shut down the layer flush task before the remote client, as one depends on the other
    task_mgr::shutdown_tasks(
        Some(TaskKind::LayerFlushTask),
-        Some(timeline.tenant_shard_id),
+        Some(timeline.tenant_shard_id.tenant_id),
        Some(timeline.timeline_id),
    )
    .await;
@@ -71,7 +71,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
    info!("waiting for timeline tasks to shutdown");
    task_mgr::shutdown_tasks(
        None,
-        Some(timeline.tenant_shard_id),
+        Some(timeline.tenant_shard_id.tenant_id),
        Some(timeline.timeline_id),
    )
    .await;
@@ -528,7 +528,7 @@ impl DeleteTimelineFlow {
        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
            TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id),
+            Some(tenant_shard_id.tenant_id),
            Some(timeline_id),
            "timeline_delete",
            false,
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -30,7 +30,7 @@ use crate::{
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    tenant::{
        config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
-        tasks::BackgroundLoopKind,
+        tasks::{BackgroundLoopKind, RateLimitError},
        timeline::EvictionError,
        LogicalSizeCalculationCause, Tenant,
    },
@@ -60,7 +60,7 @@ impl Timeline {
        task_mgr::spawn(
            BACKGROUND_RUNTIME.handle(),
            TaskKind::Eviction,
-            Some(self.tenant_shard_id),
+            Some(self.tenant_shard_id.tenant_id),
            Some(self.timeline_id),
            &format!(
                "layer eviction for {}/{}",
@@ -158,15 +158,15 @@ impl Timeline {
    ) -> ControlFlow<()> {
        let now = SystemTime::now();

-        let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
+        let _permit = match crate::tenant::tasks::concurrent_background_tasks_rate_limit(
            BackgroundLoopKind::Eviction,
            ctx,
-        );
-
-        let _permit = tokio::select! {
-            permit = acquire_permit => permit,
-            _ = cancel.cancelled() => return ControlFlow::Break(()),
-            _ = self.cancel.cancelled() => return ControlFlow::Break(()),
+            cancel,
+        )
+        .await
+        {
+            Ok(permit) => permit,
+            Err(RateLimitError::Cancelled) => return ControlFlow::Break(()),
        };

        // If we evict layers but keep cached values derived from those layers, then
@@ -212,21 +212,11 @@ impl Timeline {
        // Gather layers for eviction.
        // NB: all the checks can be invalidated as soon as we release the layer map lock.
        // We don't want to hold the layer map lock during eviction.
-
        // So, we just need to deal with this.
-
-        let remote_client = match self.remote_client.as_ref() {
-            Some(c) => c,
-            None => {
-                error!("no remote storage configured, cannot evict layers");
-                return ControlFlow::Continue(());
-            }
-        };
-
-        let mut js = tokio::task::JoinSet::new();
-        {
+        let candidates: Vec<_> = {
            let guard = self.layers.read().await;
            let layers = guard.layer_map();
+            let mut candidates = Vec::new();
            for hist_layer in layers.iter_historic_layers() {
                let hist_layer = guard.get_from_desc(&hist_layer);

@@ -272,49 +262,54 @@ impl Timeline {
                        continue;
                    }
                };
-                let layer = guard.drop_eviction_guard();
                if no_activity_for > p.threshold {
-                    let remote_client = remote_client.clone();
-                    // this could cause a lot of allocations in some cases
-                    js.spawn(async move { layer.evict_and_wait(&remote_client).await });
-                    stats.candidates += 1;
+                    candidates.push(guard.drop_eviction_guard())
                }
            }
+            candidates
+        };
+        stats.candidates = candidates.len();
+
+        let remote_client = match self.remote_client.as_ref() {
+            None => {
+                error!(
+                    num_candidates = candidates.len(),
+                    "no remote storage configured, cannot evict layers"
+                );
+                return ControlFlow::Continue(());
+            }
+            Some(c) => c,
        };

-        let join_all = async move {
-            while let Some(next) = js.join_next().await {
-                match next {
-                    Ok(Ok(())) => stats.evicted += 1,
-                    Ok(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
-                        stats.not_evictable += 1;
-                    }
-                    Err(je) if je.is_cancelled() => unreachable!("not used"),
-                    Err(je) if je.is_panic() => {
-                        /* already logged */
-                        stats.errors += 1;
-                    }
-                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
-                }
+        let results = match self.evict_layer_batch(remote_client, &candidates).await {
+            Err(pre_err) => {
+                stats.errors += candidates.len();
+                error!("could not do any evictions: {pre_err:#}");
+                return ControlFlow::Continue(());
            }
-            stats
+            Ok(results) => results,
        };
-
-        tokio::select! {
-            stats = join_all => {
-                if stats.candidates == stats.not_evictable {
-                    debug!(stats=?stats, "eviction iteration complete");
-                } else if stats.errors > 0 || stats.not_evictable > 0 {
-                    warn!(stats=?stats, "eviction iteration complete");
-                } else {
-                    info!(stats=?stats, "eviction iteration complete");
+        assert_eq!(results.len(), candidates.len());
+        for result in results {
+            match result {
+                None => {
+                    stats.skipped_for_shutdown += 1;
+                }
+                Some(Ok(())) => {
+                    stats.evicted += 1;
+                }
+                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
+                    stats.not_evictable += 1;
                }
-            }
-            _ = cancel.cancelled() => {
-                // just drop the joinset to "abort"
            }
        }
-
+        if stats.candidates == stats.not_evictable {
+            debug!(stats=?stats, "eviction iteration complete");
+        } else if stats.errors > 0 || stats.not_evictable > 0 {
+            warn!(stats=?stats, "eviction iteration complete");
+        } else {
+            info!(stats=?stats, "eviction iteration complete");
+        }
        ControlFlow::Continue(())
    }

@@ -348,7 +343,7 @@ impl Timeline {
        // Make one of the tenant's timelines draw the short straw and run the calculation.
        // The others wait until the calculation is done so that they take into account the
        // imitated accesses that the winner made.
-        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
+        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
            Ok(t) => t,
            Err(_) => {
                return ControlFlow::Break(());
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -19,14 +19,14 @@ use super::Timeline;
 pub struct UninitializedTimeline<'t> {
    pub(crate) owning_tenant: &'t Tenant,
    timeline_id: TimelineId,
-    raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
+    raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
 }

 impl<'t> UninitializedTimeline<'t> {
    pub(crate) fn new(
        owning_tenant: &'t Tenant,
        timeline_id: TimelineId,
-        raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
+        raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
    ) -> Self {
        Self {
            owning_tenant,
@@ -169,55 +169,18 @@ pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) {
 ///
 /// XXX: it's important to create it near the timeline dir, not inside it to ensure timeline dir gets removed first.
 #[must_use]
-pub(crate) struct TimelineUninitMark<'t> {
-    owning_tenant: &'t Tenant,
-    timeline_id: TimelineId,
+pub(crate) struct TimelineUninitMark {
    uninit_mark_deleted: bool,
    uninit_mark_path: Utf8PathBuf,
    pub(crate) timeline_path: Utf8PathBuf,
 }

-/// Errors when acquiring exclusive access to a timeline ID for creation
-#[derive(thiserror::Error, Debug)]
-pub(crate) enum TimelineExclusionError {
-    #[error("Already exists")]
-    AlreadyExists(Arc<Timeline>),
-    #[error("Already creating")]
-    AlreadyCreating,
-
-    // e.g. I/O errors, or some failure deep in postgres initdb
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
-impl<'t> TimelineUninitMark<'t> {
-    pub(crate) fn new(
-        owning_tenant: &'t Tenant,
-        timeline_id: TimelineId,
-        uninit_mark_path: Utf8PathBuf,
-        timeline_path: Utf8PathBuf,
-    ) -> Result<Self, TimelineExclusionError> {
-        // Lock order: this is the only place we take both locks.  During drop() we only
-        // lock creating_timelines
-        let timelines = owning_tenant.timelines.lock().unwrap();
-        let mut creating_timelines: std::sync::MutexGuard<
-            '_,
-            std::collections::HashSet<TimelineId>,
-        > = owning_tenant.timelines_creating.lock().unwrap();
-
-        if let Some(existing) = timelines.get(&timeline_id) {
-            Err(TimelineExclusionError::AlreadyExists(existing.clone()))
-        } else if creating_timelines.contains(&timeline_id) {
-            Err(TimelineExclusionError::AlreadyCreating)
-        } else {
-            creating_timelines.insert(timeline_id);
-            Ok(Self {
-                owning_tenant,
-                timeline_id,
-                uninit_mark_deleted: false,
-                uninit_mark_path,
-                timeline_path,
-            })
+impl TimelineUninitMark {
+    pub(crate) fn new(uninit_mark_path: Utf8PathBuf, timeline_path: Utf8PathBuf) -> Self {
+        Self {
+            uninit_mark_deleted: false,
+            uninit_mark_path,
+            timeline_path,
        }
    }

@@ -244,7 +207,7 @@ impl<'t> TimelineUninitMark<'t> {
    }
 }

-impl Drop for TimelineUninitMark<'_> {
+impl Drop for TimelineUninitMark {
    fn drop(&mut self) {
        if !self.uninit_mark_deleted {
            if self.timeline_path.exists() {
@@ -263,11 +226,5 @@ impl Drop for TimelineUninitMark<'_> {
                }
            }
        }
-
-        self.owning_tenant
-            .timelines_creating
-            .lock()
-            .unwrap()
-            .remove(&self.timeline_id);
    }
 }
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -30,7 +30,6 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
    connection_manager_loop_step, ConnectionManagerState,
 };

-use pageserver_api::shard::TenantShardId;
 use std::future::Future;
 use std::num::NonZeroU64;
 use std::ops::ControlFlow;
@@ -42,7 +41,7 @@ use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
 use tracing::*;

-use utils::id::TimelineId;
+use utils::id::TenantTimelineId;

 use self::connection_manager::ConnectionManagerStatus;

@@ -61,8 +60,7 @@ pub struct WalReceiverConf {
 }

 pub struct WalReceiver {
-    tenant_shard_id: TenantShardId,
-    timeline_id: TimelineId,
+    timeline: TenantTimelineId,
    manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
 }

@@ -73,7 +71,7 @@ impl WalReceiver {
        mut broker_client: BrokerClientChannel,
        ctx: &RequestContext,
    ) -> Self {
-        let tenant_shard_id = timeline.tenant_shard_id;
+        let tenant_id = timeline.tenant_shard_id.tenant_id;
        let timeline_id = timeline.timeline_id;
        let walreceiver_ctx =
            ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
@@ -83,9 +81,9 @@ impl WalReceiver {
        task_mgr::spawn(
            WALRECEIVER_RUNTIME.handle(),
            TaskKind::WalReceiverManager,
-            Some(timeline.tenant_shard_id),
+            Some(tenant_id),
            Some(timeline_id),
-            &format!("walreceiver for timeline {tenant_shard_id}/{timeline_id}"),
+            &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
            false,
            async move {
                debug_assert_current_span_has_tenant_and_timeline_id();
@@ -119,12 +117,11 @@ impl WalReceiver {
                *loop_status.write().unwrap() = None;
                Ok(())
            }
-            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
+            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
        );

        Self {
-            tenant_shard_id,
-            timeline_id,
+            timeline: TenantTimelineId::new(tenant_id, timeline_id),
            manager_status,
        }
    }
@@ -132,8 +129,8 @@ impl WalReceiver {
    pub async fn stop(self) {
        task_mgr::shutdown_tasks(
            Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_shard_id),
-            Some(self.timeline_id),
+            Some(self.timeline.tenant_id),
+            Some(self.timeline.timeline_id),
        )
        .await;
    }
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
    task_mgr::spawn(
        WALRECEIVER_RUNTIME.handle(),
        TaskKind::WalReceiverConnectionPoller,
-        Some(timeline.tenant_shard_id),
+        Some(timeline.tenant_shard_id.tenant_id),
        Some(timeline.timeline_id),
        "walreceiver connection",
        false,
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -654,7 +654,6 @@ pub fn init(num_slots: usize) {
    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
        panic!("virtual_file::init called twice");
    }
-    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }

 const TEST_MAX_FILE_DESCRIPTORS: usize = 10;
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -458,10 +458,8 @@ impl<'a> WalIngest<'a> {
            && decoded.xl_rmid == pg_constants::RM_XLOG_ID
            && (decoded.xl_info == pg_constants::XLOG_FPI
                || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
-            // compression of WAL is not yet supported: fall back to storing the original WAL record
+        // compression of WAL is not yet supported: fall back to storing the original WAL record
            && !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, self.timeline.pg_version)?
-            // do not materialize null pages because them most likely be soon replaced with real data
-            && blk.bimg_len != 0
        {
            // Extract page image from FPI record
            let img_len = blk.bimg_len as usize;
@@ -2191,7 +2189,7 @@ mod tests {
            .load()
            .await;
        let tline = tenant
-            .bootstrap_timeline_test(TIMELINE_ID, pg_version, None, &ctx)
+            .bootstrap_timeline(TIMELINE_ID, pg_version, None, &ctx)
            .await
            .unwrap();

--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -61,7 +61,6 @@ thiserror.workspace = true
 tls-listener.workspace = true
 tokio-postgres.workspace = true
 tokio-rustls.workspace = true
-tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
@@ -78,6 +77,7 @@ postgres-protocol.workspace = true
 smol_str.workspace = true

 workspace_hack.workspace = true
+tokio-util.workspace = true

 [dev-dependencies]
 rcgen.workspace = true
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -62,9 +62,6 @@ pub enum AuthErrorImpl {
        Please add it to the allowed list in the Neon console."
    )]
    IpAddressNotAllowed,
-
-    #[error("Too many connections to this endpoint. Please try again later.")]
-    TooManyConnections,
 }

 #[derive(Debug, Error)]
@@ -83,10 +80,6 @@ impl AuthError {
    pub fn ip_address_not_allowed() -> Self {
        AuthErrorImpl::IpAddressNotAllowed.into()
    }
-
-    pub fn too_many_connections() -> Self {
-        AuthErrorImpl::TooManyConnections.into()
-    }
 }

 impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
@@ -109,7 +102,6 @@ impl UserFacingError for AuthError {
            MissingEndpointName => self.to_string(),
            Io(_) => "Internal error".to_string(),
            IpAddressNotAllowed => self.to_string(),
-            TooManyConnections => self.to_string(),
        }
    }
 }
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -166,7 +166,7 @@ impl TryFrom<ClientCredentials> for ComputeUserInfo {
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
    api: &impl console::Api,
-    extra: &ConsoleReqExtra,
+    extra: &ConsoleReqExtra<'_>,
    creds: ClientCredentials,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
    allow_cleartext: bool,
@@ -235,7 +235,7 @@ async fn auth_quirks(
 /// only if authentication was successfuly.
 async fn auth_and_wake_compute(
    api: &impl console::Api,
-    extra: &ConsoleReqExtra,
+    extra: &ConsoleReqExtra<'_>,
    creds: ClientCredentials,
    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
    allow_cleartext: bool,
@@ -314,7 +314,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub async fn authenticate(
        self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
@@ -387,7 +387,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
 impl BackendType<'_, ComputeUserInfo> {
    pub async fn get_allowed_ips(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
        use BackendType::*;
        match self {
@@ -404,7 +404,7 @@ impl BackendType<'_, ComputeUserInfo> {
    /// The link auth flow doesn't support this, so we return [`None`] in that case.
    pub async fn wake_compute(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
    ) -> Result<Option<CachedNodeInfo>, console::errors::WakeComputeError> {
        use BackendType::*;

--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -3,7 +3,7 @@
 use crate::{
    auth::password_hack::parse_endpoint_param,
    error::UserFacingError,
-    proxy::{neon_options_str, NUM_CONNECTION_ACCEPTED_BY_SNI},
+    proxy::{neon_options, NUM_CONNECTION_ACCEPTED_BY_SNI},
 };
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
@@ -140,7 +140,7 @@ impl ClientCredentials {
        let cache_key = format!(
            "{}{}",
            project.as_deref().unwrap_or(""),
-            neon_options_str(params)
+            neon_options(params).unwrap_or("".to_string())
        )
        .into();

@@ -406,7 +406,10 @@ mod tests {
        let peer_addr = IpAddr::from([127, 0, 0, 1]);
        let creds = ClientCredentials::parse(&options, sni, common_names, peer_addr)?;
        assert_eq!(creds.project.as_deref(), Some("project"));
-        assert_eq!(creds.cache_key, "projectendpoint_type:read_write lsn:0/2");
+        assert_eq!(
+            creds.cache_key,
+            "projectneon_endpoint_type:read_write neon_lsn:0/2"
+        );

        Ok(())
    }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -8,7 +8,6 @@ use std::{net::SocketAddr, sync::Arc};
 use futures::future::Either;
 use itertools::Itertools;
 use proxy::config::TlsServerEndPoint;
-use proxy::proxy::run_until_cancelled;
 use tokio::net::TcpListener;

 use anyhow::{anyhow, bail, ensure, Context};
@@ -21,7 +20,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_util::sync::CancellationToken;
 use utils::{project_git_version, sentry_init::init_sentry};

-use tracing::{error, info, Instrument};
+use tracing::{error, info, warn, Instrument};

 project_git_version!(GIT_VERSION);

@@ -152,39 +151,63 @@ async fn task_main(
    // will be inherited by all accepted client sockets.
    socket2::SockRef::from(&listener).set_keepalive(true)?;

-    let connections = tokio_util::task::task_tracker::TaskTracker::new();
+    let mut connections = tokio::task::JoinSet::new();

-    while let Some(accept_result) =
-        run_until_cancelled(listener.accept(), &cancellation_token).await
-    {
-        let (socket, peer_addr) = accept_result?;
+    loop {
+        tokio::select! {
+            accept_result = listener.accept() => {
+                let (socket, peer_addr) = accept_result?;

-        let session_id = uuid::Uuid::new_v4();
-        let tls_config = Arc::clone(&tls_config);
-        let dest_suffix = Arc::clone(&dest_suffix);
+                let session_id = uuid::Uuid::new_v4();
+                let tls_config = Arc::clone(&tls_config);
+                let dest_suffix = Arc::clone(&dest_suffix);

-        connections.spawn(
-            async move {
-                socket
-                    .set_nodelay(true)
-                    .context("failed to set socket option")?;
+                connections.spawn(
+                    async move {
+                        socket
+                            .set_nodelay(true)
+                            .context("failed to set socket option")?;

-                info!(%peer_addr, "serving");
-                handle_client(dest_suffix, tls_config, tls_server_end_point, socket).await
+                        info!(%peer_addr, "serving");
+                        handle_client(dest_suffix, tls_config, tls_server_end_point, socket).await
+                    }
+                    .unwrap_or_else(|e| {
+                        // Acknowledge that the task has finished with an error.
+                        error!("per-client task finished with an error: {e:#}");
+                    })
+                    .instrument(tracing::info_span!("handle_client", ?session_id))
+                );
            }
-            .unwrap_or_else(|e| {
-                // Acknowledge that the task has finished with an error.
-                error!("per-client task finished with an error: {e:#}");
-            })
-            .instrument(tracing::info_span!("handle_client", ?session_id)),
-        );
+            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
+            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
+            // This only counts for this loop and it will be enabled again on next `select!`.
+            //
+            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
+            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
+            // not get called again, even if there are more connections to remove.
+            Some(res) = connections.join_next() => {
+                if let Err(e) = res {
+                    if !e.is_panic() && !e.is_cancelled() {
+                        warn!("unexpected error from joined connection task: {e:?}");
+                    }
+                }
+            }
+            _ = cancellation_token.cancelled() => {
+                drop(listener);
+                break;
+            }
+        }
    }

-    connections.close();
-    drop(listener);
-
-    connections.wait().await;
-
+    // Drain connections
+    info!("waiting for all client connections to finish");
+    while let Some(res) = connections.join_next().await {
+        if let Err(e) = res {
+            if !e.is_panic() && !e.is_cancelled() {
+                warn!("unexpected error from joined connection task: {e:?}");
+            }
+        }
+    }
    info!("all client connections have finished");
    Ok(())
 }
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -7,8 +7,6 @@ use proxy::console;
 use proxy::console::provider::AllowedIpsCache;
 use proxy::console::provider::NodeInfoCache;
 use proxy::http;
-use proxy::rate_limiter::EndpointRateLimiter;
-use proxy::rate_limiter::RateBucketInfo;
 use proxy::rate_limiter::RateLimiterConfig;
 use proxy::usage_metrics;

@@ -16,7 +14,6 @@ use anyhow::bail;
 use proxy::config::{self, ProxyConfig};
 use proxy::serverless;
 use std::pin::pin;
-use std::sync::Arc;
 use std::{borrow::Cow, net::SocketAddr};
 use tokio::net::TcpListener;
 use tokio::task::JoinSet;
@@ -115,12 +112,6 @@ struct ProxyCliArgs {
    /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
    rate_limiter_timeout: tokio::time::Duration,
-    /// Endpoint rate limiter max number of requests per second.
-    ///
-    /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
-    /// Can be given multiple times for different bucket sizes.
-    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
-    endpoint_rps_limit: Vec<RateBucketInfo>,
    /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
    #[clap(long, default_value_t = 100)]
    initial_limit: usize,
@@ -163,8 +154,6 @@ async fn main() -> anyhow::Result<()> {
    let proxy_listener = TcpListener::bind(proxy_address).await?;
    let cancellation_token = CancellationToken::new();

-    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
-
    // client facing tasks. these will exit on error or on cancellation
    // cancellation returns Ok(())
    let mut client_tasks = JoinSet::new();
@@ -172,7 +161,6 @@ async fn main() -> anyhow::Result<()> {
        config,
        proxy_listener,
        cancellation_token.clone(),
-        endpoint_rate_limiter.clone(),
    ));

    // TODO: rename the argument to something like serverless.
@@ -186,7 +174,6 @@ async fn main() -> anyhow::Result<()> {
            config,
            serverless_listener,
            cancellation_token.clone(),
-            endpoint_rate_limiter.clone(),
        ));
    }

@@ -321,10 +308,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
    let authentication_config = AuthenticationConfig {
        scram_protocol_timeout: args.scram_protocol_timeout,
    };
-
-    let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
-    RateBucketInfo::validate(&mut endpoint_rps_limit)?;
-
    let config = Box::leak(Box::new(ProxyConfig {
        tls_config,
        auth_backend,
@@ -334,35 +317,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        authentication_config,
        require_client_ip: args.require_client_ip,
        disable_ip_check_for_http: args.disable_ip_check_for_http,
-        endpoint_rps_limit,
    }));

    Ok(config)
 }
-
-#[cfg(test)]
-mod tests {
-    use std::time::Duration;
-
-    use clap::Parser;
-    use proxy::rate_limiter::RateBucketInfo;
-
-    #[test]
-    fn parse_endpoint_rps_limit() {
-        let config = super::ProxyCliArgs::parse_from([
-            "proxy",
-            "--endpoint-rps-limit",
-            "100@1s",
-            "--endpoint-rps-limit",
-            "20@30s",
-        ]);
-
-        assert_eq!(
-            config.endpoint_rps_limit,
-            vec![
-                RateBucketInfo::new(100, Duration::from_secs(1)),
-                RateBucketInfo::new(20, Duration::from_secs(30)),
-            ]
-        );
-    }
-}
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,13 +1,9 @@
 use crate::{
-    auth::parse_endpoint_param,
-    cancellation::CancelClosure,
-    console::errors::WakeComputeError,
-    error::UserFacingError,
-    proxy::{neon_option, NUM_DB_CONNECTIONS_GAUGE},
+    auth::parse_endpoint_param, cancellation::CancelClosure, console::errors::WakeComputeError,
+    error::UserFacingError, proxy::is_neon_param,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
-use metrics::IntCounterPairGuard;
 use pq_proto::StartupMessageParams;
 use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
@@ -227,8 +223,6 @@ pub struct PostgresConnection {
    pub params: std::collections::HashMap<String, String>,
    /// Query cancellation token.
    pub cancel_closure: CancelClosure,
-
-    _guage: IntCounterPairGuard,
 }

 impl ConnCfg {
@@ -237,7 +231,6 @@ impl ConnCfg {
        &self,
        allow_self_signed_compute: bool,
        timeout: Duration,
-        proto: &'static str,
    ) -> Result<PostgresConnection, ConnectionError> {
        let (socket_addr, stream, host) = self.connect_raw(timeout).await?;

@@ -271,7 +264,6 @@ impl ConnCfg {
            stream,
            params,
            cancel_closure,
-            _guage: NUM_DB_CONNECTIONS_GAUGE.with_label_values(&[proto]).guard(),
        };

        Ok(connection)
@@ -283,7 +275,7 @@ fn filtered_options(params: &StartupMessageParams) -> Option<String> {
    #[allow(unstable_name_collisions)]
    let options: String = params
        .options_raw()?
-        .filter(|opt| parse_endpoint_param(opt).is_none() && neon_option(opt).is_none())
+        .filter(|opt| parse_endpoint_param(opt).is_none() && !is_neon_param(opt))
        .intersperse(" ") // TODO: use impl from std once it's stabilized
        .collect();

--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,4 +1,4 @@
-use crate::{auth, rate_limiter::RateBucketInfo};
+use crate::auth;
 use anyhow::{bail, ensure, Context, Ok};
 use rustls::{sign, Certificate, PrivateKey};
 use sha2::{Digest, Sha256};
@@ -20,7 +20,6 @@ pub struct ProxyConfig {
    pub authentication_config: AuthenticationConfig,
    pub require_client_ip: bool,
    pub disable_ip_check_for_http: bool,
-    pub endpoint_rps_limit: Vec<RateBucketInfo>,
 }

 #[derive(Debug)]
@@ -33,8 +32,6 @@ pub struct TlsConfig {
    pub config: Arc<rustls::ServerConfig>,
    pub common_names: Option<HashSet<String>>,
    pub cert_resolver: Arc<CertResolver>,
-    pub handshake_timeout: Duration,
-    pub max_handshaking: usize,
 }

 pub struct HttpConfig {
@@ -100,8 +97,6 @@ pub fn configure_tls(
        config,
        common_names: Some(common_names),
        cert_resolver,
-        handshake_timeout: tls_listener::DEFAULT_HANDSHAKE_TIMEOUT,
-        max_handshaking: tls_listener::DEFAULT_MAX_HANDSHAKES,
    })
 }

--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -196,23 +196,12 @@ pub mod errors {
 }

 /// Extra query params we'd like to pass to the console.
-pub struct ConsoleReqExtra {
+pub struct ConsoleReqExtra<'a> {
    /// A unique identifier for a connection.
    pub session_id: uuid::Uuid,
    /// Name of client application, if set.
-    pub application_name: String,
-    pub options: Vec<(String, String)>,
-}
-
-impl ConsoleReqExtra {
-    // https://swagger.io/docs/specification/serialization/ DeepObject format
-    // paramName[prop1]=value1&paramName[prop2]=value2&....
-    pub fn options_as_deep_object(&self) -> Vec<(String, String)> {
-        self.options
-            .iter()
-            .map(|(k, v)| (format!("options[{}]", k), v.to_string()))
-            .collect()
-    }
+    pub application_name: Option<&'a str>,
+    pub options: Option<&'a str>,
 }

 /// Auth secret which is managed by the cloud.
@@ -259,20 +248,20 @@ pub trait Api {
    /// Get the client's auth secret for authentication.
    async fn get_auth_info(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<AuthInfo, errors::GetAuthInfoError>;

    async fn get_allowed_ips(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;

    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -144,7 +144,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_auth_info(
        &self,
-        _extra: &ConsoleReqExtra,
+        _extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        self.do_get_auth_info(creds).await
@@ -152,7 +152,7 @@ impl super::Api for Api {

    async fn get_allowed_ips(
        &self,
-        _extra: &ConsoleReqExtra,
+        _extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
        Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
@@ -161,7 +161,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
-        _extra: &ConsoleReqExtra,
+        _extra: &ConsoleReqExtra<'_>,
        _creds: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, WakeComputeError> {
        self.do_wake_compute()
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -48,7 +48,7 @@ impl Api {

    async fn do_get_auth_info(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        let request_id = uuid::Uuid::new_v4().to_string();
@@ -60,9 +60,9 @@ impl Api {
                .header("Authorization", format!("Bearer {}", &self.jwt))
                .query(&[("session_id", extra.session_id)])
                .query(&[
-                    ("application_name", extra.application_name.as_str()),
-                    ("project", creds.endpoint.as_str()),
-                    ("role", creds.inner.user.as_str()),
+                    ("application_name", extra.application_name),
+                    ("project", Some(&creds.endpoint)),
+                    ("role", Some(&creds.inner.user)),
                ])
                .build()?;

@@ -101,28 +101,23 @@ impl Api {

    async fn do_wake_compute(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<NodeInfo, WakeComputeError> {
        let request_id = uuid::Uuid::new_v4().to_string();
        async {
-            let mut request_builder = self
+            let request = self
                .endpoint
                .get("proxy_wake_compute")
                .header("X-Request-ID", &request_id)
                .header("Authorization", format!("Bearer {}", &self.jwt))
                .query(&[("session_id", extra.session_id)])
                .query(&[
-                    ("application_name", extra.application_name.as_str()),
-                    ("project", creds.endpoint.as_str()),
-                ]);
-
-            request_builder = if extra.options.is_empty() {
-                request_builder
-            } else {
-                request_builder.query(&extra.options_as_deep_object())
-            };
-            let request = request_builder.build()?;
+                    ("application_name", extra.application_name),
+                    ("project", Some(&creds.endpoint)),
+                    ("options", extra.options),
+                ])
+                .build()?;

            info!(url = request.url().as_str(), "sending http request");
            let start = Instant::now();
@@ -161,7 +156,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn get_auth_info(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<AuthInfo, GetAuthInfoError> {
        self.do_get_auth_info(extra, creds).await
@@ -169,7 +164,7 @@ impl super::Api for Api {

    async fn get_allowed_ips(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
        let key: &str = &creds.endpoint;
@@ -192,7 +187,7 @@ impl super::Api for Api {
    #[tracing::instrument(skip_all)]
    async fn wake_compute(
        &self,
-        extra: &ConsoleReqExtra,
+        extra: &ConsoleReqExtra<'_>,
        creds: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, WakeComputeError> {
        let key: &str = &creds.inner.cache_key;
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -9,7 +9,6 @@ use crate::{
    console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
    http::StatusCode,
    protocol2::WithClientIp,
-    rate_limiter::EndpointRateLimiter,
    stream::{PqStream, Stream},
    usage_metrics::{Ids, USAGE_METRICS},
 };
@@ -17,10 +16,7 @@ use anyhow::{bail, Context};
 use async_trait::async_trait;
 use futures::TryFutureExt;
 use itertools::Itertools;
-use metrics::{
-    exponential_buckets, register_int_counter_pair_vec, register_int_counter_vec,
-    IntCounterPairVec, IntCounterVec,
-};
+use metrics::{exponential_buckets, register_int_counter_vec, IntCounterVec};
 use once_cell::sync::{Lazy, OnceCell};
 use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
 use prometheus::{
@@ -28,7 +24,7 @@ use prometheus::{
    IntGaugeVec,
 };
 use regex::Regex;
-use std::{error::Error, io, net::IpAddr, ops::ControlFlow, sync::Arc};
+use std::{error::Error, io, net::IpAddr, ops::ControlFlow, sync::Arc, time::Instant};
 use tokio::{
    io::{AsyncRead, AsyncWrite, AsyncWriteExt},
    time,
@@ -47,10 +43,17 @@ const RETRY_WAIT_EXPONENT_BASE: f64 = std::f64::consts::SQRT_2;
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 const ERR_PROTO_VIOLATION: &str = "protocol violation";

-pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
+pub static NUM_DB_CONNECTIONS_OPENED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_opened_db_connections_total",
        "Number of opened connections to a database.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static NUM_DB_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_closed_db_connections_total",
        "Number of closed connections to a database.",
        &["protocol"],
@@ -58,10 +61,17 @@ pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
    .unwrap()
 });

-pub static NUM_CLIENT_CONNECTION_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
+pub static NUM_CLIENT_CONNECTION_OPENED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_opened_client_connections_total",
        "Number of opened connections from a client.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static NUM_CLIENT_CONNECTION_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_closed_client_connections_total",
        "Number of closed connections from a client.",
        &["protocol"],
@@ -69,10 +79,17 @@ pub static NUM_CLIENT_CONNECTION_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
    .unwrap()
 });

-pub static NUM_CONNECTION_REQUESTS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
+pub static NUM_CONNECTIONS_ACCEPTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_accepted_connections_total",
        "Number of client connections accepted.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "proxy_closed_connections_total",
        "Number of client connections closed.",
        &["protocol"],
@@ -154,7 +171,7 @@ pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {

 pub struct LatencyTimer {
    // time since the stopwatch was started
-    start: Option<time::Instant>,
+    start: Option<Instant>,
    // accumulated time on the stopwatch
    accumulated: std::time::Duration,
    // label data
@@ -171,7 +188,7 @@ pub struct LatencyTimerPause<'a> {
 impl LatencyTimer {
    pub fn new(protocol: &'static str) -> Self {
        Self {
-            start: Some(time::Instant::now()),
+            start: Some(Instant::now()),
            accumulated: std::time::Duration::ZERO,
            protocol,
            cache_miss: false,
@@ -205,7 +222,7 @@ impl LatencyTimer {
 impl Drop for LatencyTimerPause<'_> {
    fn drop(&mut self) {
        // start the stopwatch again
-        self.timer.start = Some(time::Instant::now());
+        self.timer.start = Some(Instant::now());
    }
 }

@@ -260,26 +277,10 @@ static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
    .unwrap()
 });

-pub async fn run_until_cancelled<F: std::future::Future>(
-    f: F,
-    cancellation_token: &CancellationToken,
-) -> Option<F::Output> {
-    match futures::future::select(
-        std::pin::pin!(f),
-        std::pin::pin!(cancellation_token.cancelled()),
-    )
-    .await
-    {
-        futures::future::Either::Left((f, _)) => Some(f),
-        futures::future::Either::Right(((), _)) => None,
-    }
-}
-
 pub async fn task_main(
    config: &'static ProxyConfig,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("proxy has shut down");
@@ -289,65 +290,71 @@ pub async fn task_main(
    // will be inherited by all accepted client sockets.
    socket2::SockRef::from(&listener).set_keepalive(true)?;

-    let connections = tokio_util::task::task_tracker::TaskTracker::new();
+    let mut connections = tokio::task::JoinSet::new();
    let cancel_map = Arc::new(CancelMap::default());

-    while let Some(accept_result) =
-        run_until_cancelled(listener.accept(), &cancellation_token).await
-    {
-        let (socket, peer_addr) = accept_result?;
+    loop {
+        tokio::select! {
+            accept_result = listener.accept() => {
+                let (socket, peer_addr) = accept_result?;

-        let session_id = uuid::Uuid::new_v4();
-        let cancel_map = Arc::clone(&cancel_map);
-        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
+                let session_id = uuid::Uuid::new_v4();
+                let cancel_map = Arc::clone(&cancel_map);
+                connections.spawn(
+                    async move {
+                        info!("accepted postgres client connection");

-        connections.spawn(
-            async move {
-                info!("accepted postgres client connection");
+                        let mut socket = WithClientIp::new(socket);
+                        let mut peer_addr = peer_addr;
+                        if let Some(ip) = socket.wait_for_addr().await? {
+                            peer_addr = ip;
+                            tracing::Span::current().record("peer_addr", &tracing::field::display(ip));
+                        } else if config.require_client_ip {
+                            bail!("missing required client IP");
+                        }

-                let mut socket = WithClientIp::new(socket);
-                let mut peer_addr = peer_addr;
-                if let Some(ip) = socket.wait_for_addr().await? {
-                    peer_addr = ip;
-                    tracing::Span::current().record("peer_addr", &tracing::field::display(ip));
-                } else if config.require_client_ip {
-                    bail!("missing required client IP");
-                }
+                        socket
+                            .inner
+                            .set_nodelay(true)
+                            .context("failed to set socket option")?;

-                socket
-                    .inner
-                    .set_nodelay(true)
-                    .context("failed to set socket option")?;
-
-                handle_client(
-                    config,
-                    &cancel_map,
-                    session_id,
-                    socket,
-                    ClientMode::Tcp,
-                    peer_addr.ip(),
-                    endpoint_rate_limiter,
-                )
-                .await
+                        handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp, peer_addr.ip()).await
+                    }
+                    .instrument(info_span!("handle_client", ?session_id, peer_addr = tracing::field::Empty))
+                    .unwrap_or_else(move |e| {
+                        // Acknowledge that the task has finished with an error.
+                        error!(?session_id, "per-client task finished with an error: {e:#}");
+                    }),
+                );
            }
-            .instrument(info_span!(
-                "handle_client",
-                ?session_id,
-                peer_addr = tracing::field::Empty
-            ))
-            .unwrap_or_else(move |e| {
-                // Acknowledge that the task has finished with an error.
-                error!(?session_id, "per-client task finished with an error: {e:#}");
-            }),
-        );
+            // Don't modify this unless you read https://docs.rs/tokio/latest/tokio/macro.select.html carefully.
+            // If this future completes and the pattern doesn't match, this branch is disabled for this call to `select!`.
+            // This only counts for this loop and it will be enabled again on next `select!`.
+            //
+            // Prior code had this as `Some(Err(e))` which _looks_ equivalent to the current setup, but it's not.
+            // When `connections.join_next()` returned `Some(Ok(()))` (which we expect), it would disable the join_next and it would
+            // not get called again, even if there are more connections to remove.
+            Some(res) = connections.join_next() => {
+                if let Err(e) = res {
+                    if !e.is_panic() && !e.is_cancelled() {
+                        warn!("unexpected error from joined connection task: {e:?}");
+                    }
+                }
+            }
+            _ = cancellation_token.cancelled() => {
+                drop(listener);
+                break;
+            }
+        }
    }
-
-    connections.close();
-    drop(listener);
-
    // Drain connections
-    connections.wait().await;
-
+    while let Some(res) = connections.join_next().await {
+        if let Err(e) = res {
+            if !e.is_panic() && !e.is_cancelled() {
+                warn!("unexpected error from joined connection task: {e:?}");
+            }
+        }
+    }
    Ok(())
 }

@@ -402,7 +409,6 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
    mode: ClientMode,
    peer_addr: IpAddr,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    info!(
        protocol = mode.protocol_label(),
@@ -410,12 +416,16 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    );

    let proto = mode.protocol_label();
-    let _client_gauge = NUM_CLIENT_CONNECTION_GAUGE
+    NUM_CLIENT_CONNECTION_OPENED_COUNTER
        .with_label_values(&[proto])
-        .guard();
-    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
+        .inc();
+    NUM_CONNECTIONS_ACCEPTED_COUNTER
        .with_label_values(&[proto])
-        .guard();
+        .inc();
+    scopeguard::defer! {
+        NUM_CLIENT_CONNECTION_CLOSED_COUNTER.with_label_values(&[proto]).inc();
+        NUM_CONNECTIONS_CLOSED_COUNTER.with_label_values(&[proto]).inc();
+    }

    let tls = config.tls_config.as_ref();

@@ -447,7 +457,6 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        &params,
        session_id,
        mode.allow_self_signed_compute(config),
-        endpoint_rate_limiter,
    );
    cancel_map
        .with_session(|session| client.connect_to_db(session, mode, &config.authentication_config))
@@ -467,14 +476,9 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    // Client may try upgrading to each protocol only once
    let (mut tried_ssl, mut tried_gss) = (false, false);

-    let handshake_timeout = tls
-        .map(|tls| tls.handshake_timeout)
-        .unwrap_or(tls_listener::DEFAULT_HANDSHAKE_TIMEOUT);
-    let deadline = time::Instant::now() + handshake_timeout;
-
    let mut stream = PqStream::new(Stream::from_raw(stream));
    loop {
-        let msg = tokio::time::timeout_at(deadline, stream.read_startup_packet()).await??;
+        let msg = stream.read_startup_packet().await?;
        info!("received {msg:?}");

        use FeStartupPacket::*;
@@ -500,9 +504,7 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                        if !read_buf.is_empty() {
                            bail!("data is sent before server replied with EncryptionResponse");
                        }
-                        let tls_stream =
-                            tokio::time::timeout_at(deadline, raw.upgrade(tls.to_server_config()))
-                                .await??;
+                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;

                        let (_, tls_server_end_point) = tls
                            .cert_resolver
@@ -569,13 +571,12 @@ pub fn invalidate_cache(node_info: console::CachedNodeInfo) -> compute::ConnCfg
 async fn connect_to_compute_once(
    node_info: &console::CachedNodeInfo,
    timeout: time::Duration,
-    proto: &'static str,
 ) -> Result<PostgresConnection, compute::ConnectionError> {
    let allow_self_signed_compute = node_info.allow_self_signed_compute;

    node_info
        .config
-        .connect(allow_self_signed_compute, timeout, proto)
+        .connect(allow_self_signed_compute, timeout)
        .await
 }

@@ -596,7 +597,6 @@ pub trait ConnectMechanism {
 pub struct TcpMechanism<'a> {
    /// KV-dictionary with PostgreSQL connection params.
    pub params: &'a StartupMessageParams,
-    pub proto: &'static str,
 }

 #[async_trait]
@@ -610,7 +610,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
        node_info: &console::CachedNodeInfo,
        timeout: time::Duration,
    ) -> Result<PostgresConnection, Self::Error> {
-        connect_to_compute_once(node_info, timeout, self.proto).await
+        connect_to_compute_once(node_info, timeout).await
    }

    fn update_connect_config(&self, config: &mut compute::ConnCfg) {
@@ -665,7 +665,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
 pub async fn connect_to_compute<M: ConnectMechanism>(
    mechanism: &M,
    mut node_info: console::CachedNodeInfo,
-    extra: &console::ConsoleReqExtra,
+    extra: &console::ConsoleReqExtra<'_>,
    creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
    mut latency_timer: LatencyTimer,
 ) -> Result<M::Connection, M::Error>
@@ -922,8 +922,6 @@ struct Client<'a, S> {
    session_id: uuid::Uuid,
    /// Allow self-signed certificates (for testing).
    allow_self_signed_compute: bool,
-    /// Rate limiter for endpoints
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 }

 impl<'a, S> Client<'a, S> {
@@ -934,7 +932,6 @@ impl<'a, S> Client<'a, S> {
        params: &'a StartupMessageParams,
        session_id: uuid::Uuid,
        allow_self_signed_compute: bool,
-        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    ) -> Self {
        Self {
            stream,
@@ -942,7 +939,6 @@ impl<'a, S> Client<'a, S> {
            params,
            session_id,
            allow_self_signed_compute,
-            endpoint_rate_limiter,
        }
    }
 }
@@ -964,29 +960,17 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
            params,
            session_id,
            allow_self_signed_compute,
-            endpoint_rate_limiter,
        } = self;

-        // check rate limit
-        if let Some(ep) = creds.get_endpoint() {
-            if !endpoint_rate_limiter.check(ep) {
-                return stream
-                    .throw_error(auth::AuthError::too_many_connections())
-                    .await;
-            }
-        }
+        let console_options = neon_options(params);

-        let proto = mode.protocol_label();
        let extra = console::ConsoleReqExtra {
            session_id, // aka this connection's id
-            application_name: format!(
-                "{}/{}",
-                params.get("application_name").unwrap_or_default(),
-                proto
-            ),
-            options: neon_options(params),
+            application_name: params.get("application_name"),
+            options: console_options.as_deref(),
        };
-        let mut latency_timer = LatencyTimer::new(proto);
+
+        let mut latency_timer = LatencyTimer::new(mode.protocol_label());

        let user = creds.get_user().to_owned();
        let auth_result = match creds
@@ -1015,7 +999,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {

        let aux = node_info.aux.clone();
        let mut node = connect_to_compute(
-            &TcpMechanism { params, proto },
+            &TcpMechanism { params },
            node_info,
            &extra,
            &creds,
@@ -1024,6 +1008,14 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
        .or_else(|e| stream.throw_error(e))
        .await?;

+        let proto = mode.protocol_label();
+        NUM_DB_CONNECTIONS_OPENED_COUNTER
+            .with_label_values(&[proto])
+            .inc();
+        scopeguard::defer! {
+            NUM_DB_CONNECTIONS_CLOSED_COUNTER.with_label_values(&[proto]).inc();
+        }
+
        prepare_client_connection(&node, session, &mut stream).await?;
        // Before proxy passing, forward to compute whatever data is left in the
        // PqStream input buffer. Normally there is none, but our serverless npm
@@ -1035,29 +1027,26 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
    }
 }

-pub fn neon_options(params: &StartupMessageParams) -> Vec<(String, String)> {
+pub fn neon_options(params: &StartupMessageParams) -> Option<String> {
    #[allow(unstable_name_collisions)]
-    match params.options_raw() {
-        Some(options) => options.filter_map(neon_option).collect(),
-        None => vec![],
-    }
-}
-
-pub fn neon_options_str(params: &StartupMessageParams) -> String {
-    #[allow(unstable_name_collisions)]
-    neon_options(params)
-        .iter()
-        .map(|(k, v)| format!("{}:{}", k, v))
+    let options: String = params
+        .options_raw()?
+        .filter(|opt| is_neon_param(opt))
        .sorted() // we sort it to use as cache key
-        .intersperse(" ".to_owned())
-        .collect()
+        .intersperse(" ") // TODO: use impl from std once it's stabilized
+        .collect();
+
+    // Don't even bother with empty options.
+    if options.is_empty() {
+        return None;
+    }
+
+    Some(options)
 }

-pub fn neon_option(bytes: &str) -> Option<(String, String)> {
+pub fn is_neon_param(bytes: &str) -> bool {
    static RE: OnceCell<Regex> = OnceCell::new();
-    let re = RE.get_or_init(|| Regex::new(r"^neon_(\w+):(.+)").unwrap());
+    RE.get_or_init(|| Regex::new(r"^neon_\w+:").unwrap());

-    let cap = re.captures(bytes)?;
-    let (_, [k, v]) = cap.extract();
-    Some((k.to_owned(), v.to_owned()))
+    RE.get().unwrap().is_match(bytes)
 }
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -85,8 +85,6 @@ fn generate_tls_config<'a>(
            config,
            common_names,
            cert_resolver: Arc::new(cert_resolver),
-            handshake_timeout: tls_listener::DEFAULT_HANDSHAKE_TIMEOUT,
-            max_handshaking: tls_listener::DEFAULT_MAX_HANDSHAKES,
        }
    };

@@ -486,14 +484,14 @@ fn helper_create_connect_info(
    mechanism: &TestConnectMechanism,
 ) -> (
    CachedNodeInfo,
-    console::ConsoleReqExtra,
+    console::ConsoleReqExtra<'static>,
    auth::BackendType<'_, ComputeUserInfo>,
 ) {
    let cache = helper_create_cached_node_info();
    let extra = console::ConsoleReqExtra {
        session_id: uuid::Uuid::new_v4(),
-        application_name: "TEST".into(),
-        options: vec![],
+        application_name: Some("TEST"),
+        options: None,
    };
    let creds = auth::BackendType::Test(mechanism);
    (cache, extra, creds)
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -4,4 +4,3 @@ mod limiter;
 pub use aimd::Aimd;
 pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
 pub use limiter::Limiter;
-pub use limiter::{EndpointRateLimiter, RateBucketInfo};
--- a/proxy/src/rate_limiter/aimd.rs
+++ b/proxy/src/rate_limiter/aimd.rs
@@ -33,6 +33,39 @@ impl Aimd {
            min_utilisation_threshold: config.aimd_min_utilisation_threshold,
        }
    }
+
+    pub fn decrease_factor(self, factor: f32) -> Self {
+        assert!((0.5..1.0).contains(&factor));
+        Self {
+            decrease_factor: factor,
+            ..self
+        }
+    }
+
+    pub fn increase_by(self, increase: usize) -> Self {
+        assert!(increase > 0);
+        Self {
+            increase_by: increase,
+            ..self
+        }
+    }
+
+    pub fn with_max_limit(self, max: usize) -> Self {
+        assert!(max > 0);
+        Self {
+            max_limit: max,
+            ..self
+        }
+    }
+
+    /// A threshold below which the limit won't be increased. 0.5 = 50%.
+    pub fn with_min_utilisation_threshold(self, min_util: f32) -> Self {
+        assert!(min_util > 0. && min_util < 1.);
+        Self {
+            min_utilisation_threshold: min_util,
+            ..self
+        }
+    }
 }

 #[async_trait]
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -1,19 +1,13 @@
 use std::{
-    collections::hash_map::RandomState,
-    hash::BuildHasher,
    sync::{
        atomic::{AtomicUsize, Ordering},
-        Arc, Mutex,
+        Arc,
    },
+    time::Duration,
 };

-use anyhow::bail;
-use dashmap::DashMap;
-use itertools::Itertools;
-use rand::{rngs::StdRng, Rng, SeedableRng};
-use smol_str::SmolStr;
 use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
-use tokio::time::{timeout, Duration, Instant};
+use tokio::time::{timeout, Instant};
 use tracing::info;

 use super::{
@@ -21,180 +15,6 @@ use super::{
    RateLimiterConfig,
 };

-// Simple per-endpoint rate limiter.
-//
-// Check that number of connections to the endpoint is below `max_rps` rps.
-// Purposefully ignore user name and database name as clients can reconnect
-// with different names, so we'll end up sending some http requests to
-// the control plane.
-//
-// We also may save quite a lot of CPU (I think) by bailing out right after we
-// saw SNI, before doing TLS handshake. User-side error messages in that case
-// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
-// I went with a more expensive way that yields user-friendlier error messages.
-pub struct EndpointRateLimiter<Rand = StdRng, Hasher = RandomState> {
-    map: DashMap<SmolStr, Vec<RateBucket>, Hasher>,
-    info: &'static [RateBucketInfo],
-    access_count: AtomicUsize,
-    rand: Mutex<Rand>,
-}
-
-#[derive(Clone, Copy)]
-struct RateBucket {
-    start: Instant,
-    count: u32,
-}
-
-impl RateBucket {
-    fn should_allow_request(&mut self, info: &RateBucketInfo, now: Instant) -> bool {
-        if now - self.start < info.interval {
-            self.count < info.max_rpi
-        } else {
-            // bucket expired, reset
-            self.count = 0;
-            self.start = now;
-
-            true
-        }
-    }
-
-    fn inc(&mut self) {
-        self.count += 1;
-    }
-}
-
-#[derive(Clone, Copy, PartialEq)]
-pub struct RateBucketInfo {
-    pub interval: Duration,
-    // requests per interval
-    pub max_rpi: u32,
-}
-
-impl std::fmt::Display for RateBucketInfo {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let rps = self.max_rpi * 1000 / self.interval.as_millis() as u32;
-        write!(f, "{rps}@{}", humantime::format_duration(self.interval))
-    }
-}
-
-impl std::fmt::Debug for RateBucketInfo {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{self}")
-    }
-}
-
-impl std::str::FromStr for RateBucketInfo {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let Some((max_rps, interval)) = s.split_once('@') else {
-            bail!("invalid rate info")
-        };
-        let max_rps = max_rps.parse()?;
-        let interval = humantime::parse_duration(interval)?;
-        Ok(Self::new(max_rps, interval))
-    }
-}
-
-impl RateBucketInfo {
-    pub const DEFAULT_SET: [Self; 3] = [
-        Self::new(300, Duration::from_secs(1)),
-        Self::new(200, Duration::from_secs(60)),
-        Self::new(100, Duration::from_secs(600)),
-    ];
-
-    pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {
-        info.sort_unstable_by_key(|info| info.interval);
-        let invalid = info
-            .iter()
-            .tuple_windows()
-            .find(|(a, b)| a.max_rpi > b.max_rpi);
-        if let Some((a, b)) = invalid {
-            bail!(
-                "invalid endpoint RPS limits. {b} allows fewer requests per bucket than {a} ({} vs {})",
-                b.max_rpi,
-                a.max_rpi,
-            );
-        }
-
-        Ok(())
-    }
-
-    pub const fn new(max_rps: u32, interval: Duration) -> Self {
-        Self {
-            interval,
-            max_rpi: max_rps * interval.as_millis() as u32 / 1000,
-        }
-    }
-}
-
-impl EndpointRateLimiter {
-    pub fn new(info: &'static [RateBucketInfo]) -> Self {
-        Self::new_with_rand_and_hasher(info, StdRng::from_entropy(), RandomState::new())
-    }
-}
-
-impl<R: Rng, S: BuildHasher + Clone> EndpointRateLimiter<R, S> {
-    fn new_with_rand_and_hasher(info: &'static [RateBucketInfo], rand: R, hasher: S) -> Self {
-        info!(buckets = ?info, "endpoint rate limiter");
-        Self {
-            info,
-            map: DashMap::with_hasher_and_shard_amount(hasher, 64),
-            access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
-            rand: Mutex::new(rand),
-        }
-    }
-
-    /// Check that number of connections to the endpoint is below `max_rps` rps.
-    pub fn check(&self, endpoint: SmolStr) -> bool {
-        // do a partial GC every 2k requests. This cleans up ~ 1/64th of the map.
-        // worst case memory usage is about:
-        //    = 2 * 2048 * 64 * (48B + 72B)
-        //    = 30MB
-        if self.access_count.fetch_add(1, Ordering::AcqRel) % 2048 == 0 {
-            self.do_gc();
-        }
-
-        let now = Instant::now();
-        let mut entry = self.map.entry(endpoint).or_insert_with(|| {
-            vec![
-                RateBucket {
-                    start: now,
-                    count: 0,
-                };
-                self.info.len()
-            ]
-        });
-
-        let should_allow_request = entry
-            .iter_mut()
-            .zip(self.info)
-            .all(|(bucket, info)| bucket.should_allow_request(info, now));
-
-        if should_allow_request {
-            // only increment the bucket counts if the request will actually be accepted
-            entry.iter_mut().for_each(RateBucket::inc);
-        }
-
-        should_allow_request
-    }
-
-    /// Clean the map. Simple strategy: remove all entries in a random shard.
-    /// At worst, we'll double the effective max_rps during the cleanup.
-    /// But that way deletion does not aquire mutex on each entry access.
-    pub fn do_gc(&self) {
-        info!(
-            "cleaning up endpoint rate limiter, current size = {}",
-            self.map.len()
-        );
-        let n = self.map.shards().len();
-        // this lock is ok as the periodic cycle of do_gc makes this very unlikely to collide
-        // (impossible, infact, unless we have 2048 threads)
-        let shard = self.rand.lock().unwrap().gen_range(0..n);
-        self.map.shards()[shard].write().clear();
-    }
-}
-
 /// Limits the number of concurrent jobs.
 ///
 /// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the
@@ -233,6 +53,7 @@ pub struct Token<'t> {
 #[derive(Debug, Clone, Copy)]
 pub struct LimiterState {
    limit: usize,
+    available: usize,
    in_flight: usize,
 }

@@ -410,7 +231,11 @@ impl Limiter {
    pub fn state(&self) -> LimiterState {
        let limit = self.limits.load(Ordering::Relaxed);
        let in_flight = self.in_flight.load(Ordering::Relaxed);
-        LimiterState { limit, in_flight }
+        LimiterState {
+            limit,
+            available: limit.saturating_sub(in_flight),
+            in_flight,
+        }
    }
 }

@@ -423,6 +248,13 @@ impl<'t> Token<'t> {
        }
    }

+    #[cfg(test)]
+    pub fn set_latency(&mut self, latency: Duration) {
+        use std::ops::Sub;
+
+        self.start = Instant::now().sub(latency);
+    }
+
    pub fn forget(&mut self) {
        if let Some(permit) = self.permit.take() {
            permit.forget();
@@ -441,6 +273,10 @@ impl LimiterState {
    pub fn limit(&self) -> usize {
        self.limit
    }
+    /// The amount of concurrency available to use.
+    pub fn available(&self) -> usize {
+        self.available
+    }
    /// The number of jobs in flight.
    pub fn in_flight(&self) -> usize {
        self.in_flight
@@ -488,16 +324,12 @@ impl reqwest_middleware::Middleware for Limiter {

 #[cfg(test)]
 mod tests {
-    use std::{hash::BuildHasherDefault, pin::pin, task::Context, time::Duration};
+    use std::{pin::pin, task::Context, time::Duration};

    use futures::{task::noop_waker_ref, Future};
-    use rand::SeedableRng;
-    use rustc_hash::FxHasher;
-    use smol_str::SmolStr;
-    use tokio::time;

-    use super::{EndpointRateLimiter, Limiter, Outcome};
-    use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm};
+    use super::{Limiter, Outcome};
+    use crate::rate_limiter::RateLimitAlgorithm;

    #[tokio::test]
    async fn it_works() {
@@ -606,105 +438,4 @@ mod tests {
        limiter.release(token1, None).await;
        limiter.release(token2, None).await;
    }
-
-    #[test]
-    fn rate_bucket_rpi() {
-        let rate_bucket = RateBucketInfo::new(50, Duration::from_secs(5));
-        assert_eq!(rate_bucket.max_rpi, 50 * 5);
-
-        let rate_bucket = RateBucketInfo::new(50, Duration::from_millis(500));
-        assert_eq!(rate_bucket.max_rpi, 50 / 2);
-    }
-
-    #[test]
-    fn rate_bucket_parse() {
-        let rate_bucket: RateBucketInfo = "100@10s".parse().unwrap();
-        assert_eq!(rate_bucket.interval, Duration::from_secs(10));
-        assert_eq!(rate_bucket.max_rpi, 100 * 10);
-        assert_eq!(rate_bucket.to_string(), "100@10s");
-
-        let rate_bucket: RateBucketInfo = "100@1m".parse().unwrap();
-        assert_eq!(rate_bucket.interval, Duration::from_secs(60));
-        assert_eq!(rate_bucket.max_rpi, 100 * 60);
-        assert_eq!(rate_bucket.to_string(), "100@1m");
-    }
-
-    #[test]
-    fn default_rate_buckets() {
-        let mut defaults = RateBucketInfo::DEFAULT_SET;
-        RateBucketInfo::validate(&mut defaults[..]).unwrap();
-    }
-
-    #[test]
-    #[should_panic = "invalid endpoint RPS limits. 10@10s allows fewer requests per bucket than 300@1s (100 vs 300)"]
-    fn rate_buckets_validate() {
-        let mut rates: Vec<RateBucketInfo> = ["300@1s", "10@10s"]
-            .into_iter()
-            .map(|s| s.parse().unwrap())
-            .collect();
-        RateBucketInfo::validate(&mut rates).unwrap();
-    }
-
-    #[tokio::test]
-    async fn test_rate_limits() {
-        let mut rates: Vec<RateBucketInfo> = ["100@1s", "20@30s"]
-            .into_iter()
-            .map(|s| s.parse().unwrap())
-            .collect();
-        RateBucketInfo::validate(&mut rates).unwrap();
-        let limiter = EndpointRateLimiter::new(Vec::leak(rates));
-
-        let endpoint = SmolStr::from("ep-my-endpoint-1234");
-
-        time::pause();
-
-        for _ in 0..100 {
-            assert!(limiter.check(endpoint.clone()));
-        }
-        // more connections fail
-        assert!(!limiter.check(endpoint.clone()));
-
-        // fail even after 500ms as it's in the same bucket
-        time::advance(time::Duration::from_millis(500)).await;
-        assert!(!limiter.check(endpoint.clone()));
-
-        // after a full 1s, 100 requests are allowed again
-        time::advance(time::Duration::from_millis(500)).await;
-        for _ in 1..6 {
-            for _ in 0..100 {
-                assert!(limiter.check(endpoint.clone()));
-            }
-            time::advance(time::Duration::from_millis(1000)).await;
-        }
-
-        // more connections after 600 will exceed the 20rps@30s limit
-        assert!(!limiter.check(endpoint.clone()));
-
-        // will still fail before the 30 second limit
-        time::advance(time::Duration::from_millis(30_000 - 6_000 - 1)).await;
-        assert!(!limiter.check(endpoint.clone()));
-
-        // after the full 30 seconds, 100 requests are allowed again
-        time::advance(time::Duration::from_millis(1)).await;
-        for _ in 0..100 {
-            assert!(limiter.check(endpoint.clone()));
-        }
-    }
-
-    #[tokio::test]
-    async fn test_rate_limits_gc() {
-        // fixed seeded random/hasher to ensure that the test is not flaky
-        let rand = rand::rngs::StdRng::from_seed([1; 32]);
-        let hasher = BuildHasherDefault::<FxHasher>::default();
-
-        let limiter = EndpointRateLimiter::new_with_rand_and_hasher(
-            &RateBucketInfo::DEFAULT_SET,
-            rand,
-            hasher,
-        );
-        for i in 0..1_000_000 {
-            limiter.check(format!("{i}").into());
-        }
-        assert!(limiter.map.len() < 150_000);
-    }
 }
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -8,14 +8,11 @@ mod websocket;

 use anyhow::bail;
 use hyper::StatusCode;
-use metrics::IntCounterPairGuard;
 pub use reqwest_middleware::{ClientWithMiddleware, Error};
 pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
-use tokio_util::task::TaskTracker;

 use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
-use crate::proxy::NUM_CLIENT_CONNECTION_GAUGE;
-use crate::rate_limiter::EndpointRateLimiter;
+use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
 use crate::{cancellation::CancelMap, config::ProxyConfig};
 use futures::StreamExt;
 use hyper::{
@@ -29,6 +26,7 @@ use hyper::{
 use std::net::IpAddr;
 use std::task::Poll;
 use std::{future::ready, sync::Arc};
+use tls_listener::TlsListener;
 use tokio::net::TcpListener;
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -38,7 +36,6 @@ pub async fn task_main(
    config: &'static ProxyConfig,
    ws_listener: TcpListener,
    cancellation_token: CancellationToken,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
        info!("websocket server has shut down");
@@ -58,15 +55,14 @@ pub async fn task_main(
        }
    });

-    // let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
-    let tls_config = match config.tls_config.as_ref() {
-        Some(config) => config,
+    let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
+    let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
+        Some(config) => config.into(),
        None => {
            warn!("TLS config is missing, WebSocket Secure server will not be started");
            return Ok(());
        }
    };
-    let tls_acceptor: tokio_rustls::TlsAcceptor = tls_config.to_server_config().into();

    let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
    let _ = addr_incoming.set_nodelay(true);
@@ -74,20 +70,14 @@ pub async fn task_main(
        incoming: addr_incoming,
    };

-    let ws_connections = tokio_util::task::task_tracker::TaskTracker::new();
-    ws_connections.close(); // allows `ws_connections.wait to complete`
-
-    let tls_listener = tls_listener::builder(tls_acceptor)
-        .handshake_timeout(tls_config.handshake_timeout)
-        .listen(addr_incoming)
-        .filter(|conn| {
-            if let Err(err) = conn {
-                error!("failed to accept TLS connection for websockets: {err:?}");
-                ready(false)
-            } else {
-                ready(true)
-            }
-        });
+    let tls_listener = TlsListener::new(tls_acceptor, addr_incoming).filter(|conn| {
+        if let Err(err) = conn {
+            error!("failed to accept TLS connection for websockets: {err:?}");
+            ready(false)
+        } else {
+            ready(true)
+        }
+    });

    let make_svc = hyper::service::make_service_fn(
        |stream: &tokio_rustls::server::TlsStream<WithClientIp<AddrStream>>| {
@@ -96,8 +86,6 @@ pub async fn task_main(
            let remote_addr = io.inner.remote_addr();
            let sni_name = tls.server_name().map(|s| s.to_string());
            let conn_pool = conn_pool.clone();
-            let ws_connections = ws_connections.clone();
-            let endpoint_rate_limiter = endpoint_rate_limiter.clone();

            async move {
                let peer_addr = match client_addr {
@@ -109,8 +97,6 @@ pub async fn task_main(
                    move |req: Request<Body>| {
                        let sni_name = sni_name.clone();
                        let conn_pool = conn_pool.clone();
-                        let ws_connections = ws_connections.clone();
-                        let endpoint_rate_limiter = endpoint_rate_limiter.clone();

                        async move {
                            let cancel_map = Arc::new(CancelMap::default());
@@ -120,12 +106,10 @@ pub async fn task_main(
                                req,
                                config,
                                conn_pool,
-                                ws_connections,
                                cancel_map,
                                session_id,
                                sni_name,
                                peer_addr.ip(),
-                                endpoint_rate_limiter,
                            )
                            .instrument(info_span!(
                                "serverless",
@@ -145,25 +129,27 @@ pub async fn task_main(
        .with_graceful_shutdown(cancellation_token.cancelled())
        .await?;

-    // await websocket connections
-    ws_connections.wait().await;
-
    Ok(())
 }

 struct MetricService<S> {
    inner: S,
-    _gauge: IntCounterPairGuard,
 }

 impl<S> MetricService<S> {
    fn new(inner: S) -> MetricService<S> {
-        MetricService {
-            inner,
-            _gauge: NUM_CLIENT_CONNECTION_GAUGE
-                .with_label_values(&["http"])
-                .guard(),
-        }
+        NUM_CLIENT_CONNECTION_OPENED_COUNTER
+            .with_label_values(&["http"])
+            .inc();
+        MetricService { inner }
+    }
+}
+
+impl<S> Drop for MetricService<S> {
+    fn drop(&mut self) {
+        NUM_CLIENT_CONNECTION_CLOSED_COUNTER
+            .with_label_values(&["http"])
+            .inc();
    }
 }

@@ -184,17 +170,14 @@ where
    }
 }

-#[allow(clippy::too_many_arguments)]
 async fn request_handler(
    mut request: Request<Body>,
    config: &'static ProxyConfig,
    conn_pool: Arc<conn_pool::GlobalConnPool>,
-    ws_connections: TaskTracker,
    cancel_map: Arc<CancelMap>,
    session_id: uuid::Uuid,
    sni_hostname: Option<String>,
    peer_addr: IpAddr,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> Result<Response<Body>, ApiError> {
    let host = request
        .headers()
@@ -210,7 +193,7 @@ async fn request_handler(
        let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
            .map_err(|e| ApiError::BadRequest(e.into()))?;

-        ws_connections.spawn(
+        tokio::spawn(
            async move {
                if let Err(e) = websocket::serve_websocket(
                    websocket,
@@ -219,7 +202,6 @@ async fn request_handler(
                    session_id,
                    host,
                    peer_addr,
-                    endpoint_rate_limiter,
                )
                .await
                {
@@ -247,7 +229,7 @@ async fn request_handler(
            .header("Access-Control-Allow-Origin", "*")
            .header(
                "Access-Control-Allow-Headers",
-                "Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In, Neon-Batch-Read-Only, Neon-Batch-Isolation-Level",
+                "Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In",
            )
            .header("Access-Control-Max-Age", "86400" /* 24 hours */)
            .status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -24,7 +24,10 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
 use crate::{
    auth::{self, backend::ComputeUserInfo, check_peer_addr_is_in_list},
    console,
-    proxy::{neon_options, LatencyTimer, NUM_DB_CONNECTIONS_GAUGE},
+    proxy::{
+        neon_options, LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER,
+        NUM_DB_CONNECTIONS_OPENED_COUNTER,
+    },
    usage_metrics::{Ids, MetricCounter, USAGE_METRICS},
 };
 use crate::{compute, config};
@@ -34,7 +37,7 @@ use crate::proxy::ConnectMechanism;
 use tracing::{error, warn, Span};
 use tracing::{info, info_span, Instrument};

-pub const APP_NAME: &str = "/sql_over_http";
+pub const APP_NAME: &str = "sql_over_http";
 const MAX_CONNS_PER_ENDPOINT: usize = 20;

 #[derive(Debug, Clone)]
@@ -429,8 +432,8 @@ async fn connect_to_compute(

    let extra = console::ConsoleReqExtra {
        session_id: uuid::Uuid::new_v4(),
-        application_name: APP_NAME.to_string(),
-        options: console_options,
+        application_name: Some(APP_NAME),
+        options: console_options.as_deref(),
    };
    // TODO(anna): this is a bit hacky way, consider using console notification listener.
    if !config.disable_ip_check_for_http {
@@ -474,11 +477,6 @@ async fn connect_to_compute_once(
        .connect_timeout(timeout)
        .connect(tokio_postgres::NoTls)
        .await?;
-
-    let conn_gauge = NUM_DB_CONNECTIONS_GAUGE
-        .with_label_values(&["http"])
-        .guard();
-
    tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));

    let (tx, mut rx) = tokio::sync::watch::channel(session);
@@ -494,7 +492,10 @@ async fn connect_to_compute_once(

    tokio::spawn(
        async move {
-            let _conn_gauge = conn_gauge;
+            NUM_DB_CONNECTIONS_OPENED_COUNTER.with_label_values(&["http"]).inc();
+            scopeguard::defer! {
+                NUM_DB_CONNECTIONS_CLOSED_COUNTER.with_label_values(&["http"]).inc();
+            }
            poll_fn(move |cx| {
                if matches!(rx.has_changed(), Ok(true)) {
                    session = *rx.borrow_and_update();
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -29,7 +29,7 @@ use utils::http::error::ApiError;
 use utils::http::json::json_response;

 use crate::config::HttpConfig;
-use crate::proxy::NUM_CONNECTION_REQUESTS_GAUGE;
+use crate::proxy::{NUM_CONNECTIONS_ACCEPTED_COUNTER, NUM_CONNECTIONS_CLOSED_COUNTER};

 use super::conn_pool::ConnInfo;
 use super::conn_pool::GlobalConnPool;
@@ -303,9 +303,12 @@ async fn handle_inner(
    session_id: uuid::Uuid,
    peer_addr: IpAddr,
 ) -> anyhow::Result<Response<Body>> {
-    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
+    NUM_CONNECTIONS_ACCEPTED_COUNTER
        .with_label_values(&["http"])
-        .guard();
+        .inc();
+    scopeguard::defer! {
+        NUM_CONNECTIONS_CLOSED_COUNTER.with_label_values(&["http"]).inc();
+    }

    //
    // Determine the destination and connection params
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -3,7 +3,6 @@ use crate::{
    config::ProxyConfig,
    error::io_error,
    proxy::{handle_client, ClientMode},
-    rate_limiter::EndpointRateLimiter,
 };
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream};
@@ -14,7 +13,6 @@ use pin_project_lite::pin_project;
 use std::{
    net::IpAddr,
    pin::Pin,
-    sync::Arc,
    task::{ready, Context, Poll},
 };
 use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
@@ -27,15 +25,15 @@ use sync_wrapper::SyncWrapper;
 pin_project! {
    /// This is a wrapper around a [`WebSocketStream`] that
    /// implements [`AsyncRead`] and [`AsyncWrite`].
-    pub struct WebSocketRw<S = Upgraded> {
+    pub struct WebSocketRw {
        #[pin]
-        stream: SyncWrapper<WebSocketStream<S>>,
+        stream: SyncWrapper<WebSocketStream<Upgraded>>,
        bytes: Bytes,
    }
 }

-impl<S> WebSocketRw<S> {
-    pub fn new(stream: WebSocketStream<S>) -> Self {
+impl WebSocketRw {
+    pub fn new(stream: WebSocketStream<Upgraded>) -> Self {
        Self {
            stream: stream.into(),
            bytes: Bytes::new(),
@@ -43,7 +41,7 @@ impl<S> WebSocketRw<S> {
    }
 }

-impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for WebSocketRw<S> {
+impl AsyncWrite for WebSocketRw {
    fn poll_write(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
@@ -69,7 +67,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for WebSocketRw<S> {
    }
 }

-impl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for WebSocketRw<S> {
+impl AsyncRead for WebSocketRw {
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
@@ -86,7 +84,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for WebSocketRw<S> {
    }
 }

-impl<S: AsyncRead + AsyncWrite + Unpin> AsyncBufRead for WebSocketRw<S> {
+impl AsyncBufRead for WebSocketRw {
    fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
        // Please refer to poll_fill_buf's documentation.
        const EOF: Poll<io::Result<&[u8]>> = Poll::Ready(Ok(&[]));
@@ -136,7 +134,6 @@ pub async fn serve_websocket(
    session_id: uuid::Uuid,
    hostname: Option<String>,
    peer_addr: IpAddr,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    let websocket = websocket.await?;
    handle_client(
@@ -146,65 +143,7 @@ pub async fn serve_websocket(
        WebSocketRw::new(websocket),
        ClientMode::Websockets { hostname },
        peer_addr,
-        endpoint_rate_limiter,
    )
    .await?;
    Ok(())
 }
-
-#[cfg(test)]
-mod tests {
-    use std::pin::pin;
-
-    use futures::{SinkExt, StreamExt};
-    use hyper_tungstenite::{
-        tungstenite::{protocol::Role, Message},
-        WebSocketStream,
-    };
-    use tokio::{
-        io::{duplex, AsyncReadExt, AsyncWriteExt},
-        task::JoinSet,
-    };
-
-    use super::WebSocketRw;
-
-    #[tokio::test]
-    async fn websocket_stream_wrapper_happy_path() {
-        let (stream1, stream2) = duplex(1024);
-
-        let mut js = JoinSet::new();
-
-        js.spawn(async move {
-            let mut client = WebSocketStream::from_raw_socket(stream1, Role::Client, None).await;
-
-            client
-                .send(Message::Binary(b"hello world".to_vec()))
-                .await
-                .unwrap();
-
-            let message = client.next().await.unwrap().unwrap();
-            assert_eq!(message, Message::Binary(b"websockets are cool".to_vec()));
-
-            client.close(None).await.unwrap();
-        });
-
-        js.spawn(async move {
-            let mut rw = pin!(WebSocketRw::new(
-                WebSocketStream::from_raw_socket(stream2, Role::Server, None).await
-            ));
-
-            let mut buf = vec![0; 1024];
-            let n = rw.read(&mut buf).await.unwrap();
-            assert_eq!(&buf[..n], b"hello world");
-
-            rw.write_all(b"websockets are cool").await.unwrap();
-            rw.flush().await.unwrap();
-
-            let n = rw.read_to_end(&mut buf).await.unwrap();
-            assert_eq!(n, 0);
-        });
-
-        js.join_next().await.unwrap().unwrap();
-        js.join_next().await.unwrap().unwrap();
-    }
-}
--- a/s3_scrubber/src/checks.rs
+++ b/s3_scrubber/src/checks.rs
@@ -142,8 +142,6 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                        .collect();

                    if !orphan_layers.is_empty() {
-                        // An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
-                        // these as a hint that there is something worth cleaning up here.
                        result.warnings.push(format!(
                            "index_part.json does not contain layers from S3: {:?}",
                            orphan_layers
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -87,7 +87,7 @@ impl S3Target {
            new_self.prefix_in_bucket = format!("/{}/", new_segment);
        } else {
            if new_self.prefix_in_bucket.ends_with('/') {
-                new_self.prefix_in_bucket.pop();
+                let _ = new_self.prefix_in_bucket.pop();
            }
            new_self.prefix_in_bucket =
                [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
--- a/s3_scrubber/src/main.rs
+++ b/s3_scrubber/src/main.rs
@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
    ));

    match cli.command {
-        Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
+        Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
            Err(e) => {
                tracing::error!("Failed: {e}");
                Err(e)
@@ -70,17 +70,6 @@ async fn main() -> anyhow::Result<()> {
                }
                if summary.is_fatal() {
                    Err(anyhow::anyhow!("Fatal scrub errors detected"))
-                } else if summary.is_empty() {
-                    // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
-                    // scrubber they were likely expecting to scan something, and if we see no timelines
-                    // at all then it's likely due to some configuration issues like a bad prefix
-                    Err(anyhow::anyhow!(
-                        "No timelines found in bucket {} prefix {}",
-                        bucket_config.bucket,
-                        bucket_config
-                            .prefix_in_bucket
-                            .unwrap_or("<none>".to_string())
-                    ))
                } else {
                    Ok(())
                }
--- a/s3_scrubber/src/scan_metadata.rs
+++ b/s3_scrubber/src/scan_metadata.rs
@@ -20,6 +20,14 @@ pub struct MetadataSummary {
    with_warnings: HashSet<TenantTimelineId>,
    with_garbage: HashSet<TenantTimelineId>,
    indices_by_version: HashMap<usize, usize>,
+    indices_with_generation: usize,
+    indices_without_generation: usize,
+
+    /// Timelines that couldn't even parse metadata and/or object keys: extremely damaged
+    invalid_count: usize,
+
+    /// Timelines with just an initdb archive, left behind after deletion.
+    relic_count: usize,

    layer_count: MinMaxHisto,
    timeline_size_bytes: MinMaxHisto,
@@ -39,6 +47,8 @@ impl MinMaxHisto {
    fn new() -> Self {
        Self {
            histo: histogram::Histogram::builder()
+                // Accomodate tenant sizes up to 32TiB
+                .maximum_value(32 * 1024 * 1024 * 1024 * 1024)
                .build()
                .expect("Bad histogram params"),
            min: u64::MAX,
@@ -90,6 +100,10 @@ impl MetadataSummary {
            with_warnings: HashSet::new(),
            with_garbage: HashSet::new(),
            indices_by_version: HashMap::new(),
+            indices_with_generation: 0,
+            indices_without_generation: 0,
+            invalid_count: 0,
+            relic_count: 0,
            layer_count: MinMaxHisto::new(),
            timeline_size_bytes: MinMaxHisto::new(),
            layer_size_bytes: MinMaxHisto::new(),
@@ -111,24 +125,35 @@ impl MetadataSummary {

    fn update_data(&mut self, data: &S3TimelineBlobData) {
        self.count += 1;
-        if let BlobDataParseResult::Parsed {
-            index_part,
-            index_part_generation: _,
-            s3_layers: _,
-        } = &data.blob_data
-        {
-            *self
-                .indices_by_version
-                .entry(index_part.get_version())
-                .or_insert(0) += 1;
+        match &data.blob_data {
+            BlobDataParseResult::Parsed {
+                index_part,
+                index_part_generation,
+                s3_layers: _,
+            } => {
+                *self
+                    .indices_by_version
+                    .entry(index_part.get_version())
+                    .or_insert(0) += 1;

-            if let Err(e) = self.update_histograms(index_part) {
-                // Value out of range?  Warn that the results are untrustworthy
-                tracing::warn!(
-                    "Error updating histograms, summary stats may be wrong: {}",
-                    e
-                );
+                // These statistics exist to track the transition to generations.  By early 2024 there should be zero
+                // generation-less timelines in the field and this check can be removed.
+                if index_part_generation.is_none() {
+                    self.indices_without_generation += 1;
+                } else {
+                    self.indices_with_generation += 1;
+                }
+
+                if let Err(e) = self.update_histograms(index_part) {
+                    // Value out of range?  Warn that the results are untrustworthy
+                    tracing::warn!(
+                        "Error updating histograms, summary stats may be wrong: {}",
+                        e
+                    );
+                }
            }
+            BlobDataParseResult::Incorrect(_) => self.invalid_count += 1,
+            BlobDataParseResult::Relic => self.relic_count += 1,
        }
    }

@@ -156,7 +181,10 @@ impl MetadataSummary {
 With errors: {1}
 With warnings: {2}
 With garbage: {3}
+Invalid: {9}
+Relics: {10}
 Index versions: {version_summary}
+Indices with/without generations: {7}/{8}
 Timeline size bytes: {4}
 Layer size bytes: {5}
 Timeline layer count: {6}
@@ -168,16 +196,16 @@ Timeline layer count: {6}
            self.timeline_size_bytes.oneline(),
            self.layer_size_bytes.oneline(),
            self.layer_count.oneline(),
+            self.indices_with_generation,
+            self.indices_without_generation,
+            self.invalid_count,
+            self.relic_count
        )
    }

    pub fn is_fatal(&self) -> bool {
        !self.with_errors.is_empty()
    }
-
-    pub fn is_empty(&self) -> bool {
-        self.count == 0
-    }
 }

 /// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -11,7 +11,7 @@ use tracing::{debug, info, info_span, Instrument};
 use crate::auth::check_permission;
 use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};

-use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE};
+use crate::metrics::{TrafficMetrics, PG_QUERIES_FINISHED, PG_QUERIES_RECEIVED};
 use crate::safekeeper::Term;
 use crate::timeline::TimelineError;
 use crate::wal_service::ConnectionId;
@@ -210,7 +210,10 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
        let cmd = parse_cmd(query_string)?;
        let cmd_str = cmd_to_string(&cmd);

-        let _guard = PG_QUERIES_GAUGE.with_label_values(&[cmd_str]).guard();
+        PG_QUERIES_RECEIVED.with_label_values(&[cmd_str]).inc();
+        scopeguard::defer! {
+            PG_QUERIES_FINISHED.with_label_values(&[cmd_str]).inc();
+        }

        info!("got query {:?}", query_string);

--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -11,8 +11,7 @@ use futures::Future;
 use metrics::{
    core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
    proto::MetricFamily,
-    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, Gauge,
-    IntCounter, IntCounterPairVec, IntCounterVec, IntGaugeVec,
+    register_int_counter, register_int_counter_vec, Gauge, IntCounter, IntCounterVec, IntGaugeVec,
 };
 use once_cell::sync::Lazy;

@@ -90,10 +89,16 @@ pub static BROKER_PULLED_UPDATES: Lazy<IntCounterVec> = Lazy::new(|| {
    )
    .expect("Failed to register safekeeper_broker_pulled_updates_total counter")
 });
-pub static PG_QUERIES_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
-    register_int_counter_pair_vec!(
+pub static PG_QUERIES_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "safekeeper_pg_queries_received_total",
        "Number of queries received through pg protocol",
+        &["query"]
+    )
+    .expect("Failed to register safekeeper_pg_queries_received_total counter")
+});
+pub static PG_QUERIES_FINISHED: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "safekeeper_pg_queries_finished_total",
        "Number of queries finished through pg protocol",
        &["query"]
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -28,7 +28,6 @@ import jwt
 import psycopg2
 import pytest
 import requests
-import toml
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
@@ -57,7 +56,6 @@ from fixtures.remote_storage import (
    RemoteStorageKind,
    RemoteStorageUser,
    S3Storage,
-    default_remote_storage,
    remote_storage_to_toml_inline_table,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
@@ -437,7 +435,7 @@ class NeonEnvBuilder:
        # Pageserver remote storage
        self.pageserver_remote_storage = pageserver_remote_storage
        # Safekeepers remote storage
-        self.safekeepers_remote_storage: Optional[RemoteStorage] = None
+        self.sk_remote_storage: Optional[RemoteStorage] = None

        self.broker = broker
        self.run_id = run_id
@@ -470,7 +468,7 @@ class NeonEnvBuilder:
        # Cannot create more than one environment from one builder
        assert self.env is None, "environment already initialized"
        if default_remote_storage_if_missing and self.pageserver_remote_storage is None:
-            self.enable_pageserver_remote_storage(default_remote_storage())
+            self.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
        self.env = NeonEnv(self)
        return self.env

@@ -507,66 +505,6 @@ class NeonEnvBuilder:

        return env

-    def from_repo_dir(
-        self,
-        repo_dir: Path,
-        neon_binpath: Optional[Path] = None,
-        pg_distrib_dir: Optional[Path] = None,
-    ) -> NeonEnv:
-        """
-        A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
-        """
-
-        # Setting custom `neon_binpath` and `pg_distrib_dir` is useful for compatibility tests
-        self.neon_binpath = neon_binpath or self.neon_binpath
-        self.pg_distrib_dir = pg_distrib_dir or self.pg_distrib_dir
-
-        # Get the initial tenant and timeline from the snapshot config
-        snapshot_config_toml = repo_dir / "config"
-        with snapshot_config_toml.open("r") as f:
-            snapshot_config = toml.load(f)
-
-        self.initial_tenant = TenantId(snapshot_config["default_tenant_id"])
-        self.initial_timeline = TimelineId(
-            dict(snapshot_config["branch_name_mappings"][DEFAULT_BRANCH_NAME])[
-                str(self.initial_tenant)
-            ]
-        )
-        self.env = self.init_configs()
-
-        for ps_dir in repo_dir.glob("pageserver_*"):
-            tenants_from_dir = ps_dir / "tenants"
-            tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
-
-            log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
-            shutil.copytree(tenants_from_dir, tenants_to_dir)
-
-        for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
-            sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
-            log.info(f"Copying safekeeper directory {sk_from_dir} to {sk_to_dir}")
-            sk_to_dir.rmdir()
-            shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
-
-        shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
-        shutil.copytree(
-            repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
-        )
-
-        if (attachments_json := Path(repo_dir / "attachments.json")).exists():
-            shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
-
-        # Update the config with info about tenants and timelines
-        with (self.repo_dir / "config").open("r") as f:
-            config = toml.load(f)
-
-        config["default_tenant_id"] = snapshot_config["default_tenant_id"]
-        config["branch_name_mappings"] = snapshot_config["branch_name_mappings"]
-
-        with (self.repo_dir / "config").open("w") as f:
-            toml.dump(config, f)
-
-        return self.env
-
    def enable_scrub_on_exit(self):
        """
        Call this if you would like the fixture to automatically run
@@ -595,11 +533,9 @@ class NeonEnvBuilder:
        self.pageserver_remote_storage = ret

    def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
-        assert (
-            self.safekeepers_remote_storage is None
-        ), "safekeepers_remote_storage already configured"
+        assert self.sk_remote_storage is None, "sk_remote_storage already configured"

-        self.safekeepers_remote_storage = self._configure_and_create_remote_storage(
+        self.sk_remote_storage = self._configure_and_create_remote_storage(
            kind, RemoteStorageUser.SAFEKEEPER
        )

@@ -652,7 +588,7 @@ class NeonEnvBuilder:
                directory_to_clean.rmdir()

    def cleanup_remote_storage(self):
-        for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
+        for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
            if isinstance(x, S3Storage):
                x.do_cleanup()

@@ -756,7 +692,7 @@ class NeonEnv:
        self.pageservers: List[NeonPageserver] = []
        self.broker = config.broker
        self.pageserver_remote_storage = config.pageserver_remote_storage
-        self.safekeepers_remote_storage = config.safekeepers_remote_storage
+        self.safekeepers_remote_storage = config.sk_remote_storage
        self.pg_version = config.pg_version
        # Binary path for pageserver, safekeeper, etc
        self.neon_binpath = config.neon_binpath
@@ -781,17 +717,25 @@ class NeonEnv:
            self.attachment_service = None

        # Create a config file corresponding to the options
-        cfg: Dict[str, Any] = {
-            "default_tenant_id": str(self.initial_tenant),
-            "broker": {
-                "listen_addr": self.broker.listen_addr(),
-            },
-            "pageservers": [],
-            "safekeepers": [],
-        }
+        toml = textwrap.dedent(
+            f"""
+            default_tenant_id = '{config.initial_tenant}'
+        """
+        )

        if self.control_plane_api is not None:
-            cfg["control_plane_api"] = self.control_plane_api
+            toml += textwrap.dedent(
+                f"""
+                control_plane_api = '{self.control_plane_api}'
+            """
+            )
+
+        toml += textwrap.dedent(
+            f"""
+            [broker]
+            listen_addr = '{self.broker.listen_addr()}'
+        """
+        )

        # Create config for pageserver
        http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -804,24 +748,26 @@ class NeonEnv:
                http=self.port_distributor.get_port(),
            )

-            ps_cfg: Dict[str, Any] = {
-                "id": ps_id,
-                "listen_pg_addr": f"localhost:{pageserver_port.pg}",
-                "listen_http_addr": f"localhost:{pageserver_port.http}",
-                "pg_auth_type": pg_auth_type,
-                "http_auth_type": http_auth_type,
-            }
+            toml += textwrap.dedent(
+                f"""
+                [[pageservers]]
+                id={ps_id}
+                listen_pg_addr = 'localhost:{pageserver_port.pg}'
+                listen_http_addr = 'localhost:{pageserver_port.http}'
+                pg_auth_type = '{pg_auth_type}'
+                http_auth_type = '{http_auth_type}'
+            """
+            )
+
            # Create a corresponding NeonPageserver object
            self.pageservers.append(
                NeonPageserver(
                    self,
                    ps_id,
                    port=pageserver_port,
-                    config_override=self.pageserver_config_override,
+                    config_override=config.pageserver_config_override,
                )
            )
-            cfg["pageservers"].append(ps_cfg)
-
        # Create config and a Safekeeper object for each safekeeper
        for i in range(1, config.num_safekeepers + 1):
            port = SafekeeperPort(
@@ -830,22 +776,32 @@ class NeonEnv:
                http=self.port_distributor.get_port(),
            )
            id = config.safekeepers_id_start + i  # assign ids sequentially
-            sk_cfg: Dict[str, Any] = {
-                "id": id,
-                "pg_port": port.pg,
-                "pg_tenant_only_port": port.pg_tenant_only,
-                "http_port": port.http,
-                "sync": config.safekeepers_enable_fsync,
-            }
+            toml += textwrap.dedent(
+                f"""
+                [[safekeepers]]
+                id = {id}
+                pg_port = {port.pg}
+                pg_tenant_only_port = {port.pg_tenant_only}
+                http_port = {port.http}
+                sync = {'true' if config.safekeepers_enable_fsync else 'false'}"""
+            )
            if config.auth_enabled:
-                sk_cfg["auth_enabled"] = True
-            if self.safekeepers_remote_storage is not None:
-                sk_cfg["remote_storage"] = self.safekeepers_remote_storage.to_toml_inline_table()
-            self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
-            cfg["safekeepers"].append(sk_cfg)
+                toml += textwrap.dedent(
+                    """
+                auth_enabled = true
+                """
+                )
+            if config.sk_remote_storage is not None:
+                toml += textwrap.dedent(
+                    f"""
+                remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
+                """
+                )
+            safekeeper = Safekeeper(env=self, id=id, port=port)
+            self.safekeepers.append(safekeeper)

-        log.info(f"Config: {cfg}")
-        self.neon_cli.init(cfg)
+        log.info(f"Config: {toml}")
+        self.neon_cli.init(toml)

    def start(self):
        # Start up broker, pageserver and all safekeepers
@@ -1331,10 +1287,10 @@ class NeonCli(AbstractNeonCli):

    def init(
        self,
-        config: Dict[str, Any],
+        config_toml: str,
    ) -> "subprocess.CompletedProcess[str]":
        with tempfile.NamedTemporaryFile(mode="w+") as tmp:
-            tmp.write(toml.dumps(config))
+            tmp.write(config_toml)
            tmp.flush()

            cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
@@ -1772,16 +1728,11 @@ class NeonPageserver(PgProtocol):

    @property
    def workdir(self) -> Path:
-        return self.env.repo_dir / f"pageserver_{self.id}"
+        return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))

    def assert_no_errors(self):
-        logfile = self.workdir / "pageserver.log"
-        if not logfile.exists():
-            log.warning(f"Skipping log check: {logfile} does not exist")
-            return
-
-        with logfile.open("r") as f:
-            errors = scan_pageserver_log_for_errors(f, self.allowed_errors)
+        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
+        errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)

        for _lineno, error in errors:
            log.info(f"not allowed error: {error.strip()}")
@@ -1805,10 +1756,7 @@ class NeonPageserver(PgProtocol):

    def log_contains(self, pattern: str) -> Optional[str]:
        """Check that the pageserver log contains a line that matches the given regex"""
-        logfile = self.workdir / "pageserver.log"
-        if not logfile.exists():
-            log.warning(f"Skipping log check: {logfile} does not exist")
-            return None
+        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")

        contains_re = re.compile(pattern)

@@ -1817,11 +1765,14 @@ class NeonPageserver(PgProtocol):
        # no guarantee it is already present in the log file. This hasn't
        # been a problem in practice, our python tests are not fast enough
        # to hit that race condition.
-        with logfile.open("r") as f:
-            for line in f:
-                if contains_re.search(line):
-                    # found it!
-                    return line
+        while True:
+            line = logfile.readline()
+            if not line:
+                break
+
+            if contains_re.search(line):
+                # found it!
+                return line

        return None

@@ -1844,38 +1795,16 @@ class NeonPageserver(PgProtocol):
        client = self.http_client()
        return client.tenant_detach(tenant_id)

-    def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
-        # This API is only for use when generations are enabled
-        assert self.env.attachment_service is not None
-
-        if config["mode"].startswith("Attached") and "generation" not in config:
-            config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
-
-        client = self.http_client()
-        return client.tenant_location_conf(tenant_id, config, **kwargs)
-
-    def read_tenant_location_conf(self, tenant_id: TenantId) -> dict[str, Any]:
-        path = self.tenant_dir(tenant_id) / "config-v1"
-        log.info(f"Reading location conf from {path}")
-        bytes = open(path, "r").read()
-        try:
-            decoded: dict[str, Any] = toml.loads(bytes)
-            return decoded
-        except:
-            log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
-            raise
-
    def tenant_create(
        self,
        tenant_id: TenantId,
        conf: Optional[Dict[str, Any]] = None,
        auth_token: Optional[str] = None,
-        generation: Optional[int] = None,
    ) -> TenantId:
-        if generation is None:
-            generation = self.maybe_get_generation(tenant_id)
        client = self.http_client(auth_token=auth_token)
-        return client.tenant_create(tenant_id, conf, generation=generation)
+        return client.tenant_create(
+            tenant_id, conf, generation=self.maybe_get_generation(tenant_id)
+        )

    def tenant_load(self, tenant_id: TenantId):
        client = self.http_client()
@@ -2799,7 +2728,6 @@ class EndpointFactory:
        lsn: Optional[Lsn] = None,
        hot_standby: bool = False,
        config_lines: Optional[List[str]] = None,
-        pageserver_id: Optional[int] = None,
    ) -> Endpoint:
        ep = Endpoint(
            self.env,
@@ -2819,7 +2747,6 @@ class EndpointFactory:
            lsn=lsn,
            hot_standby=hot_standby,
            config_lines=config_lines,
-            pageserver_id=pageserver_id,
        )

    def stop_all(self) -> "EndpointFactory":
@@ -2945,7 +2872,7 @@ class Safekeeper:
        tli_dir = self.timeline_dir(tenant_id, timeline_id)
        segments = []
        for _, _, filenames in os.walk(tli_dir):
-            segments.extend([f for f in filenames if not f.startswith("safekeeper.control")])
+            segments.extend([f for f in filenames if f != "safekeeper.control"])
        segments.sort()
        return segments

@@ -3166,7 +3093,7 @@ def pytest_addoption(parser: Parser):


 SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile(  # type: ignore[type-arg]
-    r"config|config-v1|heatmap-v1|metadata|.+\.(?:toml|pid|json|sql)"
+    r"config|metadata|.+\.(?:toml|pid|json|sql)"
 )


@@ -3427,6 +3354,8 @@ def parse_project_git_version_output(s: str) -> str:

    The information is generated by utils::project_git_version!
    """
+    import re
+
    res = re.search(r"git(-env)?:([0-9a-fA-F]{8,40})(-\S+)?", s)
    if res and (commit := res.group(2)):
        return commit
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -150,7 +150,7 @@ class PageserverHttpClient(requests.Session):
                # (this may change in future if we do fault injection of a kind that causes
                #  requests TCP flows to stick)
                read=False,
-                backoff_factor=0.2,
+                backoff_factor=0,
                status_forcelist=[503],
                allowed_methods=None,
                remove_headers_on_redirect=[],
@@ -277,23 +277,6 @@ class PageserverHttpClient(requests.Session):
        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
        self.verbose_error(res)

-    def tenant_location_conf(
-        self, tenant_id: TenantId, location_conf=dict[str, Any], flush_ms=None
-    ):
-        body = location_conf.copy()
-        body["tenant_id"] = str(tenant_id)
-
-        params = {}
-        if flush_ms is not None:
-            params["flush_ms"] = str(flush_ms)
-
-        res = self.put(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/location_config",
-            json=body,
-            params=params,
-        )
-        self.verbose_error(res)
-
    def tenant_delete(self, tenant_id: TenantId):
        res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
        self.verbose_error(res)
@@ -322,10 +305,6 @@ class PageserverHttpClient(requests.Session):
        self.verbose_error(res)
        return TenantConfig.from_json(res.json())

-    def tenant_heatmap_upload(self, tenant_id: TenantId):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/heatmap_upload")
-        self.verbose_error(res)
-
    def set_tenant_config(self, tenant_id: TenantId, config: dict[str, Any]):
        assert "tenant_id" not in config.keys()
        res = self.put(
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -9,14 +9,12 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Union

 import boto3
-import toml
 from mypy_boto3_s3 import S3Client

 from fixtures.log_helper import log
 from fixtures.types import TenantId, TimelineId

 TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
-TENANT_HEATMAP_FILE_NAME = "heatmap-v1.json"


@enum.unique
@@ -134,18 +132,8 @@ class LocalFsStorage:
        with self.index_path(tenant_id, timeline_id).open("r") as f:
            return json.load(f)

-    def heatmap_path(self, tenant_id: TenantId) -> Path:
-        return self.tenant_path(tenant_id) / TENANT_HEATMAP_FILE_NAME
-
-    def heatmap_content(self, tenant_id):
-        with self.heatmap_path(tenant_id).open("r") as f:
-            return json.load(f)
-
    def to_toml_inline_table(self) -> str:
-        rv = {
-            "local_path": str(self.root),
-        }
-        return toml.TomlEncoder().dump_inline_table(rv)
+        return f"local_path='{self.root}'"

    def cleanup(self):
        # no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
@@ -186,18 +174,18 @@ class S3Storage:
        )

    def to_toml_inline_table(self) -> str:
-        rv = {
-            "bucket_name": self.bucket_name,
-            "bucket_region": self.bucket_region,
-        }
+        s = [
+            f"bucket_name='{self.bucket_name}'",
+            f"bucket_region='{self.bucket_region}'",
+        ]

        if self.prefix_in_bucket is not None:
-            rv["prefix_in_bucket"] = self.prefix_in_bucket
+            s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")

        if self.endpoint is not None:
-            rv["endpoint"] = self.endpoint
+            s.append(f"endpoint='{self.endpoint}'")

-        return toml.TomlEncoder().dump_inline_table(rv)
+        return ",".join(s)

    def do_cleanup(self):
        if not self.cleanup:
@@ -384,16 +372,9 @@ def s3_storage() -> RemoteStorageKind:
        return RemoteStorageKind.MOCK_S3


-def default_remote_storage() -> RemoteStorageKind:
-    """
-    The remote storage kind used in tests that do not specify a preference
-    """
-    return RemoteStorageKind.LOCAL_FS
-
-
 # serialize as toml inline table
 def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
    if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
        raise Exception("invalid remote storage type")

-    return remote_storage.to_toml_inline_table()
+    return f"{{{remote_storage.to_toml_inline_table()}}}"
--- a/test_runner/fixtures/workload.py
+++ b/test_runner/fixtures/workload.py
@@ -1,148 +0,0 @@
-from typing import Optional
-
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import (
-    Endpoint,
-    NeonEnv,
-    last_flush_lsn_upload,
-    wait_for_last_flush_lsn,
-)
-from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
-from fixtures.types import TenantId, TimelineId
-
-
-class Workload:
-    """
-    This is not a general purpose load generator: it exists for storage tests that need to inject some
-    high level types of storage work via the postgres interface:
-    - layer writes (`write_rows`)
-    - work for compaction (`churn_rows`)
-    - reads, checking we get the right data (`validate`)
-    """
-
-    def __init__(self, env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
-        self.env = env
-        self.tenant_id = tenant_id
-        self.timeline_id = timeline_id
-        self.table = "foo"
-
-        self.expect_rows = 0
-        self.churn_cursor = 0
-
-        self._endpoint: Optional[Endpoint] = None
-
-    def endpoint(self, pageserver_id: int) -> Endpoint:
-        if self._endpoint is None:
-            self._endpoint = self.env.endpoints.create(
-                "main",
-                tenant_id=self.tenant_id,
-                pageserver_id=pageserver_id,
-                endpoint_id="ep-workload",
-            )
-            self._endpoint.start(pageserver_id=pageserver_id)
-        else:
-            self._endpoint.reconfigure(pageserver_id=pageserver_id)
-
-        connstring = self._endpoint.safe_psql(
-            "SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
-        )
-        log.info(f"Workload.endpoint: connstr={connstring}")
-
-        return self._endpoint
-
-    def __del__(self):
-        if self._endpoint is not None:
-            self._endpoint.stop()
-
-    def init(self, pageserver_id: int):
-        endpoint = self.endpoint(pageserver_id)
-
-        endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
-        endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
-        last_flush_lsn_upload(
-            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
-        )
-
-    def write_rows(self, n, pageserver_id):
-        endpoint = self.endpoint(pageserver_id)
-        start = self.expect_rows
-        end = start + n - 1
-        self.expect_rows += n
-        dummy_value = "blah"
-        endpoint.safe_psql(
-            f"""
-            INSERT INTO {self.table} (id, val)
-            SELECT g, '{dummy_value}'
-            FROM generate_series({start}, {end}) g
-            """
-        )
-
-        return last_flush_lsn_upload(
-            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
-        )
-
-    def churn_rows(self, n, pageserver_id, upload=True):
-        assert self.expect_rows >= n
-
-        max_iters = 10
-        endpoint = self.endpoint(pageserver_id)
-        todo = n
-        i = 0
-        while todo > 0:
-            i += 1
-            if i > max_iters:
-                raise RuntimeError("oops")
-            start = self.churn_cursor % self.expect_rows
-            n_iter = min((self.expect_rows - start), todo)
-            todo -= n_iter
-
-            end = start + n_iter - 1
-
-            log.info(
-                f"start,end = {start},{end}, cursor={self.churn_cursor}, expect_rows={self.expect_rows}"
-            )
-
-            assert end < self.expect_rows
-
-            self.churn_cursor += n_iter
-            dummy_value = "blah"
-            endpoint.safe_psql_many(
-                [
-                    f"""
-                INSERT INTO {self.table} (id, val)
-                SELECT g, '{dummy_value}'
-                FROM generate_series({start}, {end}) g
-                ON CONFLICT (id) DO UPDATE
-                SET val = EXCLUDED.val
-                """,
-                    f"VACUUM {self.table}",
-                ]
-            )
-
-        last_flush_lsn = wait_for_last_flush_lsn(
-            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
-        )
-        ps_http = self.env.get_pageserver(pageserver_id).http_client()
-        wait_for_last_record_lsn(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
-
-        if upload:
-            # force a checkpoint to trigger upload
-            ps_http.timeline_checkpoint(self.tenant_id, self.timeline_id)
-            wait_for_upload(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
-            log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
-        else:
-            log.info(f"Churn: not waiting for upload, disk LSN {last_flush_lsn}")
-
-    def validate(self, pageserver_id):
-        endpoint = self.endpoint(pageserver_id)
-        result = endpoint.safe_psql_many(
-            [
-                "select clear_buffer_cache()",
-                f"""
-            SELECT COUNT(*) FROM {self.table}
-            """,
-            ]
-        )
-
-        log.info(f"validate({self.expect_rows}): {result}")
-        assert result == [[("",)], [(self.expect_rows,)]]
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -55,20 +55,9 @@ def measure_recovery_time(env: NeonCompare):

    # Delete the Tenant in the pageserver: this will drop local and remote layers, such that
    # when we "create" the Tenant again, we will replay the WAL from the beginning.
-    #
-    # This is a "weird" thing to do, and can confuse the attachment service as we're re-using
-    # the same tenant ID for a tenant that is logically different from the pageserver's point
-    # of view, but the same as far as the safekeeper/WAL is concerned.  To work around that,
-    # we will explicitly create the tenant in the same generation that it was previously
-    # attached in.
-    assert env.env.attachment_service is not None
-    attach_status = env.env.attachment_service.inspect(tenant_id=env.tenant)
-    assert attach_status is not None
-    (attach_gen, _) = attach_status
-
    client.tenant_delete(env.tenant)
    wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5)
-    env.env.pageserver.tenant_create(tenant_id=env.tenant, generation=attach_gen)
+    env.env.pageserver.tenant_create(tenant_id=env.tenant)

    # Measure recovery time
    with env.record_duration("wal_recovery"):
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -163,7 +163,6 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "gc_feedback": True,
        "gc_horizon": 23 * (1024 * 1024),
        "gc_period": "2h 13m",
-        "heatmap_period": "10m",
        "image_creation_threshold": 7,
        "pitr_interval": "1m",
        "lagging_wal_timeout": "23m",
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -92,9 +92,8 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.auth_enabled = True
    env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.extend(
-        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
-    )
+    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
+    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")

    pageserver_token_old = env.auth_keys.generate_pageserver_token()
    pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
@@ -146,9 +145,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.auth_enabled = True
    env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.extend(
-        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
-    )
+    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
+    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
+
    pageserver_token_old = env.auth_keys.generate_pageserver_token()
    pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)

--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -14,9 +14,8 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
    env = neon_env_builder.init_start()

-    env.pageserver.allowed_errors.extend(
-        [".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"]
-    )
+    env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
+    env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")

    # Branch at the point where only 100 rows were inserted
    branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -1,7 +1,8 @@
 import random
 import threading
 import time
-from typing import List
+from queue import SimpleQueue
+from typing import Any, Dict, List, Union

 import pytest
 from fixtures.log_helper import log
@@ -147,11 +148,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
    env = neon_env_builder.init_configs()
    env.start()

-    env.pageserver.allowed_errors.extend(
-        [
-            ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
-            ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading",
-        ]
+    env.pageserver.allowed_errors.append(
+        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
+    )
+    env.pageserver.allowed_errors.append(
+        ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading"
    )
    ps_http = env.pageserver.http_client()

@@ -238,6 +239,92 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder
        t.join()


+def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: NeonEnvBuilder):
+    """
+    If the activate only after upload is used, then retries could become competing.
+    """
+
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    env.pageserver.allowed_errors.append(
+        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
+    )
+    env.pageserver.allowed_errors.append(
+        ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory"
+    )
+    ps_http = env.pageserver.http_client()
+
+    # pause all uploads
+    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
+    env.pageserver.tenant_create(env.initial_tenant)
+
+    def start_creating_timeline():
+        ps_http.timeline_create(
+            env.pg_version, env.initial_tenant, env.initial_timeline, timeout=60
+        )
+
+    create_root = threading.Thread(target=start_creating_timeline)
+
+    branch_id = TimelineId.generate()
+
+    queue: SimpleQueue[Union[Dict[Any, Any], Exception]] = SimpleQueue()
+    barrier = threading.Barrier(3)
+
+    def try_branch():
+        barrier.wait()
+        barrier.wait()
+        try:
+            ret = ps_http.timeline_create(
+                env.pg_version,
+                env.initial_tenant,
+                branch_id,
+                ancestor_timeline_id=env.initial_timeline,
+                timeout=5,
+            )
+            queue.put(ret)
+        except Exception as e:
+            queue.put(e)
+
+    threads = [threading.Thread(target=try_branch) for _ in range(2)]
+
+    try:
+        create_root.start()
+
+        for t in threads:
+            t.start()
+
+        wait_until_paused(env, "before-upload-index-pausable")
+
+        barrier.wait()
+        ps_http.configure_failpoints(("before-upload-index-pausable", "off"))
+        barrier.wait()
+
+        # now both requests race to branch, only one can win because they take gc_cs, Tenant::timelines or marker files
+        first = queue.get()
+        second = queue.get()
+
+        log.info(first)
+        log.info(second)
+
+        (succeeded, failed) = (first, second) if isinstance(second, Exception) else (second, first)
+        assert isinstance(failed, Exception)
+        assert isinstance(succeeded, Dict)
+
+        # there's multiple valid status codes:
+        # - Timeline x/y already exists
+        # - whatever 409 response says, but that is a subclass of PageserverApiException
+        assert isinstance(failed, PageserverApiException)
+        assert succeeded["state"] == "Active"
+    finally:
+        # we might still have the failpoint active
+        env.pageserver.stop(immediate=True)
+
+        for t in threads:
+            t.join()
+        create_root.join()
+
+
 def test_non_uploaded_root_timeline_is_deleted_after_restart(neon_env_builder: NeonEnvBuilder):
    """
    Check that a timeline is deleted locally on subsequent restart if it never successfully uploaded during creation.
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -1,25 +1,30 @@
+import copy
 import os
 import shutil
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import List, Optional
+from typing import Any, List, Optional

 import pytest
-import toml
+import toml  # TODO: replace with tomllib for Python >= 3.11
+from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
-    NeonEnv,
+    NeonCli,
    NeonEnvBuilder,
    PgBin,
 )
+from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import (
    timeline_delete_wait_completed,
    wait_for_last_record_lsn,
    wait_for_upload,
 )
 from fixtures.pg_version import PgVersion
-from fixtures.remote_storage import RemoteStorageKind
+from fixtures.port_distributor import PortDistributor
+from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
 from fixtures.types import Lsn
+from pytest import FixtureRequest

 #
 # A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases.
@@ -32,8 +37,8 @@ from fixtures.types import Lsn
 #   If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
 #
 # The file contains a couple of helper functions:
+# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
 # - check_neon_works performs the test itself, feel free to add more checks there.
-# - dump_differs compares two SQL dumps and writes the diff to a file.
 #
 #
 # How to run `test_backward_compatibility` locally:
@@ -41,7 +46,6 @@ from fixtures.types import Lsn
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
-#    export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
 #
 #    # Build previous version of binaries and create a data snapshot:
 #    rm -rf pg_install target
@@ -55,7 +59,8 @@ from fixtures.types import Lsn
 #    CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
 #
 #    # Run backward compatibility test
-#    ./scripts/pytest -k test_backward_compatibility
+#    COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
+#       ./scripts/pytest -k test_backward_compatibility
 #
 #
 # How to run `test_forward_compatibility` locally:
@@ -63,8 +68,6 @@ from fixtures.types import Lsn
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
-#    export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
-#    export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
 #
 #    # Build previous version of binaries and store them somewhere:
 #    rm -rf pg_install target
@@ -81,7 +84,9 @@ from fixtures.types import Lsn
 #    ./scripts/pytest -k test_create_snapshot
 #
 #    # Run forward compatibility test
-#    ./scripts/pytest -k test_forward_compatibility
+#    COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
+#    COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
+#       ./scripts/pytest -k test_forward_compatibility
 #

 check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
@@ -150,9 +155,13 @@ def test_create_snapshot(
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
 def test_backward_compatibility(
-    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+    port_distributor: PortDistributor,
    test_output_dir: Path,
+    neon_binpath: Path,
+    pg_distrib_dir: Path,
    pg_version: PgVersion,
+    request: FixtureRequest,
 ):
    """
    Test that the new binaries can read old data
@@ -168,15 +177,23 @@ def test_backward_compatibility(
    )

    try:
-        neon_env_builder.num_safekeepers = 3
-        env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
-        neon_env_builder.start()
+        # Copy the snapshot to current directory, and prepare for the test
+        prepare_snapshot(
+            from_dir=compatibility_snapshot_dir,
+            to_dir=test_output_dir / "compatibility_snapshot",
+            port_distributor=port_distributor,
+        )

        check_neon_works(
-            env,
-            test_output_dir=test_output_dir,
-            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
-            repo_dir=env.repo_dir,
+            test_output_dir / "compatibility_snapshot" / "repo",
+            neon_binpath,
+            neon_binpath,
+            pg_distrib_dir,
+            pg_version,
+            port_distributor,
+            test_output_dir,
+            pg_bin,
+            request,
        )
    except Exception:
        if breaking_changes_allowed:
@@ -195,10 +212,12 @@ def test_backward_compatibility(
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
 def test_forward_compatibility(
-    neon_env_builder: NeonEnvBuilder,
    test_output_dir: Path,
    top_output_dir: Path,
+    port_distributor: PortDistributor,
    pg_version: PgVersion,
+    request: FixtureRequest,
+    neon_binpath: Path,
 ):
    """
    Test that the old binaries can read new data
@@ -225,19 +244,24 @@ def test_forward_compatibility(
    )

    try:
-        neon_env_builder.num_safekeepers = 3
-        env = neon_env_builder.from_repo_dir(
-            compatibility_snapshot_dir / "repo",
-            neon_binpath=compatibility_neon_bin,
+        # Copy the snapshot to current directory, and prepare for the test
+        prepare_snapshot(
+            from_dir=compatibility_snapshot_dir,
+            to_dir=test_output_dir / "compatibility_snapshot",
+            port_distributor=port_distributor,
            pg_distrib_dir=compatibility_postgres_distrib_dir,
        )
-        neon_env_builder.start()

        check_neon_works(
-            env,
-            test_output_dir=test_output_dir,
-            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
-            repo_dir=env.repo_dir,
+            test_output_dir / "compatibility_snapshot" / "repo",
+            compatibility_neon_bin,
+            neon_binpath,
+            compatibility_postgres_distrib_dir,
+            pg_version,
+            port_distributor,
+            test_output_dir,
+            PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version),
+            request,
        )
    except Exception:
        if breaking_changes_allowed:
@@ -252,45 +276,193 @@ def test_forward_compatibility(
    ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"


-def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
-    ep = env.endpoints.create_start("main")
-    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
+def prepare_snapshot(
+    from_dir: Path,
+    to_dir: Path,
+    port_distributor: PortDistributor,
+    pg_distrib_dir: Optional[Path] = None,
+):
+    assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
+    assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
+    assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"

-    connstr = ep.connstr()
+    log.info(f"Copying snapshot from {from_dir} to {to_dir}")
+    shutil.copytree(from_dir, to_dir)
+
+    repo_dir = to_dir / "repo"
+
+    snapshot_config_toml = repo_dir / "config"
+    snapshot_config = toml.load(snapshot_config_toml)
+
+    # Remove old logs to avoid confusion in test artifacts
+    for logfile in repo_dir.glob("**/*.log"):
+        logfile.unlink()
+
+    # Remove old computes in 'endpoints'. Old versions of the control plane used a directory
+    # called "pgdatadirs". Delete it, too.
+    if (repo_dir / "endpoints").exists():
+        shutil.rmtree(repo_dir / "endpoints")
+    if (repo_dir / "pgdatadirs").exists():
+        shutil.rmtree(repo_dir / "pgdatadirs")
+    os.mkdir(repo_dir / "endpoints")
+
+    # Update paths and ports in config files
+    legacy_pageserver_toml = repo_dir / "pageserver.toml"
+    legacy_bundle = os.path.exists(legacy_pageserver_toml)
+
+    path_to_config: dict[Path, dict[Any, Any]] = {}
+    if legacy_bundle:
+        os.mkdir(repo_dir / "pageserver_1")
+        path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
+            legacy_pageserver_toml
+        )
+        os.remove(legacy_pageserver_toml)
+        os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
+    else:
+        for ps_conf in snapshot_config["pageservers"]:
+            config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
+            path_to_config[config_path] = toml.load(config_path)
+
+    # For each pageserver config, edit it and rewrite
+    for config_path, pageserver_config in path_to_config.items():
+        pageserver_config["remote_storage"]["local_path"] = str(
+            LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
+        )
+
+        for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
+            pageserver_config[param] = port_distributor.replace_with_new_port(
+                pageserver_config[param]
+            )
+
+        # We don't use authentication in compatibility tests
+        # so just remove authentication related settings.
+        pageserver_config.pop("pg_auth_type", None)
+        pageserver_config.pop("http_auth_type", None)
+
+        if pg_distrib_dir:
+            pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
+
+        with config_path.open("w") as f:
+            toml.dump(pageserver_config, f)
+
+    # neon_local config doesn't have to be backward compatible.  If we're using a dump from before
+    # it supported multiple pageservers, fix it up.
+    if "pageservers" not in snapshot_config:
+        snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
+        del snapshot_config["pageserver"]
+
+    for param in ("listen_http_addr", "listen_pg_addr"):
+        for pageserver in snapshot_config["pageservers"]:
+            pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
+    snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
+        snapshot_config["broker"]["listen_addr"]
+    )
+    for sk in snapshot_config["safekeepers"]:
+        for param in ("http_port", "pg_port", "pg_tenant_only_port"):
+            sk[param] = port_distributor.replace_with_new_port(sk[param])
+
+    if pg_distrib_dir:
+        snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
+
+    with snapshot_config_toml.open("w") as f:
+        toml.dump(snapshot_config, f)
+
+    # Ensure that snapshot doesn't contain references to the original path
+    rv = subprocess.run(
+        [
+            "grep",
+            "--recursive",
+            "--binary-file=without-match",
+            "--files-with-matches",
+            "test_create_snapshot/repo",
+            str(repo_dir),
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert (
+        rv.returncode != 0
+    ), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}"
+
+
+def check_neon_works(
+    repo_dir: Path,
+    neon_target_binpath: Path,
+    neon_current_binpath: Path,
+    pg_distrib_dir: Path,
+    pg_version: PgVersion,
+    port_distributor: PortDistributor,
+    test_output_dir: Path,
+    pg_bin: PgBin,
+    request: FixtureRequest,
+):
+    snapshot_config_toml = repo_dir / "config"
+    snapshot_config = toml.load(snapshot_config_toml)
+    snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
+    snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
+    with (snapshot_config_toml).open("w") as f:
+        toml.dump(snapshot_config, f)
+
+    # TODO: replace with NeonEnvBuilder / NeonEnv
+    config: Any = type("NeonEnvStub", (object,), {})
+    config.rust_log_override = None
+    config.repo_dir = repo_dir
+    config.pg_version = pg_version
+    config.initial_tenant = snapshot_config["default_tenant_id"]
+    config.pg_distrib_dir = pg_distrib_dir
+    config.remote_storage = None
+    config.sk_remote_storage = None
+
+    # Use the "target" binaries to launch the storage nodes
+    config_target = config
+    config_target.neon_binpath = neon_target_binpath
+    # We are using maybe-old binaries for neon services, but want to use current
+    # binaries for test utilities like neon_local
+    config_target.neon_local_binpath = neon_current_binpath
+    cli_target = NeonCli(config_target)
+
+    # And the current binaries to launch computes
+    snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
+    with (snapshot_config_toml).open("w") as f:
+        toml.dump(snapshot_config, f)
+    config_current = copy.copy(config)
+    config_current.neon_binpath = neon_current_binpath
+    cli_current = NeonCli(config_current)
+
+    cli_target.raw_cli(["start"])
+    request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
+
+    pg_port = port_distributor.get_port()
+    http_port = port_distributor.get_port()
+    cli_current.endpoint_create(
+        branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
+    )
+    cli_current.endpoint_start("ep-main")
+    request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
+
+    connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
    pg_bin.run_capture(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
    )
    initial_dump_differs = dump_differs(
-        sql_dump_path,
+        repo_dir.parent / "dump.sql",
        test_output_dir / "dump.sql",
        test_output_dir / "dump.filediff",
    )

    # Check that project can be recovered from WAL
    # loosely based on https://www.notion.so/neondatabase/Storage-Recovery-from-WAL-d92c0aac0ebf40df892b938045d7d720
-    pageserver_http = env.pageserver.http_client()
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-    pg_version = env.pg_version
-
-    # Delete all files from local_fs_remote_storage except initdb.tar.zst,
-    # the file is required for `timeline_create` with `existing_initdb_timeline_id`.
-    #
-    # TODO: switch to Path.walk() in Python 3.12
-    # for dirpath, _dirnames, filenames in (repo_dir / "local_fs_remote_storage").walk():
-    for dirpath, _dirnames, filenames in os.walk(repo_dir / "local_fs_remote_storage"):
-        for filename in filenames:
-            if filename != "initdb.tar.zst":
-                (Path(dirpath) / filename).unlink()
-
-    timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
-    pageserver_http.timeline_create(
-        pg_version=pg_version,
-        tenant_id=tenant_id,
-        new_timeline_id=timeline_id,
-        existing_initdb_timeline_id=timeline_id,
+    tenant_id = snapshot_config["default_tenant_id"]
+    timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
+    pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
+    pageserver_http = PageserverHttpClient(
+        port=pageserver_port,
+        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
    )

+    shutil.rmtree(repo_dir / "local_fs_remote_storage")
+    timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
+    pageserver_http.timeline_create(pg_version, tenant_id, timeline_id)
    pg_bin.run_capture(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
    )
@@ -322,11 +494,6 @@ def dump_differs(
    Returns True if the dumps differ and produced diff is not allowed, False otherwise (in most cases we want it to return False).
    """

-    if not first.exists():
-        raise FileNotFoundError(f"{first} doesn't exist")
-    if not second.exists():
-        raise FileNotFoundError(f"{second} doesn't exist")
-
    with output.open("w") as stdout:
        res = subprocess.run(
            [
--- a/test_runner/regress/test_config.py
+++ b/test_runner/regress/test_config.py
--- a/test_runner/regress/test_gc_cutoff.py
+++ b/test_runner/regress/test_gc_cutoff.py
@@ -35,11 +35,6 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):

    pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit"))

-    # Because this test does a rapid series of restarts of the same node, it's possible that
-    # we are restarted again before we can clean up deletion lists form the previous generation,
-    # resulting in a subsequent startup logging a warning.
-    env.pageserver.allowed_errors.append(".*Dropping stale deletions for tenant.*")
-
    for _ in range(5):
        with pytest.raises(subprocess.SubprocessError):
            pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -99,13 +99,12 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
        ]
    )

-    env.pageserver.allowed_errors.extend(
-        [
-            # FIXME: we should clean up pageserver to not print this
-            ".*exited with error: unexpected message type: CopyData.*",
-            # FIXME: Is this expected?
-            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
-        ]
+    # FIXME: we should clean up pageserver to not print this
+    env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
+
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
    )

    def import_tar(base, wal):
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -236,30 +236,3 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
    assert vanilla_pg.safe_psql(
        "select sum(somedata) from replication_example"
    ) == endpoint.safe_psql("select sum(somedata) from replication_example")
-
-
-#
-# Check that slots are not inherited in brnach
-#
-def test_slots_and_branching(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    tenant, timeline = env.neon_cli.create_tenant()
-    env.pageserver.http_client()
-
-    main_branch = env.endpoints.create_start("main", tenant_id=tenant)
-    main_cur = main_branch.connect().cursor()
-
-    # Create table and insert some data
-    main_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
-
-    wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
-
-    # Create branch ws.
-    env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
-    ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
-    log.info("postgres is running on 'ws' branch")
-
-    # Check that we can create slot with the same name
-    ws_cur = ws_branch.connect().cursor()
-    ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -5,6 +5,7 @@ import time
 from collections import defaultdict
 from typing import Any, DefaultDict, Dict, Tuple

+import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
    NeonEnvBuilder,
@@ -18,7 +19,7 @@ from fixtures.pageserver.utils import (
    wait_for_upload,
    wait_for_upload_queue_empty,
 )
-from fixtures.remote_storage import RemoteStorageKind
+from fixtures.remote_storage import RemoteStorageKind, available_remote_storages
 from fixtures.types import Lsn
 from fixtures.utils import query_scalar, wait_until

@@ -44,7 +45,13 @@ def get_num_downloaded_layers(client: PageserverHttpClient):
 # If you have a large relation, check that the pageserver downloads parts of it as
 # require by queries.
 #
-def test_ondemand_download_large_rel(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
+def test_ondemand_download_large_rel(
+    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
+):
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
    # thinking about using a shared environment? the test assumes that global
    # metrics are for single tenant.
    env = neon_env_builder.init_start(
@@ -138,7 +145,13 @@ def test_ondemand_download_large_rel(neon_env_builder: NeonEnvBuilder):
 # If you have a relation with a long history of updates, the pageserver downloads the layer
 # files containing the history as needed by timetravel queries.
 #
-def test_ondemand_download_timetravel(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
+def test_ondemand_download_timetravel(
+    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
+):
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
    # thinking about using a shared environment? the test assumes that global
    # metrics are for single tenant.

@@ -216,7 +229,8 @@ def test_ondemand_download_timetravel(neon_env_builder: NeonEnvBuilder):
    assert filled_current_physical == filled_size, "we don't yet do layer eviction"

    # Wait until generated image layers are uploaded to S3
-    wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, timeline_id)
+    if remote_storage_kind is not None:
+        wait_for_upload_queue_empty(pageserver_http, env.initial_tenant, timeline_id)

    env.pageserver.stop()

--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -23,20 +23,14 @@ from fixtures.neon_fixtures import (
    PgBin,
    S3Scrubber,
    last_flush_lsn_upload,
+    wait_for_last_flush_lsn,
 )
-from fixtures.pageserver.http import PageserverApiException
-from fixtures.pageserver.utils import (
-    assert_tenant_state,
-    list_prefix,
-    wait_for_last_record_lsn,
-    wait_for_upload,
-)
+from fixtures.pageserver.utils import list_prefix
 from fixtures.remote_storage import (
    RemoteStorageKind,
 )
 from fixtures.types import TenantId, TimelineId
 from fixtures.utils import print_gc_result, wait_until
-from fixtures.workload import Workload

 # A tenant configuration that is convenient for generating uploads and deletions
 # without a large amount of postgres traffic.
@@ -99,10 +93,7 @@ def generate_uploads_and_deletions(
            )
            assert tenant_id is not None
            assert timeline_id is not None
-            # We are waiting for uploads as well as local flush, in order to avoid leaving the system
-            # in a state where there are "future layers" in remote storage that will generate deletions
-            # after a restart.
-            last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
+            wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
            ps_http.timeline_checkpoint(tenant_id, timeline_id)

        # Compaction should generate some GC-elegible layers
@@ -569,91 +560,3 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
    read_all(env, tenant_id, timeline_id)
    evict_all_layers(env, tenant_id, timeline_id)
    read_all(env, tenant_id, timeline_id)
-
-
-def test_multi_attach(
-    neon_env_builder: NeonEnvBuilder,
-    pg_bin: PgBin,
-):
-    neon_env_builder.enable_generations = True
-    neon_env_builder.num_pageservers = 3
-    neon_env_builder.enable_pageserver_remote_storage(
-        remote_storage_kind=RemoteStorageKind.MOCK_S3,
-    )
-    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
-
-    pageservers = env.pageservers
-    http_clients = list([p.http_client() for p in pageservers])
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    # We will intentionally create situations where stale deletions happen from non-latest-generation
-    # nodes when the tenant is multiply-attached
-    for ps in env.pageservers:
-        ps.allowed_errors.extend(
-            [".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
-        )
-
-    # Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
-    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
-    _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
-    with pytest.raises(PageserverApiException):
-        http_clients[1].timeline_detail(tenant_id, timeline_id)
-    with pytest.raises(PageserverApiException):
-        http_clients[2].timeline_detail(tenant_id, timeline_id)
-
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init(pageservers[0].id)
-    workload.write_rows(1000, pageservers[0].id)
-
-    # Attach the tenant to the other two pageservers
-    pageservers[1].tenant_attach(env.initial_tenant)
-    pageservers[2].tenant_attach(env.initial_tenant)
-
-    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active"))
-    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active"))
-
-    # Now they all have it attached
-    _details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients])
-    _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
-    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
-
-    # The endpoint can use any pageserver to service its reads
-    for pageserver in pageservers:
-        workload.validate(pageserver.id)
-
-    # If we write some more data, all the nodes can see it, including stale ones
-    wrote_lsn = workload.write_rows(1000, pageservers[0].id)
-    for ps_http in http_clients:
-        wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, wrote_lsn)
-
-    # ...and indeed endpoints can see it via any of the pageservers
-    for pageserver in pageservers:
-        workload.validate(pageserver.id)
-
-    # Prompt all the pageservers, including stale ones, to upload ingested layers to remote storage
-    for ps_http in http_clients:
-        ps_http.timeline_checkpoint(tenant_id, timeline_id)
-        wait_for_upload(ps_http, tenant_id, timeline_id, wrote_lsn)
-
-    # Now, the contents of remote storage will be a set of layers from each pageserver, but with unique
-    # generation numbers
-    # TODO: validate remote storage contents
-
-    # Stop all pageservers
-    for ps in pageservers:
-        ps.stop()
-
-    # Returning to a normal healthy state: all pageservers will start, but only the one most
-    # recently attached via the control plane will re-attach on startup
-    for ps in pageservers:
-        ps.start()
-
-    with pytest.raises(PageserverApiException):
-        _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
-    with pytest.raises(PageserverApiException):
-        _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
-    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
-
-    # All data we wrote while multi-attached remains readable
-    workload.validate(pageservers[2].id)
--- a/test_runner/regress/test_pageserver_metric_collection.py
+++ b/test_runner/regress/test_pageserver_metric_collection.py
@@ -64,13 +64,13 @@ def test_metric_collection(
    # spin up neon,  after http server is ready
    env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
    # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*metrics endpoint refused the sent metrics*",
-            # we have a fast rate of calculation, these can happen at shutdown
-            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
-            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
-        ]
+    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
+    # we have a fast rate of calculation, these can happen at shutdown
+    env.pageserver.allowed_errors.append(
+        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
+    )
+    env.pageserver.allowed_errors.append(
+        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
    )

    tenant_id = env.initial_tenant
@@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile(
    pageserver_http = env.pageserver.http_client()

    # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.extend(
-        [
-            ".*metrics endpoint refused the sent metrics*",
-            # we have a fast rate of calculation, these can happen at shutdown
-            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
-            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
-        ]
+    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
+    # we have a fast rate of calculation, these can happen at shutdown
+    env.pageserver.allowed_errors.append(
+        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
+    )
+    env.pageserver.allowed_errors.append(
+        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
    )

    tenant_id = env.initial_tenant
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -1,375 +0,0 @@
-import random
-from typing import Any, Dict, Optional
-
-import pytest
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
-from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
-from fixtures.types import TenantId, TimelineId
-from fixtures.utils import wait_until
-from fixtures.workload import Workload
-
-# A tenant configuration that is convenient for generating uploads and deletions
-# without a large amount of postgres traffic.
-TENANT_CONF = {
-    # small checkpointing and compaction targets to ensure we generate many upload operations
-    "checkpoint_distance": f"{128 * 1024}",
-    "compaction_target_size": f"{128 * 1024}",
-    "compaction_threshold": "1",
-    # no PITR horizon, we specify the horizon when we request on-demand GC
-    "pitr_interval": "0s",
-    # disable background compaction and GC. We invoke it manually when we want it to happen.
-    "gc_period": "0s",
-    "compaction_period": "0s",
-    # create image layers eagerly, so that GC can remove some layers
-    "image_creation_threshold": "1",
-}
-
-
-def evict_random_layers(
-    rng: random.Random, pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId
-):
-    """
-    Evict 50% of the layers on a pageserver
-    """
-    timeline_path = pageserver.timeline_dir(tenant_id, timeline_id)
-    initial_local_layers = sorted(
-        list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
-    )
-    client = pageserver.http_client()
-    for layer in initial_local_layers:
-        if "ephemeral" in layer.name or "temp_download" in layer.name:
-            continue
-
-        if rng.choice([True, False]):
-            log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
-            client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
-
-
-@pytest.mark.parametrize("seed", [1, 2, 3])
-def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
-    """
-    Issue many location configuration changes, ensure that tenants
-    remain readable & we don't get any unexpected errors.  We should
-    have no ERROR in the log, and no 500s in the API.
-
-    The location_config API is intentionally designed so that all destination
-    states are valid, so that we may test it in this way: the API should always
-    work as long as the tenant exists.
-    """
-    neon_env_builder.enable_generations = True
-    neon_env_builder.num_pageservers = 3
-    neon_env_builder.enable_pageserver_remote_storage(
-        remote_storage_kind=RemoteStorageKind.MOCK_S3,
-    )
-    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
-    assert env.attachment_service is not None
-
-    pageservers = env.pageservers
-    list([p.http_client() for p in pageservers])
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    # We will make no effort to avoid stale attachments
-    for ps in env.pageservers:
-        ps.allowed_errors.extend(
-            [
-                ".*Dropped remote consistent LSN updates.*",
-                ".*Dropping stale deletions.*",
-                # page_service_conn_main{peer_addr=[::1]:41176}: query handler for 'pagestream 3b19aec5038c796f64b430b30a555121 d07776761d44050b8aab511df1657d83' failed: Tenant 3b19aec5038c796f64b430b30a555121 not found
-                ".*query handler.*Tenant.*not found.*",
-                # page_service_conn_main{peer_addr=[::1]:45552}: query handler for 'pagestream 414ede7ad50f775a8e7d9ba0e43b9efc a43884be16f44b3626482b6981b2c745' failed: Tenant 414ede7ad50f775a8e7d9ba0e43b9efc is not active
-                ".*query handler.*Tenant.*not active.*",
-            ]
-        )
-
-        # these can happen, if we shutdown at a good time. to be fixed as part of #5172.
-        message = ".*duplicated L1 layer layer=.*"
-        ps.allowed_errors.append(message)
-
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init(env.pageservers[0].id)
-    workload.write_rows(256, env.pageservers[0].id)
-
-    # We use a fixed seed to make the test reproducible: we want a randomly
-    # chosen order, but not to change the order every time we run the test.
-    rng = random.Random(seed)
-
-    initial_generation = 1
-    last_state = {
-        env.pageservers[0].id: ("AttachedSingle", initial_generation),
-        env.pageservers[1].id: ("Detached", None),
-        env.pageservers[2].id: ("Detached", None),
-    }
-
-    latest_attached = env.pageservers[0].id
-
-    for _i in range(0, 64):
-        # Pick a pageserver
-        pageserver = rng.choice(env.pageservers)
-
-        # Pick a pseudorandom state
-        modes = [
-            "AttachedSingle",
-            "AttachedMulti",
-            "AttachedStale",
-            "Secondary",
-            "Detached",
-            "_Evictions",
-            "_Restart",
-        ]
-
-        mode = rng.choice(modes)
-
-        last_state_ps = last_state[pageserver.id]
-        if mode == "_Evictions":
-            if last_state_ps[0].startswith("Attached"):
-                log.info(f"Action: evictions on pageserver {pageserver.id}")
-                evict_random_layers(rng, pageserver, tenant_id, timeline_id)
-            else:
-                log.info(
-                    f"Action: skipping evictions on pageserver {pageserver.id}, is not attached"
-                )
-        elif mode == "_Restart":
-            log.info(f"Action: restarting pageserver {pageserver.id}")
-            pageserver.stop()
-            pageserver.start()
-            if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id:
-                log.info("Entering postgres...")
-                workload.churn_rows(rng.randint(128, 256), pageserver.id)
-                workload.validate(pageserver.id)
-            elif last_state_ps[0].startswith("Attached"):
-                # The `attachment_service` will only re-attach on startup when a pageserver was the
-                # holder of the latest generation: otherwise the pageserver will revert to detached
-                # state if it was running attached with a stale generation
-                last_state[pageserver.id] = ("Detached", None)
-        else:
-            secondary_conf: Optional[Dict[str, Any]] = None
-            if mode == "Secondary":
-                secondary_conf = {"warm": rng.choice([True, False])}
-
-            location_conf: Dict[str, Any] = {
-                "mode": mode,
-                "secondary_conf": secondary_conf,
-                "tenant_conf": {},
-            }
-
-            log.info(f"Action: Configuring pageserver {pageserver.id} to {location_conf}")
-
-            # Select a generation number
-            if mode.startswith("Attached"):
-                if last_state_ps[1] is not None:
-                    if rng.choice([True, False]):
-                        # Move between attached states, staying in the same generation
-                        generation = last_state_ps[1]
-                    else:
-                        # Switch generations, while also jumping between attached states
-                        generation = env.attachment_service.attach_hook_issue(
-                            tenant_id, pageserver.id
-                        )
-                        latest_attached = pageserver.id
-                else:
-                    generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
-                    latest_attached = pageserver.id
-            else:
-                generation = None
-
-            location_conf["generation"] = generation
-
-            pageserver.tenant_location_configure(tenant_id, location_conf)
-            last_state[pageserver.id] = (mode, generation)
-
-            if mode.startswith("Attached"):
-                # This is a basic test: we are validating that he endpoint works properly _between_
-                # configuration changes.  A stronger test would be to validate that clients see
-                # no errors while we are making the changes.
-                workload.churn_rows(
-                    rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
-                )
-                workload.validate(pageserver.id)
-
-    # Attach all pageservers
-    for ps in env.pageservers:
-        location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
-        ps.tenant_location_configure(tenant_id, location_conf)
-
-    # Confirm that all are readable
-    for ps in env.pageservers:
-        workload.validate(ps.id)
-
-    # Detach all pageservers
-    for ps in env.pageservers:
-        location_conf = {"mode": "Detached", "secondary_conf": None, "tenant_conf": {}}
-        ps.tenant_location_configure(tenant_id, location_conf)
-
-    # Confirm that all local disk state was removed on detach
-    # TODO
-
-
-def test_live_migration(neon_env_builder: NeonEnvBuilder):
-    """
-    Test the sequence of location states that are used in a live migration.
-    """
-    neon_env_builder.enable_generations = True
-    neon_env_builder.num_pageservers = 2
-    neon_env_builder.enable_pageserver_remote_storage(
-        remote_storage_kind=RemoteStorageKind.MOCK_S3,
-    )
-    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
-    assert env.attachment_service is not None
-
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    pageserver_a = env.pageservers[0]
-    pageserver_b = env.pageservers[1]
-
-    initial_generation = 1
-
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init(env.pageservers[0].id)
-    workload.write_rows(256, env.pageservers[0].id)
-
-    # Make the destination a secondary location
-    pageserver_b.tenant_location_configure(
-        tenant_id,
-        {
-            "mode": "Secondary",
-            "secondary_conf": {"warm": True},
-            "tenant_conf": {},
-        },
-    )
-
-    workload.churn_rows(64, pageserver_a.id, upload=False)
-
-    # Set origin attachment to stale
-    log.info("Setting origin to AttachedStale")
-    pageserver_a.tenant_location_configure(
-        tenant_id,
-        {
-            "mode": "AttachedStale",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": initial_generation,
-        },
-        flush_ms=5000,
-    )
-
-    migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
-    log.info(f"Acquired generation {migrated_generation} for destination pageserver")
-    assert migrated_generation == initial_generation + 1
-
-    # Writes and reads still work in AttachedStale.
-    workload.validate(pageserver_a.id)
-
-    # TODO: call into secondary mode API hooks to do an upload/download sync
-
-    # Generate some more dirty writes: we expect the origin to ingest WAL in
-    # in AttachedStale
-    workload.churn_rows(64, pageserver_a.id, upload=False)
-    workload.validate(pageserver_a.id)
-
-    # Attach the destination
-    log.info("Setting destination to AttachedMulti")
-    pageserver_b.tenant_location_configure(
-        tenant_id,
-        {
-            "mode": "AttachedMulti",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": migrated_generation,
-        },
-    )
-
-    # Wait for destination LSN to catch up with origin
-    origin_lsn = pageserver_a.http_client().timeline_detail(tenant_id, timeline_id)[
-        "last_record_lsn"
-    ]
-
-    def caught_up():
-        destination_lsn = pageserver_b.http_client().timeline_detail(tenant_id, timeline_id)[
-            "last_record_lsn"
-        ]
-        log.info(
-            f"Waiting for LSN to catch up: origin {origin_lsn} vs destination {destination_lsn}"
-        )
-        assert destination_lsn >= origin_lsn
-
-    wait_until(100, 0.1, caught_up)
-
-    # The destination should accept writes
-    workload.churn_rows(64, pageserver_b.id)
-
-    # Dual attached: both are readable.
-    workload.validate(pageserver_a.id)
-    workload.validate(pageserver_b.id)
-
-    # Revert the origin to secondary
-    log.info("Setting origin to Secondary")
-    pageserver_a.tenant_location_configure(
-        tenant_id,
-        {
-            "mode": "Secondary",
-            "secondary_conf": {"warm": True},
-            "tenant_conf": {},
-        },
-    )
-
-    workload.churn_rows(64, pageserver_b.id)
-
-    # Put the destination into final state
-    pageserver_b.tenant_location_configure(
-        tenant_id,
-        {
-            "mode": "AttachedSingle",
-            "secondary_conf": None,
-            "tenant_conf": {},
-            "generation": migrated_generation,
-        },
-    )
-
-    workload.churn_rows(64, pageserver_b.id)
-    workload.validate(pageserver_b.id)
-
-
-def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
-    """
-    Test the sequence of location states that are used in a live migration.
-    """
-    env = neon_env_builder.init_start()  # initial_tenant_conf=TENANT_CONF)
-    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline
-
-    # Write some data so that we have some layers
-    workload = Workload(env, tenant_id, timeline_id)
-    workload.init(env.pageservers[0].id)
-
-    # Write some layers and upload a heatmap
-    workload.write_rows(256, env.pageservers[0].id)
-    env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
-
-    def validate_heatmap(heatmap):
-        assert len(heatmap["timelines"]) == 1
-        assert heatmap["timelines"][0]["timeline_id"] == str(timeline_id)
-        assert len(heatmap["timelines"][0]["layers"]) > 0
-        layers = heatmap["timelines"][0]["layers"]
-
-        # Each layer appears at most once
-        assert len(set(layer["name"] for layer in layers)) == len(layers)
-
-    # Download and inspect the heatmap that the pageserver uploaded
-    heatmap_first = env.pageserver_remote_storage.heatmap_content(tenant_id)
-    log.info(f"Read back heatmap: {heatmap_first}")
-    validate_heatmap(heatmap_first)
-
-    # Do some more I/O to generate more layers
-    workload.churn_rows(64, env.pageservers[0].id)
-    env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
-
-    # Ensure that another heatmap upload includes the new layers
-    heatmap_second = env.pageserver_remote_storage.heatmap_content(tenant_id)
-    log.info(f"Read back heatmap: {heatmap_second}")
-    assert heatmap_second != heatmap_first
-    validate_heatmap(heatmap_second)
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -73,20 +73,19 @@ def test_remote_storage_backup_and_restore(
    ##### First start, insert data and upload it to the remote storage
    env = neon_env_builder.init_start()

-    env.pageserver.allowed_errors.extend(
-        [
-            # FIXME: Is this expected?
-            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
-            ".*No timelines to attach received.*",
-            ".*Failed to get local tenant state.*",
-            # FIXME retry downloads without throwing errors
-            ".*failed to load remote timeline.*",
-            # we have a bunch of pytest.raises for these below
-            ".*tenant .*? already exists, state:.*",
-            ".*tenant directory already exists.*",
-            ".*simulated failure of remote operation.*",
-        ]
+    # FIXME: Is this expected?
+    env.pageserver.allowed_errors.append(
+        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
    )
+    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
+
+    env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
+    # FIXME retry downloads without throwing errors
+    env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
+    # we have a bunch of pytest.raises for these below
+    env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
+    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
+    env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")

    pageserver_http = env.pageserver.http_client()
    endpoint = env.endpoints.create_start("main")
--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -314,10 +314,6 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):

    assert not config_path.exists(), "detach did not remove config file"

-    # The re-attach's increment of the generation number may invalidate deletion queue
-    # updates in flight from the previous attachment.
-    env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*")
-
    env.pageserver.tenant_attach(tenant_id)
    wait_until(
        number_of_iterations=5,
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -23,18 +23,23 @@ from fixtures.pageserver.utils import (
    wait_until_tenant_active,
    wait_until_tenant_state,
 )
-from fixtures.remote_storage import RemoteStorageKind, available_s3_storages, s3_storage
+from fixtures.remote_storage import (
+    RemoteStorageKind,
+    available_remote_storages,
+    available_s3_storages,
+)
 from fixtures.types import TenantId
 from fixtures.utils import run_pg_bench_small, wait_until


+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
 def test_tenant_delete_smoke(
    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
    pg_bin: PgBin,
 ):
    neon_env_builder.pageserver_config_override = "test_remote_failures=1"

-    remote_storage_kind = s3_storage()
    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)

    env = neon_env_builder.init_start()
@@ -73,15 +78,16 @@ def test_tenant_delete_smoke(
            run_pg_bench_small(pg_bin, endpoint.connstr())
            wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)

-            assert_prefix_not_empty(
-                neon_env_builder,
-                prefix="/".join(
-                    (
-                        "tenants",
-                        str(tenant_id),
-                    )
-                ),
-            )
+            if remote_storage_kind in available_s3_storages():
+                assert_prefix_not_empty(
+                    neon_env_builder,
+                    prefix="/".join(
+                        (
+                            "tenants",
+                            str(tenant_id),
+                        )
+                    ),
+                )

        parent = timeline

@@ -94,15 +100,16 @@ def test_tenant_delete_smoke(
    tenant_path = env.pageserver.tenant_dir(tenant_id)
    assert not tenant_path.exists()

-    assert_prefix_empty(
-        neon_env_builder,
-        prefix="/".join(
-            (
-                "tenants",
-                str(tenant_id),
-            )
-        ),
-    )
+    if remote_storage_kind in available_s3_storages():
+        assert_prefix_empty(
+            neon_env_builder,
+            prefix="/".join(
+                (
+                    "tenants",
+                    str(tenant_id),
+                )
+            ),
+        )

    # Deletion updates the tenant count: the one default tenant remains
    assert ps_http.get_metric_value("pageserver_tenant_manager_slots") == 1
@@ -142,7 +149,9 @@ FAILPOINTS_BEFORE_BACKGROUND = [
 def combinations():
    result = []

-    remotes = available_s3_storages()
+    remotes = [RemoteStorageKind.MOCK_S3]
+    if os.getenv("ENABLE_REAL_S3_REMOTE_STORAGE"):
+        remotes.append(RemoteStorageKind.REAL_S3)

    for remote_storage_kind in remotes:
        for delete_failpoint in FAILPOINTS:
@@ -156,8 +165,8 @@ def combinations():
    return result


-@pytest.mark.parametrize("check", list(Check))
@pytest.mark.parametrize("remote_storage_kind, failpoint, simulate_failures", combinations())
+@pytest.mark.parametrize("check", list(Check))
 def test_delete_tenant_exercise_crash_safety_failpoints(
    neon_env_builder: NeonEnvBuilder,
    remote_storage_kind: RemoteStorageKind,
@@ -205,15 +214,16 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
        run_pg_bench_small(pg_bin, endpoint.connstr())
        last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)

-        assert_prefix_not_empty(
-            neon_env_builder,
-            prefix="/".join(
-                (
-                    "tenants",
-                    str(tenant_id),
-                )
-            ),
-        )
+        if remote_storage_kind in available_s3_storages():
+            assert_prefix_not_empty(
+                neon_env_builder,
+                prefix="/".join(
+                    (
+                        "tenants",
+                        str(tenant_id),
+                    )
+                ),
+            )

    ps_http.configure_failpoints((failpoint, "return"))

@@ -266,23 +276,25 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
    assert not tenant_dir.exists()

    # Check remote is empty
-    assert_prefix_empty(
-        neon_env_builder,
-        prefix="/".join(
-            (
-                "tenants",
-                str(tenant_id),
-            )
-        ),
-        allowed_postfix="initdb.tar.zst",
-    )
+    if remote_storage_kind in available_s3_storages():
+        assert_prefix_empty(
+            neon_env_builder,
+            prefix="/".join(
+                (
+                    "tenants",
+                    str(tenant_id),
+                )
+            ),
+            allowed_postfix="initdb.tar.zst",
+        )


+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
 def test_tenant_delete_is_resumed_on_attach(
    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
    pg_bin: PgBin,
 ):
-    remote_storage_kind = s3_storage()
    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)

    env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
@@ -302,15 +314,16 @@ def test_tenant_delete_is_resumed_on_attach(
            wait_for_last_flush_lsn(env, endpoint, tenant=tenant_id, timeline=timeline_id)

    # sanity check, data should be there
-    assert_prefix_not_empty(
-        neon_env_builder,
-        prefix="/".join(
-            (
-                "tenants",
-                str(tenant_id),
-            )
-        ),
-    )
+    if remote_storage_kind in available_s3_storages():
+        assert_prefix_not_empty(
+            neon_env_builder,
+            prefix="/".join(
+                (
+                    "tenants",
+                    str(tenant_id),
+                )
+            ),
+        )

    # failpoint before we remove index_part from s3
    failpoint = "timeline-delete-before-index-delete"
@@ -341,15 +354,16 @@ def test_tenant_delete_is_resumed_on_attach(
        iterations=iterations,
    )

-    assert_prefix_not_empty(
-        neon_env_builder,
-        prefix="/".join(
-            (
-                "tenants",
-                str(tenant_id),
-            )
-        ),
-    )
+    if remote_storage_kind in available_s3_storages():
+        assert_prefix_not_empty(
+            neon_env_builder,
+            prefix="/".join(
+                (
+                    "tenants",
+                    str(tenant_id),
+                )
+            ),
+        )

    reason = tenant_info["state"]["data"]["reason"]
    # failpoint may not be the only error in the stack
@@ -375,16 +389,17 @@ def test_tenant_delete_is_resumed_on_attach(
    tenant_path = env.pageserver.tenant_dir(tenant_id)
    assert not tenant_path.exists()

-    ps_http.deletion_queue_flush(execute=True)
-    assert_prefix_empty(
-        neon_env_builder,
-        prefix="/".join(
-            (
-                "tenants",
-                str(tenant_id),
-            )
-        ),
-    )
+    if remote_storage_kind in available_s3_storages():
+        ps_http.deletion_queue_flush(execute=True)
+        assert_prefix_empty(
+            neon_env_builder,
+            prefix="/".join(
+                (
+                    "tenants",
+                    str(tenant_id),
+                )
+            ),
+        )


 def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonEnvBuilder):
@@ -395,13 +410,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
    env.start()
    pageserver_http = env.pageserver.http_client()

-    env.pageserver.allowed_errors.extend(
-        [
-            # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
-            ".*Timeline got dropped without initializing, cleaning its files",
-            # the response hit_pausable_failpoint_and_later_fail
-            f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn",
-        ]
+    # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
+    env.pageserver.allowed_errors.append(
+        ".*Timeline got dropped without initializing, cleaning its files"
+    )
+    # the response hit_pausable_failpoint_and_later_fail
+    env.pageserver.allowed_errors.append(
+        f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
    )

    env.pageserver.tenant_create(env.initial_tenant)
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -21,6 +21,7 @@ from fixtures.pageserver.utils import (
 )
 from fixtures.remote_storage import (
    RemoteStorageKind,
+    available_remote_storages,
 )
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import query_scalar, wait_until
@@ -58,11 +59,16 @@ class ReattachMode(str, enum.Enum):


 # Basic detach and re-attach test
+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
@pytest.mark.parametrize(
    "mode",
    [ReattachMode.REATTACH_EXPLICIT, ReattachMode.REATTACH_RESET, ReattachMode.REATTACH_RESET_DROP],
 )
-def test_tenant_reattach(neon_env_builder: NeonEnvBuilder, mode: str):
+def test_tenant_reattach(
+    neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind, mode: str
+):
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
    # Exercise retry code path by making all uploads and downloads fail for the
    # first time. The retries print INFO-messages to the log; we will check
    # that they are present after the test.
@@ -181,13 +187,16 @@ num_rows = 100000
 #
 # I don't know what's causing that...
@pytest.mark.skip(reason="fixme")
+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
 def test_tenant_reattach_while_busy(
    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
 ):
    updates_started = 0
    updates_finished = 0
    updates_to_perform = 0

+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
    env = neon_env_builder.init_start()

    # Run random UPDATEs on test table. On failure, try again.
@@ -307,14 +316,10 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
        bogus_timeline_id = TimelineId.generate()
        pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)

-    env.pageserver.allowed_errors.extend(
-        [
-            # the error will be printed to the log too
-            ".*gc target timeline does not exist.*",
-            # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
-            ".*InternalServerError\\(timeline is Stopping.*",
-        ]
-    )
+    # the error will be printed to the log too
+    env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
+    # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
+    env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")

    # Detach while running manual GC.
    # It should wait for manual GC to finish because it runs in a task associated with the tenant.
@@ -434,9 +439,13 @@ def test_tenant_detach_regular_tenant(neon_simple_env: NeonEnv):
        should not be present in pageserver's memory"


+@pytest.mark.parametrize("remote_storage_kind", available_remote_storages())
 def test_detach_while_attaching(
    neon_env_builder: NeonEnvBuilder,
+    remote_storage_kind: RemoteStorageKind,
 ):
+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+
    ##### First start, insert secret data and upload it to the remote storage
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
John Spray	94e1d80a64	scrubber: make scan-metadata enumerate relics & unreadable timelines	2023-12-08 14:48:41 +00:00
John Spray	a28d91e8bc	scrubber: report on generation-ful-ness of indices	2023-12-08 13:59:42 +00:00
John Spray	041d610fbe	scrubber: handle initdb files	2023-12-08 13:49:40 +00:00
John Spray	ec03c29644	scrubber: only trim prefix if it ends with /	2023-12-08 13:49:40 +00:00