diff --git a/Cargo.lock b/Cargo.lock
index 1364c9d84f..1b6b423444 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -184,7 +184,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
 dependencies = [
  "concurrent-queue",
- "event-listener",
+ "event-listener 2.5.3",
  "futures-core",
 ]
 
@@ -205,11 +205,13 @@ dependencies = [
 
 [[package]]
 name = "async-lock"
-version = "2.8.0"
+version = "3.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
+checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c"
 dependencies = [
- "event-listener",
+ "event-listener 4.0.0",
+ "event-listener-strategy",
+ "pin-project-lite",
 ]
 
 [[package]]
@@ -692,9 +694,9 @@ dependencies = [
 
 [[package]]
 name = "azure_core"
-version = "0.16.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e29286b9edfdd6f2c7e9d970bb5b015df8621258acab9ecfcea09b2d7692467"
+checksum = "a6218987c374650fdad0b476bfc675729762c28dfb35f58608a38a2b1ea337dd"
 dependencies = [
  "async-trait",
  "base64 0.21.1",
@@ -702,8 +704,10 @@ dependencies = [
  "dyn-clone",
  "futures",
  "getrandom 0.2.11",
+ "hmac",
  "http-types",
  "log",
+ "once_cell",
  "paste",
  "pin-project",
  "quick-xml",
@@ -712,6 +716,7 @@ dependencies = [
  "rustc_version",
  "serde",
  "serde_json",
+ "sha2",
  "time",
  "url",
  "uuid",
@@ -719,9 +724,9 @@ dependencies = [
 
 [[package]]
 name = "azure_identity"
-version = "0.16.2"
+version = "0.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b67b337346da8739e91ea1e9400a6ebc9bc54e0b2af1d23c9bcd565950588f9"
+checksum = "9e1eacc4f7fb2a73d57c39139d0fc3aed78435606055779ddaef4b43cdf919a8"
 dependencies = [
  "async-lock",
  "async-trait",
@@ -731,7 +736,6 @@ dependencies = [
  "oauth2",
  "pin-project",
  "serde",
- "serde_json",
  "time",
  "tz-rs",
  "url",
@@ -740,21 +744,18 @@ dependencies = [
 
 [[package]]
 name = "azure_storage"
-version = "0.16.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bed0ccefde57930b2886fd4aed1f70ac469c197b8c2e94828290d71bcbdb5d97"
+checksum = "ade8f2653e408de88b9eafec9f48c3c26b94026375e88adbd34523a7dd9795a1"
 dependencies = [
  "RustyXML",
+ "async-lock",
  "async-trait",
  "azure_core",
  "bytes",
- "futures",
- "hmac",
  "log",
  "serde",
  "serde_derive",
- "serde_json",
- "sha2",
  "time",
  "url",
  "uuid",
@@ -762,13 +763,14 @@ dependencies = [
 
 [[package]]
 name = "azure_storage_blobs"
-version = "0.16.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f91a52da2d192cfe43759f61e8bb31a5969f1722d5b85ac89627f356ad674ab4"
+checksum = "025701c7cc5b523100f0f3b2b01723564ec5a86c03236521c06826337047e872"
 dependencies = [
  "RustyXML",
  "azure_core",
  "azure_storage",
+ "azure_svc_blobstorage",
  "bytes",
  "futures",
  "log",
@@ -780,6 +782,22 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "azure_svc_blobstorage"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76051e5bb67cea1055abe5e530a0878feac7e0ab4cbbcb4a6adc953a58993389"
+dependencies = [
+ "azure_core",
+ "bytes",
+ "futures",
+ "log",
+ "once_cell",
+ "serde",
+ "serde_json",
+ "time",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.67"
@@ -1686,6 +1704,27 @@ version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
 
+[[package]]
+name = "event-listener"
+version = "4.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae"
+dependencies = [
+ "concurrent-queue",
+ "parking",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "event-listener-strategy"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3"
+dependencies = [
+ "event-listener 4.0.0",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "fail"
 version = "0.5.1"
@@ -3678,9 +3717,9 @@ dependencies = [
 
 [[package]]
 name = "quick-xml"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
+checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
 dependencies = [
  "memchr",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 33f56e084f..496a9d7839 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,10 +38,10 @@ license = "Apache-2.0"
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-azure_core = "0.16"
-azure_identity = "0.16"
-azure_storage = "0.16"
-azure_storage_blobs = "0.16"
+azure_core = "0.18"
+azure_identity = "0.18"
+azure_storage = "0.18"
+azure_storage_blobs = "0.18"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 8d53a6a658..6f0b929ac6 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -168,7 +168,7 @@ fn print_timelines_tree(
                     info: t.clone(),
                     children: BTreeSet::new(),
                     name: timeline_name_mappings
-                        .remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
+                        .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
                 },
             )
         })
diff --git a/control_plane/src/tenant_migration.rs b/control_plane/src/tenant_migration.rs
index c0c44e279f..fbb0358158 100644
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -165,7 +165,7 @@ pub fn migrate_tenant(
         let found = other_ps_tenants
             .into_iter()
             .map(|t| t.id)
-            .any(|i| i == tenant_id);
+            .any(|i| i.tenant_id == tenant_id);
         if !found {
             continue;
         }
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 495a58e865..a3029e67a5 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -357,7 +357,7 @@ pub enum TenantAttachmentStatus {
 
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
-    pub id: TenantId,
+    pub id: TenantShardId,
     // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
     pub state: TenantState,
     /// Sum of the size of all layer files.
@@ -369,7 +369,7 @@ pub struct TenantInfo {
 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TimelineInfo {
-    pub tenant_id: TenantId,
+    pub tenant_id: TenantShardId,
     pub timeline_id: TimelineId,
 
     pub ancestor_timeline_id: Option<TimelineId>,
@@ -385,6 +385,9 @@ pub struct TimelineInfo {
     /// The LSN that we are advertizing to safekeepers
     pub remote_consistent_lsn_visible: Lsn,
 
+    /// The LSN from the start of the root timeline (never changes)
+    pub initdb_lsn: Lsn,
+
     pub current_logical_size: u64,
     pub current_logical_size_is_accurate: bool,
 
@@ -823,7 +826,7 @@ mod tests {
     fn test_tenantinfo_serde() {
         // Test serialization/deserialization of TenantInfo
         let original_active = TenantInfo {
-            id: TenantId::generate(),
+            id: TenantShardId::unsharded(TenantId::generate()),
             state: TenantState::Active,
             current_physical_size: Some(42),
             attachment_status: TenantAttachmentStatus::Attached,
@@ -840,7 +843,7 @@ mod tests {
         });
 
         let original_broken = TenantInfo {
-            id: TenantId::generate(),
+            id: TenantShardId::unsharded(TenantId::generate()),
             state: TenantState::Broken {
                 reason: "reason".into(),
                 backtrace: "backtrace info".into(),
diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs
index 9e83e0eee2..052fbd1402 100644
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -76,6 +76,11 @@ impl TenantShardId {
     pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
         ShardSlug(self)
     }
+
+    /// Convenience for code that has special behavior on the 0th shard.
+    pub fn is_zero(&self) -> bool {
+        self.shard_number == ShardNumber(0)
+    }
 }
 
 /// Formatting helper
diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs
index e559d00ded..548bde02f6 100644
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -271,17 +271,12 @@ impl RemoteStorage for AzureBlobStorage {
 
         let mut builder = blob_client.get();
 
-        if let Some(end_exclusive) = end_exclusive {
-            builder = builder.range(Range::new(start_inclusive, end_exclusive));
+        let range: Range = if let Some(end_exclusive) = end_exclusive {
+            (start_inclusive..end_exclusive).into()
         } else {
-            // Open ranges are not supported by the SDK so we work around
-            // by setting the upper limit extremely high (but high enough
-            // to still be representable by signed 64 bit integers).
-            // TODO remove workaround once the SDK adds open range support
-            // https://github.com/Azure/azure-sdk-for-rust/issues/1438
-            let end_exclusive = u64::MAX / 4;
-            builder = builder.range(Range::new(start_inclusive, end_exclusive));
-        }
+            (start_inclusive..).into()
+        };
+        builder = builder.range(range);
 
         self.download_for_builder(builder).await
     }
diff --git a/libs/utils/src/sync/gate.rs b/libs/utils/src/sync/gate.rs
index 9aad0af22d..31c76d2f74 100644
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -30,18 +30,32 @@ async fn warn_if_stuck<Fut: std::future::Future>(
 
     let mut fut = std::pin::pin!(fut);
 
-    loop {
+    let mut warned = false;
+    let ret = loop {
         match tokio::time::timeout(warn_period, &mut fut).await {
-            Ok(ret) => return ret,
+            Ok(ret) => break ret,
             Err(_) => {
                 tracing::warn!(
                     gate = name,
                     elapsed_ms = started.elapsed().as_millis(),
                     "still waiting, taking longer than expected..."
                 );
+                warned = true;
             }
         }
+    };
+
+    // If we emitted a warning for slowness, also emit a message when we complete, so that
+    // someone debugging a shutdown can know for sure whether we have moved past this operation.
+    if warned {
+        tracing::info!(
+            gate = name,
+            elapsed_ms = started.elapsed().as_millis(),
+            "completed, after taking longer than expected"
+        )
     }
+
+    ret
 }
 
 #[derive(Debug)]
diff --git a/pageserver/src/consumption_metrics.rs b/pageserver/src/consumption_metrics.rs
index bfd7897b49..8f2b88d191 100644
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -269,12 +269,18 @@ async fn calculate_synthetic_size_worker(
             }
         };
 
-        for (tenant_id, tenant_state) in tenants {
+        for (tenant_shard_id, tenant_state) in tenants {
             if tenant_state != TenantState::Active {
                 continue;
             }
 
-            if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
+            if !tenant_shard_id.is_zero() {
+                // We only send consumption metrics from shard 0, so don't waste time calculating
+                // synthetic size on other shards.
+                continue;
+            }
+
+            if let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) {
                 // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
                 // We can put in some prioritization for consumption metrics.
                 // Same for the loop that fetches computed metrics.
@@ -291,7 +297,9 @@ async fn calculate_synthetic_size_worker(
                     );
 
                     if !is_cancelled {
-                        error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
+                        error!(
+                            "failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}"
+                        );
                     }
                 }
             }
diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs
index 918e45ea9e..0b827816bc 100644
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -2,7 +2,6 @@ use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogi
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
-use pageserver_api::shard::ShardNumber;
 use std::{sync::Arc, time::SystemTime};
 use utils::{
     id::{TenantId, TimelineId},
@@ -198,12 +197,12 @@ pub(super) async fn collect_all_metrics(
     };
 
     let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
-        if state != TenantState::Active {
+        if state != TenantState::Active || !id.is_zero() {
             None
         } else {
             crate::tenant::mgr::get_tenant(id, true)
                 .ok()
-                .map(|tenant| (id, tenant))
+                .map(|tenant| (id.tenant_id, tenant))
         }
     });
 
@@ -229,11 +228,6 @@ where
     while let Some((tenant_id, tenant)) = tenants.next().await {
         let mut tenant_resident_size = 0;
 
-        // Sharded tenants report all consumption metrics from shard zero
-        if tenant.tenant_shard_id().shard_number != ShardNumber(0) {
-            continue;
-        }
-
         for timeline in tenant.list_timelines() {
             let timeline_id = timeline.timeline_id;
 
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index 237109abfe..9422ccb2fd 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -84,7 +84,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: Get tenant status
       responses:
@@ -181,7 +180,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: Get timelines for tenant
       responses:
@@ -232,7 +230,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -338,7 +335,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -401,7 +397,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -469,7 +464,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -523,7 +517,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Schedules attach operation to happen in the background for the given tenant.
@@ -631,7 +624,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: flush_ms
         in: query
         required: false
@@ -724,7 +716,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: detach_ignored
         in: query
         required: false
@@ -784,7 +775,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Remove tenant data (including all corresponding timelines) from pageserver's memory.
@@ -833,7 +823,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Schedules an operation that attempts to load a tenant from the local disk and
@@ -890,7 +879,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: |
         Calculate tenant's synthetic size
@@ -933,7 +921,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: inputs_only
         in: query
         required: false
@@ -1003,7 +990,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Create a timeline. Returns new timeline id on success.\
@@ -1137,7 +1123,6 @@ paths:
             application/json:
               schema:
                 type: string
-                format: hex
         "400":
           description: Malformed tenant create request
           content:
@@ -1234,7 +1219,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: |
         Returns tenant's config description: specific config overrides a tenant has
@@ -1340,7 +1324,6 @@ components:
           properties:
             new_tenant_id:
               type: string
-              format: hex
             generation:
               type: integer
               description: Attachment generation number.
@@ -1369,7 +1352,6 @@ components:
           properties:
             tenant_id:
               type: string
-              format: hex
     TenantLocationConfigRequest:
       type: object
       required:
@@ -1377,7 +1359,6 @@ components:
       properties:
         tenant_id:
           type: string
-          format: hex
         mode:
           type: string
           enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
@@ -1446,7 +1427,6 @@ components:
           format: hex
         tenant_id:
           type: string
-          format: hex
         last_record_lsn:
           type: string
           format: hex
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 9e41d912c2..fee50460a5 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -319,6 +319,7 @@ async fn build_timeline_info_common(
     ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
     crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
+    let initdb_lsn = timeline.initdb_lsn;
     let last_record_lsn = timeline.get_last_record_lsn();
     let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
         let guard = timeline.last_received_wal.lock().unwrap();
@@ -352,14 +353,14 @@ async fn build_timeline_info_common(
     let walreceiver_status = timeline.walreceiver_status();
 
     let info = TimelineInfo {
-        // TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
-        tenant_id: timeline.tenant_shard_id.tenant_id,
+        tenant_id: timeline.tenant_shard_id,
         timeline_id: timeline.timeline_id,
         ancestor_timeline_id,
         ancestor_lsn,
         disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
         remote_consistent_lsn: remote_consistent_lsn_projected,
         remote_consistent_lsn_visible,
+        initdb_lsn,
         last_record_lsn,
         prev_record_lsn: Some(timeline.get_prev_record_lsn()),
         latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
@@ -480,15 +481,15 @@ async fn timeline_list_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let include_non_incremental_logical_size: Option<bool> =
         parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
 
     let response_data = async {
-        let tenant = mgr::get_tenant(tenant_id, true)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
         let timelines = tenant.list_timelines();
 
         let mut response_data = Vec::with_capacity(timelines.len());
@@ -507,7 +508,9 @@ async fn timeline_list_handler(
         }
         Ok::<Vec<TimelineInfo>, ApiError>(response_data)
     }
-    .instrument(info_span!("timeline_list", %tenant_id))
+    .instrument(info_span!("timeline_list",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug()))
     .await?;
 
     json_response(StatusCode::OK, response_data)
@@ -517,17 +520,17 @@ async fn timeline_detail_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let include_non_incremental_logical_size: Option<bool> =
         parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     // Logical size calculation needs downloading.
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
 
     let timeline_info = async {
-        let tenant = mgr::get_tenant(tenant_id, true)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
 
         let timeline = tenant
             .get_timeline(timeline_id, false)
@@ -544,7 +547,10 @@ async fn timeline_detail_handler(
 
         Ok::<_, ApiError>(timeline_info)
     }
-    .instrument(info_span!("timeline_detail", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_detail",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug(),
+                %timeline_id))
     .await?;
 
     json_response(StatusCode::OK, timeline_info)
@@ -554,8 +560,15 @@ async fn get_lsn_by_timestamp_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    if !tenant_shard_id.is_zero() {
+        // Requires SLRU contents, which are only stored on shard zero
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     let version: Option<u8> = parse_query_param(&request, "version")?;
 
@@ -567,7 +580,7 @@ async fn get_lsn_by_timestamp_handler(
     let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let result = timeline
         .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
         .await?;
@@ -602,8 +615,15 @@ async fn get_timestamp_of_lsn_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    if !tenant_shard_id.is_zero() {
+        // Requires SLRU contents, which are only stored on shard zero
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
 
@@ -613,7 +633,7 @@ async fn get_timestamp_of_lsn_handler(
         .map_err(ApiError::BadRequest)?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;
 
     match result {
@@ -805,11 +825,11 @@ async fn tenant_status(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let tenant_info = async {
-        let tenant = mgr::get_tenant(tenant_id, false)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, false)?;
 
         // Calculate total physical size of all timelines
         let mut current_physical_size = 0;
@@ -819,13 +839,15 @@ async fn tenant_status(
 
         let state = tenant.current_state();
         Result::<_, ApiError>::Ok(TenantInfo {
-            id: tenant_id,
+            id: tenant_shard_id,
             state: state.clone(),
             current_physical_size: Some(current_physical_size),
             attachment_status: state.attachment_status(),
         })
     }
-    .instrument(info_span!("tenant_status_handler", %tenant_id))
+    .instrument(info_span!("tenant_status_handler",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug()))
     .await?;
 
     json_response(StatusCode::OK, tenant_info)
@@ -868,14 +890,20 @@ async fn tenant_size_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
     let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
     let headers = request.headers();
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let tenant = mgr::get_tenant(tenant_id, true)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+
+    if !tenant_shard_id.is_zero() {
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     // this can be long operation
     let inputs = tenant
@@ -927,7 +955,7 @@ async fn tenant_size_handler(
     json_response(
         StatusCode::OK,
         TenantHistorySize {
-            id: tenant_id,
+            id: tenant_shard_id.tenant_id,
             size: sizes.as_ref().map(|x| x.total_size),
             segment_sizes: sizes.map(|x| x.segments),
             inputs,
@@ -939,14 +967,14 @@ async fn layer_map_info_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let reset: LayerAccessStatsReset =
         parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);
 
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let layer_map_info = timeline.layer_map_info(reset).await;
 
     json_response(StatusCode::OK, layer_map_info)
@@ -956,13 +984,12 @@ async fn layer_download_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let layer_file_name = get_request_param(&request, "layer_file_name")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let downloaded = timeline
         .download_layer(layer_file_name)
         .await
@@ -973,7 +1000,7 @@ async fn layer_download_handler(
         Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
         None => json_response(
             StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
         ),
     }
 }
@@ -982,12 +1009,12 @@ async fn evict_timeline_layer_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let layer_file_name = get_request_param(&request, "layer_file_name")?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let evicted = timeline
         .evict_layer(layer_file_name)
         .await
@@ -998,7 +1025,7 @@ async fn evict_timeline_layer_handler(
         Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
         None => json_response(
             StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
         ),
     }
 }
@@ -1130,10 +1157,10 @@ async fn get_tenant_config_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let tenant = mgr::get_tenant(tenant_id, false)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, false)?;
 
     let response = HashMap::from([
         (
@@ -1227,9 +1254,9 @@ async fn handle_tenant_break(
     r: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;
 
-    let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
+    let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
         .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
 
     tenant.set_broken("broken from test".to_owned()).await;
@@ -1270,14 +1297,15 @@ async fn timeline_gc_handler(
     mut request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let gc_req: TimelineGcRequest = json_request(&mut request).await?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done =
+        mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
     let gc_result = wait_task_done
         .await
         .context("wait for gc task")
@@ -1292,9 +1320,9 @@ async fn timeline_compact_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let mut flags = EnumSet::empty();
     if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1302,14 +1330,14 @@ async fn timeline_compact_handler(
     }
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         timeline
             .compact(&cancel, flags, &ctx)
             .await
             .map_err(|e| ApiError::InternalServerError(e.into()))?;
         json_response(StatusCode::OK, ())
     }
-    .instrument(info_span!("manual_compaction", %tenant_id, %timeline_id))
+    .instrument(info_span!("manual_compaction", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1318,9 +1346,9 @@ async fn timeline_checkpoint_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let mut flags = EnumSet::empty();
     if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1328,7 +1356,7 @@ async fn timeline_checkpoint_handler(
     }
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         timeline
             .freeze_and_flush()
             .await
@@ -1340,7 +1368,7 @@ async fn timeline_checkpoint_handler(
 
         json_response(StatusCode::OK, ())
     }
-    .instrument(info_span!("manual_checkpoint", %tenant_id, %timeline_id))
+    .instrument(info_span!("manual_checkpoint", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1348,12 +1376,12 @@ async fn timeline_download_remote_layers_handler_post(
     mut request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     match timeline.spawn_download_all_remote_layers(body).await {
         Ok(st) => json_response(StatusCode::ACCEPTED, st),
         Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -1364,11 +1392,11 @@ async fn timeline_download_remote_layers_handler_get(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let info = timeline
         .get_download_all_remote_layers_task_info()
         .context("task never started since last pageserver process start")
@@ -1414,9 +1442,9 @@ async fn getpage_at_lsn_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     struct Key(crate::repository::Key);
 
@@ -1435,7 +1463,7 @@ async fn getpage_at_lsn_handler(
 
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
 
         let page = timeline.get(key.0, lsn, &ctx).await?;
 
@@ -1447,7 +1475,7 @@ async fn getpage_at_lsn_handler(
                 .unwrap(),
         )
     }
-    .instrument(info_span!("timeline_get", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1455,9 +1483,9 @@ async fn timeline_collect_keyspace(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     struct Partitioning {
         keys: crate::keyspace::KeySpace,
@@ -1526,7 +1554,7 @@ async fn timeline_collect_keyspace(
 
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
         let keys = timeline
             .collect_keyspace(at_lsn, &ctx)
@@ -1535,15 +1563,15 @@ async fn timeline_collect_keyspace(
 
         json_response(StatusCode::OK, Partitioning { keys, at_lsn })
     }
-    .instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_collect_keyspace", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
 async fn active_timeline_of_active_tenant(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
 ) -> Result<Arc<Timeline>, ApiError> {
-    let tenant = mgr::get_tenant(tenant_id, true)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
     tenant
         .get_timeline(timeline_id, true)
         .map_err(|e| ApiError::NotFound(e.into()))
@@ -1820,23 +1848,25 @@ pub fn make_router(
         })
         .get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
         .post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
-        .get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
+        .get("/v1/tenant/:tenant_shard_id", |r| {
+            api_handler(r, tenant_status)
+        })
         .delete("/v1/tenant/:tenant_shard_id", |r| {
             api_handler(r, tenant_delete_handler)
         })
-        .get("/v1/tenant/:tenant_id/synthetic_size", |r| {
+        .get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
             api_handler(r, tenant_size_handler)
         })
         .put("/v1/tenant/config", |r| {
             api_handler(r, update_tenant_config_handler)
         })
-        .get("/v1/tenant/:tenant_id/config", |r| {
+        .get("/v1/tenant/:tenant_shard_id/config", |r| {
             api_handler(r, get_tenant_config_handler)
         })
         .put("/v1/tenant/:tenant_shard_id/location_config", |r| {
             api_handler(r, put_tenant_location_config_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline", |r| {
+        .get("/v1/tenant/:tenant_shard_id/timeline", |r| {
             api_handler(r, timeline_list_handler)
         })
         .post("/v1/tenant/:tenant_shard_id/timeline", |r| {
@@ -1857,47 +1887,50 @@ pub fn make_router(
         .post("/v1/tenant/:tenant_id/ignore", |r| {
             api_handler(r, tenant_ignore_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
+        .get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
             api_handler(r, timeline_detail_handler)
         })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp",
             |r| api_handler(r, get_lsn_by_timestamp_handler),
         )
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_timestamp_of_lsn",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn",
             |r| api_handler(r, get_timestamp_of_lsn_handler),
         )
-        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
-            api_handler(r, timeline_gc_handler)
-        })
-        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
-            testing_api_handler("run timeline compaction", r, timeline_compact_handler)
-        })
         .put(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
+            |r| api_handler(r, timeline_gc_handler),
+        )
+        .put(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
+            |r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
+        )
+        .put(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
             |r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
         )
         .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
             |r| api_handler(r, timeline_download_remote_layers_handler_post),
         )
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
             |r| api_handler(r, timeline_download_remote_layers_handler_get),
         )
         .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
             api_handler(r, timeline_delete_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
-            api_handler(r, layer_map_info_handler)
-        })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
+            |r| api_handler(r, layer_map_info_handler),
+        )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
             |r| api_handler(r, layer_download_handler),
         )
         .delete(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
             |r| api_handler(r, evict_timeline_layer_handler),
         )
         .put("/v1/disk_usage_eviction/run", |r| {
@@ -1906,18 +1939,19 @@ pub fn make_router(
         .put("/v1/deletion_queue/flush", |r| {
             api_handler(r, deletion_queue_flush)
         })
-        .put("/v1/tenant/:tenant_id/break", |r| {
+        .put("/v1/tenant/:tenant_shard_id/break", |r| {
             testing_api_handler("set tenant state to broken", r, handle_tenant_break)
         })
         .get("/v1/panic", |r| api_handler(r, always_panic_handler))
         .post("/v1/tracing/event", |r| {
             testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/getpage", |r| {
-            testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler)
-        })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/keyspace",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
+            |r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
+        )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
             |r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
         )
         .any(handler_404))
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 67d798c1d4..7cc0333ee5 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -650,7 +650,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
         "pageserver_evictions_with_low_residence_duration",
         "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
          Residence duration is determined using the `residence_duration_data_source`.",
-        &["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
+        &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
     )
     .expect("failed to define a metric")
 });
@@ -714,10 +714,16 @@ impl EvictionsWithLowResidenceDurationBuilder {
         }
     }
 
-    fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
+    fn build(
+        &self,
+        tenant_id: &str,
+        shard_id: &str,
+        timeline_id: &str,
+    ) -> EvictionsWithLowResidenceDuration {
         let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
             .get_metric_with_label_values(&[
                 tenant_id,
+                shard_id,
                 timeline_id,
                 self.data_source,
                 &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
@@ -748,21 +754,24 @@ impl EvictionsWithLowResidenceDuration {
     pub fn change_threshold(
         &mut self,
         tenant_id: &str,
+        shard_id: &str,
         timeline_id: &str,
         new_threshold: Duration,
     ) {
         if new_threshold == self.threshold {
             return;
         }
-        let mut with_new =
-            EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
-                .build(tenant_id, timeline_id);
+        let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
+            self.data_source,
+            new_threshold,
+        )
+        .build(tenant_id, shard_id, timeline_id);
         std::mem::swap(self, &mut with_new);
-        with_new.remove(tenant_id, timeline_id);
+        with_new.remove(tenant_id, shard_id, timeline_id);
     }
 
     // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
-    fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
+    fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
         let Some(_counter) = self.counter.take() else {
             return;
         };
@@ -771,6 +780,7 @@ impl EvictionsWithLowResidenceDuration {
 
         let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
             tenant_id,
+            shard_id,
             timeline_id,
             self.data_source,
             &threshold,
@@ -1603,6 +1613,7 @@ impl StorageTimeMetrics {
 #[derive(Debug)]
 pub struct TimelineMetrics {
     tenant_id: String,
+    shard_id: String,
     timeline_id: String,
     pub flush_time_histo: StorageTimeMetrics,
     pub compact_time_histo: StorageTimeMetrics,
@@ -1623,11 +1634,12 @@ pub struct TimelineMetrics {
 
 impl TimelineMetrics {
     pub fn new(
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         timeline_id: &TimelineId,
         evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
     ) -> Self {
-        let tenant_id = tenant_id.to_string();
+        let tenant_id = tenant_shard_id.tenant_id.to_string();
+        let shard_id = format!("{}", tenant_shard_id.shard_slug());
         let timeline_id = timeline_id.to_string();
         let flush_time_histo =
             StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
@@ -1664,11 +1676,12 @@ impl TimelineMetrics {
         let evictions = EVICTIONS
             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
             .unwrap();
-        let evictions_with_low_residence_duration =
-            evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
+        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
+            .build(&tenant_id, &shard_id, &timeline_id);
 
         TimelineMetrics {
             tenant_id,
+            shard_id,
             timeline_id,
             flush_time_histo,
             compact_time_histo,
@@ -1714,6 +1727,7 @@ impl Drop for TimelineMetrics {
     fn drop(&mut self) {
         let tenant_id = &self.tenant_id;
         let timeline_id = &self.timeline_id;
+        let shard_id = &self.shard_id;
         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
         {
             RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
@@ -1727,7 +1741,7 @@ impl Drop for TimelineMetrics {
         self.evictions_with_low_residence_duration
             .write()
             .unwrap()
-            .remove(tenant_id, timeline_id);
+            .remove(tenant_id, shard_id, timeline_id);
 
         // The following metrics are born outside of the TimelineMetrics lifecycle but still
         // removed at the end of it. The idea is to have the metrics outlive the
diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs
index dbd85d2dcf..c3c98af406 100644
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -28,7 +28,7 @@
 //! Page cache maps from a cache key to a buffer slot.
 //! The cache key uniquely identifies the piece of data that is being cached.
 //!
-//! The cache key for **materialized pages** is  [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
+//! The cache key for **materialized pages** is  [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
 //! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
 //!
 //! The cache key for **immutable file** pages is [`FileId`] and a block number.
@@ -83,10 +83,8 @@ use std::{
 
 use anyhow::Context;
 use once_cell::sync::OnceCell;
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use pageserver_api::shard::TenantShardId;
+use utils::{id::TimelineId, lsn::Lsn};
 
 use crate::{
     context::RequestContext,
@@ -154,7 +152,13 @@ enum CacheKey {
 
 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
 struct MaterializedPageHashKey {
-    tenant_id: TenantId,
+    /// Why is this TenantShardId rather than TenantId?
+    ///
+    /// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant.  However, this
+    /// this not the case for certain internally-generated pages (e.g. relation sizes).  In future, we may make this
+    /// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
+    /// special-cased in some other way.
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     key: Key,
 }
@@ -378,7 +382,7 @@ impl PageCache {
     /// returned page.
     pub async fn lookup_materialized_page(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key: &Key,
         lsn: Lsn,
@@ -395,7 +399,7 @@ impl PageCache {
 
         let mut cache_key = CacheKey::MaterializedPage {
             hash_key: MaterializedPageHashKey {
-                tenant_id,
+                tenant_shard_id,
                 timeline_id,
                 key: *key,
             },
@@ -436,7 +440,7 @@ impl PageCache {
     ///
     pub async fn memorize_materialized_page(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key: Key,
         lsn: Lsn,
@@ -444,7 +448,7 @@ impl PageCache {
     ) -> anyhow::Result<()> {
         let cache_key = CacheKey::MaterializedPage {
             hash_key: MaterializedPageHashKey {
-                tenant_id,
+                tenant_shard_id,
                 timeline_id,
                 key,
             },
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index c653f0b7ea..b81037ae47 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -822,10 +822,7 @@ impl<'a> DatadirModification<'a> {
         self.put(DBDIR_KEY, Value::Image(buf.into()));
 
         // Create AuxFilesDirectory
-        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-            files: HashMap::new(),
-        })?;
-        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+        self.init_aux_dir()?;
 
         let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
             xids: HashSet::new(),
@@ -933,10 +930,7 @@ impl<'a> DatadirModification<'a> {
             self.put(DBDIR_KEY, Value::Image(buf.into()));
 
             // Create AuxFilesDirectory as well
-            let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-                files: HashMap::new(),
-            })?;
-            self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+            self.init_aux_dir()?;
         }
         if r.is_none() {
             // Create RelDirectory
@@ -1261,6 +1255,14 @@ impl<'a> DatadirModification<'a> {
         Ok(())
     }
 
+    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
+        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+            files: HashMap::new(),
+        })?;
+        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+        Ok(())
+    }
+
     pub async fn put_file(
         &mut self,
         path: &str,
@@ -1767,6 +1769,13 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.
 
+// AUX_FILES currently stores only data for logical replication (slots etc), and
+// we don't preserve these on a branch because safekeepers can't follow timeline
+// switch (and generally it likely should be optional), so ignore these.
+pub fn is_inherited_key(key: Key) -> bool {
+    key != AUX_FILES_KEY
+}
+
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
     Ok(match key.field1 {
         0x00 => (
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index 4270b6edb0..8747d9ad50 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -42,6 +42,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 
 use futures::FutureExt;
+use pageserver_api::shard::TenantShardId;
 use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
 use tokio::task_local;
@@ -51,7 +52,7 @@ use tracing::{debug, error, info, warn};
 
 use once_cell::sync::Lazy;
 
-use utils::id::{TenantId, TimelineId};
+use utils::id::TimelineId;
 
 use crate::shutdown_pageserver;
 
@@ -317,7 +318,7 @@ struct PageServerTask {
 
     /// Tasks may optionally be launched for a particular tenant/timeline, enabling
     /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
 
     mutable: Mutex<MutableTaskState>,
@@ -329,7 +330,7 @@ struct PageServerTask {
 pub fn spawn<F>(
     runtime: &tokio::runtime::Handle,
     kind: TaskKind,
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
     name: &str,
     shutdown_process_on_error: bool,
@@ -345,7 +346,7 @@ where
         kind,
         name: name.to_string(),
         cancel: cancel.clone(),
-        tenant_id,
+        tenant_shard_id,
         timeline_id,
         mutable: Mutex::new(MutableTaskState { join_handle: None }),
     });
@@ -424,28 +425,28 @@ async fn task_finish(
             Ok(Err(err)) => {
                 if shutdown_process_on_error {
                     error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                     shutdown_process = true;
                 } else {
                     error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                 }
             }
             Err(err) => {
                 if shutdown_process_on_error {
                     error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                     shutdown_process = true;
                 } else {
                     error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                 }
             }
@@ -467,11 +468,11 @@ async fn task_finish(
 ///
 /// Or to shut down all tasks for given timeline:
 ///
-///   shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
+///   shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
 ///
 pub async fn shutdown_tasks(
     kind: Option<TaskKind>,
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
 ) {
     let mut victim_tasks = Vec::new();
@@ -480,35 +481,35 @@ pub async fn shutdown_tasks(
         let tasks = TASKS.lock().unwrap();
         for task in tasks.values() {
             if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_id.is_none() || task.tenant_id == tenant_id)
+                && (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
                 && (timeline_id.is_none() || task.timeline_id == timeline_id)
             {
                 task.cancel.cancel();
                 victim_tasks.push((
                     Arc::clone(task),
                     task.kind,
-                    task.tenant_id,
+                    task.tenant_shard_id,
                     task.timeline_id,
                 ));
             }
         }
     }
 
-    let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();
+    let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
 
-    for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
+    for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
         let join_handle = {
             let mut task_mut = task.mutable.lock().unwrap();
             task_mut.join_handle.take()
         };
         if let Some(mut join_handle) = join_handle {
             if log_all {
-                if tenant_id.is_none() {
+                if tenant_shard_id.is_none() {
                     // there are quite few of these
                     info!(name = task.name, kind = ?task_kind, "stopping global task");
                 } else {
                     // warn to catch these in tests; there shouldn't be any
-                    warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
+                    warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
                 }
             }
             if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
@@ -517,12 +518,13 @@ pub async fn shutdown_tasks(
             {
                 // allow some time to elapse before logging to cut down the number of log
                 // lines.
-                info!("waiting for {} to shut down", task.name);
+                info!("waiting for task {} to shut down", task.name);
                 // we never handled this return value, but:
                 // - we don't deschedule which would lead to is_cancelled
                 // - panics are already logged (is_panicked)
                 // - task errors are already logged in the wrapper
                 let _ = join_handle.await;
+                info!("task {} completed", task.name);
             }
         } else {
             // Possibly one of:
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 48f71d7747..a8e8b4cbfa 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -608,7 +608,7 @@ impl Tenant {
         task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             TaskKind::Attach,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             None,
             "attach tenant",
             false,
@@ -1917,7 +1917,7 @@ impl Tenant {
         //
         // this will additionally shutdown and await all timeline tasks.
         tracing::debug!("Waiting for tasks...");
-        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;
 
         // Wait for any in-flight operations to complete
         self.gate.close().await;
diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs
index b8d6d0a321..acd311ace6 100644
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -463,7 +463,7 @@ impl DeleteTenantFlow {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             None,
             "tenant_delete",
             false,
@@ -550,7 +550,7 @@ impl DeleteTenantFlow {
                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
                 let barrier = {
                     let mut locked = tenants.write().unwrap();
-                    let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
+                    let removed = locked.remove(tenant.tenant_shard_id);
 
                     // FIXME: we should not be modifying this from outside of mgr.rs.
                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 8466fe7fca..4d7bd4259f 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -98,33 +98,6 @@ pub(crate) enum TenantsMap {
     ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
 }
 
-/// Helper for mapping shard-unaware functions to a sharding-aware map
-/// TODO(sharding): all users of this must be made shard-aware.
-fn exactly_one_or_none<'a>(
-    map: &'a BTreeMap<TenantShardId, TenantSlot>,
-    tenant_id: &TenantId,
-) -> Option<(&'a TenantShardId, &'a TenantSlot)> {
-    let mut slots = map.range(TenantShardId::tenant_range(*tenant_id));
-
-    // Retrieve the first two slots in the range: if both are populated, we must panic because the caller
-    // needs a shard-naive view of the world in which only one slot can exist for a TenantId at a time.
-    let slot_a = slots.next();
-    let slot_b = slots.next();
-    match (slot_a, slot_b) {
-        (None, None) => None,
-        (Some(slot), None) => {
-            // Exactly one matching slot
-            Some(slot)
-        }
-        (Some(_slot_a), Some(_slot_b)) => {
-            // Multiple shards for this tenant: cannot handle this yet.
-            // TODO(sharding): callers of get() should be shard-aware.
-            todo!("Attaching multiple shards in teh same tenant to the same pageserver")
-        }
-        (None, Some(_)) => unreachable!(),
-    }
-}
-
 pub(crate) enum TenantsMapRemoveResult {
     Occupied(TenantSlot),
     Vacant,
@@ -147,12 +120,11 @@ impl TenantsMap {
     /// Convenience function for typical usage, where we want to get a `Tenant` object, for
     /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
     /// None is returned.
-    pub(crate) fn get(&self, tenant_id: &TenantId) -> Option<&Arc<Tenant>> {
+    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
         match self {
             TenantsMap::Initializing => None,
             TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                // TODO(sharding): callers of get() should be shard-aware.
-                exactly_one_or_none(m, tenant_id).and_then(|(_, slot)| slot.get_attached())
+                m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
             }
         }
     }
@@ -204,25 +176,19 @@ impl TenantsMap {
     ///
     /// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
     /// slot if the enclosed tenant is shutdown.
-    pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
+    pub(crate) fn remove(&mut self, tenant_shard_id: TenantShardId) -> TenantsMapRemoveResult {
         use std::collections::btree_map::Entry;
         match self {
             TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
-            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
-                match key {
-                    Some(key) => match m.entry(key) {
-                        Entry::Occupied(entry) => match entry.get() {
-                            TenantSlot::InProgress(barrier) => {
-                                TenantsMapRemoveResult::InProgress(barrier.clone())
-                            }
-                            _ => TenantsMapRemoveResult::Occupied(entry.remove()),
-                        },
-                        Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
-                    },
-                    None => TenantsMapRemoveResult::Vacant,
-                }
-            }
+            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => match m.entry(tenant_shard_id) {
+                Entry::Occupied(entry) => match entry.get() {
+                    TenantSlot::InProgress(barrier) => {
+                        TenantsMapRemoveResult::InProgress(barrier.clone())
+                    }
+                    _ => TenantsMapRemoveResult::Occupied(entry.remove()),
+                },
+                Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
+            },
         }
     }
 
@@ -822,14 +788,16 @@ pub(crate) async fn set_new_tenant_config(
     new_tenant_conf: TenantConfOpt,
     tenant_id: TenantId,
 ) -> Result<(), SetNewTenantConfigError> {
+    // Legacy API: does not support sharding
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
     info!("configuring tenant {tenant_id}");
-    let tenant = get_tenant(tenant_id, true)?;
+    let tenant = get_tenant(tenant_shard_id, true)?;
 
     // This is a legacy API that only operates on attached tenants: the preferred
     // API to use is the location_config/ endpoint, which lets the caller provide
     // the full LocationConf.
     let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
 
     Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
         .await
@@ -1143,14 +1111,11 @@ pub(crate) enum GetTenantError {
 ///
 /// This method is cancel-safe.
 pub(crate) fn get_tenant(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     active_only: bool,
 ) -> Result<Arc<Tenant>, GetTenantError> {
     let locked = TENANTS.read().unwrap();
 
-    // TODO(sharding): make all callers of get_tenant shard-aware
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
-
     let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
 
     match peek_slot {
@@ -1162,14 +1127,18 @@ pub(crate) fn get_tenant(
             TenantState::Active => Ok(Arc::clone(tenant)),
             _ => {
                 if active_only {
-                    Err(GetTenantError::NotActive(tenant_id))
+                    Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
                 } else {
                     Ok(Arc::clone(tenant))
                 }
             }
         },
-        Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)),
-        None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)),
+        Some(TenantSlot::InProgress(_)) => {
+            Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
+        }
+        None | Some(TenantSlot::Secondary) => {
+            Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
+        }
     }
 }
 
@@ -1542,7 +1511,8 @@ pub(crate) enum TenantMapListError {
 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
+pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
+{
     let tenants = TENANTS.read().unwrap();
     let m = match &*tenants {
         TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1550,12 +1520,10 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, Tenan
     };
     Ok(m.iter()
         .filter_map(|(id, tenant)| match tenant {
-            TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
+            TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
             TenantSlot::Secondary => None,
             TenantSlot::InProgress(_) => None,
         })
-        // TODO(sharding): make callers of this function shard-aware
-        .map(|(k, v)| (k.tenant_id, v))
         .collect())
 }
 
@@ -2089,21 +2057,19 @@ use {
 };
 
 pub(crate) async fn immediate_gc(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     gc_req: TimelineGcRequest,
     cancel: CancellationToken,
     ctx: &RequestContext,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
     let guard = TENANTS.read().unwrap();
-    let tenant = guard
-        .get(&tenant_id)
-        .map(Arc::clone)
-        .with_context(|| format!("tenant {tenant_id}"))
-        .map_err(|e| ApiError::NotFound(e.into()))?;
 
-    // TODO(sharding): make callers of this function shard-aware
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+    let tenant = guard
+        .get(&tenant_shard_id)
+        .map(Arc::clone)
+        .with_context(|| format!("tenant {tenant_shard_id}"))
+        .map_err(|e| ApiError::NotFound(e.into()))?;
 
     let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
     // Use tenant's pitr setting
@@ -2116,9 +2082,9 @@ pub(crate) async fn immediate_gc(
     task_mgr::spawn(
         &tokio::runtime::Handle::current(),
         TaskKind::GarbageCollector,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         Some(timeline_id),
-        &format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
+        &format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
         false,
         async move {
             fail::fail_point!("immediate_gc_task_pre");
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 03600cf5ae..3765ff6e7a 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -1223,7 +1223,7 @@ impl RemoteTimelineClient {
             task_mgr::spawn(
                 &self.runtime,
                 TaskKind::RemoteUploadTask,
-                Some(self.tenant_shard_id.tenant_id),
+                Some(self.tenant_shard_id),
                 Some(self.timeline_id),
                 "remote upload",
                 false,
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 126d4d5563..112128ead8 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -837,7 +837,7 @@ impl LayerInner {
         crate::task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             crate::task_mgr::TaskKind::RemoteDownloadTask,
-            Some(self.desc.tenant_shard_id.tenant_id),
+            Some(self.desc.tenant_shard_id),
             Some(self.desc.timeline_id),
             &task_name,
             false,
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index bc404c41a0..dc23030218 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -87,13 +87,13 @@ pub fn start_background_loops(
     tenant: &Arc<Tenant>,
     background_jobs_can_start: Option<&completion::Barrier>,
 ) {
-    let tenant_id = tenant.tenant_shard_id.tenant_id;
+    let tenant_shard_id = tenant.tenant_shard_id;
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::Compaction,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         None,
-        &format!("compactor for tenant {tenant_id}"),
+        &format!("compactor for tenant {tenant_shard_id}"),
         false,
         {
             let tenant = Arc::clone(tenant);
@@ -105,7 +105,7 @@ pub fn start_background_loops(
                     _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                 };
                 compaction_loop(tenant, cancel)
-                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
+                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
                 Ok(())
             }
@@ -114,9 +114,9 @@ pub fn start_background_loops(
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::GarbageCollector,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         None,
-        &format!("garbage collector for tenant {tenant_id}"),
+        &format!("garbage collector for tenant {tenant_shard_id}"),
         false,
         {
             let tenant = Arc::clone(tenant);
@@ -128,7 +128,7 @@ pub fn start_background_loops(
                     _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                 };
                 gc_loop(tenant, cancel)
-                    .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
+                    .instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
                 Ok(())
             }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 551b66b77d..81dbc04793 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -66,7 +66,7 @@ use crate::metrics::{
     TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::LsnForTimestamp;
-use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
+use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
 use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
@@ -77,7 +77,7 @@ use postgres_ffi::to_pg_timestamp;
 use utils::{
     completion,
     generation::Generation,
-    id::{TenantId, TimelineId},
+    id::TimelineId,
     lsn::{AtomicLsn, Lsn, RecordLsn},
     seqwait::SeqWait,
     simple_rcu::{Rcu, RcuReadGuard},
@@ -926,7 +926,7 @@ impl Timeline {
         tracing::debug!("Waiting for WalReceiverManager...");
         task_mgr::shutdown_tasks(
             Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
         )
         .await;
@@ -977,7 +977,7 @@ impl Timeline {
         // Shut down the layer flush task before the remote client, as one depends on the other
         task_mgr::shutdown_tasks(
             Some(TaskKind::LayerFlushTask),
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
         )
         .await;
@@ -995,12 +995,7 @@ impl Timeline {
 
         tracing::debug!("Waiting for tasks...");
 
-        task_mgr::shutdown_tasks(
-            None,
-            Some(self.tenant_shard_id.tenant_id),
-            Some(self.timeline_id),
-        )
-        .await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;
 
         // Finally wait until any gate-holders are complete
         self.gate.close().await;
@@ -1314,16 +1309,20 @@ impl Timeline {
                 &self.conf.default_tenant_conf,
             );
 
-            // TODO(sharding): make evictions state shard aware
-            // (https://github.com/neondatabase/neon/issues/5953)
             let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
+            let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
 
             let timeline_id_str = self.timeline_id.to_string();
             self.metrics
                 .evictions_with_low_residence_duration
                 .write()
                 .unwrap()
-                .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
+                .change_threshold(
+                    &tenant_id_str,
+                    &shard_id_str,
+                    &timeline_id_str,
+                    new_threshold,
+                );
         }
     }
 
@@ -1395,7 +1394,7 @@ impl Timeline {
                 ancestor_lsn: metadata.ancestor_lsn(),
 
                 metrics: TimelineMetrics::new(
-                    &tenant_shard_id.tenant_id,
+                    &tenant_shard_id,
                     &timeline_id,
                     crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
                         "mtime",
@@ -1496,7 +1495,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::LayerFlushTask,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "layer flush task",
             false,
@@ -1847,7 +1846,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::InitialLogicalSizeCalculation,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "initial size calculation",
             false,
@@ -2020,7 +2019,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::OndemandLogicalSizeCalculation,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "ondemand logical size calculation",
             false,
@@ -2279,7 +2278,7 @@ impl Timeline {
             }
 
             // Recurse into ancestor if needed
-            if Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
+            if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
                 trace!(
                     "going into ancestor {}, cont_lsn is {}",
                     timeline.ancestor_lsn,
@@ -2461,13 +2460,7 @@ impl Timeline {
         // FIXME: It's pointless to check the cache for things that are not 8kB pages.
         // We should look at the key to determine if it's a cacheable object
         let (lsn, read_guard) = cache
-            .lookup_materialized_page(
-                self.tenant_shard_id.tenant_id,
-                self.timeline_id,
-                key,
-                lsn,
-                ctx,
-            )
+            .lookup_materialized_page(self.tenant_shard_id, self.timeline_id, key, lsn, ctx)
             .await?;
         let img = Bytes::from(read_guard.to_vec());
         Some((lsn, img))
@@ -3209,7 +3202,7 @@ impl DurationRecorder {
 #[derive(Default)]
 struct CompactLevel0Phase1StatsBuilder {
     version: Option<u64>,
-    tenant_id: Option<TenantId>,
+    tenant_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
     read_lock_acquisition_micros: DurationRecorder,
     read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
@@ -3226,7 +3219,7 @@ struct CompactLevel0Phase1StatsBuilder {
 #[derive(serde::Serialize)]
 struct CompactLevel0Phase1Stats {
     version: u64,
-    tenant_id: TenantId,
+    tenant_id: TenantShardId,
     timeline_id: TimelineId,
     read_lock_acquisition_micros: RecordedDuration,
     read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
@@ -3745,7 +3738,7 @@ impl Timeline {
             let ctx = ctx.attached_child();
             let mut stats = CompactLevel0Phase1StatsBuilder {
                 version: Some(2),
-                tenant_id: Some(self.tenant_shard_id.tenant_id),
+                tenant_id: Some(self.tenant_shard_id),
                 timeline_id: Some(self.timeline_id),
                 ..Default::default()
             };
@@ -4207,7 +4200,7 @@ impl Timeline {
                     let cache = page_cache::get();
                     if let Err(e) = cache
                         .memorize_materialized_page(
-                            self.tenant_shard_id.tenant_id,
+                            self.tenant_shard_id,
                             self.timeline_id,
                             key,
                             last_rec_lsn,
@@ -4251,7 +4244,7 @@ impl Timeline {
         let task_id = task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::DownloadAllRemoteLayers,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "download all remote layers task",
             false,
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 2a103a7ff4..be873181d9 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -43,7 +43,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     // Shut down the layer flush task before the remote client, as one depends on the other
     task_mgr::shutdown_tasks(
         Some(TaskKind::LayerFlushTask),
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
     )
     .await;
@@ -71,7 +71,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     info!("waiting for timeline tasks to shutdown");
     task_mgr::shutdown_tasks(
         None,
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
     )
     .await;
@@ -528,7 +528,7 @@ impl DeleteTimelineFlow {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             Some(timeline_id),
             "timeline_delete",
             false,
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index 3fe4bc0f83..020c5a9e9f 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -60,7 +60,7 @@ impl Timeline {
         task_mgr::spawn(
             BACKGROUND_RUNTIME.handle(),
             TaskKind::Eviction,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             &format!(
                 "layer eviction for {}/{}",
@@ -343,7 +343,7 @@ impl Timeline {
         // Make one of the tenant's timelines draw the short straw and run the calculation.
         // The others wait until the calculation is done so that they take into account the
         // imitated accesses that the winner made.
-        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
+        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
             Ok(t) => t,
             Err(_) => {
                 return ControlFlow::Break(());
diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs
index 04ff8602d6..e32265afb5 100644
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -30,6 +30,7 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
     connection_manager_loop_step, ConnectionManagerState,
 };
 
+use pageserver_api::shard::TenantShardId;
 use std::future::Future;
 use std::num::NonZeroU64;
 use std::ops::ControlFlow;
@@ -41,7 +42,7 @@ use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 
-use utils::id::TenantTimelineId;
+use utils::id::TimelineId;
 
 use self::connection_manager::ConnectionManagerStatus;
 
@@ -60,7 +61,8 @@ pub struct WalReceiverConf {
 }
 
 pub struct WalReceiver {
-    timeline: TenantTimelineId,
+    tenant_shard_id: TenantShardId,
+    timeline_id: TimelineId,
     manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
 }
 
@@ -71,7 +73,7 @@ impl WalReceiver {
         mut broker_client: BrokerClientChannel,
         ctx: &RequestContext,
     ) -> Self {
-        let tenant_id = timeline.tenant_shard_id.tenant_id;
+        let tenant_shard_id = timeline.tenant_shard_id;
         let timeline_id = timeline.timeline_id;
         let walreceiver_ctx =
             ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
@@ -81,9 +83,9 @@ impl WalReceiver {
         task_mgr::spawn(
             WALRECEIVER_RUNTIME.handle(),
             TaskKind::WalReceiverManager,
-            Some(tenant_id),
+            Some(timeline.tenant_shard_id),
             Some(timeline_id),
-            &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
+            &format!("walreceiver for timeline {tenant_shard_id}/{timeline_id}"),
             false,
             async move {
                 debug_assert_current_span_has_tenant_and_timeline_id();
@@ -117,11 +119,12 @@ impl WalReceiver {
                 *loop_status.write().unwrap() = None;
                 Ok(())
             }
-            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
+            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
         );
 
         Self {
-            timeline: TenantTimelineId::new(tenant_id, timeline_id),
+            tenant_shard_id,
+            timeline_id,
             manager_status,
         }
     }
@@ -129,8 +132,8 @@ impl WalReceiver {
     pub async fn stop(self) {
         task_mgr::shutdown_tasks(
             Some(TaskKind::WalReceiverManager),
-            Some(self.timeline.tenant_id),
-            Some(self.timeline.timeline_id),
+            Some(self.tenant_shard_id),
+            Some(self.timeline_id),
         )
         .await;
     }
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 3bcb7ff891..61ab236322 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
     task_mgr::spawn(
         WALRECEIVER_RUNTIME.handle(),
         TaskKind::WalReceiverConnectionPoller,
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
         "walreceiver connection",
         false,
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 75b29a2fed..738216afa5 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -458,8 +458,10 @@ impl<'a> WalIngest<'a> {
             && decoded.xl_rmid == pg_constants::RM_XLOG_ID
             && (decoded.xl_info == pg_constants::XLOG_FPI
                 || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
-        // compression of WAL is not yet supported: fall back to storing the original WAL record
+            // compression of WAL is not yet supported: fall back to storing the original WAL record
             && !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, self.timeline.pg_version)?
+            // do not materialize null pages because them most likely be soon replaced with real data
+            && blk.bimg_len != 0
         {
             // Extract page image from FPI record
             let img_len = blk.bimg_len as usize;
diff --git a/proxy/src/auth.rs b/proxy/src/auth.rs
index 7d79d34045..eadb9abd43 100644
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -62,6 +62,9 @@ pub enum AuthErrorImpl {
         Please add it to the allowed list in the Neon console."
     )]
     IpAddressNotAllowed,
+
+    #[error("Too many connections to this endpoint. Please try again later.")]
+    TooManyConnections,
 }
 
 #[derive(Debug, Error)]
@@ -80,6 +83,10 @@ impl AuthError {
     pub fn ip_address_not_allowed() -> Self {
         AuthErrorImpl::IpAddressNotAllowed.into()
     }
+
+    pub fn too_many_connections() -> Self {
+        AuthErrorImpl::TooManyConnections.into()
+    }
 }
 
 impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
@@ -102,6 +109,7 @@ impl UserFacingError for AuthError {
             MissingEndpointName => self.to_string(),
             Io(_) => "Internal error".to_string(),
             IpAddressNotAllowed => self.to_string(),
+            TooManyConnections => self.to_string(),
         }
     }
 }
diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs
index 649b3f40f2..ba054b53eb 100644
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -166,7 +166,7 @@ impl TryFrom<ClientCredentials> for ComputeUserInfo {
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
     api: &impl console::Api,
-    extra: &ConsoleReqExtra<'_>,
+    extra: &ConsoleReqExtra,
     creds: ClientCredentials,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
@@ -235,7 +235,7 @@ async fn auth_quirks(
 /// only if authentication was successfuly.
 async fn auth_and_wake_compute(
     api: &impl console::Api,
-    extra: &ConsoleReqExtra<'_>,
+    extra: &ConsoleReqExtra,
     creds: ClientCredentials,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
@@ -314,7 +314,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
     #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
     pub async fn authenticate(
         self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
@@ -387,7 +387,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
 impl BackendType<'_, ComputeUserInfo> {
     pub async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
         use BackendType::*;
         match self {
@@ -404,7 +404,7 @@ impl BackendType<'_, ComputeUserInfo> {
     /// The link auth flow doesn't support this, so we return [`None`] in that case.
     pub async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
     ) -> Result<Option<CachedNodeInfo>, console::errors::WakeComputeError> {
         use BackendType::*;
 
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index fc1c44809a..1fa2d5599f 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -112,6 +112,9 @@ struct ProxyCliArgs {
     /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     rate_limiter_timeout: tokio::time::Duration,
+    /// Endpoint rate limiter max number of requests per second.
+    #[clap(long, default_value_t = 300)]
+    endpoint_rps_limit: u32,
     /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
     #[clap(long, default_value_t = 100)]
     initial_limit: usize,
@@ -317,6 +320,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         authentication_config,
         require_client_ip: args.require_client_ip,
         disable_ip_check_for_http: args.disable_ip_check_for_http,
+        endpoint_rps_limit: args.endpoint_rps_limit,
     }));
 
     Ok(config)
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 182d71f9be..dea446eb22 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -20,6 +20,7 @@ pub struct ProxyConfig {
     pub authentication_config: AuthenticationConfig,
     pub require_client_ip: bool,
     pub disable_ip_check_for_http: bool,
+    pub endpoint_rps_limit: u32,
 }
 
 #[derive(Debug)]
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index b0a73fd03d..deab966d9e 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -196,15 +196,15 @@ pub mod errors {
 }
 
 /// Extra query params we'd like to pass to the console.
-pub struct ConsoleReqExtra<'a> {
+pub struct ConsoleReqExtra {
     /// A unique identifier for a connection.
     pub session_id: uuid::Uuid,
     /// Name of client application, if set.
-    pub application_name: Option<&'a str>,
+    pub application_name: String,
     pub options: Vec<(String, String)>,
 }
 
-impl<'a> ConsoleReqExtra<'a> {
+impl ConsoleReqExtra {
     // https://swagger.io/docs/specification/serialization/ DeepObject format
     // paramName[prop1]=value1&paramName[prop2]=value2&....
     pub fn options_as_deep_object(&self) -> Vec<(String, String)> {
@@ -259,20 +259,20 @@ pub trait Api {
     /// Get the client's auth secret for authentication.
     async fn get_auth_info(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<AuthInfo, errors::GetAuthInfoError>;
 
     async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;
 
     /// Wake up the compute node and return the corresponding connection info.
     async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }
diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs
index 8aad8c06bc..c464b4daf2 100644
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -144,7 +144,7 @@ impl super::Api for Api {
     #[tracing::instrument(skip_all)]
     async fn get_auth_info(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<AuthInfo, GetAuthInfoError> {
         self.do_get_auth_info(creds).await
@@ -152,7 +152,7 @@ impl super::Api for Api {
 
     async fn get_allowed_ips(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
         Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
@@ -161,7 +161,7 @@ impl super::Api for Api {
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         _creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         self.do_wake_compute()
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index f8c3ee5b58..192252a0df 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -48,7 +48,7 @@ impl Api {
 
     async fn do_get_auth_info(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<AuthInfo, GetAuthInfoError> {
         let request_id = uuid::Uuid::new_v4().to_string();
@@ -60,9 +60,9 @@ impl Api {
                 .header("Authorization", format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", extra.session_id)])
                 .query(&[
-                    ("application_name", extra.application_name),
-                    ("project", Some(&creds.endpoint)),
-                    ("role", Some(&creds.inner.user)),
+                    ("application_name", extra.application_name.as_str()),
+                    ("project", creds.endpoint.as_str()),
+                    ("role", creds.inner.user.as_str()),
                 ])
                 .build()?;
 
@@ -101,7 +101,7 @@ impl Api {
 
     async fn do_wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<NodeInfo, WakeComputeError> {
         let request_id = uuid::Uuid::new_v4().to_string();
@@ -113,8 +113,8 @@ impl Api {
                 .header("Authorization", format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", extra.session_id)])
                 .query(&[
-                    ("application_name", extra.application_name),
-                    ("project", Some(&creds.endpoint)),
+                    ("application_name", extra.application_name.as_str()),
+                    ("project", creds.endpoint.as_str()),
                 ]);
 
             request_builder = if extra.options.is_empty() {
@@ -161,7 +161,7 @@ impl super::Api for Api {
     #[tracing::instrument(skip_all)]
     async fn get_auth_info(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<AuthInfo, GetAuthInfoError> {
         self.do_get_auth_info(extra, creds).await
@@ -169,7 +169,7 @@ impl super::Api for Api {
 
     async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
         let key: &str = &creds.endpoint;
@@ -192,7 +192,7 @@ impl super::Api for Api {
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         let key: &str = &creds.inner.cache_key;
diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs
index 018f774c7e..ae8b294841 100644
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -9,6 +9,7 @@ use crate::{
     console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
     http::StatusCode,
     protocol2::WithClientIp,
+    rate_limiter::EndpointRateLimiter,
     stream::{PqStream, Stream},
     usage_metrics::{Ids, USAGE_METRICS},
 };
@@ -307,6 +308,7 @@ pub async fn task_main(
 
     let connections = tokio_util::task::task_tracker::TaskTracker::new();
     let cancel_map = Arc::new(CancelMap::default());
+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
 
     while let Some(accept_result) =
         run_until_cancelled(listener.accept(), &cancellation_token).await
@@ -315,6 +317,8 @@ pub async fn task_main(
 
         let session_id = uuid::Uuid::new_v4();
         let cancel_map = Arc::clone(&cancel_map);
+        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
+
         connections.spawn(
             async move {
                 info!("accepted postgres client connection");
@@ -340,6 +344,7 @@ pub async fn task_main(
                     socket,
                     ClientMode::Tcp,
                     peer_addr.ip(),
+                    endpoint_rate_limiter,
                 )
                 .await
             }
@@ -415,6 +420,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     stream: S,
     mode: ClientMode,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     info!(
         protocol = mode.protocol_label(),
@@ -463,6 +469,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
         &params,
         session_id,
         mode.allow_self_signed_compute(config),
+        endpoint_rate_limiter,
     );
     cancel_map
         .with_session(|session| client.connect_to_db(session, mode, &config.authentication_config))
@@ -671,7 +678,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
 pub async fn connect_to_compute<M: ConnectMechanism>(
     mechanism: &M,
     mut node_info: console::CachedNodeInfo,
-    extra: &console::ConsoleReqExtra<'_>,
+    extra: &console::ConsoleReqExtra,
     creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
     mut latency_timer: LatencyTimer,
 ) -> Result<M::Connection, M::Error>
@@ -928,6 +935,8 @@ struct Client<'a, S> {
     session_id: uuid::Uuid,
     /// Allow self-signed certificates (for testing).
     allow_self_signed_compute: bool,
+    /// Rate limiter for endpoints
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 }
 
 impl<'a, S> Client<'a, S> {
@@ -938,6 +947,7 @@ impl<'a, S> Client<'a, S> {
         params: &'a StartupMessageParams,
         session_id: uuid::Uuid,
         allow_self_signed_compute: bool,
+        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
     ) -> Self {
         Self {
             stream,
@@ -945,6 +955,7 @@ impl<'a, S> Client<'a, S> {
             params,
             session_id,
             allow_self_signed_compute,
+            endpoint_rate_limiter,
         }
     }
 }
@@ -966,15 +977,29 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
             params,
             session_id,
             allow_self_signed_compute,
+            endpoint_rate_limiter,
         } = self;
 
+        // check rate limit
+        if let Some(ep) = creds.get_endpoint() {
+            if !endpoint_rate_limiter.check(ep) {
+                return stream
+                    .throw_error(auth::AuthError::too_many_connections())
+                    .await;
+            }
+        }
+
+        let proto = mode.protocol_label();
         let extra = console::ConsoleReqExtra {
             session_id, // aka this connection's id
-            application_name: params.get("application_name"),
+            application_name: format!(
+                "{}/{}",
+                params.get("application_name").unwrap_or_default(),
+                proto
+            ),
             options: neon_options(params),
         };
-
-        let mut latency_timer = LatencyTimer::new(mode.protocol_label());
+        let mut latency_timer = LatencyTimer::new(proto);
 
         let user = creds.get_user().to_owned();
         let auth_result = match creds
@@ -1012,7 +1037,6 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
         .or_else(|e| stream.throw_error(e))
         .await?;
 
-        let proto = mode.protocol_label();
         NUM_DB_CONNECTIONS_OPENED_COUNTER
             .with_label_values(&[proto])
             .inc();
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 31c3ad1055..4691abbfb9 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -484,13 +484,13 @@ fn helper_create_connect_info(
     mechanism: &TestConnectMechanism,
 ) -> (
     CachedNodeInfo,
-    console::ConsoleReqExtra<'static>,
+    console::ConsoleReqExtra,
     auth::BackendType<'_, ComputeUserInfo>,
 ) {
     let cache = helper_create_cached_node_info();
     let extra = console::ConsoleReqExtra {
         session_id: uuid::Uuid::new_v4(),
-        application_name: Some("TEST"),
+        application_name: "TEST".into(),
         options: vec![],
     };
     let creds = auth::BackendType::Test(mechanism);
diff --git a/proxy/src/rate_limiter.rs b/proxy/src/rate_limiter.rs
index 5622c44a68..f40b8dbd1c 100644
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -3,4 +3,5 @@ mod limit_algorithm;
 mod limiter;
 pub use aimd::Aimd;
 pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
+pub use limiter::EndpointRateLimiter;
 pub use limiter::Limiter;
diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs
index 3a9fed3919..9d28bb67b3 100644
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -6,6 +6,9 @@ use std::{
     time::Duration,
 };
 
+use dashmap::DashMap;
+use parking_lot::Mutex;
+use smol_str::SmolStr;
 use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
 use tokio::time::{timeout, Instant};
 use tracing::info;
@@ -15,6 +18,74 @@ use super::{
     RateLimiterConfig,
 };
 
+// Simple per-endpoint rate limiter.
+//
+// Check that number of connections to the endpoint is below `max_rps` rps.
+// Purposefully ignore user name and database name as clients can reconnect
+// with different names, so we'll end up sending some http requests to
+// the control plane.
+//
+// We also may save quite a lot of CPU (I think) by bailing out right after we
+// saw SNI, before doing TLS handshake. User-side error messages in that case
+// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
+// I went with a more expensive way that yields user-friendlier error messages.
+//
+// TODO: add a better bucketing here, e.g. not more than 300 requests per second,
+//       and not more than 1000 requests per 10 seconds, etc. Short bursts of reconnects
+//       are noramal during redeployments, so we should not block them.
+pub struct EndpointRateLimiter {
+    map: DashMap<SmolStr, Arc<Mutex<(chrono::NaiveTime, u32)>>>,
+    max_rps: u32,
+    access_count: AtomicUsize,
+}
+
+impl EndpointRateLimiter {
+    pub fn new(max_rps: u32) -> Self {
+        Self {
+            map: DashMap::new(),
+            max_rps,
+            access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
+        }
+    }
+
+    /// Check that number of connections to the endpoint is below `max_rps` rps.
+    pub fn check(&self, endpoint: SmolStr) -> bool {
+        // do GC every 100k requests (worst case memory usage is about 10MB)
+        if self.access_count.fetch_add(1, Ordering::AcqRel) % 100_000 == 0 {
+            self.do_gc();
+        }
+
+        let now = chrono::Utc::now().naive_utc().time();
+        let entry = self
+            .map
+            .entry(endpoint)
+            .or_insert_with(|| Arc::new(Mutex::new((now, 0))));
+        let mut entry = entry.lock();
+        let (last_time, count) = *entry;
+
+        if now - last_time < chrono::Duration::seconds(1) {
+            if count >= self.max_rps {
+                return false;
+            }
+            *entry = (last_time, count + 1);
+        } else {
+            *entry = (now, 1);
+        }
+        true
+    }
+
+    /// Clean the map. Simple strategy: remove all entries. At worst, we'll
+    /// double the effective max_rps during the cleanup. But that way deletion
+    /// does not aquire mutex on each entry access.
+    pub fn do_gc(&self) {
+        info!(
+            "cleaning up endpoint rate limiter, current size = {}",
+            self.map.len()
+        );
+        self.map.clear();
+    }
+}
+
 /// Limits the number of concurrent jobs.
 ///
 /// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the
diff --git a/proxy/src/serverless.rs b/proxy/src/serverless.rs
index cd496ff01e..92d6e2d851 100644
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -14,6 +14,7 @@ use tokio_util::task::TaskTracker;
 
 use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
 use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
+use crate::rate_limiter::EndpointRateLimiter;
 use crate::{cancellation::CancelMap, config::ProxyConfig};
 use futures::StreamExt;
 use hyper::{
@@ -43,6 +44,7 @@ pub async fn task_main(
     }
 
     let conn_pool = conn_pool::GlobalConnPool::new(config);
+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(config.endpoint_rps_limit));
 
     // shutdown the connection pool
     tokio::spawn({
@@ -91,6 +93,7 @@ pub async fn task_main(
             let sni_name = tls.server_name().map(|s| s.to_string());
             let conn_pool = conn_pool.clone();
             let ws_connections = ws_connections.clone();
+            let endpoint_rate_limiter = endpoint_rate_limiter.clone();
 
             async move {
                 let peer_addr = match client_addr {
@@ -103,6 +106,7 @@ pub async fn task_main(
                         let sni_name = sni_name.clone();
                         let conn_pool = conn_pool.clone();
                         let ws_connections = ws_connections.clone();
+                        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
 
                         async move {
                             let cancel_map = Arc::new(CancelMap::default());
@@ -117,6 +121,7 @@ pub async fn task_main(
                                 session_id,
                                 sni_name,
                                 peer_addr.ip(),
+                                endpoint_rate_limiter,
                             )
                             .instrument(info_span!(
                                 "serverless",
@@ -190,6 +195,7 @@ async fn request_handler(
     session_id: uuid::Uuid,
     sni_hostname: Option<String>,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> Result<Response<Body>, ApiError> {
     let host = request
         .headers()
@@ -214,6 +220,7 @@ async fn request_handler(
                     session_id,
                     host,
                     peer_addr,
+                    endpoint_rate_limiter,
                 )
                 .await
                 {
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index 734df11368..4f3b31b9be 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -37,7 +37,7 @@ use crate::proxy::ConnectMechanism;
 use tracing::{error, warn, Span};
 use tracing::{info, info_span, Instrument};
 
-pub const APP_NAME: &str = "sql_over_http";
+pub const APP_NAME: &str = "/sql_over_http";
 const MAX_CONNS_PER_ENDPOINT: usize = 20;
 
 #[derive(Debug, Clone)]
@@ -432,7 +432,7 @@ async fn connect_to_compute(
 
     let extra = console::ConsoleReqExtra {
         session_id: uuid::Uuid::new_v4(),
-        application_name: Some(APP_NAME),
+        application_name: APP_NAME.to_string(),
         options: console_options,
     };
     // TODO(anna): this is a bit hacky way, consider using console notification listener.
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index 199b03550d..cd6184cdee 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -3,6 +3,7 @@ use crate::{
     config::ProxyConfig,
     error::io_error,
     proxy::{handle_client, ClientMode},
+    rate_limiter::EndpointRateLimiter,
 };
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream};
@@ -13,6 +14,7 @@ use pin_project_lite::pin_project;
 use std::{
     net::IpAddr,
     pin::Pin,
+    sync::Arc,
     task::{ready, Context, Poll},
 };
 use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
@@ -134,6 +136,7 @@ pub async fn serve_websocket(
     session_id: uuid::Uuid,
     hostname: Option<String>,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     let websocket = websocket.await?;
     handle_client(
@@ -143,6 +146,7 @@ pub async fn serve_websocket(
         WebSocketRw::new(websocket),
         ClientMode::Websockets { hostname },
         peer_addr,
+        endpoint_rate_limiter,
     )
     .await?;
     Ok(())
diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs
index 510a128663..a15a908212 100644
--- a/s3_scrubber/src/checks.rs
+++ b/s3_scrubber/src/checks.rs
@@ -142,7 +142,9 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                         .collect();
 
                     if !orphan_layers.is_empty() {
-                        result.errors.push(format!(
+                        // An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
+                        // these as a hint that there is something worth cleaning up here.
+                        result.warnings.push(format!(
                             "index_part.json does not contain layers from S3: {:?}",
                             orphan_layers
                                 .iter()
@@ -170,6 +172,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                         ));
                     }
                 }
+                BlobDataParseResult::Relic => {}
                 BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
                     parse_errors
                         .into_iter()
@@ -215,6 +218,8 @@ pub(crate) enum BlobDataParseResult {
         index_part_generation: Generation,
         s3_layers: HashSet<(LayerFileName, Generation)>,
     },
+    /// The remains of a deleted Timeline (i.e. an initdb archive only)
+    Relic,
     Incorrect(Vec<String>),
 }
 
@@ -245,6 +250,7 @@ pub(crate) async fn list_timeline_blobs(
     timeline_dir_target.delimiter = String::new();
 
     let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
+    let mut initdb_archive: bool = false;
 
     let stream = stream_listing(s3_client, &timeline_dir_target);
     pin_mut!(stream);
@@ -258,6 +264,10 @@ pub(crate) async fn list_timeline_blobs(
                 tracing::info!("Index key {key}");
                 index_parts.push(obj)
             }
+            Some("initdb.tar.zst") => {
+                tracing::info!("initdb archive {key}");
+                initdb_archive = true;
+            }
             Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
                 Ok((new_layer, gen)) => {
                     tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +289,16 @@ pub(crate) async fn list_timeline_blobs(
         }
     }
 
+    if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
+        tracing::info!(
+            "Timeline is empty apart from initdb archive: expected post-deletion state."
+        );
+        return Ok(S3TimelineBlobData {
+            blob_data: BlobDataParseResult::Relic,
+            keys_to_remove: Vec::new(),
+        });
+    }
+
     // Choose the index_part with the highest generation
     let (index_part_object, index_part_generation) = match index_parts
         .iter()
diff --git a/s3_scrubber/src/lib.rs b/s3_scrubber/src/lib.rs
index e5465952fb..6607db21e6 100644
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -86,7 +86,9 @@ impl S3Target {
         if new_self.prefix_in_bucket.is_empty() {
             new_self.prefix_in_bucket = format!("/{}/", new_segment);
         } else {
-            let _ = new_self.prefix_in_bucket.pop();
+            if new_self.prefix_in_bucket.ends_with('/') {
+                new_self.prefix_in_bucket.pop();
+            }
             new_self.prefix_in_bucket =
                 [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
         }
diff --git a/s3_scrubber/src/main.rs b/s3_scrubber/src/main.rs
index 1f0ceebdaf..ef020edc2a 100644
--- a/s3_scrubber/src/main.rs
+++ b/s3_scrubber/src/main.rs
@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
     ));
 
     match cli.command {
-        Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
+        Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
             Err(e) => {
                 tracing::error!("Failed: {e}");
                 Err(e)
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
                 }
                 if summary.is_fatal() {
                     Err(anyhow::anyhow!("Fatal scrub errors detected"))
+                } else if summary.is_empty() {
+                    // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
+                    // scrubber they were likely expecting to scan something, and if we see no timelines
+                    // at all then it's likely due to some configuration issues like a bad prefix
+                    Err(anyhow::anyhow!(
+                        "No timelines found in bucket {} prefix {}",
+                        bucket_config.bucket,
+                        bucket_config
+                            .prefix_in_bucket
+                            .unwrap_or("<none>".to_string())
+                    ))
                 } else {
                     Ok(())
                 }
diff --git a/s3_scrubber/src/scan_metadata.rs b/s3_scrubber/src/scan_metadata.rs
index ad82db1e76..228f8d6763 100644
--- a/s3_scrubber/src/scan_metadata.rs
+++ b/s3_scrubber/src/scan_metadata.rs
@@ -174,6 +174,10 @@ Timeline layer count: {6}
     pub fn is_fatal(&self) -> bool {
         !self.with_errors.is_empty()
     }
+
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
 }
 
 /// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 9e0beeb4d1..4b23650960 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -28,6 +28,7 @@ import jwt
 import psycopg2
 import pytest
 import requests
+import toml
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
@@ -436,7 +437,7 @@ class NeonEnvBuilder:
         # Pageserver remote storage
         self.pageserver_remote_storage = pageserver_remote_storage
         # Safekeepers remote storage
-        self.sk_remote_storage: Optional[RemoteStorage] = None
+        self.safekeepers_remote_storage: Optional[RemoteStorage] = None
 
         self.broker = broker
         self.run_id = run_id
@@ -506,6 +507,66 @@ class NeonEnvBuilder:
 
         return env
 
+    def from_repo_dir(
+        self,
+        repo_dir: Path,
+        neon_binpath: Optional[Path] = None,
+        pg_distrib_dir: Optional[Path] = None,
+    ) -> NeonEnv:
+        """
+        A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
+        """
+
+        # Setting custom `neon_binpath` and `pg_distrib_dir` is useful for compatibility tests
+        self.neon_binpath = neon_binpath or self.neon_binpath
+        self.pg_distrib_dir = pg_distrib_dir or self.pg_distrib_dir
+
+        # Get the initial tenant and timeline from the snapshot config
+        snapshot_config_toml = repo_dir / "config"
+        with snapshot_config_toml.open("r") as f:
+            snapshot_config = toml.load(f)
+
+        self.initial_tenant = TenantId(snapshot_config["default_tenant_id"])
+        self.initial_timeline = TimelineId(
+            dict(snapshot_config["branch_name_mappings"][DEFAULT_BRANCH_NAME])[
+                str(self.initial_tenant)
+            ]
+        )
+        self.env = self.init_configs()
+
+        for ps_dir in repo_dir.glob("pageserver_*"):
+            tenants_from_dir = ps_dir / "tenants"
+            tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
+
+            log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
+            shutil.copytree(tenants_from_dir, tenants_to_dir)
+
+        for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
+            sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
+            log.info(f"Copying safekeeper directory {sk_from_dir} to {sk_to_dir}")
+            sk_to_dir.rmdir()
+            shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
+
+        shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
+        shutil.copytree(
+            repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
+        )
+
+        if (attachments_json := Path(repo_dir / "attachments.json")).exists():
+            shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
+
+        # Update the config with info about tenants and timelines
+        with (self.repo_dir / "config").open("r") as f:
+            config = toml.load(f)
+
+        config["default_tenant_id"] = snapshot_config["default_tenant_id"]
+        config["branch_name_mappings"] = snapshot_config["branch_name_mappings"]
+
+        with (self.repo_dir / "config").open("w") as f:
+            toml.dump(config, f)
+
+        return self.env
+
     def enable_scrub_on_exit(self):
         """
         Call this if you would like the fixture to automatically run
@@ -534,9 +595,11 @@ class NeonEnvBuilder:
         self.pageserver_remote_storage = ret
 
     def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
-        assert self.sk_remote_storage is None, "sk_remote_storage already configured"
+        assert (
+            self.safekeepers_remote_storage is None
+        ), "safekeepers_remote_storage already configured"
 
-        self.sk_remote_storage = self._configure_and_create_remote_storage(
+        self.safekeepers_remote_storage = self._configure_and_create_remote_storage(
             kind, RemoteStorageUser.SAFEKEEPER
         )
 
@@ -589,7 +652,7 @@ class NeonEnvBuilder:
                 directory_to_clean.rmdir()
 
     def cleanup_remote_storage(self):
-        for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
+        for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
             if isinstance(x, S3Storage):
                 x.do_cleanup()
 
@@ -693,7 +756,7 @@ class NeonEnv:
         self.pageservers: List[NeonPageserver] = []
         self.broker = config.broker
         self.pageserver_remote_storage = config.pageserver_remote_storage
-        self.safekeepers_remote_storage = config.sk_remote_storage
+        self.safekeepers_remote_storage = config.safekeepers_remote_storage
         self.pg_version = config.pg_version
         # Binary path for pageserver, safekeeper, etc
         self.neon_binpath = config.neon_binpath
@@ -718,25 +781,17 @@ class NeonEnv:
             self.attachment_service = None
 
         # Create a config file corresponding to the options
-        toml = textwrap.dedent(
-            f"""
-            default_tenant_id = '{config.initial_tenant}'
-        """
-        )
+        cfg: Dict[str, Any] = {
+            "default_tenant_id": str(self.initial_tenant),
+            "broker": {
+                "listen_addr": self.broker.listen_addr(),
+            },
+            "pageservers": [],
+            "safekeepers": [],
+        }
 
         if self.control_plane_api is not None:
-            toml += textwrap.dedent(
-                f"""
-                control_plane_api = '{self.control_plane_api}'
-            """
-            )
-
-        toml += textwrap.dedent(
-            f"""
-            [broker]
-            listen_addr = '{self.broker.listen_addr()}'
-        """
-        )
+            cfg["control_plane_api"] = self.control_plane_api
 
         # Create config for pageserver
         http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -749,26 +804,24 @@ class NeonEnv:
                 http=self.port_distributor.get_port(),
             )
 
-            toml += textwrap.dedent(
-                f"""
-                [[pageservers]]
-                id={ps_id}
-                listen_pg_addr = 'localhost:{pageserver_port.pg}'
-                listen_http_addr = 'localhost:{pageserver_port.http}'
-                pg_auth_type = '{pg_auth_type}'
-                http_auth_type = '{http_auth_type}'
-            """
-            )
-
+            ps_cfg: Dict[str, Any] = {
+                "id": ps_id,
+                "listen_pg_addr": f"localhost:{pageserver_port.pg}",
+                "listen_http_addr": f"localhost:{pageserver_port.http}",
+                "pg_auth_type": pg_auth_type,
+                "http_auth_type": http_auth_type,
+            }
             # Create a corresponding NeonPageserver object
             self.pageservers.append(
                 NeonPageserver(
                     self,
                     ps_id,
                     port=pageserver_port,
-                    config_override=config.pageserver_config_override,
+                    config_override=self.pageserver_config_override,
                 )
             )
+            cfg["pageservers"].append(ps_cfg)
+
         # Create config and a Safekeeper object for each safekeeper
         for i in range(1, config.num_safekeepers + 1):
             port = SafekeeperPort(
@@ -777,32 +830,22 @@ class NeonEnv:
                 http=self.port_distributor.get_port(),
             )
             id = config.safekeepers_id_start + i  # assign ids sequentially
-            toml += textwrap.dedent(
-                f"""
-                [[safekeepers]]
-                id = {id}
-                pg_port = {port.pg}
-                pg_tenant_only_port = {port.pg_tenant_only}
-                http_port = {port.http}
-                sync = {'true' if config.safekeepers_enable_fsync else 'false'}"""
-            )
+            sk_cfg: Dict[str, Any] = {
+                "id": id,
+                "pg_port": port.pg,
+                "pg_tenant_only_port": port.pg_tenant_only,
+                "http_port": port.http,
+                "sync": config.safekeepers_enable_fsync,
+            }
             if config.auth_enabled:
-                toml += textwrap.dedent(
-                    """
-                auth_enabled = true
-                """
-                )
-            if config.sk_remote_storage is not None:
-                toml += textwrap.dedent(
-                    f"""
-                remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
-                """
-                )
-            safekeeper = Safekeeper(env=self, id=id, port=port)
-            self.safekeepers.append(safekeeper)
+                sk_cfg["auth_enabled"] = True
+            if self.safekeepers_remote_storage is not None:
+                sk_cfg["remote_storage"] = self.safekeepers_remote_storage.to_toml_inline_table()
+            self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
+            cfg["safekeepers"].append(sk_cfg)
 
-        log.info(f"Config: {toml}")
-        self.neon_cli.init(toml)
+        log.info(f"Config: {cfg}")
+        self.neon_cli.init(cfg)
 
     def start(self):
         # Start up broker, pageserver and all safekeepers
@@ -1288,10 +1331,10 @@ class NeonCli(AbstractNeonCli):
 
     def init(
         self,
-        config_toml: str,
+        config: Dict[str, Any],
     ) -> "subprocess.CompletedProcess[str]":
         with tempfile.NamedTemporaryFile(mode="w+") as tmp:
-            tmp.write(config_toml)
+            tmp.write(toml.dumps(config))
             tmp.flush()
 
             cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
@@ -1729,11 +1772,16 @@ class NeonPageserver(PgProtocol):
 
     @property
     def workdir(self) -> Path:
-        return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))
+        return self.env.repo_dir / f"pageserver_{self.id}"
 
     def assert_no_errors(self):
-        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
-        errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)
+        logfile = self.workdir / "pageserver.log"
+        if not logfile.exists():
+            log.warning(f"Skipping log check: {logfile} does not exist")
+            return
+
+        with logfile.open("r") as f:
+            errors = scan_pageserver_log_for_errors(f, self.allowed_errors)
 
         for _lineno, error in errors:
             log.info(f"not allowed error: {error.strip()}")
@@ -1757,7 +1805,10 @@ class NeonPageserver(PgProtocol):
 
     def log_contains(self, pattern: str) -> Optional[str]:
         """Check that the pageserver log contains a line that matches the given regex"""
-        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
+        logfile = self.workdir / "pageserver.log"
+        if not logfile.exists():
+            log.warning(f"Skipping log check: {logfile} does not exist")
+            return None
 
         contains_re = re.compile(pattern)
 
@@ -1766,14 +1817,11 @@ class NeonPageserver(PgProtocol):
         # no guarantee it is already present in the log file. This hasn't
         # been a problem in practice, our python tests are not fast enough
         # to hit that race condition.
-        while True:
-            line = logfile.readline()
-            if not line:
-                break
-
-            if contains_re.search(line):
-                # found it!
-                return line
+        with logfile.open("r") as f:
+            for line in f:
+                if contains_re.search(line):
+                    # found it!
+                    return line
 
         return None
 
@@ -1796,6 +1844,27 @@ class NeonPageserver(PgProtocol):
         client = self.http_client()
         return client.tenant_detach(tenant_id)
 
+    def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
+        # This API is only for use when generations are enabled
+        assert self.env.attachment_service is not None
+
+        if config["mode"].startswith("Attached") and "generation" not in config:
+            config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
+
+        client = self.http_client()
+        return client.tenant_location_conf(tenant_id, config, **kwargs)
+
+    def read_tenant_location_conf(self, tenant_id: TenantId) -> dict[str, Any]:
+        path = self.tenant_dir(tenant_id) / "config-v1"
+        log.info(f"Reading location conf from {path}")
+        bytes = open(path, "r").read()
+        try:
+            decoded: dict[str, Any] = toml.loads(bytes)
+            return decoded
+        except:
+            log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
+            raise
+
     def tenant_create(
         self,
         tenant_id: TenantId,
@@ -2729,6 +2798,7 @@ class EndpointFactory:
         lsn: Optional[Lsn] = None,
         hot_standby: bool = False,
         config_lines: Optional[List[str]] = None,
+        pageserver_id: Optional[int] = None,
     ) -> Endpoint:
         ep = Endpoint(
             self.env,
@@ -2748,6 +2818,7 @@ class EndpointFactory:
             lsn=lsn,
             hot_standby=hot_standby,
             config_lines=config_lines,
+            pageserver_id=pageserver_id,
         )
 
     def stop_all(self) -> "EndpointFactory":
@@ -3094,7 +3165,7 @@ def pytest_addoption(parser: Parser):
 
 
 SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile(  # type: ignore[type-arg]
-    r"config|metadata|.+\.(?:toml|pid|json|sql)"
+    r"config|config-v1|heatmap-v1|metadata|.+\.(?:toml|pid|json|sql)"
 )
 
 
@@ -3355,8 +3426,6 @@ def parse_project_git_version_output(s: str) -> str:
 
     The information is generated by utils::project_git_version!
     """
-    import re
-
     res = re.search(r"git(-env)?:([0-9a-fA-F]{8,40})(-\S+)?", s)
     if res and (commit := res.group(2)):
         return commit
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index 3e75bac424..b46ddf5527 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -150,7 +150,7 @@ class PageserverHttpClient(requests.Session):
                 # (this may change in future if we do fault injection of a kind that causes
                 #  requests TCP flows to stick)
                 read=False,
-                backoff_factor=0,
+                backoff_factor=0.2,
                 status_forcelist=[503],
                 allowed_methods=None,
                 remove_headers_on_redirect=[],
@@ -277,6 +277,23 @@ class PageserverHttpClient(requests.Session):
         res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
         self.verbose_error(res)
 
+    def tenant_location_conf(
+        self, tenant_id: TenantId, location_conf=dict[str, Any], flush_ms=None
+    ):
+        body = location_conf.copy()
+        body["tenant_id"] = str(tenant_id)
+
+        params = {}
+        if flush_ms is not None:
+            params["flush_ms"] = str(flush_ms)
+
+        res = self.put(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/location_config",
+            json=body,
+            params=params,
+        )
+        self.verbose_error(res)
+
     def tenant_delete(self, tenant_id: TenantId):
         res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
         self.verbose_error(res)
diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py
index 565e5fa7f8..824531bea4 100644
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
 import boto3
+import toml
 from mypy_boto3_s3 import S3Client
 
 from fixtures.log_helper import log
@@ -133,7 +134,10 @@ class LocalFsStorage:
             return json.load(f)
 
     def to_toml_inline_table(self) -> str:
-        return f"local_path='{self.root}'"
+        rv = {
+            "local_path": str(self.root),
+        }
+        return toml.TomlEncoder().dump_inline_table(rv)
 
     def cleanup(self):
         # no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
@@ -174,18 +178,18 @@ class S3Storage:
         )
 
     def to_toml_inline_table(self) -> str:
-        s = [
-            f"bucket_name='{self.bucket_name}'",
-            f"bucket_region='{self.bucket_region}'",
-        ]
+        rv = {
+            "bucket_name": self.bucket_name,
+            "bucket_region": self.bucket_region,
+        }
 
         if self.prefix_in_bucket is not None:
-            s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
+            rv["prefix_in_bucket"] = self.prefix_in_bucket
 
         if self.endpoint is not None:
-            s.append(f"endpoint='{self.endpoint}'")
+            rv["endpoint"] = self.endpoint
 
-        return ",".join(s)
+        return toml.TomlEncoder().dump_inline_table(rv)
 
     def do_cleanup(self):
         if not self.cleanup:
@@ -384,4 +388,4 @@ def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
     if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
         raise Exception("invalid remote storage type")
 
-    return f"{{{remote_storage.to_toml_inline_table()}}}"
+    return remote_storage.to_toml_inline_table()
diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py
new file mode 100644
index 0000000000..241531437c
--- /dev/null
+++ b/test_runner/fixtures/workload.py
@@ -0,0 +1,148 @@
+from typing import Optional
+
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import (
+    Endpoint,
+    NeonEnv,
+    last_flush_lsn_upload,
+    wait_for_last_flush_lsn,
+)
+from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
+from fixtures.types import TenantId, TimelineId
+
+
+class Workload:
+    """
+    This is not a general purpose load generator: it exists for storage tests that need to inject some
+    high level types of storage work via the postgres interface:
+    - layer writes (`write_rows`)
+    - work for compaction (`churn_rows`)
+    - reads, checking we get the right data (`validate`)
+    """
+
+    def __init__(self, env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
+        self.env = env
+        self.tenant_id = tenant_id
+        self.timeline_id = timeline_id
+        self.table = "foo"
+
+        self.expect_rows = 0
+        self.churn_cursor = 0
+
+        self._endpoint: Optional[Endpoint] = None
+
+    def endpoint(self, pageserver_id: int) -> Endpoint:
+        if self._endpoint is None:
+            self._endpoint = self.env.endpoints.create(
+                "main",
+                tenant_id=self.tenant_id,
+                pageserver_id=pageserver_id,
+                endpoint_id="ep-workload",
+            )
+            self._endpoint.start(pageserver_id=pageserver_id)
+        else:
+            self._endpoint.reconfigure(pageserver_id=pageserver_id)
+
+        connstring = self._endpoint.safe_psql(
+            "SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
+        )
+        log.info(f"Workload.endpoint: connstr={connstring}")
+
+        return self._endpoint
+
+    def __del__(self):
+        if self._endpoint is not None:
+            self._endpoint.stop()
+
+    def init(self, pageserver_id: int):
+        endpoint = self.endpoint(pageserver_id)
+
+        endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
+        endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
+        last_flush_lsn_upload(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+
+    def write_rows(self, n, pageserver_id):
+        endpoint = self.endpoint(pageserver_id)
+        start = self.expect_rows
+        end = start + n - 1
+        self.expect_rows += n
+        dummy_value = "blah"
+        endpoint.safe_psql(
+            f"""
+            INSERT INTO {self.table} (id, val)
+            SELECT g, '{dummy_value}'
+            FROM generate_series({start}, {end}) g
+            """
+        )
+
+        return last_flush_lsn_upload(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+
+    def churn_rows(self, n, pageserver_id, upload=True):
+        assert self.expect_rows >= n
+
+        max_iters = 10
+        endpoint = self.endpoint(pageserver_id)
+        todo = n
+        i = 0
+        while todo > 0:
+            i += 1
+            if i > max_iters:
+                raise RuntimeError("oops")
+            start = self.churn_cursor % self.expect_rows
+            n_iter = min((self.expect_rows - start), todo)
+            todo -= n_iter
+
+            end = start + n_iter - 1
+
+            log.info(
+                f"start,end = {start},{end}, cursor={self.churn_cursor}, expect_rows={self.expect_rows}"
+            )
+
+            assert end < self.expect_rows
+
+            self.churn_cursor += n_iter
+            dummy_value = "blah"
+            endpoint.safe_psql_many(
+                [
+                    f"""
+                INSERT INTO {self.table} (id, val)
+                SELECT g, '{dummy_value}'
+                FROM generate_series({start}, {end}) g
+                ON CONFLICT (id) DO UPDATE
+                SET val = EXCLUDED.val
+                """,
+                    f"VACUUM {self.table}",
+                ]
+            )
+
+        last_flush_lsn = wait_for_last_flush_lsn(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+        ps_http = self.env.get_pageserver(pageserver_id).http_client()
+        wait_for_last_record_lsn(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
+
+        if upload:
+            # force a checkpoint to trigger upload
+            ps_http.timeline_checkpoint(self.tenant_id, self.timeline_id)
+            wait_for_upload(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
+            log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
+        else:
+            log.info(f"Churn: not waiting for upload, disk LSN {last_flush_lsn}")
+
+    def validate(self, pageserver_id):
+        endpoint = self.endpoint(pageserver_id)
+        result = endpoint.safe_psql_many(
+            [
+                "select clear_buffer_cache()",
+                f"""
+            SELECT COUNT(*) FROM {self.table}
+            """,
+            ]
+        )
+
+        log.info(f"validate({self.expect_rows}): {result}")
+        assert result == [[("",)], [(self.expect_rows,)]]
diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py
index 7487106c44..bd87ff3efd 100644
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -92,8 +92,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.auth_enabled = True
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
-    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
+    env.pageserver.allowed_errors.extend(
+        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
+    )
 
     pageserver_token_old = env.auth_keys.generate_pageserver_token()
     pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
@@ -145,9 +146,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.auth_enabled = True
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
-    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
-
+    env.pageserver.allowed_errors.extend(
+        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
+    )
     pageserver_token_old = env.auth_keys.generate_pageserver_token()
     pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
 
diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py
index a19b2862f8..9879254897 100644
--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -14,8 +14,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
     env = neon_env_builder.init_start()
 
-    env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
-    env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
+    env.pageserver.allowed_errors.extend(
+        [".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"]
+    )
 
     # Branch at the point where only 100 rows were inserted
     branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py
index a908dd713a..82ca985d01 100644
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -148,11 +148,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
     env = neon_env_builder.init_configs()
     env.start()
 
-    env.pageserver.allowed_errors.append(
-        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
+            ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading",
+        ]
     )
     ps_http = env.pageserver.http_client()
 
@@ -247,11 +247,11 @@ def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: N
     env = neon_env_builder.init_configs()
     env.start()
 
-    env.pageserver.allowed_errors.append(
-        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
+            ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory",
+        ]
     )
     ps_http = env.pageserver.http_client()
 
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index f3c6af4427..3f5de100fd 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -1,30 +1,25 @@
-import copy
 import os
 import shutil
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Any, List, Optional
+from typing import List, Optional
 
 import pytest
-import toml  # TODO: replace with tomllib for Python >= 3.11
-from fixtures.log_helper import log
+import toml
 from fixtures.neon_fixtures import (
-    NeonCli,
+    NeonEnv,
     NeonEnvBuilder,
     PgBin,
 )
-from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import (
     timeline_delete_wait_completed,
     wait_for_last_record_lsn,
     wait_for_upload,
 )
 from fixtures.pg_version import PgVersion
-from fixtures.port_distributor import PortDistributor
-from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
+from fixtures.remote_storage import RemoteStorageKind
 from fixtures.types import Lsn
-from pytest import FixtureRequest
 
 #
 # A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases.
@@ -37,8 +32,8 @@ from pytest import FixtureRequest
 #   If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
 #
 # The file contains a couple of helper functions:
-# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
 # - check_neon_works performs the test itself, feel free to add more checks there.
+# - dump_differs compares two SQL dumps and writes the diff to a file.
 #
 #
 # How to run `test_backward_compatibility` locally:
@@ -46,6 +41,7 @@ from pytest import FixtureRequest
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
+#    export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
 #
 #    # Build previous version of binaries and create a data snapshot:
 #    rm -rf pg_install target
@@ -59,8 +55,7 @@ from pytest import FixtureRequest
 #    CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
 #
 #    # Run backward compatibility test
-#    COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
-#       ./scripts/pytest -k test_backward_compatibility
+#    ./scripts/pytest -k test_backward_compatibility
 #
 #
 # How to run `test_forward_compatibility` locally:
@@ -68,6 +63,8 @@ from pytest import FixtureRequest
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
+#    export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
+#    export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
 #
 #    # Build previous version of binaries and store them somewhere:
 #    rm -rf pg_install target
@@ -84,9 +81,7 @@ from pytest import FixtureRequest
 #    ./scripts/pytest -k test_create_snapshot
 #
 #    # Run forward compatibility test
-#    COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
-#    COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
-#       ./scripts/pytest -k test_forward_compatibility
+#    ./scripts/pytest -k test_forward_compatibility
 #
 
 check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
@@ -155,13 +150,9 @@ def test_create_snapshot(
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
 def test_backward_compatibility(
-    pg_bin: PgBin,
-    port_distributor: PortDistributor,
+    neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
-    neon_binpath: Path,
-    pg_distrib_dir: Path,
     pg_version: PgVersion,
-    request: FixtureRequest,
 ):
     """
     Test that the new binaries can read old data
@@ -177,23 +168,15 @@ def test_backward_compatibility(
     )
 
     try:
-        # Copy the snapshot to current directory, and prepare for the test
-        prepare_snapshot(
-            from_dir=compatibility_snapshot_dir,
-            to_dir=test_output_dir / "compatibility_snapshot",
-            port_distributor=port_distributor,
-        )
+        neon_env_builder.num_safekeepers = 3
+        env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
+        neon_env_builder.start()
 
         check_neon_works(
-            test_output_dir / "compatibility_snapshot" / "repo",
-            neon_binpath,
-            neon_binpath,
-            pg_distrib_dir,
-            pg_version,
-            port_distributor,
-            test_output_dir,
-            pg_bin,
-            request,
+            env,
+            test_output_dir=test_output_dir,
+            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
+            repo_dir=env.repo_dir,
         )
     except Exception:
         if breaking_changes_allowed:
@@ -212,12 +195,10 @@ def test_backward_compatibility(
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
 def test_forward_compatibility(
+    neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
     top_output_dir: Path,
-    port_distributor: PortDistributor,
     pg_version: PgVersion,
-    request: FixtureRequest,
-    neon_binpath: Path,
 ):
     """
     Test that the old binaries can read new data
@@ -244,24 +225,19 @@ def test_forward_compatibility(
     )
 
     try:
-        # Copy the snapshot to current directory, and prepare for the test
-        prepare_snapshot(
-            from_dir=compatibility_snapshot_dir,
-            to_dir=test_output_dir / "compatibility_snapshot",
-            port_distributor=port_distributor,
+        neon_env_builder.num_safekeepers = 3
+        env = neon_env_builder.from_repo_dir(
+            compatibility_snapshot_dir / "repo",
+            neon_binpath=compatibility_neon_bin,
             pg_distrib_dir=compatibility_postgres_distrib_dir,
         )
+        neon_env_builder.start()
 
         check_neon_works(
-            test_output_dir / "compatibility_snapshot" / "repo",
-            compatibility_neon_bin,
-            neon_binpath,
-            compatibility_postgres_distrib_dir,
-            pg_version,
-            port_distributor,
-            test_output_dir,
-            PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version),
-            request,
+            env,
+            test_output_dir=test_output_dir,
+            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
+            repo_dir=env.repo_dir,
         )
     except Exception:
         if breaking_changes_allowed:
@@ -276,189 +252,26 @@ def test_forward_compatibility(
     ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
 
 
-def prepare_snapshot(
-    from_dir: Path,
-    to_dir: Path,
-    port_distributor: PortDistributor,
-    pg_distrib_dir: Optional[Path] = None,
-):
-    assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
-    assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
-    assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
+def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
+    ep = env.endpoints.create_start("main")
+    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
 
-    log.info(f"Copying snapshot from {from_dir} to {to_dir}")
-    shutil.copytree(from_dir, to_dir)
-
-    repo_dir = to_dir / "repo"
-
-    snapshot_config_toml = repo_dir / "config"
-    snapshot_config = toml.load(snapshot_config_toml)
-
-    # Remove old logs to avoid confusion in test artifacts
-    for logfile in repo_dir.glob("**/*.log"):
-        logfile.unlink()
-
-    # Remove old computes in 'endpoints'. Old versions of the control plane used a directory
-    # called "pgdatadirs". Delete it, too.
-    if (repo_dir / "endpoints").exists():
-        shutil.rmtree(repo_dir / "endpoints")
-    if (repo_dir / "pgdatadirs").exists():
-        shutil.rmtree(repo_dir / "pgdatadirs")
-    os.mkdir(repo_dir / "endpoints")
-
-    # Update paths and ports in config files
-    legacy_pageserver_toml = repo_dir / "pageserver.toml"
-    legacy_bundle = os.path.exists(legacy_pageserver_toml)
-
-    path_to_config: dict[Path, dict[Any, Any]] = {}
-    if legacy_bundle:
-        os.mkdir(repo_dir / "pageserver_1")
-        path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
-            legacy_pageserver_toml
-        )
-        os.remove(legacy_pageserver_toml)
-        os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
-    else:
-        for ps_conf in snapshot_config["pageservers"]:
-            config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
-            path_to_config[config_path] = toml.load(config_path)
-
-    # For each pageserver config, edit it and rewrite
-    for config_path, pageserver_config in path_to_config.items():
-        pageserver_config["remote_storage"]["local_path"] = str(
-            LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
-        )
-
-        for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
-            pageserver_config[param] = port_distributor.replace_with_new_port(
-                pageserver_config[param]
-            )
-
-        # We don't use authentication in compatibility tests
-        # so just remove authentication related settings.
-        pageserver_config.pop("pg_auth_type", None)
-        pageserver_config.pop("http_auth_type", None)
-
-        if pg_distrib_dir:
-            pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
-        with config_path.open("w") as f:
-            toml.dump(pageserver_config, f)
-
-    # neon_local config doesn't have to be backward compatible.  If we're using a dump from before
-    # it supported multiple pageservers, fix it up.
-    if "pageservers" not in snapshot_config:
-        snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
-        del snapshot_config["pageserver"]
-
-    for param in ("listen_http_addr", "listen_pg_addr"):
-        for pageserver in snapshot_config["pageservers"]:
-            pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
-    snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
-        snapshot_config["broker"]["listen_addr"]
-    )
-    for sk in snapshot_config["safekeepers"]:
-        for param in ("http_port", "pg_port", "pg_tenant_only_port"):
-            sk[param] = port_distributor.replace_with_new_port(sk[param])
-
-    if pg_distrib_dir:
-        snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
-    with snapshot_config_toml.open("w") as f:
-        toml.dump(snapshot_config, f)
-
-    # Ensure that snapshot doesn't contain references to the original path
-    rv = subprocess.run(
-        [
-            "grep",
-            "--recursive",
-            "--binary-file=without-match",
-            "--files-with-matches",
-            "test_create_snapshot/repo",
-            str(repo_dir),
-        ],
-        capture_output=True,
-        text=True,
-    )
-    assert (
-        rv.returncode != 0
-    ), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}"
-
-
-def check_neon_works(
-    repo_dir: Path,
-    neon_target_binpath: Path,
-    neon_current_binpath: Path,
-    pg_distrib_dir: Path,
-    pg_version: PgVersion,
-    port_distributor: PortDistributor,
-    test_output_dir: Path,
-    pg_bin: PgBin,
-    request: FixtureRequest,
-):
-    snapshot_config_toml = repo_dir / "config"
-    snapshot_config = toml.load(snapshot_config_toml)
-    snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
-    snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
-    with (snapshot_config_toml).open("w") as f:
-        toml.dump(snapshot_config, f)
-
-    # TODO: replace with NeonEnvBuilder / NeonEnv
-    config: Any = type("NeonEnvStub", (object,), {})
-    config.rust_log_override = None
-    config.repo_dir = repo_dir
-    config.pg_version = pg_version
-    config.initial_tenant = snapshot_config["default_tenant_id"]
-    config.pg_distrib_dir = pg_distrib_dir
-    config.remote_storage = None
-    config.sk_remote_storage = None
-
-    # Use the "target" binaries to launch the storage nodes
-    config_target = config
-    config_target.neon_binpath = neon_target_binpath
-    # We are using maybe-old binaries for neon services, but want to use current
-    # binaries for test utilities like neon_local
-    config_target.neon_local_binpath = neon_current_binpath
-    cli_target = NeonCli(config_target)
-
-    # And the current binaries to launch computes
-    snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
-    with (snapshot_config_toml).open("w") as f:
-        toml.dump(snapshot_config, f)
-    config_current = copy.copy(config)
-    config_current.neon_binpath = neon_current_binpath
-    cli_current = NeonCli(config_current)
-
-    cli_target.raw_cli(["start"])
-    request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
-
-    pg_port = port_distributor.get_port()
-    http_port = port_distributor.get_port()
-    cli_current.endpoint_create(
-        branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
-    )
-    cli_current.endpoint_start("ep-main")
-    request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
-
-    connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
+    connstr = ep.connstr()
     pg_bin.run_capture(
         ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
     )
     initial_dump_differs = dump_differs(
-        repo_dir.parent / "dump.sql",
+        sql_dump_path,
         test_output_dir / "dump.sql",
         test_output_dir / "dump.filediff",
     )
 
     # Check that project can be recovered from WAL
     # loosely based on https://www.notion.so/neondatabase/Storage-Recovery-from-WAL-d92c0aac0ebf40df892b938045d7d720
-    tenant_id = snapshot_config["default_tenant_id"]
-    timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
-    pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
-    pageserver_http = PageserverHttpClient(
-        port=pageserver_port,
-        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
-    )
+    pageserver_http = env.pageserver.http_client()
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+    pg_version = env.pg_version
 
     shutil.rmtree(repo_dir / "local_fs_remote_storage")
     timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
@@ -494,6 +307,11 @@ def dump_differs(
     Returns True if the dumps differ and produced diff is not allowed, False otherwise (in most cases we want it to return False).
     """
 
+    if not first.exists():
+        raise FileNotFoundError(f"{first} doesn't exist")
+    if not second.exists():
+        raise FileNotFoundError(f"{second} doesn't exist")
+
     with output.open("w") as stdout:
         res = subprocess.run(
             [
diff --git a/test_runner/regress/test_config.py b/test_runner/regress/test_config.py
old mode 100755
new mode 100644
diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py
index 920e8d0b72..faedf5d944 100644
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -99,12 +99,13 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
         ]
     )
 
-    # FIXME: we should clean up pageserver to not print this
-    env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: we should clean up pageserver to not print this
+            ".*exited with error: unexpected message type: CopyData.*",
+            # FIXME: Is this expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+        ]
     )
 
     def import_tar(base, wal):
diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py
index d2d8d71e3f..51e358e60d 100644
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -236,3 +236,30 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
     assert vanilla_pg.safe_psql(
         "select sum(somedata) from replication_example"
     ) == endpoint.safe_psql("select sum(somedata) from replication_example")
+
+
+#
+# Check that slots are not inherited in brnach
+#
+def test_slots_and_branching(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+
+    tenant, timeline = env.neon_cli.create_tenant()
+    env.pageserver.http_client()
+
+    main_branch = env.endpoints.create_start("main", tenant_id=tenant)
+    main_cur = main_branch.connect().cursor()
+
+    # Create table and insert some data
+    main_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
+
+    wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
+
+    # Create branch ws.
+    env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
+    ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
+    log.info("postgres is running on 'ws' branch")
+
+    # Check that we can create slot with the same name
+    ws_cur = ws_branch.connect().cursor()
+    ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py
index 66cc286aba..4488be31c5 100644
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -23,14 +23,20 @@ from fixtures.neon_fixtures import (
     PgBin,
     S3Scrubber,
     last_flush_lsn_upload,
-    wait_for_last_flush_lsn,
 )
-from fixtures.pageserver.utils import list_prefix
+from fixtures.pageserver.http import PageserverApiException
+from fixtures.pageserver.utils import (
+    assert_tenant_state,
+    list_prefix,
+    wait_for_last_record_lsn,
+    wait_for_upload,
+)
 from fixtures.remote_storage import (
     RemoteStorageKind,
 )
 from fixtures.types import TenantId, TimelineId
 from fixtures.utils import print_gc_result, wait_until
+from fixtures.workload import Workload
 
 # A tenant configuration that is convenient for generating uploads and deletions
 # without a large amount of postgres traffic.
@@ -93,7 +99,10 @@ def generate_uploads_and_deletions(
             )
             assert tenant_id is not None
             assert timeline_id is not None
-            wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+            # We are waiting for uploads as well as local flush, in order to avoid leaving the system
+            # in a state where there are "future layers" in remote storage that will generate deletions
+            # after a restart.
+            last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
             ps_http.timeline_checkpoint(tenant_id, timeline_id)
 
         # Compaction should generate some GC-elegible layers
@@ -560,3 +569,91 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
     read_all(env, tenant_id, timeline_id)
     evict_all_layers(env, tenant_id, timeline_id)
     read_all(env, tenant_id, timeline_id)
+
+
+def test_multi_attach(
+    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+):
+    neon_env_builder.enable_generations = True
+    neon_env_builder.num_pageservers = 3
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+
+    pageservers = env.pageservers
+    http_clients = list([p.http_client() for p in pageservers])
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # We will intentionally create situations where stale deletions happen from non-latest-generation
+    # nodes when the tenant is multiply-attached
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
+        )
+
+    # Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
+    _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        http_clients[1].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(pageservers[0].id)
+    workload.write_rows(1000, pageservers[0].id)
+
+    # Attach the tenant to the other two pageservers
+    pageservers[1].tenant_attach(env.initial_tenant)
+    pageservers[2].tenant_attach(env.initial_tenant)
+
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active"))
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active"))
+
+    # Now they all have it attached
+    _details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients])
+    _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
+    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    # The endpoint can use any pageserver to service its reads
+    for pageserver in pageservers:
+        workload.validate(pageserver.id)
+
+    # If we write some more data, all the nodes can see it, including stale ones
+    wrote_lsn = workload.write_rows(1000, pageservers[0].id)
+    for ps_http in http_clients:
+        wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, wrote_lsn)
+
+    # ...and indeed endpoints can see it via any of the pageservers
+    for pageserver in pageservers:
+        workload.validate(pageserver.id)
+
+    # Prompt all the pageservers, including stale ones, to upload ingested layers to remote storage
+    for ps_http in http_clients:
+        ps_http.timeline_checkpoint(tenant_id, timeline_id)
+        wait_for_upload(ps_http, tenant_id, timeline_id, wrote_lsn)
+
+    # Now, the contents of remote storage will be a set of layers from each pageserver, but with unique
+    # generation numbers
+    # TODO: validate remote storage contents
+
+    # Stop all pageservers
+    for ps in pageservers:
+        ps.stop()
+
+    # Returning to a normal healthy state: all pageservers will start, but only the one most
+    # recently attached via the control plane will re-attach on startup
+    for ps in pageservers:
+        ps.start()
+
+    with pytest.raises(PageserverApiException):
+        _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
+    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    # All data we wrote while multi-attached remains readable
+    workload.validate(pageservers[2].id)
diff --git a/test_runner/regress/test_pageserver_metric_collection.py b/test_runner/regress/test_pageserver_metric_collection.py
index b76dbbee03..042961baa5 100644
--- a/test_runner/regress/test_pageserver_metric_collection.py
+++ b/test_runner/regress/test_pageserver_metric_collection.py
@@ -64,13 +64,13 @@ def test_metric_collection(
     # spin up neon,  after http server is ready
     env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
     # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
-    # we have a fast rate of calculation, these can happen at shutdown
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*metrics endpoint refused the sent metrics*",
+            # we have a fast rate of calculation, these can happen at shutdown
+            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
+            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id = env.initial_tenant
@@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile(
     pageserver_http = env.pageserver.http_client()
 
     # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
-    # we have a fast rate of calculation, these can happen at shutdown
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*metrics endpoint refused the sent metrics*",
+            # we have a fast rate of calculation, these can happen at shutdown
+            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
+            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id = env.initial_tenant
diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py
new file mode 100644
index 0000000000..b14b7f1328
--- /dev/null
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -0,0 +1,332 @@
+import random
+from typing import Any, Dict, Optional
+
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
+from fixtures.remote_storage import RemoteStorageKind
+from fixtures.types import TenantId, TimelineId
+from fixtures.utils import wait_until
+from fixtures.workload import Workload
+
+# A tenant configuration that is convenient for generating uploads and deletions
+# without a large amount of postgres traffic.
+TENANT_CONF = {
+    # small checkpointing and compaction targets to ensure we generate many upload operations
+    "checkpoint_distance": f"{128 * 1024}",
+    "compaction_target_size": f"{128 * 1024}",
+    "compaction_threshold": "1",
+    # no PITR horizon, we specify the horizon when we request on-demand GC
+    "pitr_interval": "0s",
+    # disable background compaction and GC. We invoke it manually when we want it to happen.
+    "gc_period": "0s",
+    "compaction_period": "0s",
+    # create image layers eagerly, so that GC can remove some layers
+    "image_creation_threshold": "1",
+}
+
+
+def evict_random_layers(
+    rng: random.Random, pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId
+):
+    """
+    Evict 50% of the layers on a pageserver
+    """
+    timeline_path = pageserver.timeline_dir(tenant_id, timeline_id)
+    initial_local_layers = sorted(
+        list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
+    )
+    client = pageserver.http_client()
+    for layer in initial_local_layers:
+        if "ephemeral" in layer.name or "temp_download" in layer.name:
+            continue
+
+        if rng.choice([True, False]):
+            log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
+            client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
+
+
+@pytest.mark.parametrize("seed", [1, 2, 3])
+def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
+    """
+    Issue many location configuration changes, ensure that tenants
+    remain readable & we don't get any unexpected errors.  We should
+    have no ERROR in the log, and no 500s in the API.
+
+    The location_config API is intentionally designed so that all destination
+    states are valid, so that we may test it in this way: the API should always
+    work as long as the tenant exists.
+    """
+    neon_env_builder.enable_generations = True
+    neon_env_builder.num_pageservers = 3
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+    assert env.attachment_service is not None
+
+    pageservers = env.pageservers
+    list([p.http_client() for p in pageservers])
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # We will make no effort to avoid stale attachments
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [
+                ".*Dropped remote consistent LSN updates.*",
+                ".*Dropping stale deletions.*",
+                # page_service_conn_main{peer_addr=[::1]:41176}: query handler for 'pagestream 3b19aec5038c796f64b430b30a555121 d07776761d44050b8aab511df1657d83' failed: Tenant 3b19aec5038c796f64b430b30a555121 not found
+                ".*query handler.*Tenant.*not found.*",
+                # page_service_conn_main{peer_addr=[::1]:45552}: query handler for 'pagestream 414ede7ad50f775a8e7d9ba0e43b9efc a43884be16f44b3626482b6981b2c745' failed: Tenant 414ede7ad50f775a8e7d9ba0e43b9efc is not active
+                ".*query handler.*Tenant.*not active.*",
+            ]
+        )
+
+        # these can happen, if we shutdown at a good time. to be fixed as part of #5172.
+        message = ".*duplicated L1 layer layer=.*"
+        ps.allowed_errors.append(message)
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageservers[0].id)
+    workload.write_rows(256, env.pageservers[0].id)
+
+    # We use a fixed seed to make the test reproducible: we want a randomly
+    # chosen order, but not to change the order every time we run the test.
+    rng = random.Random(seed)
+
+    initial_generation = 1
+    last_state = {
+        env.pageservers[0].id: ("AttachedSingle", initial_generation),
+        env.pageservers[1].id: ("Detached", None),
+        env.pageservers[2].id: ("Detached", None),
+    }
+
+    latest_attached = env.pageservers[0].id
+
+    for _i in range(0, 64):
+        # Pick a pageserver
+        pageserver = rng.choice(env.pageservers)
+
+        # Pick a pseudorandom state
+        modes = [
+            "AttachedSingle",
+            "AttachedMulti",
+            "AttachedStale",
+            "Secondary",
+            "Detached",
+            "_Evictions",
+            "_Restart",
+        ]
+
+        mode = rng.choice(modes)
+
+        last_state_ps = last_state[pageserver.id]
+        if mode == "_Evictions":
+            if last_state_ps[0].startswith("Attached"):
+                log.info(f"Action: evictions on pageserver {pageserver.id}")
+                evict_random_layers(rng, pageserver, tenant_id, timeline_id)
+            else:
+                log.info(
+                    f"Action: skipping evictions on pageserver {pageserver.id}, is not attached"
+                )
+        elif mode == "_Restart":
+            log.info(f"Action: restarting pageserver {pageserver.id}")
+            pageserver.stop()
+            pageserver.start()
+            if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id:
+                log.info("Entering postgres...")
+                workload.churn_rows(rng.randint(128, 256), pageserver.id)
+                workload.validate(pageserver.id)
+            elif last_state_ps[0].startswith("Attached"):
+                # The `attachment_service` will only re-attach on startup when a pageserver was the
+                # holder of the latest generation: otherwise the pageserver will revert to detached
+                # state if it was running attached with a stale generation
+                last_state[pageserver.id] = ("Detached", None)
+        else:
+            secondary_conf: Optional[Dict[str, Any]] = None
+            if mode == "Secondary":
+                secondary_conf = {"warm": rng.choice([True, False])}
+
+            location_conf: Dict[str, Any] = {
+                "mode": mode,
+                "secondary_conf": secondary_conf,
+                "tenant_conf": {},
+            }
+
+            log.info(f"Action: Configuring pageserver {pageserver.id} to {location_conf}")
+
+            # Select a generation number
+            if mode.startswith("Attached"):
+                if last_state_ps[1] is not None:
+                    if rng.choice([True, False]):
+                        # Move between attached states, staying in the same generation
+                        generation = last_state_ps[1]
+                    else:
+                        # Switch generations, while also jumping between attached states
+                        generation = env.attachment_service.attach_hook_issue(
+                            tenant_id, pageserver.id
+                        )
+                        latest_attached = pageserver.id
+                else:
+                    generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
+                    latest_attached = pageserver.id
+            else:
+                generation = None
+
+            location_conf["generation"] = generation
+
+            pageserver.tenant_location_configure(tenant_id, location_conf)
+            last_state[pageserver.id] = (mode, generation)
+
+            if mode.startswith("Attached"):
+                # This is a basic test: we are validating that he endpoint works properly _between_
+                # configuration changes.  A stronger test would be to validate that clients see
+                # no errors while we are making the changes.
+                workload.churn_rows(
+                    rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
+                )
+                workload.validate(pageserver.id)
+
+    # Attach all pageservers
+    for ps in env.pageservers:
+        location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
+        ps.tenant_location_configure(tenant_id, location_conf)
+
+    # Confirm that all are readable
+    for ps in env.pageservers:
+        workload.validate(ps.id)
+
+    # Detach all pageservers
+    for ps in env.pageservers:
+        location_conf = {"mode": "Detached", "secondary_conf": None, "tenant_conf": {}}
+        ps.tenant_location_configure(tenant_id, location_conf)
+
+    # Confirm that all local disk state was removed on detach
+    # TODO
+
+
+def test_live_migration(neon_env_builder: NeonEnvBuilder):
+    """
+    Test the sequence of location states that are used in a live migration.
+    """
+    neon_env_builder.enable_generations = True
+    neon_env_builder.num_pageservers = 2
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+    assert env.attachment_service is not None
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    pageserver_a = env.pageservers[0]
+    pageserver_b = env.pageservers[1]
+
+    initial_generation = 1
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageservers[0].id)
+    workload.write_rows(256, env.pageservers[0].id)
+
+    # Make the destination a secondary location
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "Secondary",
+            "secondary_conf": {"warm": True},
+            "tenant_conf": {},
+        },
+    )
+
+    workload.churn_rows(64, pageserver_a.id, upload=False)
+
+    # Set origin attachment to stale
+    log.info("Setting origin to AttachedStale")
+    pageserver_a.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedStale",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": initial_generation,
+        },
+        flush_ms=5000,
+    )
+
+    migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
+    log.info(f"Acquired generation {migrated_generation} for destination pageserver")
+    assert migrated_generation == initial_generation + 1
+
+    # Writes and reads still work in AttachedStale.
+    workload.validate(pageserver_a.id)
+
+    # TODO: call into secondary mode API hooks to do an upload/download sync
+
+    # Generate some more dirty writes: we expect the origin to ingest WAL in
+    # in AttachedStale
+    workload.churn_rows(64, pageserver_a.id, upload=False)
+    workload.validate(pageserver_a.id)
+
+    # Attach the destination
+    log.info("Setting destination to AttachedMulti")
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedMulti",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": migrated_generation,
+        },
+    )
+
+    # Wait for destination LSN to catch up with origin
+    origin_lsn = pageserver_a.http_client().timeline_detail(tenant_id, timeline_id)[
+        "last_record_lsn"
+    ]
+
+    def caught_up():
+        destination_lsn = pageserver_b.http_client().timeline_detail(tenant_id, timeline_id)[
+            "last_record_lsn"
+        ]
+        log.info(
+            f"Waiting for LSN to catch up: origin {origin_lsn} vs destination {destination_lsn}"
+        )
+        assert destination_lsn >= origin_lsn
+
+    wait_until(100, 0.1, caught_up)
+
+    # The destination should accept writes
+    workload.churn_rows(64, pageserver_b.id)
+
+    # Dual attached: both are readable.
+    workload.validate(pageserver_a.id)
+    workload.validate(pageserver_b.id)
+
+    # Revert the origin to secondary
+    log.info("Setting origin to Secondary")
+    pageserver_a.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "Secondary",
+            "secondary_conf": {"warm": True},
+            "tenant_conf": {},
+        },
+    )
+
+    workload.churn_rows(64, pageserver_b.id)
+
+    # Put the destination into final state
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedSingle",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": migrated_generation,
+        },
+    )
+
+    workload.churn_rows(64, pageserver_b.id)
+    workload.validate(pageserver_b.id)
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 0a5046e219..3004d69f50 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -73,19 +73,20 @@ def test_remote_storage_backup_and_restore(
     ##### First start, insert data and upload it to the remote storage
     env = neon_env_builder.init_start()
 
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: Is this expected?
+            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
+            ".*No timelines to attach received.*",
+            ".*Failed to get local tenant state.*",
+            # FIXME retry downloads without throwing errors
+            ".*failed to load remote timeline.*",
+            # we have a bunch of pytest.raises for these below
+            ".*tenant .*? already exists, state:.*",
+            ".*tenant directory already exists.*",
+            ".*simulated failure of remote operation.*",
+        ]
     )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-
-    env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
-    # FIXME retry downloads without throwing errors
-    env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
-    # we have a bunch of pytest.raises for these below
-    env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
-    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
-    env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
 
     pageserver_http = env.pageserver.http_client()
     endpoint = env.endpoints.create_start("main")
diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py
index 48f5682371..fece876459 100644
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -395,13 +395,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
     env.start()
     pageserver_http = env.pageserver.http_client()
 
-    # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
-    env.pageserver.allowed_errors.append(
-        ".*Timeline got dropped without initializing, cleaning its files"
-    )
-    # the response hit_pausable_failpoint_and_later_fail
-    env.pageserver.allowed_errors.append(
-        f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
+    env.pageserver.allowed_errors.extend(
+        [
+            # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
+            ".*Timeline got dropped without initializing, cleaning its files",
+            # the response hit_pausable_failpoint_and_later_fail
+            f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn",
+        ]
     )
 
     env.pageserver.tenant_create(env.initial_tenant)
diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py
index 5b63bd6161..0dcbb23ad4 100644
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -307,10 +307,14 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
         bogus_timeline_id = TimelineId.generate()
         pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
 
-    # the error will be printed to the log too
-    env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
-    # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
-    env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")
+    env.pageserver.allowed_errors.extend(
+        [
+            # the error will be printed to the log too
+            ".*gc target timeline does not exist.*",
+            # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
+            ".*InternalServerError\\(timeline is Stopping.*",
+        ]
+    )
 
     # Detach while running manual GC.
     # It should wait for manual GC to finish because it runs in a task associated with the tenant.
diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py
index feacdcc802..dcd7232b1b 100644
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -216,16 +216,17 @@ def test_tenant_relocation(
 
     tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209")
 
-    # FIXME: Is this expected?
-    env.pageservers[0].allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageservers[0].allowed_errors.extend(
+        [
+            # FIXME: Is this expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+            # Needed for detach polling on the original pageserver
+            f".*NotFound: tenant {tenant_id}.*",
+            # We will dual-attach in this test, so stale generations are expected
+            ".*Dropped remote consistent LSN updates.*",
+        ]
     )
 
-    # Needed for detach polling on the original pageserver
-    env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
-    # We will dual-attach in this test, so stale generations are expected
-    env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*")
-
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
     # we use two branches to check that they are both relocated
diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py
index b7b4e2be0b..07fb6dc5ca 100644
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -117,10 +117,12 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder):
     ##### First start, insert secret data and upload it to the remote storage
     env = neon_env_builder.init_start()
 
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: Are these expected?
+            ".*No timelines to attach received.*",
+            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
+        ]
     )
 
     pageserver_http = env.pageserver.http_client()
@@ -218,13 +220,14 @@ def test_tenant_redownloads_truncated_file_on_startup(
 
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
-    env.pageserver.allowed_errors.append(".*removing local file .* because .*")
-
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*removing local file .* because .*",
+            # FIXME: Are these expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+            ".*No timelines to attach received.*",
+        ]
     )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
 
     pageserver_http = env.pageserver.http_client()
     endpoint = env.endpoints.create_start("main")
diff --git a/test_runner/regress/test_threshold_based_eviction.py b/test_runner/regress/test_threshold_based_eviction.py
index 27d5cce5f2..5f72cfd747 100644
--- a/test_runner/regress/test_threshold_based_eviction.py
+++ b/test_runner/regress/test_threshold_based_eviction.py
@@ -36,12 +36,13 @@ def test_threshold_based_eviction(
         ".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*"
     )
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(metrics_refused_log_line)
-
-    # these can happen whenever we run consumption metrics collection
-    env.pageserver.allowed_errors.append(r".*failed to calculate logical size at \S+: cancelled")
-    env.pageserver.allowed_errors.append(
-        r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            metrics_refused_log_line,
+            # these can happen whenever we run consumption metrics collection
+            r".*failed to calculate logical size at \S+: cancelled",
+            r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id, timeline_id = env.initial_tenant, env.initial_timeline
diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py
index 17113a6bc5..c6d578a7a2 100644
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -39,10 +39,14 @@ from urllib3.util.retry import Retry
 def test_timeline_delete(neon_simple_env: NeonEnv):
     env = neon_simple_env
 
-    env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
-    env.pageserver.allowed_errors.append(".*timeline not found.*")
-    env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
-    env.pageserver.allowed_errors.append(".*Precondition failed: Requested tenant is missing.*")
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*Timeline .* was not found.*",
+            ".*timeline not found.*",
+            ".*Cannot delete timeline which has child timelines.*",
+            ".*Precondition failed: Requested tenant is missing.*",
+        ]
+    )
 
     ps_http = env.pageserver.http_client()
 
@@ -198,22 +202,22 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
             ),
         )
 
-    env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
-    # It appears when we stopped flush loop during deletion and then pageserver is stopped
-    env.pageserver.allowed_errors.append(
-        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+    env.pageserver.allowed_errors.extend(
+        [
+            f".*{timeline_id}.*failpoint: {failpoint}",
+            # It appears when we stopped flush loop during deletion and then pageserver is stopped
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+            # This happens when we fail before scheduling background operation.
+            # Timeline is left in stopping state and retry tries to stop it again.
+            ".*Ignoring new state, equal to the existing one: Stopping",
+            # This happens when we retry delete requests for broken timelines
+            ".*Ignoring state update Stopping for broken timeline",
+            # This happens when timeline remains are cleaned up during loading
+            ".*Timeline dir entry become invalid.*",
+            # In one of the branches we poll for tenant to become active. Polls can generate this log message:
+            f".*Tenant {env.initial_tenant} is not active*",
+        ]
     )
-    # This happens when we fail before scheduling background operation.
-    # Timeline is left in stopping state and retry tries to stop it again.
-    env.pageserver.allowed_errors.append(
-        ".*Ignoring new state, equal to the existing one: Stopping"
-    )
-    # This happens when we retry delete requests for broken timelines
-    env.pageserver.allowed_errors.append(".*Ignoring state update Stopping for broken timeline")
-    # This happens when timeline remains are cleaned up during loading
-    env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*")
-    # In one of the branches we poll for tenant to become active. Polls can generate this log message:
-    env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*")
 
     ps_http.configure_failpoints((failpoint, "return"))
 
@@ -398,13 +402,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
 
     env = neon_env_builder.init_start()
 
-    env.pageserver.allowed_errors.append(".*failpoint: timeline-delete-before-rm")
-    env.pageserver.allowed_errors.append(
-        ".*Ignoring new state, equal to the existing one: Stopping"
-    )
-    # this happens, because the stuck timeline is visible to shutdown
-    env.pageserver.allowed_errors.append(
-        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*failpoint: timeline-delete-before-rm",
+            ".*Ignoring new state, equal to the existing one: Stopping",
+            # this happens, because the stuck timeline is visible to shutdown
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+        ]
     )
 
     ps_http = env.pageserver.http_client()
@@ -551,10 +555,12 @@ def test_concurrent_timeline_delete_stuck_on(
         with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
             ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
         assert second_call_err.value.status_code == 409
-        env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
-        # the second call will try to transition the timeline into Stopping state as well
-        env.pageserver.allowed_errors.append(
-            f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
+        env.pageserver.allowed_errors.extend(
+            [
+                f".*{child_timeline_id}.*{error_msg_re}.*",
+                # the second call will try to transition the timeline into Stopping state as well
+                f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping",
+            ]
         )
         log.info("second call failed as expected")