diff --git a/.dockerignore b/.dockerignore
index 8b378b5dab..f7a6232ba1 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -22,6 +22,7 @@
 !s3_scrubber/
 !safekeeper/
 !storage_broker/
+!storage_controller/
 !trace/
 !vendor/postgres-*/
 !workspace_hack/
diff --git a/CODEOWNERS b/CODEOWNERS
index 9a23e8c958..af2fa6088e 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,5 +1,5 @@
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
-/control_plane/attachment_service @neondatabase/storage
+/storage_controller @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
 /libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
 /libs/remote_storage/ @neondatabase/storage
diff --git a/Cargo.lock b/Cargo.lock
index 5b57b417fd..1050b6017a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -270,45 +270,6 @@ dependencies = [
  "critical-section",
 ]
 
-[[package]]
-name = "attachment_service"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "aws-config",
- "bytes",
- "camino",
- "clap",
- "control_plane",
- "diesel",
- "diesel_migrations",
- "fail",
- "futures",
- "git-version",
- "hex",
- "humantime",
- "hyper",
- "itertools",
- "lasso",
- "measured",
- "metrics",
- "once_cell",
- "pageserver_api",
- "pageserver_client",
- "postgres_connection",
- "r2d2",
- "reqwest",
- "routerify",
- "serde",
- "serde_json",
- "thiserror",
- "tokio",
- "tokio-util",
- "tracing",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -2235,9 +2196,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.3.24"
+version = "0.3.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
+checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
 dependencies = [
  "bytes",
  "fnv",
@@ -3436,9 +3397,9 @@ dependencies = [
 
 [[package]]
 name = "ordered-multimap"
-version = "0.7.1"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4d6a8c22fc714f0c2373e6091bf6f5e9b37b1bc0b1184874b7e0a4e303d318f"
+checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79"
 dependencies = [
  "dlv-list",
  "hashbrown 0.14.0",
@@ -5623,6 +5584,45 @@ dependencies = [
  "workspace_hack",
 ]
 
+[[package]]
+name = "storage_controller"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "aws-config",
+ "bytes",
+ "camino",
+ "clap",
+ "control_plane",
+ "diesel",
+ "diesel_migrations",
+ "fail",
+ "futures",
+ "git-version",
+ "hex",
+ "humantime",
+ "hyper",
+ "itertools",
+ "lasso",
+ "measured",
+ "metrics",
+ "once_cell",
+ "pageserver_api",
+ "pageserver_client",
+ "postgres_connection",
+ "r2d2",
+ "reqwest",
+ "routerify",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tokio",
+ "tokio-util",
+ "tracing",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "storcon_cli"
 version = "0.1.0"
@@ -5799,23 +5799,23 @@ dependencies = [
 
 [[package]]
 name = "test-context"
-version = "0.1.4"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "055831a02a4f5aa28fede67f2902014273eb8c21b958ac5ebbd59b71ef30dbc3"
+checksum = "6676ab8513edfd2601a108621103fdb45cac9098305ca25ec93f7023b06b05d9"
 dependencies = [
- "async-trait",
  "futures",
  "test-context-macros",
 ]
 
 [[package]]
 name = "test-context-macros"
-version = "0.1.4"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d"
+checksum = "78ea17a2dc368aeca6f554343ced1b1e31f76d63683fa8016e5844bd7a5144a1"
 dependencies = [
+ "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.52",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 2d49581f9f..0057a617f6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "2"
 members = [
     "compute_tools",
     "control_plane",
-    "control_plane/attachment_service",
     "control_plane/storcon_cli",
     "pageserver",
     "pageserver/compaction",
@@ -13,6 +12,7 @@ members = [
     "proxy",
     "safekeeper",
     "storage_broker",
+    "storage_controller",
     "s3_scrubber",
     "workspace_hack",
     "trace",
@@ -159,7 +159,7 @@ svg_fmt = "0.4.1"
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
-test-context = "0.1"
+test-context = "0.3"
 thiserror = "1.0"
 tikv-jemallocator = "0.5"
 tikv-jemalloc-ctl = "0.5"
diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs
index 2fced7d778..94666f2870 100644
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -86,7 +86,10 @@ where
         .stdout(process_log_file)
         .stderr(same_file_for_stderr)
         .args(args);
-    let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
+
+    let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
+        fill_rust_env_vars(background_command),
+    ));
     filled_cmd.envs(envs);
 
     let pid_file_to_check = match &initial_pid_file {
@@ -268,6 +271,15 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
     cmd
 }
 
+fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
+    for (var, val) in std::env::vars() {
+        if var.starts_with("NEON_PAGESERVER_") {
+            cmd = cmd.env(var, val);
+        }
+    }
+    cmd
+}
+
 /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
 /// 1. Claims a pidfile with a fcntl lock on it and
 /// 2. Sets up the pidfile's file descriptor so that it (and the lock)
diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index f72bc9a2a9..2edd09eac1 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -223,7 +223,7 @@ impl Client {
         }
     }
 
-    /// Simple HTTP request wrapper for calling into attachment service
+    /// Simple HTTP request wrapper for calling into storage controller
     async fn dispatch<RQ, RS>(
         &self,
         method: hyper::Method,
diff --git a/diesel.toml b/diesel.toml
index 30ed4444d7..558c54a1e1 100644
--- a/diesel.toml
+++ b/diesel.toml
@@ -2,8 +2,8 @@
 # see https://diesel.rs/guides/configuring-diesel-cli
 
 [print_schema]
-file = "control_plane/attachment_service/src/schema.rs"
+file = "storage_controller/src/schema.rs"
 custom_type_derives = ["diesel::query_builder::QueryId"]
 
 [migrations_directory]
-dir = "control_plane/attachment_service/migrations"
+dir = "storage_controller/migrations"
diff --git a/docs/sourcetree.md b/docs/sourcetree.md
index 12fa80349e..3732bfdab2 100644
--- a/docs/sourcetree.md
+++ b/docs/sourcetree.md
@@ -7,6 +7,11 @@ Below you will find a brief overview of each subdir in the source tree in alphab
 Neon storage broker, providing messaging between safekeepers and pageservers.
 [storage_broker.md](./storage_broker.md)
 
+`storage_controller`:
+
+Neon storage controller, manages a cluster of pageservers and exposes an API that enables
+managing a many-sharded tenant as a single entity.
+
 `/control_plane`:
 
 Local control plane.
diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs
index be24d452b6..1278f17ad2 100644
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -2,7 +2,7 @@ use std::str::FromStr;
 
 /// Request/response types for the storage controller
 /// API (`/control/v1` prefix).  Implemented by the server
-/// in [`attachment_service::http`]
+/// in [`storage_controller::http`]
 use serde::{Deserialize, Serialize};
 use utils::id::{NodeId, TenantId};
 
diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs
index 6adddf52a9..6aa02868e6 100644
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -57,7 +57,6 @@ enum MaybeEnabledStorage {
     Disabled,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorage {
     async fn setup() -> Self {
         ensure_logging_ready();
@@ -86,7 +85,6 @@ struct AzureWithTestBlobs {
     remote_blobs: HashSet<RemotePath>,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
     async fn setup() -> Self {
         ensure_logging_ready();
@@ -148,7 +146,6 @@ struct AzureWithSimpleTestBlobs {
     remote_blobs: HashSet<RemotePath>,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
     async fn setup() -> Self {
         ensure_logging_ready();
diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs
index bc5e40e70f..c5d5216f00 100644
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -219,7 +219,6 @@ enum MaybeEnabledStorage {
     Disabled,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorage {
     async fn setup() -> Self {
         ensure_logging_ready();
@@ -248,7 +247,6 @@ struct S3WithTestBlobs {
     remote_blobs: HashSet<RemotePath>,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
     async fn setup() -> Self {
         ensure_logging_ready();
@@ -310,7 +308,6 @@ struct S3WithSimpleTestBlobs {
     remote_blobs: HashSet<RemotePath>,
 }
 
-#[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
     async fn setup() -> Self {
         ensure_logging_ready();
diff --git a/libs/utils/src/env.rs b/libs/utils/src/env.rs
new file mode 100644
index 0000000000..b3e326bfd0
--- /dev/null
+++ b/libs/utils/src/env.rs
@@ -0,0 +1,21 @@
+//! Wrapper around `std::env::var` for parsing environment variables.
+
+use std::{fmt::Display, str::FromStr};
+
+pub fn var<V, E>(varname: &str) -> Option<V>
+where
+    V: FromStr<Err = E>,
+    E: Display,
+{
+    match std::env::var(varname) {
+        Ok(s) => Some(
+            s.parse()
+                .map_err(|e| format!("failed to parse env var {varname}: {e:#}"))
+                .unwrap(),
+        ),
+        Err(std::env::VarError::NotPresent) => None,
+        Err(std::env::VarError::NotUnicode(_)) => {
+            panic!("env var {varname} is not unicode")
+        }
+    }
+}
diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs
index 336ee87570..19d3e978dc 100644
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -89,6 +89,8 @@ pub mod yielding_loop;
 
 pub mod zstd;
 
+pub mod env;
+
 pub mod poison;
 
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
diff --git a/pageserver/benches/bench_walredo.rs b/pageserver/benches/bench_walredo.rs
index e1a6518e04..ccb4f50eb4 100644
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -27,25 +27,10 @@
 //!
 //! # Reference Numbers
 //!
-//! 2024-03-20 on i3en.3xlarge
+//! 2024-04-04 on i3en.3xlarge
 //!
 //! ```text
-//! short/1                 time:   [29.804 µs 29.959 µs 30.142 µs]
-//! short/2                 time:   [33.847 µs 34.020 µs 34.223 µs]
-//! short/4                 time:   [44.016 µs 44.259 µs 44.538 µs]
-//! short/8                 time:   [77.512 µs 78.146 µs 78.796 µs]
-//! short/16                time:   [139.74 µs 141.37 µs 143.03 µs]
-//! short/32                time:   [269.00 µs 271.98 µs 275.04 µs]
-//! short/64                time:   [526.08 µs 528.47 µs 531.24 µs]
-//! short/128               time:   [1.0206 ms 1.0264 ms 1.0329 ms]
-//! medium/1                time:   [113.87 µs 114.41 µs 115.07 µs]
-//! medium/2                time:   [157.36 µs 158.05 µs 158.93 µs]
-//! medium/4                time:   [325.91 µs 328.01 µs 330.61 µs]
-//! medium/8                time:   [656.48 µs 663.55 µs 671.50 µs]
-//! medium/16               time:   [1.2956 ms 1.3069 ms 1.3203 ms]
-//! medium/32               time:   [2.5294 ms 2.5510 ms 2.5768 ms]
-//! medium/64               time:   [4.8728 ms 4.9262 ms 4.9862 ms]
-//! medium/128              time:   [8.9833 ms 9.1374 ms 9.2848 ms]
+//! TODO
 //! ```
 
 use bytes::{Buf, Bytes};
diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs
index ab55d2b0a3..3c9982ffb8 100644
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -128,12 +128,12 @@ impl Client {
 
     pub async fn timeline_info(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         force_await_logical_size: ForceAwaitLogicalSize,
     ) -> Result<pageserver_api::models::TimelineInfo> {
         let uri = format!(
-            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
+            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}",
             self.mgmt_api_endpoint
         );
 
@@ -151,11 +151,11 @@ impl Client {
 
     pub async fn keyspace(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
     ) -> Result<pageserver_api::models::partitioning::Partitioning> {
         let uri = format!(
-            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/keyspace",
+            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/keyspace",
             self.mgmt_api_endpoint
         );
         self.get(&uri)
diff --git a/pageserver/pagebench/src/cmd/basebackup.rs b/pageserver/pagebench/src/cmd/basebackup.rs
index 55844be041..3ae6d99aa7 100644
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -1,4 +1,5 @@
 use anyhow::Context;
+use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;
 
@@ -95,7 +96,7 @@ async fn main_impl(
             let timeline = *timeline;
             let info = mgmt_api_client
                 .timeline_info(
-                    timeline.tenant_id,
+                    TenantShardId::unsharded(timeline.tenant_id),
                     timeline.timeline_id,
                     ForceAwaitLogicalSize::No,
                 )
diff --git a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
index 2838511a77..c3d8e61a2c 100644
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -4,6 +4,7 @@ use pageserver_api::key::{is_rel_block_key, key_to_rel_block, Key};
 use pageserver_api::keyspace::KeySpaceAccum;
 use pageserver_api::models::PagestreamGetPageRequest;
 
+use pageserver_api::shard::TenantShardId;
 use tokio_util::sync::CancellationToken;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
@@ -173,7 +174,10 @@ async fn main_impl(
                 let timeline = *timeline;
                 async move {
                     let partitioning = mgmt_api_client
-                        .keyspace(timeline.tenant_id, timeline.timeline_id)
+                        .keyspace(
+                            TenantShardId::unsharded(timeline.tenant_id),
+                            timeline.timeline_id,
+                        )
                         .await?;
                     let lsn = partitioning.at_lsn;
                     let start = Instant::now();
diff --git a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs
index 98938d780a..f07beeecfd 100644
--- a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs
+++ b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs
@@ -1,6 +1,7 @@
 use std::sync::Arc;
 
 use humantime::Duration;
+use pageserver_api::shard::TenantShardId;
 use tokio::task::JoinSet;
 use utils::id::TenantTimelineId;
 
@@ -59,7 +60,11 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
         let mgmt_api_client = Arc::clone(&mgmt_api_client);
         js.spawn(async move {
             let info = mgmt_api_client
-                .timeline_info(tl.tenant_id, tl.timeline_id, ForceAwaitLogicalSize::Yes)
+                .timeline_info(
+                    TenantShardId::unsharded(tl.tenant_id),
+                    tl.timeline_id,
+                    ForceAwaitLogicalSize::Yes,
+                )
                 .await
                 .unwrap();
 
@@ -74,7 +79,11 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
                 while !info.current_logical_size_is_accurate {
                     ticker.tick().await;
                     info = mgmt_api_client
-                        .timeline_info(tl.tenant_id, tl.timeline_id, ForceAwaitLogicalSize::Yes)
+                        .timeline_info(
+                            TenantShardId::unsharded(tl.tenant_id),
+                            tl.timeline_id,
+                            ForceAwaitLogicalSize::Yes,
+                        )
                         .await
                         .unwrap();
                 }
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index c80230d4d7..0903b206ff 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -18,6 +18,7 @@ use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::WALRECEIVER_RUNTIME;
 use pageserver::tenant::{secondary, TenantSharedResources};
 use remote_storage::GenericRemoteStorage;
+use tokio::signal::unix::SignalKind;
 use tokio::time::Instant;
 use tracing::*;
 
@@ -671,42 +672,37 @@ fn start_pageserver(
     let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard());
 
     // All started up! Now just sit and wait for shutdown signal.
-    {
-        use signal_hook::consts::*;
-        let signal_handler = BACKGROUND_RUNTIME.spawn_blocking(move || {
-            let mut signals =
-                signal_hook::iterator::Signals::new([SIGINT, SIGTERM, SIGQUIT]).unwrap();
-            return signals
-                .forever()
-                .next()
-                .expect("forever() never returns None unless explicitly closed");
-        });
-        let signal = BACKGROUND_RUNTIME
-            .block_on(signal_handler)
-            .expect("join error");
-        match signal {
-            SIGQUIT => {
-                info!("Got signal {signal}. Terminating in immediate shutdown mode",);
-                std::process::exit(111);
-            }
-            SIGINT | SIGTERM => {
-                info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",);
 
-                // This cancels the `shutdown_pageserver` cancellation tree.
-                // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
-                // The plan is to change that over time.
-                shutdown_pageserver.take();
-                let bg_remote_storage = remote_storage.clone();
-                let bg_deletion_queue = deletion_queue.clone();
-                BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(
-                    &tenant_manager,
-                    bg_remote_storage.map(|_| bg_deletion_queue),
-                    0,
-                ));
-                unreachable!()
-            }
-            _ => unreachable!(),
-        }
+    {
+        BACKGROUND_RUNTIME.block_on(async move {
+            let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt()).unwrap();
+            let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate()).unwrap();
+            let mut sigquit = tokio::signal::unix::signal(SignalKind::quit()).unwrap();
+            let signal = tokio::select! {
+                _ = sigquit.recv() => {
+                    info!("Got signal SIGQUIT. Terminating in immediate shutdown mode",);
+                    std::process::exit(111);
+                }
+                _ = sigint.recv() => { "SIGINT" },
+                _ = sigterm.recv() => { "SIGTERM" },
+            };
+
+            info!("Got signal {signal}. Terminating gracefully in fast shutdown mode",);
+
+            // This cancels the `shutdown_pageserver` cancellation tree.
+            // Right now that tree doesn't reach very far, and `task_mgr` is used instead.
+            // The plan is to change that over time.
+            shutdown_pageserver.take();
+            let bg_remote_storage = remote_storage.clone();
+            let bg_deletion_queue = deletion_queue.clone();
+            pageserver::shutdown_pageserver(
+                &tenant_manager,
+                bg_remote_storage.map(|_| bg_deletion_queue),
+                0,
+            )
+            .await;
+            unreachable!()
+        })
     }
 }
 
diff --git a/pageserver/src/control_plane_client.rs b/pageserver/src/control_plane_client.rs
index 42c800822b..f0ed46ce23 100644
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -12,7 +12,7 @@ use pageserver_api::{
 use serde::{de::DeserializeOwned, Serialize};
 use tokio_util::sync::CancellationToken;
 use url::Url;
-use utils::{backoff, generation::Generation, id::NodeId};
+use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
 
 use crate::{
     config::{NodeMetadata, PageServerConf},
@@ -210,7 +210,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
                 .collect(),
         };
 
-        fail::fail_point!("control-plane-client-validate");
+        failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
+        if self.cancel.is_cancelled() {
+            return Err(RetryForeverError::ShuttingDown);
+        }
 
         let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
 
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index bb477f89c5..2713309824 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -1629,7 +1629,7 @@ components:
           type: integer
           format: int64
           minimum: 0
-          description: The amount of disk space currently utilized by layer files.
+          description: The amount of disk space currently used.
         free_space_bytes:
           type: integer
           format: int64
diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs
index 343dec2ca1..ed409d3130 100644
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -8,6 +8,7 @@ use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use camino::Utf8Path;
 use futures::StreamExt;
+use pageserver_api::key::rel_block_to_key;
 use tokio::io::{AsyncRead, AsyncReadExt};
 use tokio_tar::Archive;
 use tracing::*;
@@ -170,7 +171,10 @@ async fn import_rel(
         let r = reader.read_exact(&mut buf).await;
         match r {
             Ok(_) => {
-                modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
+                let key = rel_block_to_key(rel, blknum);
+                if modification.tline.get_shard_identity().is_key_local(&key) {
+                    modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
+                }
             }
 
             // TODO: UnexpectedEof is expected
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 67847f5da8..e037339fb7 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2114,6 +2114,7 @@ pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
 use futures::Future;
 use pin_project_lite::pin_project;
 use std::collections::HashMap;
+use std::num::NonZeroUsize;
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};
@@ -2683,6 +2684,26 @@ pub(crate) mod disk_usage_based_eviction {
     pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
 }
 
+static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
+    register_uint_gauge_vec!(
+        "pageserver_tokio_executor_thread_configured_count",
+        "Total number of configued tokio executor threads in the process.
+         The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
+        &["setup"],
+    )
+    .unwrap()
+});
+
+pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
+    static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
+    let _guard = SERIALIZE.lock().unwrap();
+    TOKIO_EXECUTOR_THREAD_COUNT.reset();
+    TOKIO_EXECUTOR_THREAD_COUNT
+        .get_metric_with_label_values(&[setup])
+        .unwrap()
+        .set(u64::try_from(num_threads.get()).unwrap());
+}
+
 pub fn preinitialize_metrics() {
     // Python tests need these and on some we do alerting.
     //
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index 0cc5611a12..9a1e354ecf 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -33,13 +33,14 @@
 use std::collections::HashMap;
 use std::fmt;
 use std::future::Future;
+use std::num::NonZeroUsize;
 use std::panic::AssertUnwindSafe;
+use std::str::FromStr;
 use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 
 use futures::FutureExt;
 use pageserver_api::shard::TenantShardId;
-use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
 use tokio::task_local;
 use tokio_util::sync::CancellationToken;
@@ -48,8 +49,11 @@ use tracing::{debug, error, info, warn};
 
 use once_cell::sync::Lazy;
 
+use utils::env;
 use utils::id::TimelineId;
 
+use crate::metrics::set_tokio_runtime_setup;
+
 //
 // There are four runtimes:
 //
@@ -98,52 +102,119 @@ use utils::id::TimelineId;
 // other operations, if the upload tasks e.g. get blocked on locks. It shouldn't
 // happen, but still.
 //
-pub static COMPUTE_REQUEST_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
-    tokio::runtime::Builder::new_multi_thread()
-        .thread_name("compute request worker")
-        .enable_all()
-        .build()
-        .expect("Failed to create compute request runtime")
-});
 
-pub static MGMT_REQUEST_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
-    tokio::runtime::Builder::new_multi_thread()
-        .thread_name("mgmt request worker")
-        .enable_all()
-        .build()
-        .expect("Failed to create mgmt request runtime")
-});
-
-pub static WALRECEIVER_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
-    tokio::runtime::Builder::new_multi_thread()
-        .thread_name("walreceiver worker")
-        .enable_all()
-        .build()
-        .expect("Failed to create walreceiver runtime")
-});
-
-pub static BACKGROUND_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
-    tokio::runtime::Builder::new_multi_thread()
-        .thread_name("background op worker")
-        // if you change the number of worker threads please change the constant below
-        .enable_all()
-        .build()
-        .expect("Failed to create background op runtime")
-});
-
-pub(crate) static BACKGROUND_RUNTIME_WORKER_THREADS: Lazy<usize> = Lazy::new(|| {
-    // force init and thus panics
-    let _ = BACKGROUND_RUNTIME.handle();
+pub(crate) static TOKIO_WORKER_THREADS: Lazy<NonZeroUsize> = Lazy::new(|| {
     // replicates tokio-1.28.1::loom::sys::num_cpus which is not available publicly
     // tokio would had already panicked for parsing errors or NotUnicode
     //
     // this will be wrong if any of the runtimes gets their worker threads configured to something
     // else, but that has not been needed in a long time.
-    std::env::var("TOKIO_WORKER_THREADS")
-        .map(|s| s.parse::<usize>().unwrap())
-        .unwrap_or_else(|_e| usize::max(2, num_cpus::get()))
+    NonZeroUsize::new(
+        std::env::var("TOKIO_WORKER_THREADS")
+            .map(|s| s.parse::<usize>().unwrap())
+            .unwrap_or_else(|_e| usize::max(2, num_cpus::get())),
+    )
+    .expect("the max() ensures that this is not zero")
 });
 
+enum TokioRuntimeMode {
+    SingleThreaded,
+    MultiThreaded { num_workers: NonZeroUsize },
+}
+
+impl FromStr for TokioRuntimeMode {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "current_thread" => Ok(TokioRuntimeMode::SingleThreaded),
+            s => match s.strip_prefix("multi_thread:") {
+                Some("default") => Ok(TokioRuntimeMode::MultiThreaded {
+                    num_workers: *TOKIO_WORKER_THREADS,
+                }),
+                Some(suffix) => {
+                    let num_workers = suffix.parse::<NonZeroUsize>().map_err(|e| {
+                        format!(
+                            "invalid number of multi-threaded runtime workers ({suffix:?}): {e}",
+                        )
+                    })?;
+                    Ok(TokioRuntimeMode::MultiThreaded { num_workers })
+                }
+                None => Err(format!("invalid runtime config: {s:?}")),
+            },
+        }
+    }
+}
+
+static ONE_RUNTIME: Lazy<Option<tokio::runtime::Runtime>> = Lazy::new(|| {
+    let thread_name = "pageserver-tokio";
+    let Some(mode) = env::var("NEON_PAGESERVER_USE_ONE_RUNTIME") else {
+        // If the env var is not set, leave this static as None.
+        set_tokio_runtime_setup(
+            "multiple-runtimes",
+            NUM_MULTIPLE_RUNTIMES
+                .checked_mul(*TOKIO_WORKER_THREADS)
+                .unwrap(),
+        );
+        return None;
+    };
+    Some(match mode {
+        TokioRuntimeMode::SingleThreaded => {
+            set_tokio_runtime_setup("one-runtime-single-threaded", NonZeroUsize::new(1).unwrap());
+            tokio::runtime::Builder::new_current_thread()
+                .thread_name(thread_name)
+                .enable_all()
+                .build()
+                .expect("failed to create one single runtime")
+        }
+        TokioRuntimeMode::MultiThreaded { num_workers } => {
+            set_tokio_runtime_setup("one-runtime-multi-threaded", num_workers);
+            tokio::runtime::Builder::new_multi_thread()
+                .thread_name(thread_name)
+                .enable_all()
+                .worker_threads(num_workers.get())
+                .build()
+                .expect("failed to create one multi-threaded runtime")
+        }
+    })
+});
+
+/// Declare a lazy static variable named `$varname` that will resolve
+/// to a tokio runtime handle. If the env var `NEON_PAGESERVER_USE_ONE_RUNTIME`
+/// is set, this will resolve to `ONE_RUNTIME`. Otherwise, the macro invocation
+/// declares a separate runtime and the lazy static variable `$varname`
+/// will resolve to that separate runtime.
+///
+/// The result is is that `$varname.spawn()` will use `ONE_RUNTIME` if
+/// `NEON_PAGESERVER_USE_ONE_RUNTIME` is set, and will use the separate runtime
+/// otherwise.
+macro_rules! pageserver_runtime {
+    ($varname:ident, $name:literal) => {
+        pub static $varname: Lazy<&'static tokio::runtime::Runtime> = Lazy::new(|| {
+            if let Some(runtime) = &*ONE_RUNTIME {
+                return runtime;
+            }
+            static RUNTIME: Lazy<tokio::runtime::Runtime> = Lazy::new(|| {
+                tokio::runtime::Builder::new_multi_thread()
+                    .thread_name($name)
+                    .worker_threads(TOKIO_WORKER_THREADS.get())
+                    .enable_all()
+                    .build()
+                    .expect(std::concat!("Failed to create runtime ", $name))
+            });
+            &*RUNTIME
+        });
+    };
+}
+
+pageserver_runtime!(COMPUTE_REQUEST_RUNTIME, "compute request worker");
+pageserver_runtime!(MGMT_REQUEST_RUNTIME, "mgmt request worker");
+pageserver_runtime!(WALRECEIVER_RUNTIME, "walreceiver worker");
+pageserver_runtime!(BACKGROUND_RUNTIME, "background op worker");
+// Bump this number when adding a new pageserver_runtime!
+// SAFETY: it's obviously correct
+const NUM_MULTIPLE_RUNTIMES: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4) };
+
 #[derive(Debug, Clone, Copy)]
 pub struct PageserverTaskId(u64);
 
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 13fcd1a5e8..3879135f26 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -200,6 +200,7 @@ use utils::backoff::{
 use std::collections::{HashMap, VecDeque};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};
+use std::time::Duration;
 
 use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath, TimeoutOrCancel};
 use std::ops::DerefMut;
@@ -207,7 +208,7 @@ use tracing::{debug, error, info, instrument, warn};
 use tracing::{info_span, Instrument};
 use utils::lsn::Lsn;
 
-use crate::deletion_queue::DeletionQueueClient;
+use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
 use crate::metrics::{
     MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
     RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
@@ -261,6 +262,10 @@ pub(crate) const INITDB_PRESERVED_PATH: &str = "initdb-preserved.tar.zst";
 /// Default buffer size when interfacing with [`tokio::fs::File`].
 pub(crate) const BUFFER_SIZE: usize = 32 * 1024;
 
+/// Doing non-essential flushes of deletion queue is subject to this timeout, after
+/// which we warn and skip.
+const DELETION_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_secs(10);
+
 pub enum MaybeDeletedIndexPart {
     IndexPart(IndexPart),
     Deleted(IndexPart),
@@ -588,14 +593,14 @@ impl RemoteTimelineClient {
         upload_queue: &mut UploadQueueInitialized,
         metadata: TimelineMetadata,
     ) {
+        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
+
         info!(
-            "scheduling metadata upload with {} files ({} changed)",
+            "scheduling metadata upload up to consistent LSN {disk_consistent_lsn} with {} files ({} changed)",
             upload_queue.latest_files.len(),
             upload_queue.latest_files_changes_since_metadata_upload_scheduled,
         );
 
-        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
-
         let index_part = IndexPart::new(
             upload_queue.latest_files.clone(),
             disk_consistent_lsn,
@@ -1050,6 +1055,26 @@ impl RemoteTimelineClient {
         Ok(())
     }
 
+    async fn flush_deletion_queue(&self) -> Result<(), DeletionQueueError> {
+        match tokio::time::timeout(
+            DELETION_QUEUE_FLUSH_TIMEOUT,
+            self.deletion_queue_client.flush_immediate(),
+        )
+        .await
+        {
+            Ok(result) => result,
+            Err(_timeout) => {
+                // Flushing remote deletions is not mandatory: we flush here to make the system easier to test, and
+                // to ensure that _usually_ objects are really gone after a DELETE is acked.  However, in case of deletion
+                // queue issues (https://github.com/neondatabase/neon/issues/6440), we don't want to wait indefinitely here.
+                tracing::warn!(
+                    "Timed out waiting for deletion queue flush, acking deletion anyway"
+                );
+                Ok(())
+            }
+        }
+    }
+
     /// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.
     /// The function deletes layer files one by one, then lists the prefix to see if we leaked something
     /// deletes leaked files if any and proceeds with deletion of index file at the end.
@@ -1099,7 +1124,7 @@ impl RemoteTimelineClient {
 
         // Execute all pending deletions, so that when we proceed to do a list_prefixes below, we aren't
         // taking the burden of listing all the layers that we already know we should delete.
-        self.deletion_queue_client.flush_immediate().await?;
+        self.flush_deletion_queue().await?;
 
         let cancel = shutdown_token();
 
@@ -1173,7 +1198,7 @@ impl RemoteTimelineClient {
 
         // Timeline deletion is rare and we have probably emitted a reasonably number of objects: wait
         // for a flush to a persistent deletion list so that we may be sure deletion will occur.
-        self.deletion_queue_client.flush_immediate().await?;
+        self.flush_deletion_queue().await?;
 
         fail::fail_point!("timeline-delete-after-index-delete", |_| {
             Err(anyhow::anyhow!(
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index e4f5f75132..74ed677ffe 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -18,7 +18,7 @@ use utils::{backoff, completion};
 
 static CONCURRENT_BACKGROUND_TASKS: once_cell::sync::Lazy<tokio::sync::Semaphore> =
     once_cell::sync::Lazy::new(|| {
-        let total_threads = *task_mgr::BACKGROUND_RUNTIME_WORKER_THREADS;
+        let total_threads = task_mgr::TOKIO_WORKER_THREADS.get();
         let permits = usize::max(
             1,
             // while a lot of the work is done on spawn_blocking, we still do
@@ -72,6 +72,7 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
         loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation
     );
 
+    // TODO: assert that we run on BACKGROUND_RUNTIME; requires tokio_unstable Handle::id();
     match CONCURRENT_BACKGROUND_TASKS.acquire().await {
         Ok(permit) => permit,
         Err(_closed) => unreachable!("we never close the semaphore"),
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index c5eda44b7d..d3c8c5f66c 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -282,10 +282,12 @@ pub struct Timeline {
     pub(super) flush_loop_state: Mutex<FlushLoopState>,
 
     /// layer_flush_start_tx can be used to wake up the layer-flushing task.
-    /// The value is a counter, incremented every time a new flush cycle is requested.
-    /// The flush cycle counter is sent back on the layer_flush_done channel when
-    /// the flush finishes. You can use that to wait for the flush to finish.
-    layer_flush_start_tx: tokio::sync::watch::Sender<u64>,
+    /// - The u64 value is a counter, incremented every time a new flush cycle is requested.
+    ///   The flush cycle counter is sent back on the layer_flush_done channel when
+    ///   the flush finishes. You can use that to wait for the flush to finish.
+    /// - The LSN is updated to max() of its current value and the latest disk_consistent_lsn
+    ///   read by whoever sends an update
+    layer_flush_start_tx: tokio::sync::watch::Sender<(u64, Lsn)>,
     /// to be notified when layer flushing has finished, subscribe to the layer_flush_done channel
     layer_flush_done_tx: tokio::sync::watch::Sender<(u64, Result<(), FlushLayerError>)>,
 
@@ -1169,8 +1171,8 @@ impl Timeline {
     /// Flush to disk all data that was written with the put_* functions
     #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
     pub(crate) async fn freeze_and_flush(&self) -> anyhow::Result<()> {
-        self.freeze_inmem_layer(false).await;
-        self.flush_frozen_layers_and_wait().await
+        let to_lsn = self.freeze_inmem_layer(false).await;
+        self.flush_frozen_layers_and_wait(to_lsn).await
     }
 
     /// If there is no writer, and conditions for rolling the latest layer are met, then freeze it.
@@ -1190,7 +1192,39 @@ impl Timeline {
         };
 
         let Some(open_layer) = &layers_guard.layer_map().open_layer else {
-            // No open layer, no work to do.
+            // If there is no open layer, we have no layer freezing to do.  However, we might need to generate
+            // some updates to disk_consistent_lsn and remote_consistent_lsn, in case we ingested some WAL regions
+            // that didn't result in writes to this shard.
+
+            // Must not hold the layers lock while waiting for a flush.
+            drop(layers_guard);
+
+            let last_record_lsn = self.get_last_record_lsn();
+            let disk_consistent_lsn = self.get_disk_consistent_lsn();
+            if last_record_lsn > disk_consistent_lsn {
+                // We have no open layer, but disk_consistent_lsn is behind the last record: this indicates
+                // we are a sharded tenant and have skipped some WAL
+                let last_freeze_ts = *self.last_freeze_ts.read().unwrap();
+                if last_freeze_ts.elapsed() >= self.get_checkpoint_timeout() {
+                    // This should be somewhat rare, so we log it at INFO level.
+                    //
+                    // We checked for checkpoint timeout so that a shard without any
+                    // data ingested (yet) doesn't write a remote index as soon as it
+                    // sees its LSN advance: we only do this if we've been layer-less
+                    // for some time.
+                    tracing::info!(
+                        "Advancing disk_consistent_lsn past WAL ingest gap {} -> {}",
+                        disk_consistent_lsn,
+                        last_record_lsn
+                    );
+
+                    // The flush loop will update remote consistent LSN as well as disk consistent LSN.
+                    self.flush_frozen_layers_and_wait(last_record_lsn)
+                        .await
+                        .ok();
+                }
+            }
+
             return;
         };
 
@@ -1769,7 +1803,7 @@ impl Timeline {
         let disk_consistent_lsn = metadata.disk_consistent_lsn();
         let (state, _) = watch::channel(state);
 
-        let (layer_flush_start_tx, _) = tokio::sync::watch::channel(0);
+        let (layer_flush_start_tx, _) = tokio::sync::watch::channel((0, disk_consistent_lsn));
         let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(())));
 
         let evictions_low_residence_duration_metric_threshold = {
@@ -3174,7 +3208,9 @@ impl Timeline {
         self.last_record_lsn.advance(new_lsn);
     }
 
-    async fn freeze_inmem_layer(&self, write_lock_held: bool) {
+    /// Whether there was a layer to freeze or not, return the value of get_last_record_lsn
+    /// before we attempted the freeze: this guarantees that ingested data is frozen up to this lsn (inclusive).
+    async fn freeze_inmem_layer(&self, write_lock_held: bool) -> Lsn {
         // Freeze the current open in-memory layer. It will be written to disk on next
         // iteration.
 
@@ -3184,7 +3220,9 @@ impl Timeline {
             Some(self.write_lock.lock().await)
         };
 
-        self.freeze_inmem_layer_at(self.get_last_record_lsn()).await;
+        let to_lsn = self.get_last_record_lsn();
+        self.freeze_inmem_layer_at(to_lsn).await;
+        to_lsn
     }
 
     async fn freeze_inmem_layer_at(&self, at: Lsn) {
@@ -3197,7 +3235,7 @@ impl Timeline {
     /// Layer flusher task's main loop.
     async fn flush_loop(
         self: &Arc<Self>,
-        mut layer_flush_start_rx: tokio::sync::watch::Receiver<u64>,
+        mut layer_flush_start_rx: tokio::sync::watch::Receiver<(u64, Lsn)>,
         ctx: &RequestContext,
     ) {
         info!("started flush loop");
@@ -3210,7 +3248,11 @@ impl Timeline {
                 _ = layer_flush_start_rx.changed() => {}
             }
             trace!("waking up");
-            let flush_counter = *layer_flush_start_rx.borrow();
+            let (flush_counter, frozen_to_lsn) = *layer_flush_start_rx.borrow();
+
+            // The highest LSN to which we flushed in the loop over frozen layers
+            let mut flushed_to_lsn = Lsn(0);
+
             let result = loop {
                 if self.cancel.is_cancelled() {
                     info!("dropping out of flush loop for timeline shutdown");
@@ -3231,7 +3273,9 @@ impl Timeline {
                     break Ok(());
                 };
                 match self.flush_frozen_layer(layer_to_flush, ctx).await {
-                    Ok(()) => {}
+                    Ok(this_layer_to_lsn) => {
+                        flushed_to_lsn = std::cmp::max(flushed_to_lsn, this_layer_to_lsn);
+                    }
                     Err(FlushLayerError::Cancelled) => {
                         info!("dropping out of flush loop for timeline shutdown");
                         return;
@@ -3240,11 +3284,36 @@ impl Timeline {
                         FlushLayerError::Other(_) | FlushLayerError::CreateImageLayersError(_),
                     ) => {
                         error!("could not flush frozen layer: {err:?}");
-                        break err;
+                        break err.map(|_| ());
                     }
                 }
                 timer.stop_and_record();
             };
+
+            // Unsharded tenants should never advance their LSN beyond the end of the
+            // highest layer they write: such gaps between layer data and the frozen LSN
+            // are only legal on sharded tenants.
+            debug_assert!(
+                self.shard_identity.count.count() > 1
+                    || flushed_to_lsn >= frozen_to_lsn
+                    || !flushed_to_lsn.is_valid()
+            );
+
+            if flushed_to_lsn < frozen_to_lsn && self.shard_identity.count.count() > 1 {
+                // If our layer flushes didn't carry disk_consistent_lsn up to the `to_lsn` advertised
+                // to us via layer_flush_start_rx, then advance it here.
+                //
+                // This path is only taken for tenants with multiple shards: single sharded tenants should
+                // never encounter a gap in the wal.
+                let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
+                tracing::debug!("Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}");
+                if self.set_disk_consistent_lsn(frozen_to_lsn) {
+                    if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) {
+                        tracing::warn!("Failed to schedule metadata upload after updating disk_consistent_lsn: {e}");
+                    }
+                }
+            }
+
             // Notify any listeners that we're done
             let _ = self
                 .layer_flush_done_tx
@@ -3252,7 +3321,13 @@ impl Timeline {
         }
     }
 
-    async fn flush_frozen_layers_and_wait(&self) -> anyhow::Result<()> {
+    /// Request the flush loop to write out all frozen layers up to `to_lsn` as Delta L0 files to disk.
+    /// The caller is responsible for the freezing, e.g., [`Self::freeze_inmem_layer`].
+    ///
+    /// `last_record_lsn` may be higher than the highest LSN of a frozen layer: if this is the case,
+    /// it means no data will be written between the top of the highest frozen layer and to_lsn,
+    /// e.g. because this tenant shard has ingested up to to_lsn and not written any data locally for that part of the WAL.
+    async fn flush_frozen_layers_and_wait(&self, last_record_lsn: Lsn) -> anyhow::Result<()> {
         let mut rx = self.layer_flush_done_tx.subscribe();
 
         // Increment the flush cycle counter and wake up the flush task.
@@ -3266,9 +3341,10 @@ impl Timeline {
             anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}")
         }
 
-        self.layer_flush_start_tx.send_modify(|counter| {
+        self.layer_flush_start_tx.send_modify(|(counter, lsn)| {
             my_flush_request = *counter + 1;
             *counter = my_flush_request;
+            *lsn = std::cmp::max(last_record_lsn, *lsn);
         });
 
         loop {
@@ -3305,16 +3381,22 @@ impl Timeline {
     }
 
     fn flush_frozen_layers(&self) {
-        self.layer_flush_start_tx.send_modify(|val| *val += 1);
+        self.layer_flush_start_tx.send_modify(|(counter, lsn)| {
+            *counter += 1;
+
+            *lsn = std::cmp::max(*lsn, Lsn(self.last_freeze_at.load().0 - 1));
+        });
     }
 
     /// Flush one frozen in-memory layer to disk, as a new delta layer.
+    ///
+    /// Return value is the last lsn (inclusive) of the layer that was frozen.
     #[instrument(skip_all, fields(layer=%frozen_layer))]
     async fn flush_frozen_layer(
         self: &Arc<Self>,
         frozen_layer: Arc<InMemoryLayer>,
         ctx: &RequestContext,
-    ) -> Result<(), FlushLayerError> {
+    ) -> Result<Lsn, FlushLayerError> {
         debug_assert_current_span_has_tenant_and_timeline_id();
 
         // As a special case, when we have just imported an image into the repository,
@@ -3389,7 +3471,6 @@ impl Timeline {
         }
 
         let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
-        let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
 
         // The new on-disk layers are now in the layer map. We can remove the
         // in-memory layer from the map now. The flushed layer is stored in
@@ -3403,10 +3484,7 @@ impl Timeline {
 
             guard.finish_flush_l0_layer(delta_layer_to_add.as_ref(), &frozen_layer, &self.metrics);
 
-            if disk_consistent_lsn != old_disk_consistent_lsn {
-                assert!(disk_consistent_lsn > old_disk_consistent_lsn);
-                self.disk_consistent_lsn.store(disk_consistent_lsn);
-
+            if self.set_disk_consistent_lsn(disk_consistent_lsn) {
                 // Schedule remote uploads that will reflect our new disk_consistent_lsn
                 self.schedule_uploads(disk_consistent_lsn, layers_to_upload)?;
             }
@@ -3423,7 +3501,22 @@ impl Timeline {
         // This failpoint is used by another test case `test_pageserver_recovery`.
         fail_point!("flush-frozen-exit");
 
-        Ok(())
+        Ok(Lsn(lsn_range.end.0 - 1))
+    }
+
+    /// Return true if the value changed
+    ///
+    /// This function must only be used from the layer flush task, and may not be called concurrently.
+    fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool {
+        // We do a simple load/store cycle: that's why this function isn't safe for concurrent use.
+        let old_value = self.disk_consistent_lsn.load();
+        if new_value != old_value {
+            assert!(new_value >= old_value);
+            self.disk_consistent_lsn.store(new_value);
+            true
+        } else {
+            false
+        }
     }
 
     /// Update metadata file
diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs
index d54dc1642c..64edcc5e40 100644
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -120,9 +120,10 @@ impl LayerManager {
     /// Called from `freeze_inmem_layer`, returns true if successfully frozen.
     pub(crate) async fn try_freeze_in_memory_layer(
         &mut self,
-        Lsn(last_record_lsn): Lsn,
+        lsn: Lsn,
         last_freeze_at: &AtomicLsn,
     ) {
+        let Lsn(last_record_lsn) = lsn;
         let end_lsn = Lsn(last_record_lsn + 1);
 
         if let Some(open_layer) = &self.layer_map.open_layer {
@@ -135,8 +136,11 @@ impl LayerManager {
             self.layer_map.frozen_layers.push_back(open_layer_rc);
             self.layer_map.open_layer = None;
             self.layer_map.next_open_layer_at = Some(end_lsn);
-            last_freeze_at.store(end_lsn);
         }
+
+        // Even if there was no layer to freeze, advance last_freeze_at to last_record_lsn+1: this
+        // accounts for regions in the LSN range where we might have ingested no data due to sharding.
+        last_freeze_at.store(end_lsn);
     }
 
     /// Add image layers to the layer map, called from `create_image_layers`.
diff --git a/pageserver/src/utilization.rs b/pageserver/src/utilization.rs
index 830c9897ca..5eccf185ac 100644
--- a/pageserver/src/utilization.rs
+++ b/pageserver/src/utilization.rs
@@ -15,11 +15,23 @@ pub(crate) fn regenerate(tenants_path: &Path) -> anyhow::Result<PageserverUtiliz
         .map_err(std::io::Error::from)
         .context("statvfs tenants directory")?;
 
-    let blocksz = statvfs.block_size();
+    // https://unix.stackexchange.com/a/703650
+    let blocksz = if statvfs.fragment_size() > 0 {
+        statvfs.fragment_size()
+    } else {
+        statvfs.block_size()
+    };
 
     #[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
     let free = statvfs.blocks_available() as u64 * blocksz;
-    let used = crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.get();
+
+    #[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
+    let used = statvfs
+        .blocks()
+        // use blocks_free instead of available here to match df in case someone compares
+        .saturating_sub(statvfs.blocks_free()) as u64
+        * blocksz;
+
     let captured_at = std::time::SystemTime::now();
 
     let doc = PageserverUtilization {
diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs
index c050bd9340..0ffe4a5213 100644
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -37,11 +37,12 @@ use bytes::{Bytes, BytesMut};
 use pageserver_api::key::key_to_rel_block;
 use pageserver_api::models::WalRedoManagerStatus;
 use pageserver_api::shard::TenantShardId;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
 use tracing::*;
 use utils::lsn::Lsn;
+use utils::sync::heavier_once_cell;
 
 ///
 /// This is the real implementation that uses a Postgres process to
@@ -54,7 +55,19 @@ pub struct PostgresRedoManager {
     tenant_shard_id: TenantShardId,
     conf: &'static PageServerConf,
     last_redo_at: std::sync::Mutex<Option<Instant>>,
-    redo_process: RwLock<Option<Arc<process::Process>>>,
+    /// The current [`process::Process`] that is used by new redo requests.
+    /// We use [`heavier_once_cell`] for coalescing the spawning, but the redo
+    /// requests don't use the [`heavier_once_cell::Guard`] to keep ahold of the
+    /// their process object; we use [`Arc::clone`] for that.
+    /// This is primarily because earlier implementations that didn't  use [`heavier_once_cell`]
+    /// had that behavior; it's probably unnecessary.
+    /// The only merit of it is that if one walredo process encounters an error,
+    /// it can take it out of rotation (= using [`heavier_once_cell::Guard::take_and_deinit`].
+    /// and retry redo, thereby starting the new process, while other redo tasks might
+    /// still be using the old redo process. But, those other tasks will most likely
+    /// encounter an error as well, and errors are an unexpected condition anyway.
+    /// So, probably we could get rid of the `Arc` in the future.
+    redo_process: heavier_once_cell::OnceCell<Arc<process::Process>>,
 }
 
 ///
@@ -135,7 +148,7 @@ impl PostgresRedoManager {
                     chrono::Utc::now().checked_sub_signed(chrono::Duration::from_std(age).ok()?)
                 })
             },
-            pid: self.redo_process.read().unwrap().as_ref().map(|p| p.id()),
+            pid: self.redo_process.get().map(|p| p.id()),
         })
     }
 }
@@ -153,7 +166,7 @@ impl PostgresRedoManager {
             tenant_shard_id,
             conf,
             last_redo_at: std::sync::Mutex::default(),
-            redo_process: RwLock::new(None),
+            redo_process: heavier_once_cell::OnceCell::default(),
         }
     }
 
@@ -165,8 +178,7 @@ impl PostgresRedoManager {
             if let Some(last_redo_at) = *g {
                 if last_redo_at.elapsed() >= idle_timeout {
                     drop(g);
-                    let mut guard = self.redo_process.write().unwrap();
-                    *guard = None;
+                    drop(self.redo_process.get().map(|guard| guard.take_and_deinit()));
                 }
             }
         }
@@ -194,40 +206,24 @@ impl PostgresRedoManager {
         const MAX_RETRY_ATTEMPTS: u32 = 1;
         let mut n_attempts = 0u32;
         loop {
-            // launch the WAL redo process on first use
-            let proc: Arc<process::Process> = {
-                let proc_guard = self.redo_process.read().unwrap();
-                match &*proc_guard {
-                    None => {
-                        // "upgrade" to write lock to launch the process
-                        drop(proc_guard);
-                        let mut proc_guard = self.redo_process.write().unwrap();
-                        match &*proc_guard {
-                            None => {
-                                let start = Instant::now();
-                                let proc = Arc::new(
-                                    process::Process::launch(
-                                        self.conf,
-                                        self.tenant_shard_id,
-                                        pg_version,
-                                    )
-                                    .context("launch walredo process")?,
-                                );
-                                let duration = start.elapsed();
-                                WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM
-                                    .observe(duration.as_secs_f64());
-                                info!(
-                                    duration_ms = duration.as_millis(),
-                                    pid = proc.id(),
-                                    "launched walredo process"
-                                );
-                                *proc_guard = Some(Arc::clone(&proc));
-                                proc
-                            }
-                            Some(proc) => Arc::clone(proc),
-                        }
-                    }
-                    Some(proc) => Arc::clone(proc),
+            let proc: Arc<process::Process> = match self.redo_process.get_or_init_detached().await {
+                Ok(guard) => Arc::clone(&guard),
+                Err(permit) => {
+                    // don't hold poison_guard, the launch code can bail
+                    let start = Instant::now();
+                    let proc = Arc::new(
+                        process::Process::launch(self.conf, self.tenant_shard_id, pg_version)
+                            .context("launch walredo process")?,
+                    );
+                    let duration = start.elapsed();
+                    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
+                    info!(
+                        duration_ms = duration.as_millis(),
+                        pid = proc.id(),
+                        "launched walredo process"
+                    );
+                    self.redo_process.set(Arc::clone(&proc), permit);
+                    proc
                 }
             };
 
@@ -276,34 +272,34 @@ impl PostgresRedoManager {
                     n_attempts,
                     e,
                 );
-                // Avoid concurrent callers hitting the same issue.
-                // We can't prevent it from happening because we want to enable parallelism.
-                {
-                    let mut guard = self.redo_process.write().unwrap();
-                    match &*guard {
-                        Some(current_field_value) => {
-                            if Arc::ptr_eq(current_field_value, &proc) {
-                                // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
-                                *guard = None;
-                            }
-                        }
-                        None => {
-                            // Another thread was faster to observe the error, and already took the process out of rotation.
-                        }
-                    }
-                }
+                // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.
+                // Note that there may be other tasks concurrent with us that also hold `proc`.
+                // We have to deal with that here.
+                // Also read the doc comment on field `self.redo_process`.
+                //
                 // NB: there may still be other concurrent threads using `proc`.
                 // The last one will send SIGKILL when the underlying Arc reaches refcount 0.
-                // NB: it's important to drop(proc) after drop(guard). Otherwise we'd keep
-                // holding the lock while waiting for the process to exit.
-                // NB: the drop impl blocks the current threads with a wait() system call for
-                // the child process. We dropped the `guard` above so that other threads aren't
-                // affected. But, it's good that the current thread _does_ block to wait.
-                // If we instead deferred the waiting into the background / to tokio, it could
-                // happen that if walredo always fails immediately, we spawn processes faster
+                //
+                // NB: the drop impl blocks the dropping thread with a wait() system call for
+                // the child process. In some ways the blocking is actually good: if we
+                // deferred the waiting into the background / to tokio if we used `tokio::process`,
+                // it could happen that if walredo always fails immediately, we spawn processes faster
                 // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,
                 // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.
                 // This probably needs revisiting at some later point.
+                match self.redo_process.get() {
+                    None => (),
+                    Some(guard) => {
+                        if Arc::ptr_eq(&proc, &*guard) {
+                            // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
+                            guard.take_and_deinit();
+                        } else {
+                            // Another task already spawned another redo process (further up in this method)
+                            // and put it into `redo_process`. Do nothing, our view of the world is behind.
+                        }
+                    }
+                }
+                // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.
                 drop(proc);
             } else if n_attempts != 0 {
                 info!(n_attempts, "retried walredo succeeded");
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 1bc8a2e87c..2276b4e807 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -495,16 +495,17 @@ retry:
 static void
 pageserver_disconnect(shardno_t shard_no)
 {
-	if (page_servers[shard_no].conn)
-	{
-		/*
-		 * If the connection to any pageserver is lost, we throw away the
-		 * whole prefetch queue, even for other pageservers. It should not
-		 * cause big problems, because connection loss is supposed to be a
-		 * rare event.
-		 */
-		prefetch_on_ps_disconnect();
-	}
+	/*
+	 * If the connection to any pageserver is lost, we throw away the
+	 * whole prefetch queue, even for other pageservers. It should not
+	 * cause big problems, because connection loss is supposed to be a
+	 * rare event.
+	 *
+	 * Prefetch state should be reset even if page_servers[shard_no].conn == NULL,
+	 * because prefetch request may be registered before connection is established.
+	 */
+	prefetch_on_ps_disconnect();
+
 	pageserver_disconnect_shard(shard_no);
 }
 
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index b33cfab2bb..57a16e00ca 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -641,13 +641,12 @@ prefetch_on_ps_disconnect(void)
 static inline void
 prefetch_set_unused(uint64 ring_index)
 {
-	PrefetchRequest *slot = GetPrfSlot(ring_index);
+	PrefetchRequest *slot;
 
 	if (ring_index < MyPState->ring_last)
 		return;					/* Should already be unused */
 
-	Assert(MyPState->ring_unused > ring_index);
-
+	slot = GetPrfSlot(ring_index);
 	if (slot->status == PRFS_UNUSED)
 		return;
 
@@ -806,7 +805,8 @@ Retry:
 			{
 				if (*force_lsn > slot->effective_request_lsn)
 				{
-					prefetch_wait_for(ring_index);
+					if (!prefetch_wait_for(ring_index))
+						goto Retry;
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 				}
@@ -821,7 +821,8 @@ Retry:
 			{
 				if (*force_lsn != slot->effective_request_lsn)
 				{
-					prefetch_wait_for(ring_index);
+					if (!prefetch_wait_for(ring_index))
+						goto Retry;
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 				}
@@ -887,7 +888,8 @@ Retry:
 			{
 				case PRFS_REQUESTED:
 					Assert(MyPState->ring_receive == cleanup_index);
-					prefetch_wait_for(cleanup_index);
+					if (!prefetch_wait_for(cleanup_index))
+						goto Retry;
 					prefetch_set_unused(cleanup_index);
 					break;
 				case PRFS_RECEIVED:
@@ -2140,6 +2142,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	/*
 	 * Try to find prefetched page in the list of received pages.
 	 */
+  Retry:
 	entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &buftag);
 
 	if (entry != NULL)
@@ -2161,7 +2164,8 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			 */
 			if (slot->status == PRFS_REQUESTED)
 			{
-				prefetch_wait_for(slot->my_ring_index);
+				if (!prefetch_wait_for(slot->my_ring_index))
+					goto Retry;
 			}
 			/* drop caches */
 			prefetch_set_unused(slot->my_ring_index);
diff --git a/proxy/src/auth/backend/link.rs b/proxy/src/auth/backend/link.rs
index 7db76f3d9e..415a4b7d85 100644
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -102,8 +102,7 @@ pub(super) async fn authenticate(
 
     ctx.set_user(db_info.user.into());
     ctx.set_project(db_info.aux.clone());
-    let cold_start_info = db_info.aux.cold_start_info.clone().unwrap_or_default();
-    info!(?cold_start_info, "woken up a compute node");
+    info!("woken up a compute node");
 
     // Backwards compatibility. pg_sni_proxy uses "--" in domain names
     // while direct connections do not. Once we migrate to pg_sni_proxy
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index 385f7820cb..c28814b1c8 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -10,6 +10,7 @@ use itertools::Itertools;
 use proxy::config::TlsServerEndPoint;
 use proxy::context::RequestMonitoring;
 use proxy::proxy::run_until_cancelled;
+use proxy::{BranchId, EndpointId, ProjectId};
 use rustls::pki_types::PrivateKeyDer;
 use tokio::net::TcpListener;
 
@@ -269,7 +270,12 @@ async fn handle_client(
 
     let client = tokio::net::TcpStream::connect(destination).await?;
 
-    let metrics_aux: MetricsAuxInfo = Default::default();
+    let metrics_aux: MetricsAuxInfo = MetricsAuxInfo {
+        endpoint_id: (&EndpointId::from("")).into(),
+        project_id: (&ProjectId::from("")).into(),
+        branch_id: (&BranchId::from("")).into(),
+        cold_start_info: proxy::console::messages::ColdStartInfo::Unknown,
+    };
 
     // doesn't yet matter as pg-sni-router doesn't report analytics logs
     ctx.set_success();
diff --git a/proxy/src/cache/project_info.rs b/proxy/src/cache/project_info.rs
index 5a3660520b..d8a1d261ce 100644
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -16,7 +16,7 @@ use crate::{
     config::ProjectInfoCacheOptions,
     console::AuthSecret,
     intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
-    EndpointId, ProjectId, RoleName,
+    EndpointId, RoleName,
 };
 
 use super::{Cache, Cached};
@@ -214,14 +214,11 @@ impl ProjectInfoCacheImpl {
     }
     pub fn insert_role_secret(
         &self,
-        project_id: &ProjectId,
-        endpoint_id: &EndpointId,
-        role_name: &RoleName,
+        project_id: ProjectIdInt,
+        endpoint_id: EndpointIdInt,
+        role_name: RoleNameInt,
         secret: Option<AuthSecret>,
     ) {
-        let project_id = ProjectIdInt::from(project_id);
-        let endpoint_id = EndpointIdInt::from(endpoint_id);
-        let role_name = RoleNameInt::from(role_name);
         if self.cache.len() >= self.config.size {
             // If there are too many entries, wait until the next gc cycle.
             return;
@@ -234,12 +231,10 @@ impl ProjectInfoCacheImpl {
     }
     pub fn insert_allowed_ips(
         &self,
-        project_id: &ProjectId,
-        endpoint_id: &EndpointId,
+        project_id: ProjectIdInt,
+        endpoint_id: EndpointIdInt,
         allowed_ips: Arc<Vec<IpPattern>>,
     ) {
-        let project_id = ProjectIdInt::from(project_id);
-        let endpoint_id = EndpointIdInt::from(endpoint_id);
         if self.cache.len() >= self.config.size {
             // If there are too many entries, wait until the next gc cycle.
             return;
@@ -358,7 +353,7 @@ impl Cache for ProjectInfoCacheImpl {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::scram::ServerSecret;
+    use crate::{scram::ServerSecret, ProjectId};
 
     #[tokio::test]
     async fn test_project_info_cache_settings() {
@@ -369,8 +364,8 @@ mod tests {
             ttl: Duration::from_secs(1),
             gc_interval: Duration::from_secs(600),
         });
-        let project_id = "project".into();
-        let endpoint_id = "endpoint".into();
+        let project_id: ProjectId = "project".into();
+        let endpoint_id: EndpointId = "endpoint".into();
         let user1: RoleName = "user1".into();
         let user2: RoleName = "user2".into();
         let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
@@ -379,9 +374,23 @@ mod tests {
             "127.0.0.1".parse().unwrap(),
             "127.0.0.2".parse().unwrap(),
         ]);
-        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
-        cache.insert_role_secret(&project_id, &endpoint_id, &user2, secret2.clone());
-        cache.insert_allowed_ips(&project_id, &endpoint_id, allowed_ips.clone());
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user1).into(),
+            secret1.clone(),
+        );
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user2).into(),
+            secret2.clone(),
+        );
+        cache.insert_allowed_ips(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            allowed_ips.clone(),
+        );
 
         let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
         assert!(cached.cached());
@@ -393,7 +402,12 @@ mod tests {
         // Shouldn't add more than 2 roles.
         let user3: RoleName = "user3".into();
         let secret3 = Some(AuthSecret::Scram(ServerSecret::mock([3; 32])));
-        cache.insert_role_secret(&project_id, &endpoint_id, &user3, secret3.clone());
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user3).into(),
+            secret3.clone(),
+        );
         assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());
 
         let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
@@ -421,8 +435,8 @@ mod tests {
         cache.clone().disable_ttl();
         tokio::time::advance(Duration::from_secs(2)).await;
 
-        let project_id = "project".into();
-        let endpoint_id = "endpoint".into();
+        let project_id: ProjectId = "project".into();
+        let endpoint_id: EndpointId = "endpoint".into();
         let user1: RoleName = "user1".into();
         let user2: RoleName = "user2".into();
         let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
@@ -431,9 +445,23 @@ mod tests {
             "127.0.0.1".parse().unwrap(),
             "127.0.0.2".parse().unwrap(),
         ]);
-        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
-        cache.insert_role_secret(&project_id, &endpoint_id, &user2, secret2.clone());
-        cache.insert_allowed_ips(&project_id, &endpoint_id, allowed_ips.clone());
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user1).into(),
+            secret1.clone(),
+        );
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user2).into(),
+            secret2.clone(),
+        );
+        cache.insert_allowed_ips(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            allowed_ips.clone(),
+        );
 
         tokio::time::advance(Duration::from_secs(2)).await;
         // Nothing should be invalidated.
@@ -470,8 +498,8 @@ mod tests {
             gc_interval: Duration::from_secs(600),
         }));
 
-        let project_id = "project".into();
-        let endpoint_id = "endpoint".into();
+        let project_id: ProjectId = "project".into();
+        let endpoint_id: EndpointId = "endpoint".into();
         let user1: RoleName = "user1".into();
         let user2: RoleName = "user2".into();
         let secret1 = Some(AuthSecret::Scram(ServerSecret::mock([1; 32])));
@@ -480,10 +508,20 @@ mod tests {
             "127.0.0.1".parse().unwrap(),
             "127.0.0.2".parse().unwrap(),
         ]);
-        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user1).into(),
+            secret1.clone(),
+        );
         cache.clone().disable_ttl();
         tokio::time::advance(Duration::from_millis(100)).await;
-        cache.insert_role_secret(&project_id, &endpoint_id, &user2, secret2.clone());
+        cache.insert_role_secret(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            (&user2).into(),
+            secret2.clone(),
+        );
 
         // Added before ttl was disabled + ttl should be still cached.
         let cached = cache.get_role_secret(&endpoint_id, &user1).unwrap();
@@ -497,7 +535,11 @@ mod tests {
         assert!(cache.get_role_secret(&endpoint_id, &user2).is_none());
 
         // Added after ttl was disabled + ttl should not be cached.
-        cache.insert_allowed_ips(&project_id, &endpoint_id, allowed_ips.clone());
+        cache.insert_allowed_ips(
+            (&project_id).into(),
+            (&endpoint_id).into(),
+            allowed_ips.clone(),
+        );
         let cached = cache.get_allowed_ips(&endpoint_id).unwrap();
         assert!(!cached.cached());
 
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 65153babcb..ee33b97fbd 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -276,6 +276,7 @@ impl ConnCfg {
         let stream = connection.stream.into_inner();
 
         info!(
+            cold_start_info = ctx.cold_start_info.as_str(),
             "connected to compute node at {host} ({socket_addr}) sslmode={:?}",
             self.0.get_ssl_mode()
         );
diff --git a/proxy/src/console/messages.rs b/proxy/src/console/messages.rs
index 102076f2c6..45161f5ac8 100644
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -3,7 +3,7 @@ use std::fmt;
 
 use crate::auth::IpPattern;
 
-use crate::{BranchId, EndpointId, ProjectId};
+use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
 
 /// Generic error response with human-readable description.
 /// Note that we can't always present it to user as is.
@@ -18,7 +18,7 @@ pub struct ConsoleError {
 pub struct GetRoleSecret {
     pub role_secret: Box<str>,
     pub allowed_ips: Option<Vec<IpPattern>>,
-    pub project_id: Option<ProjectId>,
+    pub project_id: Option<ProjectIdInt>,
 }
 
 // Manually implement debug to omit sensitive info.
@@ -93,22 +93,47 @@ impl fmt::Debug for DatabaseInfo {
 
 /// Various labels for prometheus metrics.
 /// Also known as `ProxyMetricsAuxInfo` in the console.
-#[derive(Debug, Deserialize, Clone, Default)]
+#[derive(Debug, Deserialize, Clone)]
 pub struct MetricsAuxInfo {
-    pub endpoint_id: EndpointId,
-    pub project_id: ProjectId,
-    pub branch_id: BranchId,
-    pub cold_start_info: Option<ColdStartInfo>,
+    pub endpoint_id: EndpointIdInt,
+    pub project_id: ProjectIdInt,
+    pub branch_id: BranchIdInt,
+    #[serde(default)]
+    pub cold_start_info: ColdStartInfo,
 }
 
-#[derive(Debug, Default, Serialize, Deserialize, Clone)]
+#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy)]
 #[serde(rename_all = "snake_case")]
 pub enum ColdStartInfo {
     #[default]
-    Unknown = 0,
-    Warm = 1,
-    PoolHit = 2,
-    PoolMiss = 3,
+    Unknown,
+    /// Compute was already running
+    Warm,
+    #[serde(rename = "pool_hit")]
+    /// Compute was not running but there was an available VM
+    VmPoolHit,
+    #[serde(rename = "pool_miss")]
+    /// Compute was not running and there were no VMs available
+    VmPoolMiss,
+
+    // not provided by control plane
+    /// Connection available from HTTP pool
+    HttpPoolHit,
+    /// Cached connection info
+    WarmCached,
+}
+
+impl ColdStartInfo {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ColdStartInfo::Unknown => "unknown",
+            ColdStartInfo::Warm => "warm",
+            ColdStartInfo::VmPoolHit => "pool_hit",
+            ColdStartInfo::VmPoolMiss => "pool_miss",
+            ColdStartInfo::HttpPoolHit => "http_pool_hit",
+            ColdStartInfo::WarmCached => "warm_cached",
+        }
+    }
 }
 
 #[cfg(test)]
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index 69bfd6b045..f7d621fb12 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -12,7 +12,8 @@ use crate::{
     compute,
     config::{CacheOptions, ProjectInfoCacheOptions},
     context::RequestMonitoring,
-    scram, EndpointCacheKey, ProjectId,
+    intern::ProjectIdInt,
+    scram, EndpointCacheKey,
 };
 use dashmap::DashMap;
 use std::{sync::Arc, time::Duration};
@@ -271,7 +272,7 @@ pub struct AuthInfo {
     /// List of IP addresses allowed for the autorization.
     pub allowed_ips: Vec<IpPattern>,
     /// Project ID. This is used for cache invalidation.
-    pub project_id: Option<ProjectId>,
+    pub project_id: Option<ProjectIdInt>,
 }
 
 /// Info for establishing a connection to a compute node.
diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs
index b759c81373..cfe491f2aa 100644
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -4,10 +4,16 @@ use super::{
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
     AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
 };
-use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
 use crate::context::RequestMonitoring;
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use crate::{auth::IpPattern, cache::Cached};
+use crate::{
+    console::{
+        messages::MetricsAuxInfo,
+        provider::{CachedAllowedIps, CachedRoleSecret},
+    },
+    BranchId, EndpointId, ProjectId,
+};
 use futures::TryFutureExt;
 use std::{str::FromStr, sync::Arc};
 use thiserror::Error;
@@ -114,7 +120,12 @@ impl Api {
 
         let node = NodeInfo {
             config,
-            aux: Default::default(),
+            aux: MetricsAuxInfo {
+                endpoint_id: (&EndpointId::from("endpoint")).into(),
+                project_id: (&ProjectId::from("project")).into(),
+                branch_id: (&BranchId::from("branch")).into(),
+                cold_start_info: crate::console::messages::ColdStartInfo::Warm,
+            },
             allow_self_signed_compute: false,
         };
 
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index 289b0c08f7..1a3e2ca795 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -181,15 +181,16 @@ impl super::Api for Api {
         }
         let auth_info = self.do_get_auth_info(ctx, user_info).await?;
         if let Some(project_id) = auth_info.project_id {
+            let ep_int = ep.into();
             self.caches.project_info.insert_role_secret(
-                &project_id,
-                ep,
-                user,
+                project_id,
+                ep_int,
+                user.into(),
                 auth_info.secret.clone(),
             );
             self.caches.project_info.insert_allowed_ips(
-                &project_id,
-                ep,
+                project_id,
+                ep_int,
                 Arc::new(auth_info.allowed_ips),
             );
             ctx.set_project_id(project_id);
@@ -217,15 +218,16 @@ impl super::Api for Api {
         let allowed_ips = Arc::new(auth_info.allowed_ips);
         let user = &user_info.user;
         if let Some(project_id) = auth_info.project_id {
+            let ep_int = ep.into();
             self.caches.project_info.insert_role_secret(
-                &project_id,
-                ep,
-                user,
+                project_id,
+                ep_int,
+                user.into(),
                 auth_info.secret.clone(),
             );
             self.caches
                 .project_info
-                .insert_allowed_ips(&project_id, ep, allowed_ips.clone());
+                .insert_allowed_ips(project_id, ep_int, allowed_ips.clone());
             ctx.set_project_id(project_id);
         }
         Ok((
@@ -248,8 +250,7 @@ impl super::Api for Api {
         // which means that we might cache it to reduce the load and latency.
         if let Some(cached) = self.caches.node_info.get(&key) {
             info!(key = &*key, "found cached compute node info");
-            info!("cold_start_info=warm");
-            ctx.set_cold_start_info(ColdStartInfo::Warm);
+            ctx.set_project(cached.aux.clone());
             return Ok(cached);
         }
 
@@ -260,17 +261,21 @@ impl super::Api for Api {
         if permit.should_check_cache() {
             if let Some(cached) = self.caches.node_info.get(&key) {
                 info!(key = &*key, "found cached compute node info");
-                info!("cold_start_info=warm");
-                ctx.set_cold_start_info(ColdStartInfo::Warm);
+                ctx.set_project(cached.aux.clone());
                 return Ok(cached);
             }
         }
 
-        let node = self.do_wake_compute(ctx, user_info).await?;
+        let mut node = self.do_wake_compute(ctx, user_info).await?;
         ctx.set_project(node.aux.clone());
-        let cold_start_info = node.aux.cold_start_info.clone().unwrap_or_default();
-        info!(?cold_start_info, "woken up a compute node");
-        let (_, cached) = self.caches.node_info.insert(key.clone(), node);
+        let cold_start_info = node.aux.cold_start_info;
+        info!("woken up a compute node");
+
+        // store the cached node as 'warm'
+        node.aux.cold_start_info = ColdStartInfo::WarmCached;
+        let (_, mut cached) = self.caches.node_info.insert(key.clone(), node);
+        cached.aux.cold_start_info = cold_start_info;
+
         info!(key = &*key, "created a cache entry for compute node info");
 
         Ok(cached)
diff --git a/proxy/src/context.rs b/proxy/src/context.rs
index 7ca830cdb4..fec95f4722 100644
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -11,8 +11,9 @@ use uuid::Uuid;
 use crate::{
     console::messages::{ColdStartInfo, MetricsAuxInfo},
     error::ErrorKind,
+    intern::{BranchIdInt, ProjectIdInt},
     metrics::{LatencyTimer, ENDPOINT_ERRORS_BY_KIND, ERROR_BY_KIND},
-    BranchId, DbName, EndpointId, ProjectId, RoleName,
+    DbName, EndpointId, RoleName,
 };
 
 use self::parquet::RequestData;
@@ -34,8 +35,8 @@ pub struct RequestMonitoring {
     pub span: Span,
 
     // filled in as they are discovered
-    project: Option<ProjectId>,
-    branch: Option<BranchId>,
+    project: Option<ProjectIdInt>,
+    branch: Option<BranchIdInt>,
     endpoint_id: Option<EndpointId>,
     dbname: Option<DbName>,
     user: Option<RoleName>,
@@ -43,7 +44,7 @@ pub struct RequestMonitoring {
     error_kind: Option<ErrorKind>,
     pub(crate) auth_method: Option<AuthMethod>,
     success: bool,
-    cold_start_info: Option<ColdStartInfo>,
+    pub(crate) cold_start_info: ColdStartInfo,
 
     // extra
     // This sender is here to keep the request monitoring channel open while requests are taking place.
@@ -92,7 +93,7 @@ impl RequestMonitoring {
             error_kind: None,
             auth_method: None,
             success: false,
-            cold_start_info: None,
+            cold_start_info: ColdStartInfo::Unknown,
 
             sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),
             latency_timer: LatencyTimer::new(protocol),
@@ -113,26 +114,31 @@ impl RequestMonitoring {
     }
 
     pub fn set_cold_start_info(&mut self, info: ColdStartInfo) {
-        self.cold_start_info = Some(info);
+        self.cold_start_info = info;
+        self.latency_timer.cold_start_info(info);
     }
 
     pub fn set_project(&mut self, x: MetricsAuxInfo) {
-        self.set_endpoint_id(x.endpoint_id);
+        if self.endpoint_id.is_none() {
+            self.set_endpoint_id(x.endpoint_id.as_str().into())
+        }
         self.branch = Some(x.branch_id);
         self.project = Some(x.project_id);
-        self.cold_start_info = x.cold_start_info;
+        self.set_cold_start_info(x.cold_start_info);
     }
 
-    pub fn set_project_id(&mut self, project_id: ProjectId) {
+    pub fn set_project_id(&mut self, project_id: ProjectIdInt) {
         self.project = Some(project_id);
     }
 
     pub fn set_endpoint_id(&mut self, endpoint_id: EndpointId) {
-        self.span.record("ep", display(&endpoint_id));
-        crate::metrics::CONNECTING_ENDPOINTS
-            .with_label_values(&[self.protocol])
-            .measure(&endpoint_id);
-        self.endpoint_id = Some(endpoint_id);
+        if self.endpoint_id.is_none() {
+            self.span.record("ep", display(&endpoint_id));
+            crate::metrics::CONNECTING_ENDPOINTS
+                .with_label_values(&[self.protocol])
+                .measure(&endpoint_id);
+            self.endpoint_id = Some(endpoint_id);
+        }
     }
 
     pub fn set_application(&mut self, app: Option<SmolStr>) {
diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs
index 04e5695255..eb77409429 100644
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -87,7 +87,7 @@ pub struct RequestData {
     /// Or if we make it to proxy_pass
     success: bool,
     /// Indicates if the cplane started the new compute node for this request.
-    cold_start_info: Option<&'static str>,
+    cold_start_info: &'static str,
     /// Tracks time from session start (HTTP request/libpq TCP handshake)
     /// Through to success/failure
     duration_us: u64,
@@ -115,12 +115,7 @@ impl From<&RequestMonitoring> for RequestData {
             region: value.region,
             error: value.error_kind.as_ref().map(|e| e.to_metric_label()),
             success: value.success,
-            cold_start_info: value.cold_start_info.as_ref().map(|x| match x {
-                crate::console::messages::ColdStartInfo::Unknown => "unknown",
-                crate::console::messages::ColdStartInfo::Warm => "warm",
-                crate::console::messages::ColdStartInfo::PoolHit => "pool_hit",
-                crate::console::messages::ColdStartInfo::PoolMiss => "pool_miss",
-            }),
+            cold_start_info: value.cold_start_info.as_str(),
             duration_us: SystemTime::from(value.first_packet)
                 .elapsed()
                 .unwrap_or_default()
@@ -454,7 +449,7 @@ mod tests {
             region: "us-east-1",
             error: None,
             success: rng.gen(),
-            cold_start_info: Some("no"),
+            cold_start_info: "no",
             duration_us: rng.gen_range(0..30_000_000),
         }
     }
@@ -524,15 +519,15 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1314406, 3, 6000),
-                (1314399, 3, 6000),
-                (1314459, 3, 6000),
-                (1314416, 3, 6000),
-                (1314546, 3, 6000),
-                (1314388, 3, 6000),
-                (1314180, 3, 6000),
-                (1314416, 3, 6000),
-                (438359, 1, 2000)
+                (1314385, 3, 6000),
+                (1314378, 3, 6000),
+                (1314438, 3, 6000),
+                (1314395, 3, 6000),
+                (1314525, 3, 6000),
+                (1314367, 3, 6000),
+                (1314159, 3, 6000),
+                (1314395, 3, 6000),
+                (438352, 1, 2000)
             ]
         );
 
@@ -562,11 +557,11 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1220668, 5, 10000),
-                (1226818, 5, 10000),
-                (1228612, 5, 10000),
-                (1227974, 5, 10000),
-                (1219252, 5, 10000)
+                (1220633, 5, 10000),
+                (1226783, 5, 10000),
+                (1228577, 5, 10000),
+                (1227939, 5, 10000),
+                (1219217, 5, 10000)
             ]
         );
 
@@ -598,11 +593,11 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1206315, 5, 10000),
-                (1206046, 5, 10000),
-                (1206339, 5, 10000),
-                (1206327, 5, 10000),
-                (1206582, 5, 10000)
+                (1206280, 5, 10000),
+                (1206011, 5, 10000),
+                (1206304, 5, 10000),
+                (1206292, 5, 10000),
+                (1206547, 5, 10000)
             ]
         );
 
@@ -627,15 +622,15 @@ mod tests {
         assert_eq!(
             file_stats,
             [
-                (1314406, 3, 6000),
-                (1314399, 3, 6000),
-                (1314459, 3, 6000),
-                (1314416, 3, 6000),
-                (1314546, 3, 6000),
-                (1314388, 3, 6000),
-                (1314180, 3, 6000),
-                (1314416, 3, 6000),
-                (438359, 1, 2000)
+                (1314385, 3, 6000),
+                (1314378, 3, 6000),
+                (1314438, 3, 6000),
+                (1314395, 3, 6000),
+                (1314525, 3, 6000),
+                (1314367, 3, 6000),
+                (1314159, 3, 6000),
+                (1314395, 3, 6000),
+                (438352, 1, 2000)
             ]
         );
 
@@ -672,7 +667,7 @@ mod tests {
         // files are smaller than the size threshold, but they took too long to fill so were flushed early
         assert_eq!(
             file_stats,
-            [(658837, 2, 3001), (658551, 2, 3000), (658347, 2, 2999)]
+            [(658823, 2, 3001), (658537, 2, 3000), (658333, 2, 2999)]
         );
 
         tmpdir.close().unwrap();
diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs
index 9da1fdc02f..59ee899c08 100644
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -12,6 +12,8 @@ use metrics::{
 use once_cell::sync::Lazy;
 use tokio::time::{self, Instant};
 
+use crate::console::messages::ColdStartInfo;
+
 pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
     register_int_counter_pair_vec!(
         "proxy_opened_db_connections_total",
@@ -50,8 +52,8 @@ pub static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
         "proxy_compute_connection_latency_seconds",
         "Time it took for proxy to establish a connection to the compute endpoint",
         // http/ws/tcp, true/false, true/false, success/failure, client/client_and_cplane
-        // 3 * 2 * 2 * 2 * 2 = 48 counters
-        &["protocol", "cache_miss", "pool_miss", "outcome", "excluded"],
+        // 3 * 6 * 2 * 2 = 72 counters
+        &["protocol", "cold_start_info", "outcome", "excluded"],
         // largest bucket = 2^16 * 0.5ms = 32s
         exponential_buckets(0.0005, 2.0, 16).unwrap(),
     )
@@ -183,6 +185,20 @@ struct Accumulated {
     compute: time::Duration,
 }
 
+enum Outcome {
+    Success,
+    Failed,
+}
+
+impl Outcome {
+    fn as_str(&self) -> &'static str {
+        match self {
+            Outcome::Success => "success",
+            Outcome::Failed => "failed",
+        }
+    }
+}
+
 pub struct LatencyTimer {
     // time since the stopwatch was started
     start: time::Instant,
@@ -192,9 +208,8 @@ pub struct LatencyTimer {
     accumulated: Accumulated,
     // label data
     protocol: &'static str,
-    cache_miss: bool,
-    pool_miss: bool,
-    outcome: &'static str,
+    cold_start_info: ColdStartInfo,
+    outcome: Outcome,
 }
 
 pub struct LatencyTimerPause<'a> {
@@ -210,11 +225,9 @@ impl LatencyTimer {
             stop: None,
             accumulated: Accumulated::default(),
             protocol,
-            cache_miss: false,
-            // by default we don't do pooling
-            pool_miss: true,
+            cold_start_info: ColdStartInfo::Unknown,
             // assume failed unless otherwise specified
-            outcome: "failed",
+            outcome: Outcome::Failed,
         }
     }
 
@@ -226,12 +239,8 @@ impl LatencyTimer {
         }
     }
 
-    pub fn cache_miss(&mut self) {
-        self.cache_miss = true;
-    }
-
-    pub fn pool_hit(&mut self) {
-        self.pool_miss = false;
+    pub fn cold_start_info(&mut self, cold_start_info: ColdStartInfo) {
+        self.cold_start_info = cold_start_info;
     }
 
     pub fn success(&mut self) {
@@ -239,7 +248,7 @@ impl LatencyTimer {
         self.stop = Some(time::Instant::now());
 
         // success
-        self.outcome = "success";
+        self.outcome = Outcome::Success;
     }
 }
 
@@ -264,9 +273,8 @@ impl Drop for LatencyTimer {
         COMPUTE_CONNECTION_LATENCY
             .with_label_values(&[
                 self.protocol,
-                bool_to_str(self.cache_miss),
-                bool_to_str(self.pool_miss),
-                self.outcome,
+                self.cold_start_info.as_str(),
+                self.outcome.as_str(),
                 "client",
             ])
             .observe((duration.saturating_sub(self.accumulated.client)).as_secs_f64());
@@ -275,9 +283,8 @@ impl Drop for LatencyTimer {
         COMPUTE_CONNECTION_LATENCY
             .with_label_values(&[
                 self.protocol,
-                bool_to_str(self.cache_miss),
-                bool_to_str(self.pool_miss),
-                self.outcome,
+                self.cold_start_info.as_str(),
+                self.outcome.as_str(),
                 "client_and_cplane",
             ])
             .observe((duration.saturating_sub(accumulated_total)).as_secs_f64());
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
index c76e2ff6d9..4c0d68ce0b 100644
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -87,7 +87,6 @@ impl ConnectMechanism for TcpMechanism<'_> {
 }
 
 /// Try to connect to the compute node, retrying if necessary.
-/// This function might update `node_info`, so we take it by `&mut`.
 #[tracing::instrument(skip_all)]
 pub async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
     ctx: &mut RequestMonitoring,
@@ -132,7 +131,6 @@ where
     } else {
         // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
         info!("compute node's state has likely changed; requesting a wake-up");
-        ctx.latency_timer.cache_miss();
         let old_node_info = invalidate_cache(node_info);
         let mut node_info = wake_compute(&mut num_retries, ctx, user_info).await?;
         node_info.reuse_settings(old_node_info);
diff --git a/proxy/src/proxy/passthrough.rs b/proxy/src/proxy/passthrough.rs
index cf53c6e673..c81a1a8292 100644
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -19,8 +19,8 @@ pub async fn proxy_pass(
     aux: MetricsAuxInfo,
 ) -> anyhow::Result<()> {
     let usage = USAGE_METRICS.register(Ids {
-        endpoint_id: aux.endpoint_id.clone(),
-        branch_id: aux.branch_id.clone(),
+        endpoint_id: aux.endpoint_id,
+        branch_id: aux.branch_id,
     });
 
     let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx"]);
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index a4051447c1..71d85e106d 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -12,11 +12,12 @@ use crate::auth::backend::{
 };
 use crate::config::CertResolver;
 use crate::console::caches::NodeInfoCache;
+use crate::console::messages::MetricsAuxInfo;
 use crate::console::provider::{CachedAllowedIps, CachedRoleSecret, ConsoleBackend};
 use crate::console::{self, CachedNodeInfo, NodeInfo};
 use crate::error::ErrorKind;
 use crate::proxy::retry::{retry_after, NUM_RETRIES_CONNECT};
-use crate::{http, sasl, scram};
+use crate::{http, sasl, scram, BranchId, EndpointId, ProjectId};
 use anyhow::{bail, Context};
 use async_trait::async_trait;
 use rstest::rstest;
@@ -512,7 +513,12 @@ impl TestBackend for TestConnectMechanism {
 fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
     let node = NodeInfo {
         config: compute::ConnCfg::new(),
-        aux: Default::default(),
+        aux: MetricsAuxInfo {
+            endpoint_id: (&EndpointId::from("endpoint")).into(),
+            project_id: (&ProjectId::from("project")).into(),
+            branch_id: (&BranchId::from("branch")).into(),
+            cold_start_info: crate::console::messages::ColdStartInfo::Warm,
+        },
         allow_self_signed_compute: false,
     };
     let (_, node) = cache.insert("key".into(), node);
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index f10779d7ba..8aa5ad4e8a 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -9,7 +9,6 @@ use crate::{
     config::ProxyConfig,
     console::{
         errors::{GetAuthInfoError, WakeComputeError},
-        messages::ColdStartInfo,
         CachedNodeInfo,
     },
     context::RequestMonitoring,
@@ -57,7 +56,10 @@ impl PoolingBackend {
         let auth_outcome =
             crate::auth::validate_password_and_exchange(&conn_info.password, secret).await?;
         let res = match auth_outcome {
-            crate::sasl::Outcome::Success(key) => Ok(key),
+            crate::sasl::Outcome::Success(key) => {
+                info!("user successfully authenticated");
+                Ok(key)
+            }
             crate::sasl::Outcome::Failure(reason) => {
                 info!("auth backend failed with an error: {reason}");
                 Err(AuthError::auth_failed(&*conn_info.user_info.user))
@@ -89,8 +91,6 @@ impl PoolingBackend {
         };
 
         if let Some(client) = maybe_client {
-            info!("cold_start_info=warm");
-            ctx.set_cold_start_info(ColdStartInfo::Warm);
             return Ok(client);
         }
         let conn_id = uuid::Uuid::new_v4();
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index c7e8eaef76..35311facb8 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -17,7 +17,7 @@ use tokio::time::Instant;
 use tokio_postgres::tls::NoTlsStream;
 use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket};
 
-use crate::console::messages::MetricsAuxInfo;
+use crate::console::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::metrics::{ENDPOINT_POOLS, GC_LATENCY, NUM_OPEN_CLIENTS_IN_HTTP_POOL};
 use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
 use crate::{
@@ -383,9 +383,12 @@ impl<C: ClientInnerExt> GlobalConnPool<C> {
                     "pid",
                     &tracing::field::display(client.inner.get_process_id()),
                 );
-                info!("pool: reusing connection '{conn_info}'");
+                info!(
+                    cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
+                    "pool: reusing connection '{conn_info}'"
+                );
                 client.session.send(ctx.session_id)?;
-                ctx.latency_timer.pool_hit();
+                ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
                 ctx.latency_timer.success();
                 return Ok(Some(Client::new(client, conn_info.clone(), endpoint_pool)));
             }
@@ -454,8 +457,9 @@ pub fn poll_client<C: ClientInnerExt>(
     let (tx, mut rx) = tokio::sync::watch::channel(session_id);
 
     let span = info_span!(parent: None, "connection", %conn_id);
+    let cold_start_info = ctx.cold_start_info;
     span.in_scope(|| {
-        info!(%conn_info, %session_id, "new connection");
+        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection");
     });
     let pool = match conn_info.endpoint_cache_key() {
         Some(endpoint) => Arc::downgrade(&global_pool.get_or_create_endpoint_pool(&endpoint)),
@@ -565,8 +569,8 @@ impl<C: ClientInnerExt> Client<C> {
     pub fn metrics(&self) -> Arc<MetricCounter> {
         let aux = &self.inner.as_ref().unwrap().aux;
         USAGE_METRICS.register(Ids {
-            endpoint_id: aux.endpoint_id.clone(),
-            branch_id: aux.branch_id.clone(),
+            endpoint_id: aux.endpoint_id,
+            branch_id: aux.branch_id,
         })
     }
 }
@@ -666,6 +670,8 @@ impl<C: ClientInnerExt> Drop for Client<C> {
 mod tests {
     use std::{mem, sync::atomic::AtomicBool};
 
+    use crate::{BranchId, EndpointId, ProjectId};
+
     use super::*;
 
     struct MockClient(Arc<AtomicBool>);
@@ -691,7 +697,12 @@ mod tests {
         ClientInner {
             inner: client,
             session: tokio::sync::watch::Sender::new(uuid::Uuid::new_v4()),
-            aux: Default::default(),
+            aux: MetricsAuxInfo {
+                endpoint_id: (&EndpointId::from("endpoint")).into(),
+                project_id: (&ProjectId::from("project")).into(),
+                branch_id: (&BranchId::from("branch")).into(),
+                cold_start_info: crate::console::messages::ColdStartInfo::Warm,
+            },
             conn_id: uuid::Uuid::new_v4(),
         }
     }
diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs
index 2ad0883fb0..5ffbf95c07 100644
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -3,7 +3,8 @@
 use crate::{
     config::{MetricBackupCollectionConfig, MetricCollectionConfig},
     context::parquet::{FAILED_UPLOAD_MAX_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
-    http, BranchId, EndpointId,
+    http,
+    intern::{BranchIdInt, EndpointIdInt},
 };
 use anyhow::Context;
 use async_compression::tokio::write::GzipEncoder;
@@ -43,8 +44,8 @@ const DEFAULT_HTTP_REPORTING_TIMEOUT: Duration = Duration::from_secs(60);
 /// because we enrich the event with project_id in the control-plane endpoint.
 #[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)]
 pub struct Ids {
-    pub endpoint_id: EndpointId,
-    pub branch_id: BranchId,
+    pub endpoint_id: EndpointIdInt,
+    pub branch_id: BranchIdInt,
 }
 
 pub trait MetricCounterRecorder {
@@ -461,7 +462,7 @@ async fn upload_events_chunk(
         || async {
             let stream = futures::stream::once(futures::future::ready(Ok(compressed_data.clone())));
             storage
-                .upload(stream, data.len(), remote_path, None, cancel)
+                .upload(stream, compressed_data.len(), remote_path, None, cancel)
                 .await
         },
         TimeoutOrCancel::caused_by_cancel,
@@ -494,7 +495,7 @@ mod tests {
     use url::Url;
 
     use super::*;
-    use crate::{http, rate_limiter::RateLimiterConfig};
+    use crate::{http, rate_limiter::RateLimiterConfig, BranchId, EndpointId};
 
     #[tokio::test]
     async fn metrics() {
@@ -536,8 +537,8 @@ mod tests {
         // register a new counter
 
         let counter = metrics.register(Ids {
-            endpoint_id: "e1".into(),
-            branch_id: "b1".into(),
+            endpoint_id: (&EndpointId::from("e1")).into(),
+            branch_id: (&BranchId::from("b1")).into(),
         });
 
         // the counter should be observed despite 0 egress
diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs
index a535c814ea..200096ac5c 100644
--- a/safekeeper/src/wal_backup_partial.rs
+++ b/safekeeper/src/wal_backup_partial.rs
@@ -337,6 +337,17 @@ pub async fn main_task(tli: Arc<Timeline>, conf: SafeKeeperConf) {
             }
         }
 
+        // if we don't have any data and zero LSNs, wait for something
+        while flush_lsn_rx.borrow().lsn == Lsn(0) {
+            tokio::select! {
+                _ = cancellation_rx.changed() => {
+                    info!("timeline canceled");
+                    return;
+                }
+                _ = flush_lsn_rx.changed() => {}
+            }
+        }
+
         // fixing the segno and waiting some time to prevent reuploading the same segment too often
         let pending_segno = backup.segno(flush_lsn_rx.borrow().lsn);
         let timeout = tokio::time::sleep(await_duration);
diff --git a/control_plane/attachment_service/Cargo.toml b/storage_controller/Cargo.toml
similarity index 83%
rename from control_plane/attachment_service/Cargo.toml
rename to storage_controller/Cargo.toml
index 595b091df4..165cafaf4e 100644
--- a/control_plane/attachment_service/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "attachment_service"
+name = "storage_controller"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
@@ -45,8 +45,8 @@ diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
 diesel_migrations = { version = "2.1.0" }
 r2d2 = { version = "0.8.10" }
 
-utils = { path = "../../libs/utils/" }
-metrics = { path = "../../libs/metrics/" }
-control_plane = { path = ".." }
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+utils = { path = "../libs/utils/" }
+metrics = { path = "../libs/metrics/" }
+control_plane = { path = "../control_plane" }
+workspace_hack = { version = "0.1", path = "../workspace_hack" }
 
diff --git a/control_plane/attachment_service/migrations/.keep b/storage_controller/migrations/.keep
similarity index 100%
rename from control_plane/attachment_service/migrations/.keep
rename to storage_controller/migrations/.keep
diff --git a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql b/storage_controller/migrations/00000000000000_diesel_initial_setup/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
rename to storage_controller/migrations/00000000000000_diesel_initial_setup/down.sql
diff --git a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql b/storage_controller/migrations/00000000000000_diesel_initial_setup/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
rename to storage_controller/migrations/00000000000000_diesel_initial_setup/up.sql
diff --git a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql b/storage_controller/migrations/2024-01-07-211257_create_tenant_shards/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
rename to storage_controller/migrations/2024-01-07-211257_create_tenant_shards/down.sql
diff --git a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql b/storage_controller/migrations/2024-01-07-211257_create_tenant_shards/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
rename to storage_controller/migrations/2024-01-07-211257_create_tenant_shards/up.sql
diff --git a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql b/storage_controller/migrations/2024-01-07-212945_create_nodes/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
rename to storage_controller/migrations/2024-01-07-212945_create_nodes/down.sql
diff --git a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql b/storage_controller/migrations/2024-01-07-212945_create_nodes/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
rename to storage_controller/migrations/2024-01-07-212945_create_nodes/up.sql
diff --git a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql b/storage_controller/migrations/2024-02-29-094122_generations_null/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql
rename to storage_controller/migrations/2024-02-29-094122_generations_null/down.sql
diff --git a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql b/storage_controller/migrations/2024-02-29-094122_generations_null/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql
rename to storage_controller/migrations/2024-02-29-094122_generations_null/up.sql
diff --git a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql b/storage_controller/migrations/2024-03-18-184429_rename_policy/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql
rename to storage_controller/migrations/2024-03-18-184429_rename_policy/down.sql
diff --git a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql b/storage_controller/migrations/2024-03-18-184429_rename_policy/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql
rename to storage_controller/migrations/2024-03-18-184429_rename_policy/up.sql
diff --git a/control_plane/attachment_service/migrations/2024-03-27-133204_tenant_policies/down.sql b/storage_controller/migrations/2024-03-27-133204_tenant_policies/down.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-03-27-133204_tenant_policies/down.sql
rename to storage_controller/migrations/2024-03-27-133204_tenant_policies/down.sql
diff --git a/control_plane/attachment_service/migrations/2024-03-27-133204_tenant_policies/up.sql b/storage_controller/migrations/2024-03-27-133204_tenant_policies/up.sql
similarity index 100%
rename from control_plane/attachment_service/migrations/2024-03-27-133204_tenant_policies/up.sql
rename to storage_controller/migrations/2024-03-27-133204_tenant_policies/up.sql
diff --git a/control_plane/attachment_service/src/auth.rs b/storage_controller/src/auth.rs
similarity index 100%
rename from control_plane/attachment_service/src/auth.rs
rename to storage_controller/src/auth.rs
diff --git a/control_plane/attachment_service/src/compute_hook.rs b/storage_controller/src/compute_hook.rs
similarity index 61%
rename from control_plane/attachment_service/src/compute_hook.rs
rename to storage_controller/src/compute_hook.rs
index 1a8dc6b86d..eb0c4472e4 100644
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -1,3 +1,4 @@
+use std::sync::Arc;
 use std::{collections::HashMap, time::Duration};
 
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
@@ -18,14 +19,26 @@ const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);
 
 pub(crate) const API_CONCURRENCY: usize = 32;
 
+struct UnshardedComputeHookTenant {
+    // Which node is this tenant attached to
+    node_id: NodeId,
+
+    // Must hold this lock to send a notification.
+    send_lock: Arc<tokio::sync::Mutex<Option<ComputeHookNotifyRequest>>>,
+}
 struct ShardedComputeHookTenant {
     stripe_size: ShardStripeSize,
     shard_count: ShardCount,
     shards: Vec<(ShardNumber, NodeId)>,
+
+    // Must hold this lock to send a notification.  The contents represent
+    // the last successfully sent notification, and are used to coalesce multiple
+    // updates by only sending when there is a chance since our last successful send.
+    send_lock: Arc<tokio::sync::Mutex<Option<ComputeHookNotifyRequest>>>,
 }
 
 enum ComputeHookTenant {
-    Unsharded(NodeId),
+    Unsharded(UnshardedComputeHookTenant),
     Sharded(ShardedComputeHookTenant),
 }
 
@@ -37,9 +50,20 @@ impl ComputeHookTenant {
                 shards: vec![(tenant_shard_id.shard_number, node_id)],
                 stripe_size,
                 shard_count: tenant_shard_id.shard_count,
+                send_lock: Arc::default(),
             })
         } else {
-            Self::Unsharded(node_id)
+            Self::Unsharded(UnshardedComputeHookTenant {
+                node_id,
+                send_lock: Arc::default(),
+            })
+        }
+    }
+
+    fn get_send_lock(&self) -> &Arc<tokio::sync::Mutex<Option<ComputeHookNotifyRequest>>> {
+        match self {
+            Self::Unsharded(unsharded_tenant) => &unsharded_tenant.send_lock,
+            Self::Sharded(sharded_tenant) => &sharded_tenant.send_lock,
         }
     }
 
@@ -52,8 +76,8 @@ impl ComputeHookTenant {
         node_id: NodeId,
     ) {
         match self {
-            Self::Unsharded(existing_node_id) if tenant_shard_id.shard_count.count() == 1 => {
-                *existing_node_id = node_id
+            Self::Unsharded(unsharded_tenant) if tenant_shard_id.shard_count.count() == 1 => {
+                unsharded_tenant.node_id = node_id
             }
             Self::Sharded(sharded_tenant)
                 if sharded_tenant.stripe_size == stripe_size
@@ -80,14 +104,14 @@ impl ComputeHookTenant {
     }
 }
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
 struct ComputeHookNotifyRequestShard {
     node_id: NodeId,
     shard_number: ShardNumber,
 }
 
 /// Request body that we send to the control plane to notify it of where a tenant is attached
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
 struct ComputeHookNotifyRequest {
     tenant_id: TenantId,
     stripe_size: Option<ShardStripeSize>,
@@ -120,14 +144,44 @@ pub(crate) enum NotifyError {
     Fatal(StatusCode),
 }
 
+enum MaybeSendResult {
+    // Please send this request while holding the lock, and if you succeed then write
+    // the request into the lock.
+    Transmit(
+        (
+            ComputeHookNotifyRequest,
+            tokio::sync::OwnedMutexGuard<Option<ComputeHookNotifyRequest>>,
+        ),
+    ),
+    // Something requires sending, but you must wait for a current sender then call again
+    AwaitLock(Arc<tokio::sync::Mutex<Option<ComputeHookNotifyRequest>>>),
+    // Nothing requires sending
+    Noop,
+}
+
 impl ComputeHookTenant {
-    fn maybe_reconfigure(&self, tenant_id: TenantId) -> Option<ComputeHookNotifyRequest> {
-        match self {
-            Self::Unsharded(node_id) => Some(ComputeHookNotifyRequest {
+    fn maybe_send(
+        &self,
+        tenant_id: TenantId,
+        lock: Option<tokio::sync::OwnedMutexGuard<Option<ComputeHookNotifyRequest>>>,
+    ) -> MaybeSendResult {
+        let locked = match lock {
+            Some(already_locked) => already_locked,
+            None => {
+                // Lock order: this _must_ be only a try_lock, because we are called inside of the [`ComputeHook::state`] lock.
+                let Ok(locked) = self.get_send_lock().clone().try_lock_owned() else {
+                    return MaybeSendResult::AwaitLock(self.get_send_lock().clone());
+                };
+                locked
+            }
+        };
+
+        let request = match self {
+            Self::Unsharded(unsharded_tenant) => Some(ComputeHookNotifyRequest {
                 tenant_id,
                 shards: vec![ComputeHookNotifyRequestShard {
                     shard_number: ShardNumber(0),
-                    node_id: *node_id,
+                    node_id: unsharded_tenant.node_id,
                 }],
                 stripe_size: None,
             }),
@@ -151,12 +205,25 @@ impl ComputeHookTenant {
                 // Sharded tenant doesn't yet have information for all its shards
 
                 tracing::info!(
-                    "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
+                    "ComputeHookTenant::maybe_send: not enough shards ({}/{})",
                     sharded_tenant.shards.len(),
                     sharded_tenant.shard_count.count()
                 );
                 None
             }
+        };
+
+        match request {
+            None => {
+                // Not yet ready to emit a notification
+                tracing::info!("Tenant isn't yet ready to emit a notification");
+                MaybeSendResult::Noop
+            }
+            Some(request) if Some(&request) == locked.as_ref() => {
+                // No change from the last value successfully sent
+                MaybeSendResult::Noop
+            }
+            Some(request) => MaybeSendResult::Transmit((request, locked)),
         }
     }
 }
@@ -166,8 +233,15 @@ impl ComputeHookTenant {
 /// the compute connection string.
 pub(super) struct ComputeHook {
     config: Config,
-    state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
+    state: std::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
     authorization_header: Option<String>,
+
+    // Concurrency limiter, so that we do not overload the cloud control plane when updating
+    // large numbers of tenants (e.g. when failing over after a node failure)
+    api_concurrency: tokio::sync::Semaphore,
+
+    // This lock is only used in testing enviroments, to serialize calls into neon_lock
+    neon_local_lock: tokio::sync::Mutex<()>,
 }
 
 impl ComputeHook {
@@ -181,14 +255,20 @@ impl ComputeHook {
             state: Default::default(),
             config,
             authorization_header,
+            neon_local_lock: Default::default(),
+            api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
         }
     }
 
     /// For test environments: use neon_local's LocalEnv to update compute
     async fn do_notify_local(
         &self,
-        reconfigure_request: ComputeHookNotifyRequest,
+        reconfigure_request: &ComputeHookNotifyRequest,
     ) -> anyhow::Result<()> {
+        // neon_local updates are not safe to call concurrently, use a lock to serialize
+        // all calls to this function
+        let _locked = self.neon_local_lock.lock().await;
+
         let env = match LocalEnv::load_config() {
             Ok(e) => e,
             Err(e) => {
@@ -205,7 +285,7 @@ impl ComputeHook {
         } = reconfigure_request;
 
         let compute_pageservers = shards
-            .into_iter()
+            .iter()
             .map(|shard| {
                 let ps_conf = env
                     .get_pageserver_conf(shard.node_id)
@@ -217,10 +297,10 @@ impl ComputeHook {
             .collect::<Vec<_>>();
 
         for (endpoint_name, endpoint) in &cplane.endpoints {
-            if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
+            if endpoint.tenant_id == *tenant_id && endpoint.status() == EndpointStatus::Running {
                 tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
                 endpoint
-                    .reconfigure(compute_pageservers.clone(), stripe_size)
+                    .reconfigure(compute_pageservers.clone(), *stripe_size)
                     .await?;
             }
         }
@@ -298,12 +378,23 @@ impl ComputeHook {
     async fn do_notify(
         &self,
         url: &String,
-        reconfigure_request: ComputeHookNotifyRequest,
+        reconfigure_request: &ComputeHookNotifyRequest,
         cancel: &CancellationToken,
     ) -> Result<(), NotifyError> {
         let client = reqwest::Client::new();
+
+        // We hold these semaphore units across all retries, rather than only across each
+        // HTTP request: this is to preserve fairness and avoid a situation where a retry might
+        // time out waiting for a semaphore.
+        let _units = self
+            .api_concurrency
+            .acquire()
+            .await
+            // Interpret closed semaphore as shutdown
+            .map_err(|_| NotifyError::ShuttingDown)?;
+
         backoff::retry(
-            || self.do_notify_iteration(&client, url, &reconfigure_request, cancel),
+            || self.do_notify_iteration(&client, url, reconfigure_request, cancel),
             |e| {
                 matches!(
                     e,
@@ -343,42 +434,70 @@ impl ComputeHook {
         stripe_size: ShardStripeSize,
         cancel: &CancellationToken,
     ) -> Result<(), NotifyError> {
-        let mut locked = self.state.lock().await;
+        let maybe_send_result = {
+            let mut state_locked = self.state.lock().unwrap();
 
-        use std::collections::hash_map::Entry;
-        let tenant = match locked.entry(tenant_shard_id.tenant_id) {
-            Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
-                tenant_shard_id,
-                stripe_size,
-                node_id,
-            )),
-            Entry::Occupied(e) => {
-                let tenant = e.into_mut();
-                tenant.update(tenant_shard_id, stripe_size, node_id);
-                tenant
+            use std::collections::hash_map::Entry;
+            let tenant = match state_locked.entry(tenant_shard_id.tenant_id) {
+                Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
+                    tenant_shard_id,
+                    stripe_size,
+                    node_id,
+                )),
+                Entry::Occupied(e) => {
+                    let tenant = e.into_mut();
+                    tenant.update(tenant_shard_id, stripe_size, node_id);
+                    tenant
+                }
+            };
+            tenant.maybe_send(tenant_shard_id.tenant_id, None)
+        };
+
+        // Process result: we may get an update to send, or we may have to wait for a lock
+        // before trying again.
+        let (request, mut send_lock_guard) = match maybe_send_result {
+            MaybeSendResult::Noop => {
+                return Ok(());
             }
+            MaybeSendResult::AwaitLock(send_lock) => {
+                let send_locked = send_lock.lock_owned().await;
+
+                // Lock order: maybe_send is called within the `[Self::state]` lock, and takes the send lock, but here
+                // we have acquired the send lock and take `[Self::state]` lock.  This is safe because maybe_send only uses
+                // try_lock.
+                let state_locked = self.state.lock().unwrap();
+                let Some(tenant) = state_locked.get(&tenant_shard_id.tenant_id) else {
+                    return Ok(());
+                };
+                match tenant.maybe_send(tenant_shard_id.tenant_id, Some(send_locked)) {
+                    MaybeSendResult::AwaitLock(_) => {
+                        unreachable!("We supplied lock guard")
+                    }
+                    MaybeSendResult::Noop => {
+                        return Ok(());
+                    }
+                    MaybeSendResult::Transmit((request, lock)) => (request, lock),
+                }
+            }
+            MaybeSendResult::Transmit((request, lock)) => (request, lock),
         };
 
-        let reconfigure_request = tenant.maybe_reconfigure(tenant_shard_id.tenant_id);
-        let Some(reconfigure_request) = reconfigure_request else {
-            // The tenant doesn't yet have pageservers for all its shards: we won't notify anything
-            // until it does.
-            tracing::info!("Tenant isn't yet ready to emit a notification");
-            return Ok(());
-        };
-
-        if let Some(notify_url) = &self.config.compute_hook_url {
-            self.do_notify(notify_url, reconfigure_request, cancel)
-                .await
+        let result = if let Some(notify_url) = &self.config.compute_hook_url {
+            self.do_notify(notify_url, &request, cancel).await
         } else {
-            self.do_notify_local(reconfigure_request)
-                .await
-                .map_err(|e| {
-                    // This path is for testing only, so munge the error into our prod-style error type.
-                    tracing::error!("Local notification hook failed: {e}");
-                    NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
-                })
+            self.do_notify_local(&request).await.map_err(|e| {
+                // This path is for testing only, so munge the error into our prod-style error type.
+                tracing::error!("Local notification hook failed: {e}");
+                NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
+            })
+        };
+
+        if result.is_ok() {
+            // Before dropping the send lock, stash the request we just sent so that
+            // subsequent callers can avoid redundantly re-sending the same thing.
+            *send_lock_guard = Some(request);
         }
+        result
     }
 }
 
@@ -402,21 +521,22 @@ pub(crate) mod tests {
             NodeId(1),
         );
 
-        // An unsharded tenant is always ready to emit a notification
-        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
-        assert_eq!(
-            tenant_state
-                .maybe_reconfigure(tenant_id)
-                .unwrap()
-                .shards
-                .len(),
-            1
-        );
-        assert!(tenant_state
-            .maybe_reconfigure(tenant_id)
-            .unwrap()
-            .stripe_size
-            .is_none());
+        // An unsharded tenant is always ready to emit a notification, but won't
+        // send the same one twice
+        let send_result = tenant_state.maybe_send(tenant_id, None);
+        let MaybeSendResult::Transmit((request, mut guard)) = send_result else {
+            anyhow::bail!("Wrong send result");
+        };
+        assert_eq!(request.shards.len(), 1);
+        assert!(request.stripe_size.is_none());
+
+        // Simulate successful send
+        *guard = Some(request);
+        drop(guard);
+
+        // Try asking again: this should be a no-op
+        let send_result = tenant_state.maybe_send(tenant_id, None);
+        assert!(matches!(send_result, MaybeSendResult::Noop));
 
         // Writing the first shard of a multi-sharded situation (i.e. in a split)
         // resets the tenant state and puts it in an non-notifying state (need to
@@ -430,7 +550,10 @@ pub(crate) mod tests {
             ShardStripeSize(32768),
             NodeId(1),
         );
-        assert!(tenant_state.maybe_reconfigure(tenant_id).is_none());
+        assert!(matches!(
+            tenant_state.maybe_send(tenant_id, None),
+            MaybeSendResult::Noop
+        ));
 
         // Writing the second shard makes it ready to notify
         tenant_state.update(
@@ -443,22 +566,16 @@ pub(crate) mod tests {
             NodeId(1),
         );
 
-        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
-        assert_eq!(
-            tenant_state
-                .maybe_reconfigure(tenant_id)
-                .unwrap()
-                .shards
-                .len(),
-            2
-        );
-        assert_eq!(
-            tenant_state
-                .maybe_reconfigure(tenant_id)
-                .unwrap()
-                .stripe_size,
-            Some(ShardStripeSize(32768))
-        );
+        let send_result = tenant_state.maybe_send(tenant_id, None);
+        let MaybeSendResult::Transmit((request, mut guard)) = send_result else {
+            anyhow::bail!("Wrong send result");
+        };
+        assert_eq!(request.shards.len(), 2);
+        assert_eq!(request.stripe_size, Some(ShardStripeSize(32768)));
+
+        // Simulate successful send
+        *guard = Some(request);
+        drop(guard);
 
         Ok(())
     }
diff --git a/control_plane/attachment_service/src/heartbeater.rs b/storage_controller/src/heartbeater.rs
similarity index 100%
rename from control_plane/attachment_service/src/heartbeater.rs
rename to storage_controller/src/heartbeater.rs
diff --git a/control_plane/attachment_service/src/http.rs b/storage_controller/src/http.rs
similarity index 98%
rename from control_plane/attachment_service/src/http.rs
rename to storage_controller/src/http.rs
index 03883f0ca2..c59bcaa174 100644
--- a/control_plane/attachment_service/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -602,9 +602,17 @@ where
     .await
 }
 
+/// Check if the required scope is held in the request's token, or if the request has
+/// a token with 'admin' scope then always permit it.
 fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
     check_permission_with(request, |claims| {
-        crate::auth::check_permission(claims, required_scope)
+        match crate::auth::check_permission(claims, required_scope) {
+            Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
+                Ok(()) => Ok(()),
+                Err(_) => Err(e),
+            },
+            Ok(()) => Ok(()),
+        }
     })
 }
 
diff --git a/control_plane/attachment_service/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs
similarity index 100%
rename from control_plane/attachment_service/src/id_lock_map.rs
rename to storage_controller/src/id_lock_map.rs
diff --git a/control_plane/attachment_service/src/lib.rs b/storage_controller/src/lib.rs
similarity index 98%
rename from control_plane/attachment_service/src/lib.rs
rename to storage_controller/src/lib.rs
index 8bcd5c0ac4..2ea490a14b 100644
--- a/control_plane/attachment_service/src/lib.rs
+++ b/storage_controller/src/lib.rs
@@ -14,7 +14,7 @@ mod reconciler;
 mod scheduler;
 mod schema;
 pub mod service;
-mod tenant_state;
+mod tenant_shard;
 
 #[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
 struct Sequence(u64);
diff --git a/control_plane/attachment_service/src/main.rs b/storage_controller/src/main.rs
similarity index 96%
rename from control_plane/attachment_service/src/main.rs
rename to storage_controller/src/main.rs
index bd8d7f5c59..3c03d6efe8 100644
--- a/control_plane/attachment_service/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -1,18 +1,19 @@
 use anyhow::{anyhow, Context};
-use attachment_service::http::make_router;
-use attachment_service::metrics::preinitialize_metrics;
-use attachment_service::persistence::Persistence;
-use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
 use camino::Utf8PathBuf;
 use clap::Parser;
 use diesel::Connection;
 use metrics::launch_timestamp::LaunchTimestamp;
 use std::sync::Arc;
+use storage_controller::http::make_router;
+use storage_controller::metrics::preinitialize_metrics;
+use storage_controller::persistence::Persistence;
+use storage_controller::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
 use tokio::signal::unix::SignalKind;
 use tokio_util::sync::CancellationToken;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};
 
+use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version, tcp_listener};
 
 project_git_version!(GIT_VERSION);
@@ -50,7 +51,7 @@ struct Cli {
     #[arg(short, long)]
     path: Option<Utf8PathBuf>,
 
-    /// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
+    /// URL to connect to postgres, like postgresql://localhost:1234/storage_controller
     #[arg(long)]
     database_url: Option<String>,
 
@@ -158,6 +159,8 @@ fn main() -> anyhow::Result<()> {
         std::process::exit(1);
     }));
 
+    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
     tokio::runtime::Builder::new_current_thread()
         // We use spawn_blocking for database operations, so require approximately
         // as many blocking threads as we will open database connections.
diff --git a/control_plane/attachment_service/src/metrics.rs b/storage_controller/src/metrics.rs
similarity index 100%
rename from control_plane/attachment_service/src/metrics.rs
rename to storage_controller/src/metrics.rs
diff --git a/control_plane/attachment_service/src/node.rs b/storage_controller/src/node.rs
similarity index 100%
rename from control_plane/attachment_service/src/node.rs
rename to storage_controller/src/node.rs
diff --git a/control_plane/attachment_service/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs
similarity index 100%
rename from control_plane/attachment_service/src/pageserver_client.rs
rename to storage_controller/src/pageserver_client.rs
diff --git a/control_plane/attachment_service/src/persistence.rs b/storage_controller/src/persistence.rs
similarity index 99%
rename from control_plane/attachment_service/src/persistence.rs
rename to storage_controller/src/persistence.rs
index d60392bdbc..55fbfd10bc 100644
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -696,7 +696,7 @@ impl Persistence {
     }
 }
 
-/// Parts of [`crate::tenant_state::TenantState`] that are stored durably
+/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
 #[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
 #[diesel(table_name = crate::schema::tenant_shards)]
 pub(crate) struct TenantShardPersistence {
diff --git a/control_plane/attachment_service/src/persistence/split_state.rs b/storage_controller/src/persistence/split_state.rs
similarity index 100%
rename from control_plane/attachment_service/src/persistence/split_state.rs
rename to storage_controller/src/persistence/split_state.rs
diff --git a/control_plane/attachment_service/src/reconciler.rs b/storage_controller/src/reconciler.rs
similarity index 99%
rename from control_plane/attachment_service/src/reconciler.rs
rename to storage_controller/src/reconciler.rs
index 72eb8faccb..49cfaad569 100644
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/storage_controller/src/reconciler.rs
@@ -18,14 +18,14 @@ use utils::sync::gate::GateGuard;
 
 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
-use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};
+use crate::tenant_shard::{IntentState, ObservedState, ObservedStateLocation};
 
 const DEFAULT_HEATMAP_PERIOD: &str = "60s";
 
 /// Object with the lifetime of the background reconcile task that is created
 /// for tenants which have a difference between their intent and observed states.
 pub(super) struct Reconciler {
-    /// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
+    /// See [`crate::tenant_shard::TenantShard`] for the meanings of these fields: they are a snapshot
     /// of a tenant's state from when we spawned a reconcile task.
     pub(super) tenant_shard_id: TenantShardId,
     pub(crate) shard: ShardIdentity,
@@ -48,11 +48,11 @@ pub(super) struct Reconciler {
 
     /// To avoid stalling if the cloud control plane is unavailable, we may proceed
     /// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed
-    /// so that we can set [`crate::tenant_state::TenantState::pending_compute_notification`] to ensure a later retry.
+    /// so that we can set [`crate::tenant_shard::TenantShard::pending_compute_notification`] to ensure a later retry.
     pub(crate) compute_notify_failure: bool,
 
     /// A means to abort background reconciliation: it is essential to
-    /// call this when something changes in the original TenantState that
+    /// call this when something changes in the original TenantShard that
     /// will make this reconciliation impossible or unnecessary, for
     /// example when a pageserver node goes offline, or the PlacementPolicy for
     /// the tenant is changed.
@@ -66,7 +66,7 @@ pub(super) struct Reconciler {
     pub(crate) persistence: Arc<Persistence>,
 }
 
-/// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
+/// This is a snapshot of [`crate::tenant_shard::IntentState`], but it does not do any
 /// reference counting for Scheduler.  The IntentState is what the scheduler works with,
 /// and the TargetState is just the instruction for a particular Reconciler run.
 #[derive(Debug)]
diff --git a/control_plane/attachment_service/src/scheduler.rs b/storage_controller/src/scheduler.rs
similarity index 98%
rename from control_plane/attachment_service/src/scheduler.rs
rename to storage_controller/src/scheduler.rs
index 782189d11f..862ac0cbfe 100644
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -1,4 +1,4 @@
-use crate::{node::Node, tenant_state::TenantState};
+use crate::{node::Node, tenant_shard::TenantShard};
 use pageserver_api::controller_api::UtilizationScore;
 use serde::Serialize;
 use std::collections::HashMap;
@@ -27,7 +27,7 @@ pub enum MaySchedule {
 
 #[derive(Serialize)]
 struct SchedulerNode {
-    /// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
+    /// How many shards are currently scheduled on this node, via their [`crate::tenant_shard::IntentState`].
     shard_count: usize,
 
     /// Whether this node is currently elegible to have new shards scheduled (this is derived
@@ -84,7 +84,7 @@ impl std::ops::Add for AffinityScore {
     }
 }
 
-// For carrying state between multiple calls to [`TenantState::schedule`], e.g. when calling
+// For carrying state between multiple calls to [`TenantShard::schedule`], e.g. when calling
 // it for many shards in the same tenant.
 #[derive(Debug, Default)]
 pub(crate) struct ScheduleContext {
@@ -147,7 +147,7 @@ impl Scheduler {
     pub(crate) fn consistency_check<'a>(
         &self,
         nodes: impl Iterator<Item = &'a Node>,
-        shards: impl Iterator<Item = &'a TenantState>,
+        shards: impl Iterator<Item = &'a TenantShard>,
     ) -> anyhow::Result<()> {
         let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
         for node in nodes {
@@ -398,7 +398,7 @@ pub(crate) mod test_utils {
 mod tests {
     use super::*;
 
-    use crate::tenant_state::IntentState;
+    use crate::tenant_shard::IntentState;
     #[test]
     fn scheduler_basic() -> anyhow::Result<()> {
         let nodes = test_utils::make_test_nodes(2);
diff --git a/control_plane/attachment_service/src/schema.rs b/storage_controller/src/schema.rs
similarity index 100%
rename from control_plane/attachment_service/src/schema.rs
rename to storage_controller/src/schema.rs
diff --git a/control_plane/attachment_service/src/service.rs b/storage_controller/src/service.rs
similarity index 97%
rename from control_plane/attachment_service/src/service.rs
rename to storage_controller/src/service.rs
index 0b67e30b96..010558b797 100644
--- a/control_plane/attachment_service/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -66,9 +66,9 @@ use crate::{
     persistence::{split_state::SplitState, DatabaseError, Persistence, TenantShardPersistence},
     reconciler::attached_location_conf,
     scheduler::Scheduler,
-    tenant_state::{
+    tenant_shard::{
         IntentState, ObservedState, ObservedStateLocation, ReconcileResult, ReconcileWaitError,
-        ReconcilerWaiter, TenantState,
+        ReconcilerWaiter, TenantShard,
     },
 };
 
@@ -92,7 +92,7 @@ pub const MAX_UNAVAILABLE_INTERVAL_DEFAULT: Duration = Duration::from_secs(30);
 
 // Top level state available to all HTTP handlers
 struct ServiceState {
-    tenants: BTreeMap<TenantShardId, TenantState>,
+    tenants: BTreeMap<TenantShardId, TenantShard>,
 
     nodes: Arc<HashMap<NodeId, Node>>,
 
@@ -102,7 +102,7 @@ struct ServiceState {
 impl ServiceState {
     fn new(
         nodes: HashMap<NodeId, Node>,
-        tenants: BTreeMap<TenantShardId, TenantState>,
+        tenants: BTreeMap<TenantShardId, TenantShard>,
         scheduler: Scheduler,
     ) -> Self {
         Self {
@@ -116,7 +116,7 @@ impl ServiceState {
         &mut self,
     ) -> (
         &mut Arc<HashMap<NodeId, Node>>,
-        &mut BTreeMap<TenantShardId, TenantState>,
+        &mut BTreeMap<TenantShardId, TenantShard>,
         &mut Scheduler,
     ) {
         (&mut self.nodes, &mut self.tenants, &mut self.scheduler)
@@ -335,11 +335,11 @@ impl Service {
 
             for (tenant_shard_id, shard_observations) in observed {
                 for (node_id, observed_loc) in shard_observations {
-                    let Some(tenant_state) = tenants.get_mut(&tenant_shard_id) else {
+                    let Some(tenant_shard) = tenants.get_mut(&tenant_shard_id) else {
                         cleanup.push((tenant_shard_id, node_id));
                         continue;
                     };
-                    tenant_state
+                    tenant_shard
                         .observed
                         .locations
                         .insert(node_id, ObservedStateLocation { conf: observed_loc });
@@ -348,14 +348,14 @@ impl Service {
 
             // Populate each tenant's intent state
             let mut schedule_context = ScheduleContext::default();
-            for (tenant_shard_id, tenant_state) in tenants.iter_mut() {
+            for (tenant_shard_id, tenant_shard) in tenants.iter_mut() {
                 if tenant_shard_id.shard_number == ShardNumber(0) {
                     // Reset scheduling context each time we advance to the next Tenant
                     schedule_context = ScheduleContext::default();
                 }
 
-                tenant_state.intent_from_observed(scheduler);
-                if let Err(e) = tenant_state.schedule(scheduler, &mut schedule_context) {
+                tenant_shard.intent_from_observed(scheduler);
+                if let Err(e) = tenant_shard.schedule(scheduler, &mut schedule_context) {
                     // Non-fatal error: we are unable to properly schedule the tenant, perhaps because
                     // not enough pageservers are available.  The tenant may well still be available
                     // to clients.
@@ -364,11 +364,11 @@ impl Service {
                     // If we're both intending and observed to be attached at a particular node, we will
                     // emit a compute notification for this. In the case where our observed state does not
                     // yet match our intent, we will eventually reconcile, and that will emit a compute notification.
-                    if let Some(attached_at) = tenant_state.stably_attached() {
+                    if let Some(attached_at) = tenant_shard.stably_attached() {
                         compute_notifications.push((
                             *tenant_shard_id,
                             attached_at,
-                            tenant_state.shard.stripe_size,
+                            tenant_shard.shard.stripe_size,
                         ));
                     }
                 }
@@ -743,7 +743,7 @@ impl Service {
 
     /// Apply the contents of a [`ReconcileResult`] to our in-memory state: if the reconciliation
     /// was successful, this will update the observed state of the tenant such that subsequent
-    /// calls to [`TenantState::maybe_reconcile`] will do nothing.
+    /// calls to [`TenantShard::maybe_reconcile`] will do nothing.
     #[instrument(skip_all, fields(
         tenant_id=%result.tenant_shard_id.tenant_id, shard_id=%result.tenant_shard_id.shard_slug(),
         sequence=%result.sequence
@@ -761,10 +761,10 @@ impl Service {
         tenant.generation = std::cmp::max(tenant.generation, result.generation);
 
         // If the reconciler signals that it failed to notify compute, set this state on
-        // the shard so that a future [`TenantState::maybe_reconcile`] will try again.
+        // the shard so that a future [`TenantShard::maybe_reconcile`] will try again.
         tenant.pending_compute_notification = result.pending_compute_notification;
 
-        // Let the TenantState know it is idle.
+        // Let the TenantShard know it is idle.
         tenant.reconcile_complete(result.sequence);
 
         match result.result {
@@ -979,7 +979,7 @@ impl Service {
             if let Some(generation_pageserver) = tsp.generation_pageserver {
                 intent.set_attached(&mut scheduler, Some(NodeId(generation_pageserver as u64)));
             }
-            let new_tenant = TenantState::from_persistent(tsp, intent)?;
+            let new_tenant = TenantShard::from_persistent(tsp, intent)?;
 
             tenants.insert(tenant_shard_id, new_tenant);
         }
@@ -1126,7 +1126,7 @@ impl Service {
                     let mut locked = self.inner.write().unwrap();
                     locked.tenants.insert(
                         attach_req.tenant_shard_id,
-                        TenantState::new(
+                        TenantShard::new(
                             attach_req.tenant_shard_id,
                             ShardIdentity::unsharded(),
                             PlacementPolicy::Attached(0),
@@ -1178,32 +1178,32 @@ impl Service {
         let mut locked = self.inner.write().unwrap();
         let (_nodes, tenants, scheduler) = locked.parts_mut();
 
-        let tenant_state = tenants
+        let tenant_shard = tenants
             .get_mut(&attach_req.tenant_shard_id)
             .expect("Checked for existence above");
 
         if let Some(new_generation) = new_generation {
-            tenant_state.generation = Some(new_generation);
-            tenant_state.policy = PlacementPolicy::Attached(0);
+            tenant_shard.generation = Some(new_generation);
+            tenant_shard.policy = PlacementPolicy::Attached(0);
         } else {
             // This is a detach notification.  We must update placement policy to avoid re-attaching
             // during background scheduling/reconciliation, or during storage controller restart.
             assert!(attach_req.node_id.is_none());
-            tenant_state.policy = PlacementPolicy::Detached;
+            tenant_shard.policy = PlacementPolicy::Detached;
         }
 
         if let Some(attaching_pageserver) = attach_req.node_id.as_ref() {
             tracing::info!(
                 tenant_id = %attach_req.tenant_shard_id,
                 ps_id = %attaching_pageserver,
-                generation = ?tenant_state.generation,
+                generation = ?tenant_shard.generation,
                 "issuing",
             );
-        } else if let Some(ps_id) = tenant_state.intent.get_attached() {
+        } else if let Some(ps_id) = tenant_shard.intent.get_attached() {
             tracing::info!(
                 tenant_id = %attach_req.tenant_shard_id,
                 %ps_id,
-                generation = ?tenant_state.generation,
+                generation = ?tenant_shard.generation,
                 "dropping",
             );
         } else {
@@ -1211,14 +1211,14 @@ impl Service {
             tenant_id = %attach_req.tenant_shard_id,
             "no-op: tenant already has no pageserver");
         }
-        tenant_state
+        tenant_shard
             .intent
             .set_attached(scheduler, attach_req.node_id);
 
         tracing::info!(
             "attach_hook: tenant {} set generation {:?}, pageserver {}",
             attach_req.tenant_shard_id,
-            tenant_state.generation,
+            tenant_shard.generation,
             // TODO: this is an odd number of 0xf's
             attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
         );
@@ -1230,36 +1230,36 @@ impl Service {
         #[cfg(feature = "testing")]
         {
             if let Some(node_id) = attach_req.node_id {
-                tenant_state.observed.locations = HashMap::from([(
+                tenant_shard.observed.locations = HashMap::from([(
                     node_id,
                     ObservedStateLocation {
                         conf: Some(attached_location_conf(
-                            tenant_state.generation.unwrap(),
-                            &tenant_state.shard,
-                            &tenant_state.config,
+                            tenant_shard.generation.unwrap(),
+                            &tenant_shard.shard,
+                            &tenant_shard.config,
                             false,
                         )),
                     },
                 )]);
             } else {
-                tenant_state.observed.locations.clear();
+                tenant_shard.observed.locations.clear();
             }
         }
 
         Ok(AttachHookResponse {
             gen: attach_req
                 .node_id
-                .map(|_| tenant_state.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap()),
+                .map(|_| tenant_shard.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap()),
         })
     }
 
     pub(crate) fn inspect(&self, inspect_req: InspectRequest) -> InspectResponse {
         let locked = self.inner.read().unwrap();
 
-        let tenant_state = locked.tenants.get(&inspect_req.tenant_shard_id);
+        let tenant_shard = locked.tenants.get(&inspect_req.tenant_shard_id);
 
         InspectResponse {
-            attachment: tenant_state.and_then(|s| {
+            attachment: tenant_shard.and_then(|s| {
                 s.intent
                     .get_attached()
                     .map(|ps| (s.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap(), ps))
@@ -1321,11 +1321,11 @@ impl Service {
             let mut locked = self.inner.write().unwrap();
 
             for (tenant_shard_id, observed_loc) in configs.tenant_shards {
-                let Some(tenant_state) = locked.tenants.get_mut(&tenant_shard_id) else {
+                let Some(tenant_shard) = locked.tenants.get_mut(&tenant_shard_id) else {
                     cleanup.push(tenant_shard_id);
                     continue;
                 };
-                tenant_state
+                tenant_shard
                     .observed
                     .locations
                     .insert(node.get_id(), ObservedStateLocation { conf: observed_loc });
@@ -1496,13 +1496,13 @@ impl Service {
         };
 
         for req_tenant in validate_req.tenants {
-            if let Some(tenant_state) = locked.tenants.get(&req_tenant.id) {
-                let valid = tenant_state.generation == Some(Generation::new(req_tenant.gen));
+            if let Some(tenant_shard) = locked.tenants.get(&req_tenant.id) {
+                let valid = tenant_shard.generation == Some(Generation::new(req_tenant.gen));
                 tracing::info!(
                     "handle_validate: {}(gen {}): valid={valid} (latest {:?})",
                     req_tenant.id,
                     req_tenant.gen,
-                    tenant_state.generation
+                    tenant_shard.generation
                 );
                 response.tenants.push(ValidateResponseTenant {
                     id: req_tenant.id,
@@ -1688,7 +1688,7 @@ impl Service {
                         continue;
                     }
                     Entry::Vacant(entry) => {
-                        let state = entry.insert(TenantState::new(
+                        let state = entry.insert(TenantShard::new(
                             tenant_shard_id,
                             ShardIdentity::from_params(
                                 tenant_shard_id.shard_number,
@@ -1763,6 +1763,9 @@ impl Service {
 
     /// Part of [`Self::tenant_location_config`]: dissect an incoming location config request,
     /// and transform it into either a tenant creation of a series of shard updates.
+    ///
+    /// If the incoming request makes no changes, a [`TenantCreateOrUpdate::Update`] result will
+    /// still be returned.
     fn tenant_location_config_prepare(
         &self,
         tenant_id: TenantId,
@@ -1810,17 +1813,12 @@ impl Service {
                 _ => None,
             };
 
-            if shard.policy != placement_policy
-                || shard.config != req.config.tenant_conf
-                || set_generation.is_some()
-            {
-                updates.push(ShardUpdate {
-                    tenant_shard_id: *shard_id,
-                    placement_policy: placement_policy.clone(),
-                    tenant_config: req.config.tenant_conf.clone(),
-                    generation: set_generation,
-                });
-            }
+            updates.push(ShardUpdate {
+                tenant_shard_id: *shard_id,
+                placement_policy: placement_policy.clone(),
+                tenant_config: req.config.tenant_conf.clone(),
+                generation: set_generation,
+            });
         }
 
         if create {
@@ -1849,6 +1847,7 @@ impl Service {
                 },
             )
         } else {
+            assert!(!updates.is_empty());
             TenantCreateOrUpdate::Update(updates)
         }
     }
@@ -2739,7 +2738,7 @@ impl Service {
     /// Returns None if the input iterator of shards does not include a shard with number=0
     fn tenant_describe_impl<'a>(
         &self,
-        shards: impl Iterator<Item = &'a TenantState>,
+        shards: impl Iterator<Item = &'a TenantShard>,
     ) -> Option<TenantDescribeResponse> {
         let mut shard_zero = None;
         let mut describe_shards = Vec::new();
@@ -3039,7 +3038,7 @@ impl Service {
                         },
                     );
 
-                    let mut child_state = TenantState::new(child, child_shard, policy.clone());
+                    let mut child_state = TenantShard::new(child, child_shard, policy.clone());
                     child_state.intent = IntentState::single(scheduler, Some(pageserver));
                     child_state.observed = ObservedState {
                         locations: child_observed,
@@ -3047,7 +3046,7 @@ impl Service {
                     child_state.generation = Some(generation);
                     child_state.config = config.clone();
 
-                    // The child's TenantState::splitting is intentionally left at the default value of Idle,
+                    // The child's TenantShard::splitting is intentionally left at the default value of Idle,
                     // as at this point in the split process we have succeeded and this part is infallible:
                     // we will never need to do any special recovery from this state.
 
@@ -3596,8 +3595,8 @@ impl Service {
         Ok(())
     }
 
-    /// For debug/support: a full JSON dump of TenantStates.  Returns a response so that
-    /// we don't have to make TenantState clonable in the return path.
+    /// For debug/support: a full JSON dump of TenantShards.  Returns a response so that
+    /// we don't have to make TenantShard clonable in the return path.
     pub(crate) fn tenants_dump(&self) -> Result<hyper::Response<hyper::Body>, ApiError> {
         let serialized = {
             let locked = self.inner.read().unwrap();
@@ -3701,7 +3700,7 @@ impl Service {
     }
 
     /// For debug/support: a JSON dump of the [`Scheduler`].  Returns a response so that
-    /// we don't have to make TenantState clonable in the return path.
+    /// we don't have to make TenantShard clonable in the return path.
     pub(crate) fn scheduler_dump(&self) -> Result<hyper::Response<hyper::Body>, ApiError> {
         let serialized = {
             let locked = self.inner.read().unwrap();
@@ -3918,8 +3917,8 @@ impl Service {
                 tracing::info!("Node {} transition to offline", node_id);
                 let mut tenants_affected: usize = 0;
 
-                for (tenant_shard_id, tenant_state) in tenants {
-                    if let Some(observed_loc) = tenant_state.observed.locations.get_mut(&node_id) {
+                for (tenant_shard_id, tenant_shard) in tenants {
+                    if let Some(observed_loc) = tenant_shard.observed.locations.get_mut(&node_id) {
                         // When a node goes offline, we set its observed configuration to None, indicating unknown: we will
                         // not assume our knowledge of the node's configuration is accurate until it comes back online
                         observed_loc.conf = None;
@@ -3932,24 +3931,24 @@ impl Service {
                         continue;
                     }
 
-                    if tenant_state.intent.demote_attached(node_id) {
-                        tenant_state.sequence = tenant_state.sequence.next();
+                    if tenant_shard.intent.demote_attached(node_id) {
+                        tenant_shard.sequence = tenant_shard.sequence.next();
 
                         // TODO: populate a ScheduleContext including all shards in the same tenant_id (only matters
                         // for tenants without secondary locations: if they have a secondary location, then this
                         // schedule() call is just promoting an existing secondary)
                         let mut schedule_context = ScheduleContext::default();
 
-                        match tenant_state.schedule(scheduler, &mut schedule_context) {
+                        match tenant_shard.schedule(scheduler, &mut schedule_context) {
                             Err(e) => {
                                 // It is possible that some tenants will become unschedulable when too many pageservers
                                 // go offline: in this case there isn't much we can do other than make the issue observable.
-                                // TODO: give TenantState a scheduling error attribute to be queried later.
+                                // TODO: give TenantShard a scheduling error attribute to be queried later.
                                 tracing::warn!(%tenant_shard_id, "Scheduling error when marking pageserver {} offline: {e}", node_id);
                             }
                             Ok(()) => {
                                 if self
-                                    .maybe_reconcile_shard(tenant_state, &new_nodes)
+                                    .maybe_reconcile_shard(tenant_shard, &new_nodes)
                                     .is_some()
                                 {
                                     tenants_affected += 1;
@@ -3968,10 +3967,10 @@ impl Service {
                 tracing::info!("Node {} transition to active", node_id);
                 // When a node comes back online, we must reconcile any tenant that has a None observed
                 // location on the node.
-                for tenant_state in locked.tenants.values_mut() {
-                    if let Some(observed_loc) = tenant_state.observed.locations.get_mut(&node_id) {
+                for tenant_shard in locked.tenants.values_mut() {
+                    if let Some(observed_loc) = tenant_shard.observed.locations.get_mut(&node_id) {
                         if observed_loc.conf.is_none() {
-                            self.maybe_reconcile_shard(tenant_state, &new_nodes);
+                            self.maybe_reconcile_shard(tenant_shard, &new_nodes);
                         }
                     }
                 }
@@ -4054,11 +4053,11 @@ impl Service {
         Ok(())
     }
 
-    /// Convenience wrapper around [`TenantState::maybe_reconcile`] that provides
+    /// Convenience wrapper around [`TenantShard::maybe_reconcile`] that provides
     /// all the references to parts of Self that are needed
     fn maybe_reconcile_shard(
         &self,
-        shard: &mut TenantState,
+        shard: &mut TenantShard,
         nodes: &Arc<HashMap<NodeId, Node>>,
     ) -> Option<ReconcilerWaiter> {
         shard.maybe_reconcile(
@@ -4124,7 +4123,7 @@ impl Service {
 
         let mut reconciles_spawned = 0;
 
-        let mut tenant_shards: Vec<&TenantState> = Vec::new();
+        let mut tenant_shards: Vec<&TenantShard> = Vec::new();
 
         // Limit on how many shards' optmizations each call to this function will execute.  Combined
         // with the frequency of background calls, this acts as an implicit rate limit that runs a small
@@ -4255,7 +4254,7 @@ impl Service {
 
     pub async fn shutdown(&self) {
         // Note that this already stops processing any results from reconciles: so
-        // we do not expect that our [`TenantState`] objects will reach a neat
+        // we do not expect that our [`TenantShard`] objects will reach a neat
         // final state.
         self.cancel.cancel();
 
diff --git a/control_plane/attachment_service/src/tenant_state.rs b/storage_controller/src/tenant_shard.rs
similarity index 96%
rename from control_plane/attachment_service/src/tenant_state.rs
rename to storage_controller/src/tenant_shard.rs
index 6717b8e178..58b8ef8d5d 100644
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -50,7 +50,7 @@ where
 /// This struct implement Serialize for debugging purposes, but is _not_ persisted
 /// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
 #[derive(Serialize)]
-pub(crate) struct TenantState {
+pub(crate) struct TenantShard {
     pub(crate) tenant_shard_id: TenantShardId,
 
     pub(crate) shard: ShardIdentity,
@@ -354,7 +354,7 @@ pub(crate) struct ReconcilerHandle {
 }
 
 /// When a reconcile task completes, it sends this result object
-/// to be applied to the primary TenantState.
+/// to be applied to the primary TenantShard.
 pub(crate) struct ReconcileResult {
     pub(crate) sequence: Sequence,
     /// On errors, `observed` should be treated as an incompleted description
@@ -367,7 +367,7 @@ pub(crate) struct ReconcileResult {
     pub(crate) generation: Option<Generation>,
     pub(crate) observed: ObservedState,
 
-    /// Set [`TenantState::pending_compute_notification`] from this flag
+    /// Set [`TenantShard::pending_compute_notification`] from this flag
     pub(crate) pending_compute_notification: bool,
 }
 
@@ -379,7 +379,7 @@ impl ObservedState {
     }
 }
 
-impl TenantState {
+impl TenantShard {
     pub(crate) fn new(
         tenant_shard_id: TenantShardId,
         shard: ShardIdentity,
@@ -1143,7 +1143,7 @@ pub(crate) mod tests {
 
     use super::*;
 
-    fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantState {
+    fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantShard {
         let tenant_id = TenantId::generate();
         let shard_number = ShardNumber(0);
         let shard_count = ShardCount::new(1);
@@ -1153,7 +1153,7 @@ pub(crate) mod tests {
             shard_number,
             shard_count,
         };
-        TenantState::new(
+        TenantShard::new(
             tenant_shard_id,
             ShardIdentity::new(
                 shard_number,
@@ -1165,7 +1165,7 @@ pub(crate) mod tests {
         )
     }
 
-    fn make_test_tenant(policy: PlacementPolicy, shard_count: ShardCount) -> Vec<TenantState> {
+    fn make_test_tenant(policy: PlacementPolicy, shard_count: ShardCount) -> Vec<TenantShard> {
         let tenant_id = TenantId::generate();
 
         (0..shard_count.count())
@@ -1177,7 +1177,7 @@ pub(crate) mod tests {
                     shard_number,
                     shard_count,
                 };
-                TenantState::new(
+                TenantShard::new(
                     tenant_shard_id,
                     ShardIdentity::new(
                         shard_number,
@@ -1202,24 +1202,24 @@ pub(crate) mod tests {
         let mut scheduler = Scheduler::new(nodes.values());
         let mut context = ScheduleContext::default();
 
-        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
-        tenant_state
+        let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
+        tenant_shard
             .schedule(&mut scheduler, &mut context)
             .expect("we have enough nodes, scheduling should work");
 
         // Expect to initially be schedule on to different nodes
-        assert_eq!(tenant_state.intent.secondary.len(), 1);
-        assert!(tenant_state.intent.attached.is_some());
+        assert_eq!(tenant_shard.intent.secondary.len(), 1);
+        assert!(tenant_shard.intent.attached.is_some());
 
-        let attached_node_id = tenant_state.intent.attached.unwrap();
-        let secondary_node_id = *tenant_state.intent.secondary.iter().last().unwrap();
+        let attached_node_id = tenant_shard.intent.attached.unwrap();
+        let secondary_node_id = *tenant_shard.intent.secondary.iter().last().unwrap();
         assert_ne!(attached_node_id, secondary_node_id);
 
         // Notifying the attached node is offline should demote it to a secondary
-        let changed = tenant_state.intent.demote_attached(attached_node_id);
+        let changed = tenant_shard.intent.demote_attached(attached_node_id);
         assert!(changed);
-        assert!(tenant_state.intent.attached.is_none());
-        assert_eq!(tenant_state.intent.secondary.len(), 2);
+        assert!(tenant_shard.intent.attached.is_none());
+        assert_eq!(tenant_shard.intent.secondary.len(), 2);
 
         // Update the scheduler state to indicate the node is offline
         nodes
@@ -1229,18 +1229,18 @@ pub(crate) mod tests {
         scheduler.node_upsert(nodes.get(&attached_node_id).unwrap());
 
         // Scheduling the node should promote the still-available secondary node to attached
-        tenant_state
+        tenant_shard
             .schedule(&mut scheduler, &mut context)
             .expect("active nodes are available");
-        assert_eq!(tenant_state.intent.attached.unwrap(), secondary_node_id);
+        assert_eq!(tenant_shard.intent.attached.unwrap(), secondary_node_id);
 
         // The original attached node should have been retained as a secondary
         assert_eq!(
-            *tenant_state.intent.secondary.iter().last().unwrap(),
+            *tenant_shard.intent.secondary.iter().last().unwrap(),
             attached_node_id
         );
 
-        tenant_state.intent.clear(&mut scheduler);
+        tenant_shard.intent.clear(&mut scheduler);
 
         Ok(())
     }
@@ -1250,48 +1250,48 @@ pub(crate) mod tests {
         let nodes = make_test_nodes(3);
         let mut scheduler = Scheduler::new(nodes.values());
 
-        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
+        let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
 
-        tenant_state.observed.locations.insert(
+        tenant_shard.observed.locations.insert(
             NodeId(3),
             ObservedStateLocation {
                 conf: Some(LocationConfig {
                     mode: LocationConfigMode::AttachedMulti,
                     generation: Some(2),
                     secondary_conf: None,
-                    shard_number: tenant_state.shard.number.0,
-                    shard_count: tenant_state.shard.count.literal(),
-                    shard_stripe_size: tenant_state.shard.stripe_size.0,
+                    shard_number: tenant_shard.shard.number.0,
+                    shard_count: tenant_shard.shard.count.literal(),
+                    shard_stripe_size: tenant_shard.shard.stripe_size.0,
                     tenant_conf: TenantConfig::default(),
                 }),
             },
         );
 
-        tenant_state.observed.locations.insert(
+        tenant_shard.observed.locations.insert(
             NodeId(2),
             ObservedStateLocation {
                 conf: Some(LocationConfig {
                     mode: LocationConfigMode::AttachedStale,
                     generation: Some(1),
                     secondary_conf: None,
-                    shard_number: tenant_state.shard.number.0,
-                    shard_count: tenant_state.shard.count.literal(),
-                    shard_stripe_size: tenant_state.shard.stripe_size.0,
+                    shard_number: tenant_shard.shard.number.0,
+                    shard_count: tenant_shard.shard.count.literal(),
+                    shard_stripe_size: tenant_shard.shard.stripe_size.0,
                     tenant_conf: TenantConfig::default(),
                 }),
             },
         );
 
-        tenant_state.intent_from_observed(&mut scheduler);
+        tenant_shard.intent_from_observed(&mut scheduler);
 
         // The highest generationed attached location gets used as attached
-        assert_eq!(tenant_state.intent.attached, Some(NodeId(3)));
+        assert_eq!(tenant_shard.intent.attached, Some(NodeId(3)));
         // Other locations get used as secondary
-        assert_eq!(tenant_state.intent.secondary, vec![NodeId(2)]);
+        assert_eq!(tenant_shard.intent.secondary, vec![NodeId(2)]);
 
-        scheduler.consistency_check(nodes.values(), [&tenant_state].into_iter())?;
+        scheduler.consistency_check(nodes.values(), [&tenant_shard].into_iter())?;
 
-        tenant_state.intent.clear(&mut scheduler);
+        tenant_shard.intent.clear(&mut scheduler);
         Ok(())
     }
 
@@ -1300,23 +1300,23 @@ pub(crate) mod tests {
         let nodes = make_test_nodes(3);
         let mut scheduler = Scheduler::new(nodes.values());
 
-        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
+        let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
 
         // In pause mode, schedule() shouldn't do anything
-        tenant_state.scheduling_policy = ShardSchedulingPolicy::Pause;
-        assert!(tenant_state
+        tenant_shard.scheduling_policy = ShardSchedulingPolicy::Pause;
+        assert!(tenant_shard
             .schedule(&mut scheduler, &mut ScheduleContext::default())
             .is_ok());
-        assert!(tenant_state.intent.all_pageservers().is_empty());
+        assert!(tenant_shard.intent.all_pageservers().is_empty());
 
         // In active mode, schedule() works
-        tenant_state.scheduling_policy = ShardSchedulingPolicy::Active;
-        assert!(tenant_state
+        tenant_shard.scheduling_policy = ShardSchedulingPolicy::Active;
+        assert!(tenant_shard
             .schedule(&mut scheduler, &mut ScheduleContext::default())
             .is_ok());
-        assert!(!tenant_state.intent.all_pageservers().is_empty());
+        assert!(!tenant_shard.intent.all_pageservers().is_empty());
 
-        tenant_state.intent.clear(&mut scheduler);
+        tenant_shard.intent.clear(&mut scheduler);
         Ok(())
     }
 
@@ -1429,7 +1429,7 @@ pub(crate) mod tests {
     fn optimize_til_idle(
         nodes: &HashMap<NodeId, Node>,
         scheduler: &mut Scheduler,
-        shards: &mut [TenantState],
+        shards: &mut [TenantShard],
     ) {
         let mut loop_n = 0;
         loop {
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index d3bf46b2e8..b899b0dac8 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -308,6 +308,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
             params=params,
         )
         self.verbose_error(res)
+        return res.json()
 
     def tenant_list_locations(self):
         res = self.get(
diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py
index ab8717de54..c44628ce06 100644
--- a/test_runner/fixtures/workload.py
+++ b/test_runner/fixtures/workload.py
@@ -81,9 +81,13 @@ class Workload:
 
         return self._endpoint
 
-    def __del__(self):
+    def stop(self):
         if self._endpoint is not None:
             self._endpoint.stop()
+            self._endpoint = None
+
+    def __del__(self):
+        self.stop()
 
     def init(self, pageserver_id: Optional[int] = None):
         endpoint = self.endpoint(pageserver_id)
diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py
index 4767f2edb1..67f68a62af 100644
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -22,6 +22,7 @@ from fixtures.neon_fixtures import (
     NeonPageserver,
     PgBin,
     S3Scrubber,
+    flush_ep_to_pageserver,
     last_flush_lsn_upload,
 )
 from fixtures.pageserver.http import PageserverApiException
@@ -30,6 +31,7 @@ from fixtures.pageserver.utils import (
     list_prefix,
     wait_for_last_record_lsn,
     wait_for_upload,
+    wait_for_upload_queue_empty,
 )
 from fixtures.remote_storage import (
     RemoteStorageKind,
@@ -111,7 +113,6 @@ def generate_uploads_and_deletions(
             last_flush_lsn_upload(
                 env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver.id
             )
-            ps_http.timeline_checkpoint(tenant_id, timeline_id)
 
         # Compaction should generate some GC-elegible layers
         for i in range(0, 2):
@@ -121,6 +122,17 @@ def generate_uploads_and_deletions(
         print_gc_result(gc_result)
         assert gc_result["layers_removed"] > 0
 
+        # Stop endpoint and flush all data to pageserver, then checkpoint it: this
+        # ensures that the pageserver is in a fully idle state: there will be no more
+        # background ingest, no more uploads pending, and therefore no non-determinism
+        # in subsequent actions like pageserver restarts.
+        final_lsn = flush_ep_to_pageserver(env, endpoint, tenant_id, timeline_id, pageserver.id)
+        ps_http.timeline_checkpoint(tenant_id, timeline_id)
+        # Finish uploads
+        wait_for_upload(ps_http, tenant_id, timeline_id, final_lsn)
+        # Finish all remote writes (including deletions)
+        wait_for_upload_queue_empty(ps_http, tenant_id, timeline_id)
+
 
 def read_all(
     env: NeonEnv, tenant_id: Optional[TenantId] = None, timeline_id: Optional[TimelineId] = None
@@ -385,9 +397,8 @@ def test_deletion_queue_recovery(
     if validate_before == ValidateBefore.NO_VALIDATE:
         failpoints.append(
             # Prevent deletion lists from being validated, we will test that they are
-            # dropped properly during recovery.  'pause' is okay here because we kill
-            # the pageserver with immediate=true
-            ("control-plane-client-validate", "pause")
+            # dropped properly during recovery.  This is such a long sleep as to be equivalent to "never"
+            ("control-plane-client-validate", "return(3600000)")
         )
 
     ps_http.configure_failpoints(failpoints)
diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py
index 2699654f80..bfaab9125f 100644
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -10,9 +10,13 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
     NeonEnv,
     NeonEnvBuilder,
+    S3Scrubber,
     StorageControllerApiException,
+    last_flush_lsn_upload,
     tenant_get_shards,
+    wait_for_last_flush_lsn,
 )
+from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty
 from fixtures.remote_storage import s3_storage
 from fixtures.types import Lsn, TenantId, TenantShardId, TimelineId
 from fixtures.utils import wait_until
@@ -67,6 +71,15 @@ def test_sharding_smoke(
         log.info(f"sizes = {sizes}")
         return sizes
 
+    # The imported initdb for timeline creation should
+    # not be fully imported on every shard.  We use a 1MB strripe size so expect
+    # pretty good distribution: no one shard should have more than half the data
+    sizes = get_sizes()
+    physical_initdb_total = sum(sizes.values())
+    expect_initdb_size = 20 * 1024 * 1024
+    assert physical_initdb_total > expect_initdb_size
+    assert all(s < expect_initdb_size // 2 for s in sizes.values())
+
     # Test that timeline creation works on a sharded tenant
     timeline_b = env.neon_cli.create_branch("branch_b", tenant_id=tenant_id)
 
@@ -99,6 +112,38 @@ def test_sharding_smoke(
 
     env.storage_controller.consistency_check()
 
+    # Validate that deleting a sharded tenant removes all files in the prefix
+
+    # Before deleting, stop the client and check we have some objects to delete
+    workload.stop()
+    assert_prefix_not_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix="/".join(
+            (
+                "tenants",
+                str(tenant_id),
+            )
+        ),
+    )
+
+    # Check the scrubber isn't confused by sharded content, then disable
+    # it during teardown because we'll have deleted by then
+    S3Scrubber(neon_env_builder).scan_metadata()
+    neon_env_builder.scrub_on_exit = False
+
+    env.storage_controller.pageserver_api().tenant_delete(tenant_id)
+    assert_prefix_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix="/".join(
+            (
+                "tenants",
+                str(tenant_id),
+            )
+        ),
+    )
+
+    env.storage_controller.consistency_check()
+
 
 def test_sharding_split_unsharded(
     neon_env_builder: NeonEnvBuilder,
@@ -466,13 +511,11 @@ def test_sharding_split_stripe_size(
     os.getenv("BUILD_TYPE") == "debug",
     reason="Avoid running bulkier ingest tests in debug mode",
 )
-def test_sharding_ingest(
+def test_sharding_ingest_layer_sizes(
     neon_env_builder: NeonEnvBuilder,
 ):
     """
-    Check behaviors related to ingest:
-    - That we generate properly sized layers
-    - TODO: that updates to remote_consistent_lsn are made correctly via safekeepers
+    Check that when ingesting data to a sharded tenant, we properly respect layer size limts.
     """
 
     # Set a small stripe size and checkpoint distance, so that we can exercise rolling logic
@@ -503,6 +546,7 @@ def test_sharding_ingest(
     workload.write_rows(4096, upload=False)
     workload.write_rows(4096, upload=False)
     workload.write_rows(4096, upload=False)
+
     workload.validate()
 
     small_layer_count = 0
@@ -515,7 +559,9 @@ def test_sharding_ingest(
         shard_id = shard["shard_id"]
         layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id)
 
-        for layer in layer_map.historic_layers:
+        historic_layers = sorted(layer_map.historic_layers, key=lambda layer: layer.lsn_start)
+
+        for layer in historic_layers:
             assert layer.layer_file_size is not None
             if layer.layer_file_size < expect_layer_size // 2:
                 classification = "Small"
@@ -552,6 +598,93 @@ def test_sharding_ingest(
     assert huge_layer_count <= shard_count
 
 
+def test_sharding_ingest_gaps(
+    neon_env_builder: NeonEnvBuilder,
+):
+    """
+    Check ingest behavior when the incoming data results in some shards having gaps where
+    no data is ingested: they should advance their disk_consistent_lsn and remote_consistent_lsn
+    even if they aren't writing out layers.
+    """
+
+    # Set a small stripe size and checkpoint distance, so that we can exercise rolling logic
+    # without writing a lot of data.
+    expect_layer_size = 131072
+    checkpoint_interval_secs = 5
+    TENANT_CONF = {
+        # small checkpointing and compaction targets to ensure we generate many upload operations
+        "checkpoint_distance": f"{expect_layer_size}",
+        "compaction_target_size": f"{expect_layer_size}",
+        # Set a short checkpoint interval as we will wait for uploads to happen
+        "checkpoint_timeout": f"{checkpoint_interval_secs}s",
+        # Background checkpointing is done from compaction loop, so set that interval short too
+        "compaction_period": "1s",
+    }
+    shard_count = 4
+    neon_env_builder.num_pageservers = shard_count
+    env = neon_env_builder.init_start(
+        initial_tenant_conf=TENANT_CONF,
+        initial_tenant_shard_count=shard_count,
+        initial_tenant_shard_stripe_size=128,
+    )
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # Just a few writes: we aim to produce a situation where some shards are skipping
+    # ingesting some records and thereby won't have layer files that advance their
+    # consistent LSNs, to exercise the code paths that explicitly handle this case by
+    # advancing consistent LSNs in the background if there is no open layer.
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init()
+    workload.write_rows(128, upload=False)
+    workload.churn_rows(128, upload=False)
+
+    # Checkpoint, so that we won't get a background checkpoint happening during the next step
+    workload.endpoint().safe_psql("checkpoint")
+    # Freeze + flush, so that subsequent writes will start from a position of no open layers
+    last_flush_lsn_upload(env, workload.endpoint(), tenant_id, timeline_id)
+
+    # This write is tiny: at least some of the shards should find they don't have any
+    # data to ingest.  This will exercise how they handle that.
+    workload.churn_rows(1, upload=False)
+
+    # The LSN that has reached pageservers, but may not have been flushed to historic layers yet
+    expect_lsn = wait_for_last_flush_lsn(env, workload.endpoint(), tenant_id, timeline_id)
+
+    # Don't leave the endpoint running, we don't want it writing in the background
+    workload.stop()
+
+    log.info(f"Waiting for shards' consistent LSNs to reach {expect_lsn}")
+
+    shards = tenant_get_shards(env, tenant_id, None)
+
+    def assert_all_disk_consistent():
+        """
+        Assert that all the shards' disk_consistent_lsns have reached expect_lsn
+        """
+        for tenant_shard_id, pageserver in shards:
+            timeline_detail = pageserver.http_client().timeline_detail(tenant_shard_id, timeline_id)
+            log.info(f"{tenant_shard_id} (ps {pageserver.id}) detail: {timeline_detail}")
+            assert Lsn(timeline_detail["disk_consistent_lsn"]) >= expect_lsn
+
+    # We set a short checkpoint timeout: expect things to get frozen+flushed within that
+    wait_until(checkpoint_interval_secs * 3, 1, assert_all_disk_consistent)
+
+    def assert_all_remote_consistent():
+        """
+        Assert that all the shards' remote_consistent_lsns have reached expect_lsn
+        """
+        for tenant_shard_id, pageserver in shards:
+            timeline_detail = pageserver.http_client().timeline_detail(tenant_shard_id, timeline_id)
+            log.info(f"{tenant_shard_id} (ps {pageserver.id}) detail: {timeline_detail}")
+            assert Lsn(timeline_detail["remote_consistent_lsn"]) >= expect_lsn
+
+    # We set a short checkpoint timeout: expect things to get frozen+flushed within that
+    wait_until(checkpoint_interval_secs * 3, 1, assert_all_remote_consistent)
+
+    workload.validate()
+
+
 class Failure:
     pageserver_id: Optional[int]
 
@@ -795,6 +928,8 @@ def test_sharding_split_failures(
             ".*Reconcile error: receive body: error sending request for url.*",
             # Node offline cases will fail inside reconciler when detaching secondaries
             ".*Reconcile error on shard.*: receive body: error sending request for url.*",
+            # While parent shard's client is stopped during split, flush loop updating LSNs will emit this warning
+            ".*Failed to schedule metadata upload after updating disk_consistent_lsn.*",
         ]
     )
 
diff --git a/test_runner/regress/test_sharding_service.py b/test_runner/regress/test_storage_controller.py
similarity index 95%
rename from test_runner/regress/test_sharding_service.py
rename to test_runner/regress/test_storage_controller.py
index 7df0b58596..840f354142 100644
--- a/test_runner/regress/test_sharding_service.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -42,11 +42,11 @@ def get_node_shard_counts(env: NeonEnv, tenant_ids):
     return counts
 
 
-def test_sharding_service_smoke(
+def test_storage_controller_smoke(
     neon_env_builder: NeonEnvBuilder,
 ):
     """
-    Test the basic lifecycle of a sharding service:
+    Test the basic lifecycle of a storage controller:
     - Restarting
     - Restarting a pageserver
     - Creating and deleting tenants and timelines
@@ -204,7 +204,7 @@ def test_node_status_after_restart(
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_passthrough(
+def test_storage_controller_passthrough(
     neon_env_builder: NeonEnvBuilder,
 ):
     """
@@ -231,7 +231,7 @@ def test_sharding_service_passthrough(
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
+def test_storage_controller_restart(neon_env_builder: NeonEnvBuilder):
     env = neon_env_builder.init_start()
     tenant_a = env.initial_tenant
     tenant_b = TenantId.generate()
@@ -266,7 +266,7 @@ def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
 
 
 @pytest.mark.parametrize("warm_up", [True, False])
-def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: bool):
+def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: bool):
     """
     We onboard tenants to the sharding service by treating it as a 'virtual pageserver'
     which provides the /location_config API.  This is similar to creating a tenant,
@@ -303,7 +303,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
     origin_ps.http_client().tenant_create(tenant_id, generation=generation)
 
     # As if doing a live migration, first configure origin into stale mode
-    origin_ps.http_client().tenant_location_conf(
+    r = origin_ps.http_client().tenant_location_conf(
         tenant_id,
         {
             "mode": "AttachedStale",
@@ -312,6 +312,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
             "generation": generation,
         },
     )
+    assert len(r["shards"]) == 1
 
     if warm_up:
         origin_ps.http_client().tenant_heatmap_upload(tenant_id)
@@ -332,7 +333,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
 
     # Call into storage controller to onboard the tenant
     generation += 1
-    virtual_ps_http.tenant_location_conf(
+    r = virtual_ps_http.tenant_location_conf(
         tenant_id,
         {
             "mode": "AttachedMulti",
@@ -341,6 +342,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
             "generation": generation,
         },
     )
+    assert len(r["shards"]) == 1
 
     # As if doing a live migration, detach the original pageserver
     origin_ps.http_client().tenant_location_conf(
@@ -357,7 +359,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
     # set it to AttachedSingle: this is a no-op, but we test it because the
     # cloud control plane may call this for symmetry with live migration to
     # an individual pageserver
-    virtual_ps_http.tenant_location_conf(
+    r = virtual_ps_http.tenant_location_conf(
         tenant_id,
         {
             "mode": "AttachedSingle",
@@ -366,6 +368,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
             "generation": generation,
         },
     )
+    assert len(r["shards"]) == 1
 
     # We should see the tenant is now attached to the pageserver managed
     # by the sharding service
@@ -396,7 +399,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
     # The generation has moved on since we onboarded
     assert generation != dest_tenant_before_conf_change["generation"]
 
-    virtual_ps_http.tenant_location_conf(
+    r = virtual_ps_http.tenant_location_conf(
         tenant_id,
         {
             "mode": "AttachedSingle",
@@ -406,6 +409,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
             "generation": generation,
         },
     )
+    assert len(r["shards"]) == 1
     dest_tenant_after_conf_change = dest_ps.http_client().tenant_status(tenant_id)
     assert (
         dest_tenant_after_conf_change["generation"] == dest_tenant_before_conf_change["generation"]
@@ -416,7 +420,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_compute_hook(
+def test_storage_controller_compute_hook(
     httpserver: HTTPServer,
     neon_env_builder: NeonEnvBuilder,
     httpserver_listen_address,
@@ -529,7 +533,7 @@ def test_sharding_service_compute_hook(
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
+def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder):
     """
     Verify that occasional-use debug APIs work as expected.  This is a lightweight test
     that just hits the endpoints to check that they don't bitrot.
@@ -590,7 +594,7 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_s3_time_travel_recovery(
+def test_storage_controller_s3_time_travel_recovery(
     neon_env_builder: NeonEnvBuilder,
     pg_bin: PgBin,
 ):
@@ -700,7 +704,7 @@ def test_sharding_service_s3_time_travel_recovery(
     env.storage_controller.consistency_check()
 
 
-def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
+def test_storage_controller_auth(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.auth_enabled = True
     env = neon_env_builder.init_start()
     svc = env.storage_controller
@@ -724,13 +728,18 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
         StorageControllerApiException,
         match="Forbidden: JWT authentication error",
     ):
-        svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
+        svc.request(
+            "POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.SAFEKEEPER_DATA)
+        )
 
     # Token with correct scope
     svc.request(
         "POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.PAGE_SERVER_API)
     )
 
+    # Token with admin scope should also be permitted
+    svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
+
     # No token
     with pytest.raises(
         StorageControllerApiException,
@@ -764,7 +773,7 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
         )
 
 
-def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
+def test_storage_controller_tenant_conf(neon_env_builder: NeonEnvBuilder):
     """
     Validate the pageserver-compatible API endpoints for setting and getting tenant conf, without
     supplying the whole LocationConf.
@@ -867,7 +876,7 @@ def build_node_to_tenants_map(env: NeonEnv) -> dict[int, list[TenantId]]:
         PageserverFailpoint(pageserver_id=1, failpoint="get-utilization-http-handler"),
     ],
 )
-def test_sharding_service_heartbeats(
+def test_storage_controller_heartbeats(
     neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, failure: Failure
 ):
     neon_env_builder.num_pageservers = 2
@@ -977,7 +986,7 @@ def test_sharding_service_heartbeats(
     wait_until(10, 1, storage_controller_consistent)
 
 
-def test_sharding_service_re_attach(neon_env_builder: NeonEnvBuilder):
+def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder):
     """
     Exercise the behavior of the /re-attach endpoint on pageserver startup when
     pageservers have a mixture of attached and secondary locations
@@ -1178,7 +1187,14 @@ def test_storcon_cli(neon_env_builder: NeonEnvBuilder):
     storcon_cli(["node-configure", "--node-id", "1", "--scheduling", "pause"])
     assert "Pause" in storcon_cli(["nodes"])[3]
 
-    # Make a node offline
+    # We will simulate a node death and then marking it offline
+    env.pageservers[0].stop(immediate=True)
+    # Sleep to make it unlikely that the controller's heartbeater will race handling
+    # a /utilization response internally, such that it marks the node back online.  IRL
+    # there would always be a longer delay than this before a node failing and a human
+    # intervening.
+    time.sleep(2)
+
     storcon_cli(["node-configure", "--node-id", "1", "--availability", "offline"])
     assert "Offline" in storcon_cli(["nodes"])[3]
 
@@ -1187,7 +1203,10 @@ def test_storcon_cli(neon_env_builder: NeonEnvBuilder):
     assert len(tenant_lines) == 5
     assert str(env.initial_tenant) in tenant_lines[3]
 
-    env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy.*")
+    # Setting scheduling policies intentionally result in warnings, they're for rare use.
+    env.storage_controller.allowed_errors.extend(
+        [".*Skipping reconcile for policy.*", ".*Scheduling is disabled by policy.*"]
+    )
 
     # Describe a tenant
     tenant_lines = storcon_cli(["tenant-describe", "--tenant-id", str(env.initial_tenant)])
diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py
index 025cc930d7..4c8fd4b0e5 100644
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -20,9 +20,10 @@ from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn, TenantId, TimelineId
 
 
-@pytest.mark.xfail
-def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
-    env = neon_simple_env
+def test_empty_tenant_size(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_configs()
+    env.start()
+
     (tenant_id, _) = env.neon_cli.create_tenant()
     http_client = env.pageserver.http_client()
     initial_size = http_client.tenant_size(tenant_id)
@@ -35,66 +36,25 @@ def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
     branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0]
     assert branch_name == main_branch_name
 
-    with env.endpoints.create_start(
+    endpoint = env.endpoints.create_start(
         main_branch_name,
         tenant_id=tenant_id,
         config_lines=["autovacuum=off", "checkpoint_timeout=10min"],
-    ) as endpoint:
-        with endpoint.cursor() as cur:
-            cur.execute("SELECT 1")
-            row = cur.fetchone()
-            assert row is not None
-            assert row[0] == 1
-        size = http_client.tenant_size(tenant_id)
-        # we've disabled the autovacuum and checkpoint
-        # so background processes should not change the size.
-        # If this test will flake we should probably loosen the check
-        assert (
-            size == initial_size
-        ), f"starting idle compute should not change the tenant size (Currently {size}, expected {initial_size})"
+    )
 
-    # the size should be the same, until we increase the size over the
-    # gc_horizon
-    size, inputs = http_client.tenant_size_and_modelinputs(tenant_id)
-    assert (
-        size == initial_size
-    ), f"tenant_size should not be affected by shutdown of compute (Currently {size}, expected {initial_size})"
+    with endpoint.cursor() as cur:
+        cur.execute("SELECT 1")
+        row = cur.fetchone()
+        assert row is not None
+        assert row[0] == 1
 
-    expected_inputs = {
-        "segments": [
-            {
-                "segment": {"parent": None, "lsn": 23694408, "size": 25362432, "needed": True},
-                "timeline_id": f"{main_timeline_id}",
-                "kind": "BranchStart",
-            },
-            {
-                "segment": {"parent": 0, "lsn": 23694528, "size": None, "needed": True},
-                "timeline_id": f"{main_timeline_id}",
-                "kind": "BranchEnd",
-            },
-        ],
-        "timeline_inputs": [
-            {
-                "timeline_id": f"{main_timeline_id}",
-                "ancestor_id": None,
-                "ancestor_lsn": "0/0",
-                "last_record": "0/1698CC0",
-                "latest_gc_cutoff": "0/1698C48",
-                "horizon_cutoff": "0/0",
-                "pitr_cutoff": "0/0",
-                "next_gc_cutoff": "0/0",
-                "retention_param_cutoff": None,
-            }
-        ],
-    }
-    expected_inputs = mask_model_inputs(expected_inputs)
-    actual_inputs = mask_model_inputs(inputs)
+    # The transaction above will make the compute generate a checkpoint.
+    # In turn, the pageserver persists the checkpoint. This should only be
+    # one key with a size of a couple hundred bytes.
+    wait_for_last_flush_lsn(env, endpoint, tenant_id, main_timeline_id)
+    size = http_client.tenant_size(tenant_id)
 
-    assert expected_inputs == actual_inputs
-
-    size_debug_file = open(test_output_dir / "size_debug.html", "w")
-    size_debug = http_client.tenant_size_debug(tenant_id)
-    size_debug_file.write(size_debug)
+    assert size >= initial_size and size - initial_size < 1024
 
 
 def test_branched_empty_timeline_size(neon_simple_env: NeonEnv, test_output_dir: Path):
@@ -190,7 +150,6 @@ def test_branched_from_many_empty_parents_size(neon_simple_env: NeonEnv, test_ou
     size_debug_file.write(size_debug)
 
 
-@pytest.mark.skip("This should work, but is left out because assumed covered by other tests")
 def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
     """
     gc_horizon = 15
@@ -233,7 +192,6 @@ def test_branch_point_within_horizon(neon_simple_env: NeonEnv, test_output_dir:
     size_debug_file.write(size_debug)
 
 
-@pytest.mark.skip("This should work, but is left out because assumed covered by other tests")
 def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
     """
     gc_horizon = 5
@@ -282,7 +240,6 @@ def test_parent_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
     size_debug_file.write(size_debug)
 
 
-@pytest.mark.skip("This should work, but is left out because assumed covered by other tests")
 def test_only_heads_within_horizon(neon_simple_env: NeonEnv, test_output_dir: Path):
     """
     gc_horizon = small