remove hyper-tungstenite

consistency with ws1
maybe works with ws over http2?
2026-02-07 20:50:38 +00:00 · 2024-01-11 18:01:11 +00:00 · 2024-01-11 17:46:15 +00:00 · 2024-01-11 15:28:03 +00:00 · 2024-01-11 15:28:03 +00:00 · 2024-01-11 15:28:03 +00:00
138 changed files with 3311 additions and 8571 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -1,2 +1,2 @@
 [profile.default]
-slow-timeout = { period = "20s", terminate-after = 3 }
+slow-timeout = "1m"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1131,7 +1131,7 @@ jobs:
            # TODO: move deployPreprodRegion to release (`"$GITHUB_REF_NAME" == "release"` block), once Staging support different compute tag prefixes for different regions
            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=true
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}}
+            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -270,32 +270,6 @@ dependencies = [
 "critical-section",
 ]

-[[package]]
-name = "attachment_service"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "camino",
- "clap",
- "control_plane",
- "futures",
- "git-version",
- "hyper",
- "metrics",
- "pageserver_api",
- "pageserver_client",
- "postgres_backend",
- "postgres_connection",
- "serde",
- "serde_json",
- "thiserror",
- "tokio",
- "tokio-util",
- "tracing",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -1774,12 +1748,6 @@ dependencies = [
 "termcolor",
 ]

-[[package]]
-name = "equivalent"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
-
 [[package]]
 name = "errno"
 version = "0.3.1"
@@ -2138,9 +2106,9 @@ dependencies = [

 [[package]]
 name = "h2"
-version = "0.3.24"
+version = "0.3.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
+checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782"
 dependencies = [
 "bytes",
 "fnv",
@@ -2148,7 +2116,7 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "http",
- "indexmap 2.0.1",
+ "indexmap",
 "slab",
 "tokio",
 "tokio-util",
@@ -2421,19 +2389,6 @@ dependencies = [
 "tokio-native-tls",
 ]

-[[package]]
-name = "hyper-tungstenite"
-version = "0.11.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7cc7dcb1ab67cd336f468a12491765672e61a3b6b148634dbfe2fe8acd3fe7d9"
-dependencies = [
- "hyper",
- "pin-project-lite",
- "tokio",
- "tokio-tungstenite",
- "tungstenite",
-]
-
 [[package]]
 name = "iana-time-zone"
 version = "0.1.56"
@@ -2484,16 +2439,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "indexmap"
-version = "2.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad227c3af19d4914570ad36d30409928b75967c298feb9ea1969db3a610bb14e"
-dependencies = [
- "equivalent",
- "hashbrown 0.14.0",
-]
-
 [[package]]
 name = "infer"
 version = "0.2.3"
@@ -3172,7 +3117,7 @@ dependencies = [
 "fnv",
 "futures-channel",
 "futures-util",
- "indexmap 1.9.3",
+ "indexmap",
 "once_cell",
 "pin-project-lite",
 "thiserror",
@@ -3253,19 +3198,18 @@ name = "pagebench"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "camino",
 "clap",
 "futures",
 "hdrhistogram",
 "humantime",
 "humantime-serde",
+ "pageserver",
 "pageserver_api",
 "pageserver_client",
 "rand 0.8.5",
 "serde",
 "serde_json",
 "tokio",
- "tokio-util",
 "tracing",
 "utils",
 "workspace_hack",
@@ -3382,7 +3326,6 @@ dependencies = [
 "const_format",
 "enum-map",
 "hex",
- "humantime-serde",
 "postgres_ffi",
 "rand 0.8.5",
 "serde",
@@ -3567,7 +3510,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4"
 dependencies = [
 "fixedbitset",
- "indexmap 1.9.3",
+ "indexmap",
 ]

 [[package]]
@@ -3937,7 +3880,6 @@ dependencies = [
 "hostname",
 "humantime",
 "hyper",
- "hyper-tungstenite",
 "ipnet",
 "itertools",
 "md5",
@@ -3983,15 +3925,16 @@ dependencies = [
 "tokio-postgres",
 "tokio-postgres-rustls",
 "tokio-rustls",
+ "tokio-tungstenite",
 "tokio-util",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
+ "tungstenite",
 "url",
 "utils",
 "uuid",
- "walkdir",
 "webpki-roots 0.25.2",
 "workspace_hack",
 "x509-parser",
@@ -4973,7 +4916,7 @@ dependencies = [
 "base64 0.13.1",
 "chrono",
 "hex",
- "indexmap 1.9.3",
+ "indexmap",
 "serde",
 "serde_json",
 "serde_with_macros",
@@ -5674,7 +5617,7 @@ version = "0.19.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739"
 dependencies = [
- "indexmap 1.9.3",
+ "indexmap",
 "serde",
 "serde_spanned",
 "toml_datetime",
@@ -5766,7 +5709,7 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
 dependencies = [
 "futures-core",
 "futures-util",
- "indexmap 1.9.3",
+ "indexmap",
 "pin-project",
 "pin-project-lite",
 "rand 0.8.5",
@@ -6637,11 +6580,9 @@ dependencies = [
 "futures-sink",
 "futures-util",
 "getrandom 0.2.11",
- "hashbrown 0.14.0",
 "hex",
 "hmac",
 "hyper",
- "indexmap 1.9.3",
 "itertools",
 "libc",
 "log",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "2"
 members = [
    "compute_tools",
    "control_plane",
-    "control_plane/attachment_service",
    "pageserver",
    "pageserver/ctl",
    "pageserver/client",
@@ -90,7 +89,6 @@ http-types = { version = "2", default-features = false }
 humantime = "2.1"
 humantime-serde = "1.1.1"
 hyper = "0.14"
-hyper-tungstenite = "0.11"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -157,6 +155,7 @@ tokio-rustls = "0.24"
 tokio-stream = "0.1"
 tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
+tokio-tungstenite = "0.20"
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
@@ -164,6 +163,7 @@ tracing = "0.1"
 tracing-error = "0.2.0"
 tracing-opentelemetry = "0.19.0"
 tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
+tungstenite = "0.20"
 url = "2.2"
 uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
 walkdir = "2.3.2"
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -883,10 +883,8 @@ FROM debian:bullseye-slim
 RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    echo "postgres:test_console_pass" | chpasswd && \
    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
-    mkdir /var/db/postgres/pgbouncer && \
    chown -R postgres:postgres /var/db/postgres && \
    chmod 0750 /var/db/postgres/compute && \
-    chmod 0750 /var/db/postgres/pgbouncer && \
    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
    # create folder for file cache
    mkdir -p -m 777 /neon/cache
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -32,6 +32,8 @@
 //!             -S /var/db/postgres/specs/current.json \
 //!             -b /usr/local/bin/postgres \
 //!             -r http://pg-ext-s3-gateway \
+//!             --pgbouncer-connstr 'host=localhost port=6432 dbname=pgbouncer user=cloud_admin sslmode=disable'
+//!             --pgbouncer-ini-path /etc/pgbouncer.ini \
 //! ```
 //!
 use std::collections::HashMap;
@@ -110,6 +112,9 @@ fn main() -> Result<()> {
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

+    let pgbouncer_connstr = matches.get_one::<String>("pgbouncer-connstr");
+    let pgbouncer_ini_path = matches.get_one::<String>("pgbouncer-ini-path");
+
    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
    // tracing context.
@@ -220,13 +225,15 @@ fn main() -> Result<()> {
        ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
        ext_download_progress: RwLock::new(HashMap::new()),
        build_tag,
+        pgbouncer_connstr: pgbouncer_connstr.map(|s| s.to_string()),
+        pgbouncer_ini_path: pgbouncer_ini_path.map(|s| s.to_string()),
    };
    let compute = Arc::new(compute_node);

    // If this is a pooled VM, prewarm before starting HTTP server and becoming
-    // available for binding. Prewarming helps Postgres start quicker later,
+    // available for binding. Prewarming helps postgres start quicker later,
    // because QEMU will already have it's memory allocated from the host, and
-    // the necessary binaries will already be cached.
+    // the necessary binaries will alreaady be cached.
    if !spec_set {
        compute.prewarm_postgres()?;
    }
@@ -269,11 +276,6 @@ fn main() -> Result<()> {

    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();
-
-    info!(
-        "running compute with features: {:?}",
-        state.pspec.as_ref().unwrap().spec.features
-    );
    drop(state);

    // Launch remaining service threads
@@ -286,7 +288,7 @@ fn main() -> Result<()> {
    let pg = match compute.start_compute(extension_server_port) {
        Ok(pg) => Some(pg),
        Err(err) => {
-            error!("could not start the compute node: {:#}", err);
+            error!("could not start the compute node: {:?}", err);
            let mut state = compute.state.lock().unwrap();
            state.error = Some(format!("{:?}", err));
            state.status = ComputeStatus::Failed;
@@ -516,6 +518,23 @@ fn cli() -> clap::Command {
                )
                .value_name("FILECACHE_CONNSTR"),
        )
+        .arg(
+            Arg::new("pgbouncer-connstr")
+                .long("pgbouncer-connstr")
+                .default_value(
+                    "host=localhost port=6432 dbname=pgbouncer user=cloud_admin sslmode=disable",
+                )
+                .value_name("PGBOUNCER_CONNSTR"),
+        )
+        .arg(
+            Arg::new("pgbouncer-ini-path")
+                .long("pgbouncer-ini-path")
+                // Note: this doesn't match current path for pgbouncer.ini.
+                // Until we fix it, we need to pass the path explicitly
+                // or this will be effectively no-op.
+                .default_value("/etc/pgbouncer.ini")
+                .value_name("PGBOUNCER_INI_PATH"),
+        )
 }

 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -20,7 +20,7 @@ use futures::StreamExt;
 use postgres::{Client, NoTls};
 use tokio;
 use tokio_postgres;
-use tracing::{debug, error, info, instrument, warn};
+use tracing::{error, info, instrument, warn};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

@@ -71,6 +71,10 @@ pub struct ComputeNode {
    // key: ext_archive_name, value: started download time, download_completed?
    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
    pub build_tag: String,
+    // connection string to pgbouncer to change settings
+    pub pgbouncer_connstr: Option<String>,
+    // path to pgbouncer.ini to change settings
+    pub pgbouncer_ini_path: Option<String>,
 }

 // store some metrics about download size that might impact startup time
@@ -276,7 +280,7 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()>
            $$;"#,
        roles_decl, database_decl,
    );
-    info!("Neon superuser created: {}", inlinify(&query));
+    info!("Neon superuser created:\n{}", inlinify(&query));
    client
        .simple_query(&query)
        .map_err(|e| anyhow::anyhow!(e).context(query))?;
@@ -765,8 +769,8 @@ impl ComputeNode {
    pub fn reconfigure(&self) -> Result<()> {
        let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;

-        if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
-            info!("tuning pgbouncer");
+        if let Some(connstr) = &self.pgbouncer_connstr {
+            info!("tuning pgbouncer with connstr: {:?}", connstr);

            let rt = tokio::runtime::Builder::new_current_thread()
                .enable_all()
@@ -775,9 +779,15 @@ impl ComputeNode {

            // Spawn a thread to do the tuning,
            // so that we don't block the main thread that starts Postgres.
-            let pgbouncer_settings = pgbouncer_settings.clone();
+            let pgbouncer_settings = spec.pgbouncer_settings.clone();
+            let connstr_clone = connstr.clone();
+            let pgbouncer_ini_path = self.pgbouncer_ini_path.clone();
            let _handle = thread::spawn(move || {
-                let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
+                let res = rt.block_on(tune_pgbouncer(
+                    pgbouncer_settings,
+                    &connstr_clone,
+                    pgbouncer_ini_path,
+                ));
                if let Err(err) = res {
                    error!("error while tuning pgbouncer: {err:?}");
                }
@@ -842,8 +852,8 @@ impl ComputeNode {
        );

        // tune pgbouncer
-        if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings {
-            info!("tuning pgbouncer");
+        if let Some(connstr) = &self.pgbouncer_connstr {
+            info!("tuning pgbouncer with connstr: {:?}", connstr);

            let rt = tokio::runtime::Builder::new_current_thread()
                .enable_all()
@@ -852,9 +862,15 @@ impl ComputeNode {

            // Spawn a thread to do the tuning,
            // so that we don't block the main thread that starts Postgres.
-            let pgbouncer_settings = pgbouncer_settings.clone();
+            let pgbouncer_settings = pspec.spec.pgbouncer_settings.clone();
+            let connstr_clone = connstr.clone();
+            let pgbouncer_ini_path = self.pgbouncer_ini_path.clone();
            let _handle = thread::spawn(move || {
-                let res = rt.block_on(tune_pgbouncer(pgbouncer_settings));
+                let res = rt.block_on(tune_pgbouncer(
+                    pgbouncer_settings,
+                    &connstr_clone,
+                    pgbouncer_ini_path,
+                ));
                if let Err(err) = res {
                    error!("error while tuning pgbouncer: {err:?}");
                }
@@ -948,16 +964,6 @@ impl ComputeNode {
        Ok(pg_process)
    }

-    /// Update the `last_active` in the shared state, but ensure that it's a more recent one.
-    pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
-        let mut state = self.state.lock().unwrap();
-        // NB: `Some(<DateTime>)` is always greater than `None`.
-        if last_active > state.last_active {
-            state.last_active = last_active;
-            debug!("set the last compute activity time to: {:?}", last_active);
-        }
-    }
-
    // Look for core dumps and collect backtraces.
    //
    // EKS worker nodes have following core dump settings:
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -3,118 +3,88 @@ use std::{thread, time::Duration};

 use chrono::{DateTime, Utc};
 use postgres::{Client, NoTls};
-use tracing::{debug, error, info, warn};
+use tracing::{debug, info, warn};

 use crate::compute::ComputeNode;
-use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeFeature;

 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);

 // Spin in a loop and figure out the last activity time in the Postgres.
 // Then update it in the shared state. This function never errors out.
-// NB: the only expected panic is at `Mutex` unwrap(), all other errors
-// should be handled gracefully.
+// XXX: the only expected panic is at `RwLock` unwrap().
 fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
    let connstr = compute.connstr.as_str();
-
-    // During startup and configuration we connect to every Postgres database,
-    // but we don't want to count this as some user activity. So wait until
-    // the compute fully started before monitoring activity.
-    wait_for_postgres_start(compute);
-
    // Define `client` outside of the loop to reuse existing connection if it's active.
    let mut client = Client::connect(connstr, NoTls);

-    let mut sleep = false;
-    let mut prev_active_time: Option<f64> = None;
-    let mut prev_sessions: Option<i64> = None;
-
-    if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
-        info!("starting experimental activity monitor for {}", connstr);
-    } else {
-        info!("starting activity monitor for {}", connstr);
-    }
+    info!("watching Postgres activity at {}", connstr);

    loop {
-        // We use `continue` a lot, so it's more convenient to sleep at the top of the loop.
-        // But skip the first sleep, so we can connect to Postgres immediately.
-        if sleep {
-            // Should be outside of the mutex lock to allow others to read while we sleep.
-            thread::sleep(MONITOR_CHECK_INTERVAL);
-        } else {
-            sleep = true;
-        }
+        // Should be outside of the write lock to allow others to read while we sleep.
+        thread::sleep(MONITOR_CHECK_INTERVAL);

        match &mut client {
            Ok(cli) => {
                if cli.is_closed() {
-                    info!("connection to Postgres is closed, trying to reconnect");
+                    info!("connection to postgres closed, trying to reconnect");

                    // Connection is closed, reconnect and try again.
                    client = Client::connect(connstr, NoTls);
                    continue;
                }

-                // This is a new logic, only enable if the feature flag is set.
-                // TODO: remove this once we are sure that it works OR drop it altogether.
-                if compute.has_feature(ComputeFeature::ActivityMonitorExperimental) {
-                    // First, check if the total active time or sessions across all databases has changed.
-                    // If it did, it means that user executed some queries. In theory, it can even go down if
-                    // some databases were dropped, but it's still a user activity.
-                    match get_database_stats(cli) {
-                        Ok((active_time, sessions)) => {
-                            let mut detected_activity = false;
+                // Get all running client backends except ourself, use RFC3339 DateTime format.
+                let backends = cli
+                    .query(
+                        "SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
+                         FROM pg_stat_activity
+                         WHERE backend_type = 'client backend'
+                            AND pid != pg_backend_pid()
+                            AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
+                        &[],
+                    );
+                let mut last_active = compute.state.lock().unwrap().last_active;

-                            prev_active_time = match prev_active_time {
-                                Some(prev_active_time) => {
-                                    if active_time != prev_active_time {
-                                        detected_activity = true;
-                                    }
-                                    Some(active_time)
-                                }
-                                None => Some(active_time),
-                            };
-                            prev_sessions = match prev_sessions {
-                                Some(prev_sessions) => {
-                                    if sessions != prev_sessions {
-                                        detected_activity = true;
-                                    }
-                                    Some(sessions)
-                                }
-                                None => Some(sessions),
-                            };
+                if let Ok(backs) = backends {
+                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];

-                            if detected_activity {
-                                // Update the last active time and continue, we don't need to
-                                // check backends state change.
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
+                    for b in backs.into_iter() {
+                        let state: String = match b.try_get("state") {
+                            Ok(state) => state,
+                            Err(_) => continue,
+                        };
+
+                        if state == "idle" {
+                            let change: String = match b.try_get("state_change") {
+                                Ok(state_change) => state_change,
+                                Err(_) => continue,
+                            };
+                            let change = DateTime::parse_from_rfc3339(&change);
+                            match change {
+                                Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
+                                Err(e) => {
+                                    info!("cannot parse backend state_change DateTime: {}", e);
+                                    continue;
+                                }
                            }
+                        } else {
+                            // Found non-idle backend, so the last activity is NOW.
+                            // Save it and exit the for loop. Also clear the idle backend
+                            // `state_change` timestamps array as it doesn't matter now.
+                            last_active = Some(Utc::now());
+                            idle_backs.clear();
+                            break;
                        }
-                        Err(e) => {
-                            error!("could not get database statistics: {}", e);
-                            continue;
-                        }
+                    }
+
+                    // Get idle backend `state_change` with the max timestamp.
+                    if let Some(last) = idle_backs.iter().max() {
+                        last_active = Some(*last);
                    }
                }

-                // Second, if database statistics is the same, check all backends state change,
-                // maybe there is some with more recent activity. `get_backends_state_change()`
-                // can return None or stale timestamp, so it's `compute.update_last_active()`
-                // responsibility to check if the new timestamp is more recent than the current one.
-                // This helps us to discover new sessions, that did nothing yet.
-                match get_backends_state_change(cli) {
-                    Ok(last_active) => {
-                        compute.update_last_active(last_active);
-                    }
-                    Err(e) => {
-                        error!("could not get backends state change: {}", e);
-                    }
-                }
-
-                // Finally, if there are existing (logical) walsenders, do not suspend.
+                // If there are existing (logical) walsenders, do not suspend.
                //
                // walproposer doesn't currently show up in pg_stat_replication,
                // but protect if it will be
@@ -123,12 +93,11 @@ fn watch_compute_activity(compute: &ComputeNode) {
                    Ok(r) => match r.try_get::<&str, i64>("count") {
                        Ok(num_ws) => {
                            if num_ws > 0 {
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
+                                last_active = Some(Utc::now());
                            }
                        }
                        Err(e) => {
-                            warn!("failed to parse walsenders count: {:?}", e);
+                            warn!("failed to parse ws count: {:?}", e);
                            continue;
                        }
                    },
@@ -137,31 +106,17 @@ fn watch_compute_activity(compute: &ComputeNode) {
                        continue;
                    }
                }
-                //
-                // Do not suspend compute if autovacuum is running
-                //
-                let autovacuum_count_query = "select count(*) from pg_stat_activity where backend_type = 'autovacuum worker'";
-                match cli.query_one(autovacuum_count_query, &[]) {
-                    Ok(r) => match r.try_get::<&str, i64>("count") {
-                        Ok(num_workers) => {
-                            if num_workers > 0 {
-                                compute.update_last_active(Some(Utc::now()));
-                                continue;
-                            }
-                        }
-                        Err(e) => {
-                            warn!("failed to parse autovacuum workers count: {:?}", e);
-                            continue;
-                        }
-                    },
-                    Err(e) => {
-                        warn!("failed to get list of autovacuum workers: {:?}", e);
-                        continue;
-                    }
+
+                // Update the last activity in the shared state if we got a more recent one.
+                let mut state = compute.state.lock().unwrap();
+                // NB: `Some(<DateTime>)` is always greater than `None`.
+                if last_active > state.last_active {
+                    state.last_active = last_active;
+                    debug!("set the last compute activity time to: {:?}", last_active);
                }
            }
            Err(e) => {
-                debug!("could not connect to Postgres: {}, retrying", e);
+                debug!("cannot connect to postgres: {}, retrying", e);

                // Establish a new connection and try again.
                client = Client::connect(connstr, NoTls);
@@ -170,124 +125,12 @@ fn watch_compute_activity(compute: &ComputeNode) {
    }
 }

-// Hang on condition variable waiting until the compute status is `Running`.
-fn wait_for_postgres_start(compute: &ComputeNode) {
-    let mut state = compute.state.lock().unwrap();
-    while state.status != ComputeStatus::Running {
-        info!("compute is not running, waiting before monitoring activity");
-        state = compute.state_changed.wait(state).unwrap();
-
-        if state.status == ComputeStatus::Running {
-            break;
-        }
-    }
-}
-
-// Figure out the total active time and sessions across all non-system databases.
-// Returned tuple is `(active_time, sessions)`.
-// It can return `0.0` active time or `0` sessions, which means no user databases exist OR
-// it was a start with skipped `pg_catalog` updates and user didn't do any queries
-// (or open any sessions) yet.
-fn get_database_stats(cli: &mut Client) -> anyhow::Result<(f64, i64)> {
-    // Filter out `postgres` database as `compute_ctl` and other monitoring tools
-    // like `postgres_exporter` use it to query Postgres statistics.
-    // Use explicit 8 bytes type casts to match Rust types.
-    let stats = cli.query_one(
-        "SELECT coalesce(sum(active_time), 0.0)::float8 AS total_active_time,
-            coalesce(sum(sessions), 0)::bigint AS total_sessions
-        FROM pg_stat_database
-        WHERE datname NOT IN (
-                'postgres',
-                'template0',
-                'template1'
-            );",
-        &[],
-    );
-    let stats = match stats {
-        Ok(stats) => stats,
-        Err(e) => {
-            return Err(anyhow::anyhow!("could not query active_time: {}", e));
-        }
-    };
-
-    let active_time: f64 = match stats.try_get("total_active_time") {
-        Ok(active_time) => active_time,
-        Err(e) => return Err(anyhow::anyhow!("could not get total_active_time: {}", e)),
-    };
-
-    let sessions: i64 = match stats.try_get("total_sessions") {
-        Ok(sessions) => sessions,
-        Err(e) => return Err(anyhow::anyhow!("could not get total_sessions: {}", e)),
-    };
-
-    Ok((active_time, sessions))
-}
-
-// Figure out the most recent state change time across all client backends.
-// If there is currently active backend, timestamp will be `Utc::now()`.
-// It can return `None`, which means no client backends exist or we were
-// unable to parse the timestamp.
-fn get_backends_state_change(cli: &mut Client) -> anyhow::Result<Option<DateTime<Utc>>> {
-    let mut last_active: Option<DateTime<Utc>> = None;
-    // Get all running client backends except ourself, use RFC3339 DateTime format.
-    let backends = cli.query(
-        "SELECT state, to_char(state_change, 'YYYY-MM-DD\"T\"HH24:MI:SS.US\"Z\"') AS state_change
-                FROM pg_stat_activity
-                    WHERE backend_type = 'client backend'
-                    AND pid != pg_backend_pid()
-                    AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
-        &[],
-    );
-
-    match backends {
-        Ok(backs) => {
-            let mut idle_backs: Vec<DateTime<Utc>> = vec![];
-
-            for b in backs.into_iter() {
-                let state: String = match b.try_get("state") {
-                    Ok(state) => state,
-                    Err(_) => continue,
-                };
-
-                if state == "idle" {
-                    let change: String = match b.try_get("state_change") {
-                        Ok(state_change) => state_change,
-                        Err(_) => continue,
-                    };
-                    let change = DateTime::parse_from_rfc3339(&change);
-                    match change {
-                        Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
-                        Err(e) => {
-                            info!("cannot parse backend state_change DateTime: {}", e);
-                            continue;
-                        }
-                    }
-                } else {
-                    // Found non-idle backend, so the last activity is NOW.
-                    // Return immediately, no need to check other backends.
-                    return Ok(Some(Utc::now()));
-                }
-            }
-
-            // Get idle backend `state_change` with the max timestamp.
-            if let Some(last) = idle_backs.iter().max() {
-                last_active = Some(*last);
-            }
-        }
-        Err(e) => {
-            return Err(anyhow::anyhow!("could not query backends: {}", e));
-        }
-    }
-
-    Ok(last_active)
-}
-
 /// Launch a separate compute monitor thread and return its `JoinHandle`.
-pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
-    let compute = Arc::clone(compute);
+pub fn launch_monitor(state: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
+    let state = Arc::clone(state);

    thread::Builder::new()
        .name("compute-monitor".into())
-        .spawn(move || watch_compute_activity(&compute))
+        .spawn(move || watch_compute_activity(&state))
        .expect("cannot launch compute monitor thread")
 }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -366,7 +366,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> {
 }

 /// Update pgbouncer.ini with provided options
-fn update_pgbouncer_ini(
+pub fn update_pgbouncer_ini(
    pgbouncer_config: HashMap<String, String>,
    pgbouncer_ini_path: &str,
 ) -> Result<()> {
@@ -375,10 +375,6 @@ fn update_pgbouncer_ini(

    for (option_name, value) in pgbouncer_config.iter() {
        section.insert(option_name, value);
-        debug!(
-            "Updating pgbouncer.ini with new values {}={}",
-            option_name, value
-        );
    }

    conf.write_to_file(pgbouncer_ini_path)?;
@@ -388,79 +384,48 @@ fn update_pgbouncer_ini(
 /// Tune pgbouncer.
 /// 1. Apply new config using pgbouncer admin console
 /// 2. Add new values to pgbouncer.ini to preserve them after restart
-pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
-    let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
-        // for VMs use pgbouncer specific way to connect to
-        // pgbouncer admin console without password
-        // when pgbouncer is running under the same user.
-        "host=/tmp port=6432 dbname=pgbouncer user=pgbouncer".to_string()
-    } else {
-        // for k8s use normal connection string with password
-        // to connect to pgbouncer admin console
-        let mut pgbouncer_connstr =
-            "host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
-        if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
-            pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
-        }
-        pgbouncer_connstr
-    };
-
-    info!(
-        "Connecting to pgbouncer with connection string: {}",
-        pgbouncer_connstr
-    );
-
-    // connect to pgbouncer, retrying several times
-    // because pgbouncer may not be ready yet
-    let mut retries = 3;
-    let client = loop {
-        match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {
-            Ok((client, connection)) => {
-                tokio::spawn(async move {
-                    if let Err(e) = connection.await {
-                        eprintln!("connection error: {}", e);
-                    }
-                });
-                break client;
+pub async fn tune_pgbouncer(
+    pgbouncer_settings: Option<HashMap<String, String>>,
+    pgbouncer_connstr: &str,
+    pgbouncer_ini_path: Option<String>,
+) -> Result<()> {
+    if let Some(pgbouncer_config) = pgbouncer_settings {
+        // Apply new config
+        let connect_result = tokio_postgres::connect(pgbouncer_connstr, NoTls).await;
+        let (client, connection) = connect_result.unwrap();
+        tokio::spawn(async move {
+            if let Err(e) = connection.await {
+                eprintln!("connection error: {}", e);
            }
-            Err(e) => {
-                if retries == 0 {
-                    return Err(e.into());
-                }
-                error!("Failed to connect to pgbouncer: pgbouncer_connstr {}", e);
-                retries -= 1;
-                tokio::time::sleep(Duration::from_secs(1)).await;
-            }
-        }
-    };
+        });

-    // Apply new config
-    for (option_name, value) in pgbouncer_config.iter() {
-        let query = format!("SET {}={}", option_name, value);
-        // keep this log line for debugging purposes
-        info!("Applying pgbouncer setting change: {}", query);
-
-        if let Err(err) = client.simple_query(&query).await {
-            // Don't fail on error, just print it into log
-            error!(
-                "Failed to apply pgbouncer setting change: {},  {}",
-                query, err
+        for (option_name, value) in pgbouncer_config.iter() {
+            info!(
+                "Applying pgbouncer setting change: {} = {}",
+                option_name, value
            );
-        };
-    }
+            let query = format!("SET {} = {}", option_name, value);

-    // save values to pgbouncer.ini
-    // so that they are preserved after pgbouncer restart
-    let pgbouncer_ini_path = if std::env::var_os("AUTOSCALING").is_some() {
-        // in VMs we use /etc/pgbouncer.ini
-        "/etc/pgbouncer.ini".to_string()
-    } else {
-        // in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini
-        // this is a shared volume between pgbouncer and postgres containers
-        // FIXME: fix permissions for this file
-        "/var/db/postgres/pgbouncer/pgbouncer.ini".to_string()
-    };
-    update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
+            let result = client.simple_query(&query).await;
+
+            info!("Applying pgbouncer setting change: {}", query);
+            info!("pgbouncer setting change result: {:?}", result);
+
+            if let Err(err) = result {
+                // Don't fail on error, just print it into log
+                error!(
+                    "Failed to apply pgbouncer setting change: {},  {}",
+                    query, err
+                );
+            };
+        }
+
+        // save values to pgbouncer.ini
+        // so that they are preserved after pgbouncer restart
+        if let Some(pgbouncer_ini_path) = pgbouncer_ini_path {
+            update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
+        }
+    }

    Ok(())
 }
@@ -491,7 +456,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
 /// - no new lines were written for the last second
 async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
    let mut lines = tokio::io::BufReader::new(stderr).lines();
-    let timeout_duration = Duration::from_millis(100);
+    let timeout_duration = Duration::from_secs(1);
    let ts_regex =
        regex::Regex::new(r"^\d+-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("regex is valid");

--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -190,20 +190,18 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    // Print a list of existing Postgres roles (only in debug mode)
    if span_enabled!(Level::INFO) {
-        let mut vec = Vec::new();
+        info!("postgres roles:");
        for r in &existing_roles {
-            vec.push(format!(
-                "{}:{}",
+            info!(
+                "    - {}:{}",
                r.name,
                if r.encrypted_password.is_some() {
                    "[FILTERED]"
                } else {
                    "(null)"
                }
-            ));
+            );
        }
-
-        info!("postgres roles (total {}): {:?}", vec.len(), vec);
    }

    // Process delta operations first
@@ -241,10 +239,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    // Refresh Postgres roles info to handle possible roles renaming
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

-    info!(
-        "handling cluster spec roles (total {})",
-        spec.cluster.roles.len()
-    );
+    info!("cluster spec roles:");
    for role in &spec.cluster.roles {
        let name = &role.name;
        // XXX: with a limited number of roles it is fine, but consider making it a HashMap
@@ -307,7 +302,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                    name.pg_quote()
                );
-                info!("running role create query: '{}'", &query);
+                info!("role create query: '{}'", &query);
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
            }
@@ -324,7 +319,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                RoleAction::Create => " -> create",
                RoleAction::Update => " -> update",
            };
-            info!(" - {}:{}{}", name, pwd, action_str);
+            info!("   - {}:{}{}", name, pwd, action_str);
        }
    }

@@ -433,11 +428,10 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

    // Print a list of existing Postgres databases (only in debug mode)
    if span_enabled!(Level::INFO) {
-        let mut vec = Vec::new();
+        info!("postgres databases:");
        for (dbname, db) in &existing_dbs {
-            vec.push(format!("{}:{}", dbname, db.owner));
+            info!("    {}:{}", dbname, db.owner);
        }
-        info!("postgres databases (total {}): {:?}", vec.len(), vec);
    }

    // Process delta operations first
@@ -509,10 +503,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    // Refresh Postgres databases info to handle possible renames
    let existing_dbs = get_existing_dbs(client)?;

-    info!(
-        "handling cluster spec databases (total {})",
-        spec.cluster.databases.len()
-    );
+    info!("cluster spec databases:");
    for db in &spec.cluster.databases {
        let name = &db.name;
        let pg_db = existing_dbs.get(name);
@@ -571,7 +562,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                DatabaseAction::Create => " -> create",
                DatabaseAction::Update => " -> update",
            };
-            info!(" - {}:{}{}", db.name, db.owner, action_str);
+            info!("   - {}:{}{}", db.name, db.owner, action_str);
        }
    }

--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -1,32 +0,0 @@
-[package]
-name = "attachment_service"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-anyhow.workspace = true
-camino.workspace = true
-clap.workspace = true
-futures.workspace = true
-git-version.workspace = true
-hyper.workspace = true
-pageserver_api.workspace = true
-pageserver_client.workspace = true
-postgres_connection.workspace = true
-serde.workspace = true
-serde_json.workspace = true
-thiserror.workspace = true
-tokio.workspace = true
-tokio-util.workspace = true
-tracing.workspace = true
-
-# TODO: remove this after DB persistence is added, it is only used for
-# a parsing function when loading pageservers from neon_local LocalEnv
-postgres_backend.workspace = true
-
-utils = { path = "../../libs/utils/" }
-metrics = { path = "../../libs/metrics/" }
-control_plane = { path = ".." }
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/control_plane/attachment_service/src/compute_hook.rs
@@ -1,116 +0,0 @@
-use std::collections::HashMap;
-
-use control_plane::endpoint::ComputeControlPlane;
-use control_plane::local_env::LocalEnv;
-use pageserver_api::shard::{ShardCount, ShardIndex, TenantShardId};
-use postgres_connection::parse_host_port;
-use utils::id::{NodeId, TenantId};
-
-pub(super) struct ComputeHookTenant {
-    shards: Vec<(ShardIndex, NodeId)>,
-}
-
-impl ComputeHookTenant {
-    pub(super) async fn maybe_reconfigure(&mut self, tenant_id: TenantId) -> anyhow::Result<()> {
-        // Find the highest shard count and drop any shards that aren't
-        // for that shard count.
-        let shard_count = self.shards.iter().map(|(k, _v)| k.shard_count).max();
-        let Some(shard_count) = shard_count else {
-            // No shards, nothing to do.
-            tracing::info!("ComputeHookTenant::maybe_reconfigure: no shards");
-            return Ok(());
-        };
-
-        self.shards.retain(|(k, _v)| k.shard_count == shard_count);
-        self.shards
-            .sort_by_key(|(shard, _node_id)| shard.shard_number);
-
-        if self.shards.len() == shard_count.0 as usize || shard_count == ShardCount(0) {
-            // We have pageservers for all the shards: proceed to reconfigure compute
-            let env = match LocalEnv::load_config() {
-                Ok(e) => e,
-                Err(e) => {
-                    tracing::warn!(
-                        "Couldn't load neon_local config, skipping compute update ({e})"
-                    );
-                    return Ok(());
-                }
-            };
-            let cplane = ComputeControlPlane::load(env.clone())
-                .expect("Error loading compute control plane");
-
-            let compute_pageservers = self
-                .shards
-                .iter()
-                .map(|(_shard, node_id)| {
-                    let ps_conf = env
-                        .get_pageserver_conf(*node_id)
-                        .expect("Unknown pageserver");
-                    let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
-                        .expect("Unable to parse listen_pg_addr");
-                    (pg_host, pg_port.unwrap_or(5432))
-                })
-                .collect::<Vec<_>>();
-
-            for (endpoint_name, endpoint) in &cplane.endpoints {
-                if endpoint.tenant_id == tenant_id && endpoint.status() == "running" {
-                    tracing::info!("🔁 Reconfiguring endpoint {}", endpoint_name,);
-                    endpoint.reconfigure(compute_pageservers.clone()).await?;
-                }
-            }
-        } else {
-            tracing::info!(
-                "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
-                self.shards.len(),
-                shard_count.0
-            );
-        }
-
-        Ok(())
-    }
-}
-
-/// The compute hook is a destination for notifications about changes to tenant:pageserver
-/// mapping.  It aggregates updates for the shards in a tenant, and when appropriate reconfigures
-/// the compute connection string.
-pub(super) struct ComputeHook {
-    state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
-}
-
-impl ComputeHook {
-    pub(super) fn new() -> Self {
-        Self {
-            state: Default::default(),
-        }
-    }
-
-    pub(super) async fn notify(
-        &self,
-        tenant_shard_id: TenantShardId,
-        node_id: NodeId,
-    ) -> anyhow::Result<()> {
-        tracing::info!("ComputeHook::notify: {}->{}", tenant_shard_id, node_id);
-        let mut locked = self.state.lock().await;
-        let entry = locked
-            .entry(tenant_shard_id.tenant_id)
-            .or_insert_with(|| ComputeHookTenant { shards: Vec::new() });
-
-        let shard_index = ShardIndex {
-            shard_count: tenant_shard_id.shard_count,
-            shard_number: tenant_shard_id.shard_number,
-        };
-
-        let mut set = false;
-        for (existing_shard, existing_node) in &mut entry.shards {
-            if *existing_shard == shard_index {
-                *existing_node = node_id;
-                set = true;
-            }
-        }
-        if !set {
-            entry.shards.push((shard_index, node_id));
-        }
-
-        entry.maybe_reconfigure(tenant_shard_id.tenant_id).await
-    }
-}
--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -1,218 +0,0 @@
-use crate::reconciler::ReconcileError;
-use crate::service::Service;
-use hyper::{Body, Request, Response};
-use hyper::{StatusCode, Uri};
-use pageserver_api::models::{TenantCreateRequest, TimelineCreateRequest};
-use pageserver_api::shard::TenantShardId;
-use std::sync::Arc;
-use utils::auth::SwappableJwtAuth;
-use utils::http::endpoint::{auth_middleware, request_span};
-use utils::http::request::parse_request_param;
-use utils::id::TenantId;
-
-use utils::{
-    http::{
-        endpoint::{self},
-        error::ApiError,
-        json::{json_request, json_response},
-        RequestExt, RouterBuilder,
-    },
-    id::NodeId,
-};
-
-use pageserver_api::control_api::{ReAttachRequest, ValidateRequest};
-
-use control_plane::attachment_service::{
-    AttachHookRequest, InspectRequest, NodeConfigureRequest, NodeRegisterRequest,
-    TenantShardMigrateRequest,
-};
-
-/// State available to HTTP request handlers
-#[derive(Clone)]
-pub struct HttpState {
-    service: Arc<crate::service::Service>,
-    auth: Option<Arc<SwappableJwtAuth>>,
-    allowlist_routes: Vec<Uri>,
-}
-
-impl HttpState {
-    pub fn new(service: Arc<crate::service::Service>, auth: Option<Arc<SwappableJwtAuth>>) -> Self {
-        let allowlist_routes = ["/status"]
-            .iter()
-            .map(|v| v.parse().unwrap())
-            .collect::<Vec<_>>();
-        Self {
-            service,
-            auth,
-            allowlist_routes,
-        }
-    }
-}
-
-#[inline(always)]
-fn get_state(request: &Request<Body>) -> &HttpState {
-    request
-        .data::<Arc<HttpState>>()
-        .expect("unknown state type")
-        .as_ref()
-}
-
-/// Pageserver calls into this on startup, to learn which tenants it should attach
-async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let reattach_req = json_request::<ReAttachRequest>(&mut req).await?;
-    let state = get_state(&req);
-    json_response(
-        StatusCode::OK,
-        state
-            .service
-            .re_attach(reattach_req)
-            .await
-            .map_err(ApiError::InternalServerError)?,
-    )
-}
-
-/// Pageserver calls into this before doing deletions, to confirm that it still
-/// holds the latest generation for the tenants with deletions enqueued
-async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let validate_req = json_request::<ValidateRequest>(&mut req).await?;
-    let state = get_state(&req);
-    json_response(StatusCode::OK, state.service.validate(validate_req))
-}
-
-/// Call into this before attaching a tenant to a pageserver, to acquire a generation number
-/// (in the real control plane this is unnecessary, because the same program is managing
-///  generation numbers and doing attachments).
-async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let attach_req = json_request::<AttachHookRequest>(&mut req).await?;
-    let state = get_state(&req);
-
-    json_response(
-        StatusCode::OK,
-        state
-            .service
-            .attach_hook(attach_req)
-            .await
-            .map_err(ApiError::InternalServerError)?,
-    )
-}
-
-async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let inspect_req = json_request::<InspectRequest>(&mut req).await?;
-
-    let state = get_state(&req);
-
-    json_response(StatusCode::OK, state.service.inspect(inspect_req))
-}
-
-async fn handle_tenant_create(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let create_req = json_request::<TenantCreateRequest>(&mut req).await?;
-    let state = get_state(&req);
-    json_response(
-        StatusCode::OK,
-        state.service.tenant_create(create_req).await?,
-    )
-}
-
-async fn handle_tenant_timeline_create(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
-
-    let state = get_state(&req);
-    json_response(
-        StatusCode::OK,
-        state
-            .service
-            .tenant_timeline_create(tenant_id, create_req)
-            .await?,
-    )
-}
-
-async fn handle_tenant_locate(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let state = get_state(&req);
-
-    json_response(StatusCode::OK, state.service.tenant_locate(tenant_id)?)
-}
-
-async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let register_req = json_request::<NodeRegisterRequest>(&mut req).await?;
-    let state = get_state(&req);
-    state.service.node_register(register_req).await?;
-    json_response(StatusCode::OK, ())
-}
-
-async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-    let config_req = json_request::<NodeConfigureRequest>(&mut req).await?;
-    if node_id != config_req.node_id {
-        return Err(ApiError::BadRequest(anyhow::anyhow!(
-            "Path and body node_id differ"
-        )));
-    }
-    let state = get_state(&req);
-
-    json_response(StatusCode::OK, state.service.node_configure(config_req)?)
-}
-
-async fn handle_tenant_shard_migrate(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
-    let migrate_req = json_request::<TenantShardMigrateRequest>(&mut req).await?;
-    let state = get_state(&req);
-    json_response(
-        StatusCode::OK,
-        state
-            .service
-            .tenant_shard_migrate(tenant_shard_id, migrate_req)
-            .await?,
-    )
-}
-
-/// Status endpoint is just used for checking that our HTTP listener is up
-async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    json_response(StatusCode::OK, ())
-}
-
-impl From<ReconcileError> for ApiError {
-    fn from(value: ReconcileError) -> Self {
-        ApiError::Conflict(format!("Reconciliation error: {}", value))
-    }
-}
-
-pub fn make_router(
-    service: Arc<Service>,
-    auth: Option<Arc<SwappableJwtAuth>>,
-) -> RouterBuilder<hyper::Body, ApiError> {
-    let mut router = endpoint::make_router();
-    if auth.is_some() {
-        router = router.middleware(auth_middleware(|request| {
-            let state = get_state(request);
-            if state.allowlist_routes.contains(request.uri()) {
-                None
-            } else {
-                state.auth.as_deref()
-            }
-        }))
-    }
-
-    router
-        .data(Arc::new(HttpState::new(service, auth)))
-        .get("/status", |r| request_span(r, handle_status))
-        .post("/re-attach", |r| request_span(r, handle_re_attach))
-        .post("/validate", |r| request_span(r, handle_validate))
-        .post("/attach-hook", |r| request_span(r, handle_attach_hook))
-        .post("/inspect", |r| request_span(r, handle_inspect))
-        .post("/node", |r| request_span(r, handle_node_register))
-        .put("/node/:node_id/config", |r| {
-            request_span(r, handle_node_configure)
-        })
-        .post("/tenant", |r| request_span(r, handle_tenant_create))
-        .post("/tenant/:tenant_id/timeline", |r| {
-            request_span(r, handle_tenant_timeline_create)
-        })
-        .get("/tenant/:tenant_id/locate", |r| {
-            request_span(r, handle_tenant_locate)
-        })
-        .put("/tenant/:tenant_shard_id/migrate", |r| {
-            request_span(r, handle_tenant_shard_migrate)
-        })
-}
--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -1,57 +0,0 @@
-use serde::{Deserialize, Serialize};
-use utils::seqwait::MonotonicCounter;
-
-mod compute_hook;
-pub mod http;
-mod node;
-pub mod persistence;
-mod reconciler;
-mod scheduler;
-pub mod service;
-mod tenant_state;
-
-#[derive(Clone, Serialize, Deserialize)]
-enum PlacementPolicy {
-    /// Cheapest way to attach a tenant: just one pageserver, no secondary
-    Single,
-    /// Production-ready way to attach a tenant: one attached pageserver and
-    /// some number of secondaries.
-    Double(usize),
-}
-
-#[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone)]
-struct Sequence(u64);
-
-impl Sequence {
-    fn initial() -> Self {
-        Self(0)
-    }
-}
-
-impl std::fmt::Display for Sequence {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-impl MonotonicCounter<Sequence> for Sequence {
-    fn cnt_advance(&mut self, v: Sequence) {
-        assert!(*self <= v);
-        *self = v;
-    }
-    fn cnt_value(&self) -> Sequence {
-        *self
-    }
-}
-
-impl Sequence {
-    fn next(&self) -> Sequence {
-        Sequence(self.0 + 1)
-    }
-}
-
-impl Default for PlacementPolicy {
-    fn default() -> Self {
-        PlacementPolicy::Double(1)
-    }
-}
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -1,100 +0,0 @@
-/// The attachment service mimics the aspects of the control plane API
-/// that are required for a pageserver to operate.
-///
-/// This enables running & testing pageservers without a full-blown
-/// deployment of the Neon cloud platform.
-///
-use anyhow::anyhow;
-use attachment_service::http::make_router;
-use attachment_service::persistence::Persistence;
-use attachment_service::service::{Config, Service};
-use camino::Utf8PathBuf;
-use clap::Parser;
-use metrics::launch_timestamp::LaunchTimestamp;
-use std::sync::Arc;
-use utils::auth::{JwtAuth, SwappableJwtAuth};
-use utils::logging::{self, LogFormat};
-use utils::signals::{ShutdownSignals, Signal};
-
-use utils::{project_build_tag, project_git_version, tcp_listener};
-
-project_git_version!(GIT_VERSION);
-project_build_tag!(BUILD_TAG);
-
-#[derive(Parser)]
-#[command(author, version, about, long_about = None)]
-#[command(arg_required_else_help(true))]
-struct Cli {
-    /// Host and port to listen on, like `127.0.0.1:1234`
-    #[arg(short, long)]
-    listen: std::net::SocketAddr,
-
-    /// Path to public key for JWT authentication of clients
-    #[arg(long)]
-    public_key: Option<camino::Utf8PathBuf>,
-
-    /// Token for authenticating this service with the pageservers it controls
-    #[arg(short, long)]
-    jwt_token: Option<String>,
-
-    /// Path to the .json file to store state (will be created if it doesn't exist)
-    #[arg(short, long)]
-    path: Utf8PathBuf,
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));
-
-    logging::init(
-        LogFormat::Plain,
-        logging::TracingErrorLayerEnablement::Disabled,
-        logging::Output::Stdout,
-    )?;
-
-    let args = Cli::parse();
-    tracing::info!(
-        "version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
-        GIT_VERSION,
-        launch_ts.to_string(),
-        BUILD_TAG,
-        args.path,
-        args.listen
-    );
-
-    let config = Config {
-        jwt_token: args.jwt_token,
-    };
-
-    let persistence = Arc::new(Persistence::new(&args.path).await);
-
-    let service = Service::spawn(config, persistence).await?;
-
-    let http_listener = tcp_listener::bind(args.listen)?;
-
-    let auth = if let Some(public_key_path) = &args.public_key {
-        let jwt_auth = JwtAuth::from_key_path(public_key_path)?;
-        Some(Arc::new(SwappableJwtAuth::new(jwt_auth)))
-    } else {
-        None
-    };
-    let router = make_router(service, auth)
-        .build()
-        .map_err(|err| anyhow!(err))?;
-    let service = utils::http::RouterService::new(router).unwrap();
-    let server = hyper::Server::from_tcp(http_listener)?.serve(service);
-
-    tracing::info!("Serving on {0}", args.listen);
-
-    tokio::task::spawn(server);
-
-    ShutdownSignals::handle(|signal| match signal {
-        Signal::Interrupt | Signal::Terminate | Signal::Quit => {
-            tracing::info!("Got {}. Terminating", signal.name());
-            // We're just a test helper: no graceful shutdown.
-            std::process::exit(0);
-        }
-    })?;
-
-    Ok(())
-}
--- a/control_plane/attachment_service/src/node.rs
+++ b/control_plane/attachment_service/src/node.rs
@@ -1,37 +0,0 @@
-use control_plane::attachment_service::{NodeAvailability, NodeSchedulingPolicy};
-use utils::id::NodeId;
-
-#[derive(Clone)]
-pub(crate) struct Node {
-    pub(crate) id: NodeId,
-
-    pub(crate) availability: NodeAvailability,
-    pub(crate) scheduling: NodeSchedulingPolicy,
-
-    pub(crate) listen_http_addr: String,
-    pub(crate) listen_http_port: u16,
-
-    pub(crate) listen_pg_addr: String,
-    pub(crate) listen_pg_port: u16,
-}
-
-impl Node {
-    pub(crate) fn base_url(&self) -> String {
-        format!("http://{}:{}", self.listen_http_addr, self.listen_http_port)
-    }
-
-    /// Is this node elegible to have work scheduled onto it?
-    pub(crate) fn may_schedule(&self) -> bool {
-        match self.availability {
-            NodeAvailability::Active => {}
-            NodeAvailability::Offline => return false,
-        }
-
-        match self.scheduling {
-            NodeSchedulingPolicy::Active => true,
-            NodeSchedulingPolicy::Draining => false,
-            NodeSchedulingPolicy::Filling => true,
-            NodeSchedulingPolicy::Pause => false,
-        }
-    }
-}
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -1,272 +0,0 @@
-use std::{collections::HashMap, str::FromStr};
-
-use camino::{Utf8Path, Utf8PathBuf};
-use control_plane::{
-    attachment_service::{NodeAvailability, NodeSchedulingPolicy},
-    local_env::LocalEnv,
-};
-use pageserver_api::{
-    models::TenantConfig,
-    shard::{ShardCount, ShardNumber, TenantShardId},
-};
-use postgres_connection::parse_host_port;
-use serde::{Deserialize, Serialize};
-use utils::{
-    generation::Generation,
-    id::{NodeId, TenantId},
-};
-
-use crate::{node::Node, PlacementPolicy};
-
-/// Placeholder for storage.  This will be replaced with a database client.
-pub struct Persistence {
-    state: std::sync::Mutex<PersistentState>,
-}
-
-// Top level state available to all HTTP handlers
-#[derive(Serialize, Deserialize)]
-struct PersistentState {
-    tenants: HashMap<TenantShardId, TenantShardPersistence>,
-
-    #[serde(skip)]
-    path: Utf8PathBuf,
-}
-
-/// A convenience for serializing the state inside a sync lock, and then
-/// writing it to disk outside of the lock.  This will go away when switching
-/// to a database backend.
-struct PendingWrite {
-    bytes: Vec<u8>,
-    path: Utf8PathBuf,
-}
-
-impl PendingWrite {
-    async fn commit(&self) -> anyhow::Result<()> {
-        tokio::fs::write(&self.path, &self.bytes).await?;
-
-        Ok(())
-    }
-}
-
-impl PersistentState {
-    fn save(&self) -> PendingWrite {
-        PendingWrite {
-            bytes: serde_json::to_vec(self).expect("Serialization error"),
-            path: self.path.clone(),
-        }
-    }
-
-    async fn load(path: &Utf8Path) -> anyhow::Result<Self> {
-        let bytes = tokio::fs::read(path).await?;
-        let mut decoded = serde_json::from_slice::<Self>(&bytes)?;
-        decoded.path = path.to_owned();
-
-        for (tenant_id, tenant) in &mut decoded.tenants {
-            // Backward compat: an old attachments.json from before PR #6251, replace
-            // empty strings with proper defaults.
-            if tenant.tenant_id.is_empty() {
-                tenant.tenant_id = format!("{}", tenant_id);
-                tenant.config = serde_json::to_string(&TenantConfig::default())?;
-                tenant.placement_policy = serde_json::to_string(&PlacementPolicy::default())?;
-            }
-        }
-
-        Ok(decoded)
-    }
-
-    async fn load_or_new(path: &Utf8Path) -> Self {
-        match Self::load(path).await {
-            Ok(s) => {
-                tracing::info!("Loaded state file at {}", path);
-                s
-            }
-            Err(e)
-                if e.downcast_ref::<std::io::Error>()
-                    .map(|e| e.kind() == std::io::ErrorKind::NotFound)
-                    .unwrap_or(false) =>
-            {
-                tracing::info!("Will create state file at {}", path);
-                Self {
-                    tenants: HashMap::new(),
-                    path: path.to_owned(),
-                }
-            }
-            Err(e) => {
-                panic!("Failed to load state from '{}': {e:#} (maybe your .neon/ dir was written by an older version?)", path)
-            }
-        }
-    }
-}
-
-impl Persistence {
-    pub async fn new(path: &Utf8Path) -> Self {
-        let state = PersistentState::load_or_new(path).await;
-        Self {
-            state: std::sync::Mutex::new(state),
-        }
-    }
-
-    /// When registering a node, persist it so that on next start we will be able to
-    /// iterate over known nodes to synchronize their tenant shard states with our observed state.
-    pub(crate) async fn insert_node(&self, _node: &Node) -> anyhow::Result<()> {
-        // TODO: node persitence will come with database backend
-        Ok(())
-    }
-
-    /// At startup, we populate the service's list of nodes, and use this list to call into
-    /// each node to do an initial reconciliation of the state of the world with our in-memory
-    /// observed state.
-    pub(crate) async fn list_nodes(&self) -> anyhow::Result<Vec<Node>> {
-        let env = LocalEnv::load_config()?;
-        // TODO: node persitence will come with database backend
-
-        // XXX hack: enable test_backward_compatibility to work by populating our list of
-        // nodes from LocalEnv when it is not present in persistent storage.  Otherwise at
-        // first startup in the compat test, we may have shards but no nodes.
-        let mut result = Vec::new();
-        tracing::info!(
-            "Loaded {} pageserver nodes from LocalEnv",
-            env.pageservers.len()
-        );
-        for ps_conf in env.pageservers {
-            let (pg_host, pg_port) =
-                parse_host_port(&ps_conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
-            let (http_host, http_port) = parse_host_port(&ps_conf.listen_http_addr)
-                .expect("Unable to parse listen_http_addr");
-            result.push(Node {
-                id: ps_conf.id,
-                listen_pg_addr: pg_host.to_string(),
-                listen_pg_port: pg_port.unwrap_or(5432),
-                listen_http_addr: http_host.to_string(),
-                listen_http_port: http_port.unwrap_or(80),
-                availability: NodeAvailability::Active,
-                scheduling: NodeSchedulingPolicy::Active,
-            });
-        }
-
-        Ok(result)
-    }
-
-    /// At startup, we populate our map of tenant shards from persistent storage.
-    pub(crate) async fn list_tenant_shards(&self) -> anyhow::Result<Vec<TenantShardPersistence>> {
-        let locked = self.state.lock().unwrap();
-        Ok(locked.tenants.values().cloned().collect())
-    }
-
-    /// Tenants must be persisted before we schedule them for the first time.  This enables us
-    /// to correctly retain generation monotonicity, and the externally provided placement policy & config.
-    pub(crate) async fn insert_tenant_shards(
-        &self,
-        shards: Vec<TenantShardPersistence>,
-    ) -> anyhow::Result<()> {
-        let write = {
-            let mut locked = self.state.lock().unwrap();
-            for shard in shards {
-                let tenant_shard_id = TenantShardId {
-                    tenant_id: TenantId::from_str(shard.tenant_id.as_str())?,
-                    shard_number: ShardNumber(shard.shard_number as u8),
-                    shard_count: ShardCount(shard.shard_count as u8),
-                };
-
-                locked.tenants.insert(tenant_shard_id, shard);
-            }
-            locked.save()
-        };
-
-        write.commit().await?;
-
-        Ok(())
-    }
-
-    /// Reconciler calls this immediately before attaching to a new pageserver, to acquire a unique, monotonically
-    /// advancing generation number.  We also store the NodeId for which the generation was issued, so that in
-    /// [`Self::re_attach`] we can do a bulk UPDATE on the generations for that node.
-    pub(crate) async fn increment_generation(
-        &self,
-        tenant_shard_id: TenantShardId,
-        node_id: Option<NodeId>,
-    ) -> anyhow::Result<Generation> {
-        let (write, gen) = {
-            let mut locked = self.state.lock().unwrap();
-            let Some(shard) = locked.tenants.get_mut(&tenant_shard_id) else {
-                anyhow::bail!("Tried to increment generation of unknown shard");
-            };
-
-            // If we're called with a None pageserver, we need only update the generation
-            // record to disassociate it with this pageserver, not actually increment the number, as
-            // the increment is guaranteed to happen the next time this tenant is attached.
-            if node_id.is_some() {
-                shard.generation += 1;
-            }
-
-            shard.generation_pageserver = node_id;
-            let gen = Generation::new(shard.generation);
-            (locked.save(), gen)
-        };
-
-        write.commit().await?;
-        Ok(gen)
-    }
-
-    pub(crate) async fn re_attach(
-        &self,
-        node_id: NodeId,
-    ) -> anyhow::Result<HashMap<TenantShardId, Generation>> {
-        let (write, result) = {
-            let mut result = HashMap::new();
-            let mut locked = self.state.lock().unwrap();
-            for (tenant_shard_id, shard) in locked.tenants.iter_mut() {
-                if shard.generation_pageserver == Some(node_id) {
-                    shard.generation += 1;
-                    result.insert(*tenant_shard_id, Generation::new(shard.generation));
-                }
-            }
-
-            (locked.save(), result)
-        };
-
-        write.commit().await?;
-        Ok(result)
-    }
-
-    // TODO: when we start shard splitting, we must durably mark the tenant so that
-    // on restart, we know that we must go through recovery (list shards that exist
-    // and pick up where we left off and/or revert to parent shards).
-    #[allow(dead_code)]
-    pub(crate) async fn begin_shard_split(&self, _tenant_id: TenantId) -> anyhow::Result<()> {
-        todo!();
-    }
-
-    // TODO: when we finish shard splitting, we must atomically clean up the old shards
-    // and insert the new shards, and clear the splitting marker.
-    #[allow(dead_code)]
-    pub(crate) async fn complete_shard_split(&self, _tenant_id: TenantId) -> anyhow::Result<()> {
-        todo!();
-    }
-}
-
-/// Parts of [`crate::tenant_state::TenantState`] that are stored durably
-#[derive(Serialize, Deserialize, Clone)]
-pub(crate) struct TenantShardPersistence {
-    #[serde(default)]
-    pub(crate) tenant_id: String,
-    #[serde(default)]
-    pub(crate) shard_number: i32,
-    #[serde(default)]
-    pub(crate) shard_count: i32,
-    #[serde(default)]
-    pub(crate) shard_stripe_size: i32,
-
-    // Currently attached pageserver
-    #[serde(rename = "pageserver")]
-    pub(crate) generation_pageserver: Option<NodeId>,
-
-    // Latest generation number: next time we attach, increment this
-    // and use the incremented number when attaching
-    pub(crate) generation: u32,
-
-    #[serde(default)]
-    pub(crate) placement_policy: String,
-    #[serde(default)]
-    pub(crate) config: String,
-}
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -1,495 +0,0 @@
-use crate::persistence::Persistence;
-use crate::service;
-use control_plane::attachment_service::NodeAvailability;
-use pageserver_api::models::{
-    LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
-};
-use pageserver_api::shard::{ShardIdentity, TenantShardId};
-use pageserver_client::mgmt_api;
-use std::collections::HashMap;
-use std::sync::Arc;
-use std::time::Duration;
-use tokio_util::sync::CancellationToken;
-use utils::generation::Generation;
-use utils::id::{NodeId, TimelineId};
-use utils::lsn::Lsn;
-
-use crate::compute_hook::ComputeHook;
-use crate::node::Node;
-use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};
-
-/// Object with the lifetime of the background reconcile task that is created
-/// for tenants which have a difference between their intent and observed states.
-pub(super) struct Reconciler {
-    /// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
-    /// of a tenant's state from when we spawned a reconcile task.
-    pub(super) tenant_shard_id: TenantShardId,
-    pub(crate) shard: ShardIdentity,
-    pub(crate) generation: Generation,
-    pub(crate) intent: IntentState,
-    pub(crate) config: TenantConfig,
-    pub(crate) observed: ObservedState,
-
-    pub(crate) service_config: service::Config,
-
-    /// A snapshot of the pageservers as they were when we were asked
-    /// to reconcile.
-    pub(crate) pageservers: Arc<HashMap<NodeId, Node>>,
-
-    /// A hook to notify the running postgres instances when we change the location
-    /// of a tenant
-    pub(crate) compute_hook: Arc<ComputeHook>,
-
-    /// A means to abort background reconciliation: it is essential to
-    /// call this when something changes in the original TenantState that
-    /// will make this reconciliation impossible or unnecessary, for
-    /// example when a pageserver node goes offline, or the PlacementPolicy for
-    /// the tenant is changed.
-    pub(crate) cancel: CancellationToken,
-
-    /// Access to persistent storage for updating generation numbers
-    pub(crate) persistence: Arc<Persistence>,
-}
-
-#[derive(thiserror::Error, Debug)]
-pub enum ReconcileError {
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
-impl Reconciler {
-    async fn location_config(
-        &mut self,
-        node_id: NodeId,
-        config: LocationConfig,
-        flush_ms: Option<Duration>,
-    ) -> anyhow::Result<()> {
-        let node = self
-            .pageservers
-            .get(&node_id)
-            .expect("Pageserver may not be removed while referenced");
-
-        self.observed
-            .locations
-            .insert(node.id, ObservedStateLocation { conf: None });
-
-        tracing::info!("location_config({}) calling: {:?}", node_id, config);
-        let client =
-            mgmt_api::Client::new(node.base_url(), self.service_config.jwt_token.as_deref());
-        client
-            .location_config(self.tenant_shard_id, config.clone(), flush_ms)
-            .await?;
-        tracing::info!("location_config({}) complete: {:?}", node_id, config);
-
-        self.observed
-            .locations
-            .insert(node.id, ObservedStateLocation { conf: Some(config) });
-
-        Ok(())
-    }
-
-    async fn maybe_live_migrate(&mut self) -> Result<(), ReconcileError> {
-        let destination = if let Some(node_id) = self.intent.attached {
-            match self.observed.locations.get(&node_id) {
-                Some(conf) => {
-                    // We will do a live migration only if the intended destination is not
-                    // currently in an attached state.
-                    match &conf.conf {
-                        Some(conf) if conf.mode == LocationConfigMode::Secondary => {
-                            // Fall through to do a live migration
-                            node_id
-                        }
-                        None | Some(_) => {
-                            // Attached or uncertain: don't do a live migration, proceed
-                            // with a general-case reconciliation
-                            tracing::info!("maybe_live_migrate: destination is None or attached");
-                            return Ok(());
-                        }
-                    }
-                }
-                None => {
-                    // Our destination is not attached: maybe live migrate if some other
-                    // node is currently attached.  Fall through.
-                    node_id
-                }
-            }
-        } else {
-            // No intent to be attached
-            tracing::info!("maybe_live_migrate: no attached intent");
-            return Ok(());
-        };
-
-        let mut origin = None;
-        for (node_id, state) in &self.observed.locations {
-            if let Some(observed_conf) = &state.conf {
-                if observed_conf.mode == LocationConfigMode::AttachedSingle {
-                    let node = self
-                        .pageservers
-                        .get(node_id)
-                        .expect("Nodes may not be removed while referenced");
-                    // We will only attempt live migration if the origin is not offline: this
-                    // avoids trying to do it while reconciling after responding to an HA failover.
-                    if !matches!(node.availability, NodeAvailability::Offline) {
-                        origin = Some(*node_id);
-                        break;
-                    }
-                }
-            }
-        }
-
-        let Some(origin) = origin else {
-            tracing::info!("maybe_live_migrate: no origin found");
-            return Ok(());
-        };
-
-        // We have an origin and a destination: proceed to do the live migration
-        tracing::info!("Live migrating {}->{}", origin, destination);
-        self.live_migrate(origin, destination).await?;
-
-        Ok(())
-    }
-
-    async fn get_lsns(
-        &self,
-        tenant_shard_id: TenantShardId,
-        node_id: &NodeId,
-    ) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
-        let node = self
-            .pageservers
-            .get(node_id)
-            .expect("Pageserver may not be removed while referenced");
-
-        let client =
-            mgmt_api::Client::new(node.base_url(), self.service_config.jwt_token.as_deref());
-
-        let timelines = client.timeline_list(&tenant_shard_id).await?;
-        Ok(timelines
-            .into_iter()
-            .map(|t| (t.timeline_id, t.last_record_lsn))
-            .collect())
-    }
-
-    async fn secondary_download(&self, tenant_shard_id: TenantShardId, node_id: &NodeId) {
-        let node = self
-            .pageservers
-            .get(node_id)
-            .expect("Pageserver may not be removed while referenced");
-
-        let client =
-            mgmt_api::Client::new(node.base_url(), self.service_config.jwt_token.as_deref());
-
-        match client.tenant_secondary_download(tenant_shard_id).await {
-            Ok(()) => {}
-            Err(_) => {
-                tracing::info!("  (skipping, destination wasn't in secondary mode)")
-            }
-        }
-    }
-
-    async fn await_lsn(
-        &self,
-        tenant_shard_id: TenantShardId,
-        pageserver_id: &NodeId,
-        baseline: HashMap<TimelineId, Lsn>,
-    ) -> anyhow::Result<()> {
-        loop {
-            let latest = match self.get_lsns(tenant_shard_id, pageserver_id).await {
-                Ok(l) => l,
-                Err(e) => {
-                    println!(
-                        "🕑 Can't get LSNs on pageserver {} yet, waiting ({e})",
-                        pageserver_id
-                    );
-                    std::thread::sleep(Duration::from_millis(500));
-                    continue;
-                }
-            };
-
-            let mut any_behind: bool = false;
-            for (timeline_id, baseline_lsn) in &baseline {
-                match latest.get(timeline_id) {
-                    Some(latest_lsn) => {
-                        println!("🕑 LSN origin {baseline_lsn} vs destination {latest_lsn}");
-                        if latest_lsn < baseline_lsn {
-                            any_behind = true;
-                        }
-                    }
-                    None => {
-                        // Expected timeline isn't yet visible on migration destination.
-                        // (IRL we would have to account for timeline deletion, but this
-                        //  is just test helper)
-                        any_behind = true;
-                    }
-                }
-            }
-
-            if !any_behind {
-                println!("✅ LSN caught up.  Proceeding...");
-                break;
-            } else {
-                std::thread::sleep(Duration::from_millis(500));
-            }
-        }
-
-        Ok(())
-    }
-
-    pub async fn live_migrate(
-        &mut self,
-        origin_ps_id: NodeId,
-        dest_ps_id: NodeId,
-    ) -> anyhow::Result<()> {
-        // `maybe_live_migrate` is responsibble for sanity of inputs
-        assert!(origin_ps_id != dest_ps_id);
-
-        fn build_location_config(
-            shard: &ShardIdentity,
-            config: &TenantConfig,
-            mode: LocationConfigMode,
-            generation: Option<Generation>,
-            secondary_conf: Option<LocationConfigSecondary>,
-        ) -> LocationConfig {
-            LocationConfig {
-                mode,
-                generation: generation.map(|g| g.into().unwrap()),
-                secondary_conf,
-                tenant_conf: config.clone(),
-                shard_number: shard.number.0,
-                shard_count: shard.count.0,
-                shard_stripe_size: shard.stripe_size.0,
-            }
-        }
-
-        tracing::info!(
-            "🔁 Switching origin pageserver {} to stale mode",
-            origin_ps_id
-        );
-
-        // FIXME: it is incorrect to use self.generation here, we should use the generation
-        // from the ObservedState of the origin pageserver (it might be older than self.generation)
-        let stale_conf = build_location_config(
-            &self.shard,
-            &self.config,
-            LocationConfigMode::AttachedStale,
-            Some(self.generation),
-            None,
-        );
-        self.location_config(origin_ps_id, stale_conf, Some(Duration::from_secs(10)))
-            .await?;
-
-        let baseline_lsns = Some(self.get_lsns(self.tenant_shard_id, &origin_ps_id).await?);
-
-        // If we are migrating to a destination that has a secondary location, warm it up first
-        if let Some(destination_conf) = self.observed.locations.get(&dest_ps_id) {
-            if let Some(destination_conf) = &destination_conf.conf {
-                if destination_conf.mode == LocationConfigMode::Secondary {
-                    tracing::info!(
-                        "🔁 Downloading latest layers to destination pageserver {}",
-                        dest_ps_id,
-                    );
-                    self.secondary_download(self.tenant_shard_id, &dest_ps_id)
-                        .await;
-                }
-            }
-        }
-
-        // Increment generation before attaching to new pageserver
-        self.generation = self
-            .persistence
-            .increment_generation(self.tenant_shard_id, Some(dest_ps_id))
-            .await?;
-
-        let dest_conf = build_location_config(
-            &self.shard,
-            &self.config,
-            LocationConfigMode::AttachedMulti,
-            Some(self.generation),
-            None,
-        );
-
-        tracing::info!("🔁 Attaching to pageserver {}", dest_ps_id);
-        self.location_config(dest_ps_id, dest_conf, None).await?;
-
-        if let Some(baseline) = baseline_lsns {
-            tracing::info!("🕑 Waiting for LSN to catch up...");
-            self.await_lsn(self.tenant_shard_id, &dest_ps_id, baseline)
-                .await?;
-        }
-
-        tracing::info!("🔁 Notifying compute to use pageserver {}", dest_ps_id);
-        self.compute_hook
-            .notify(self.tenant_shard_id, dest_ps_id)
-            .await?;
-
-        // Downgrade the origin to secondary.  If the tenant's policy is PlacementPolicy::Single, then
-        // this location will be deleted in the general case reconciliation that runs after this.
-        let origin_secondary_conf = build_location_config(
-            &self.shard,
-            &self.config,
-            LocationConfigMode::Secondary,
-            None,
-            Some(LocationConfigSecondary { warm: true }),
-        );
-        self.location_config(origin_ps_id, origin_secondary_conf.clone(), None)
-            .await?;
-        // TODO: we should also be setting the ObservedState on earlier API calls, in case we fail
-        // partway through.  In fact, all location conf API calls should be in a wrapper that sets
-        // the observed state to None, then runs, then sets it to what we wrote.
-        self.observed.locations.insert(
-            origin_ps_id,
-            ObservedStateLocation {
-                conf: Some(origin_secondary_conf),
-            },
-        );
-
-        println!(
-            "🔁 Switching to AttachedSingle mode on pageserver {}",
-            dest_ps_id
-        );
-        let dest_final_conf = build_location_config(
-            &self.shard,
-            &self.config,
-            LocationConfigMode::AttachedSingle,
-            Some(self.generation),
-            None,
-        );
-        self.location_config(dest_ps_id, dest_final_conf.clone(), None)
-            .await?;
-        self.observed.locations.insert(
-            dest_ps_id,
-            ObservedStateLocation {
-                conf: Some(dest_final_conf),
-            },
-        );
-
-        println!("✅ Migration complete");
-
-        Ok(())
-    }
-
-    /// Reconciling a tenant makes API calls to pageservers until the observed state
-    /// matches the intended state.
-    ///
-    /// First we apply special case handling (e.g. for live migrations), and then a
-    /// general case reconciliation where we walk through the intent by pageserver
-    /// and call out to the pageserver to apply the desired state.
-    pub(crate) async fn reconcile(&mut self) -> Result<(), ReconcileError> {
-        // TODO: if any of self.observed is None, call to remote pageservers
-        // to learn correct state.
-
-        // Special case: live migration
-        self.maybe_live_migrate().await?;
-
-        // If the attached pageserver is not attached, do so now.
-        if let Some(node_id) = self.intent.attached {
-            let mut wanted_conf =
-                attached_location_conf(self.generation, &self.shard, &self.config);
-            match self.observed.locations.get(&node_id) {
-                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
-                    // Nothing to do
-                    tracing::info!("Observed configuration already correct.")
-                }
-                _ => {
-                    // In all cases other than a matching observed configuration, we will
-                    // reconcile this location.  This includes locations with different configurations, as well
-                    // as locations with unknown (None) observed state.
-                    self.generation = self
-                        .persistence
-                        .increment_generation(self.tenant_shard_id, Some(node_id))
-                        .await?;
-                    wanted_conf.generation = self.generation.into();
-                    tracing::info!("Observed configuration requires update.");
-                    self.location_config(node_id, wanted_conf, None).await?;
-                    if let Err(e) = self
-                        .compute_hook
-                        .notify(self.tenant_shard_id, node_id)
-                        .await
-                    {
-                        tracing::warn!(
-                            "Failed to notify compute of newly attached pageserver {node_id}: {e}"
-                        );
-                    }
-                }
-            }
-        }
-
-        // Configure secondary locations: if these were previously attached this
-        // implicitly downgrades them from attached to secondary.
-        let mut changes = Vec::new();
-        for node_id in &self.intent.secondary {
-            let wanted_conf = secondary_location_conf(&self.shard, &self.config);
-            match self.observed.locations.get(node_id) {
-                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
-                    // Nothing to do
-                    tracing::info!(%node_id, "Observed configuration already correct.")
-                }
-                _ => {
-                    // In all cases other than a matching observed configuration, we will
-                    // reconcile this location.
-                    tracing::info!(%node_id, "Observed configuration requires update.");
-                    changes.push((*node_id, wanted_conf))
-                }
-            }
-        }
-
-        // Detach any extraneous pageservers that are no longer referenced
-        // by our intent.
-        let all_pageservers = self.intent.all_pageservers();
-        for node_id in self.observed.locations.keys() {
-            if all_pageservers.contains(node_id) {
-                // We are only detaching pageservers that aren't used at all.
-                continue;
-            }
-
-            changes.push((
-                *node_id,
-                LocationConfig {
-                    mode: LocationConfigMode::Detached,
-                    generation: None,
-                    secondary_conf: None,
-                    shard_number: self.shard.number.0,
-                    shard_count: self.shard.count.0,
-                    shard_stripe_size: self.shard.stripe_size.0,
-                    tenant_conf: self.config.clone(),
-                },
-            ));
-        }
-
-        for (node_id, conf) in changes {
-            self.location_config(node_id, conf, None).await?;
-        }
-
-        Ok(())
-    }
-}
-
-pub(crate) fn attached_location_conf(
-    generation: Generation,
-    shard: &ShardIdentity,
-    config: &TenantConfig,
-) -> LocationConfig {
-    LocationConfig {
-        mode: LocationConfigMode::AttachedSingle,
-        generation: generation.into(),
-        secondary_conf: None,
-        shard_number: shard.number.0,
-        shard_count: shard.count.0,
-        shard_stripe_size: shard.stripe_size.0,
-        tenant_conf: config.clone(),
-    }
-}
-
-pub(crate) fn secondary_location_conf(
-    shard: &ShardIdentity,
-    config: &TenantConfig,
-) -> LocationConfig {
-    LocationConfig {
-        mode: LocationConfigMode::Secondary,
-        generation: None,
-        secondary_conf: Some(LocationConfigSecondary { warm: true }),
-        shard_number: shard.number.0,
-        shard_count: shard.count.0,
-        shard_stripe_size: shard.stripe_size.0,
-        tenant_conf: config.clone(),
-    }
-}
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/control_plane/attachment_service/src/scheduler.rs
@@ -1,89 +0,0 @@
-use pageserver_api::shard::TenantShardId;
-use std::collections::{BTreeMap, HashMap};
-use utils::{http::error::ApiError, id::NodeId};
-
-use crate::{node::Node, tenant_state::TenantState};
-
-/// Scenarios in which we cannot find a suitable location for a tenant shard
-#[derive(thiserror::Error, Debug)]
-pub enum ScheduleError {
-    #[error("No pageservers found")]
-    NoPageservers,
-    #[error("No pageserver found matching constraint")]
-    ImpossibleConstraint,
-}
-
-impl From<ScheduleError> for ApiError {
-    fn from(value: ScheduleError) -> Self {
-        ApiError::Conflict(format!("Scheduling error: {}", value))
-    }
-}
-
-pub(crate) struct Scheduler {
-    tenant_counts: HashMap<NodeId, usize>,
-}
-
-impl Scheduler {
-    pub(crate) fn new(
-        tenants: &BTreeMap<TenantShardId, TenantState>,
-        nodes: &HashMap<NodeId, Node>,
-    ) -> Self {
-        let mut tenant_counts = HashMap::new();
-        for node_id in nodes.keys() {
-            tenant_counts.insert(*node_id, 0);
-        }
-
-        for tenant in tenants.values() {
-            if let Some(ps) = tenant.intent.attached {
-                let entry = tenant_counts.entry(ps).or_insert(0);
-                *entry += 1;
-            }
-        }
-
-        for (node_id, node) in nodes {
-            if !node.may_schedule() {
-                tenant_counts.remove(node_id);
-            }
-        }
-
-        Self { tenant_counts }
-    }
-
-    pub(crate) fn schedule_shard(
-        &mut self,
-        hard_exclude: &[NodeId],
-    ) -> Result<NodeId, ScheduleError> {
-        if self.tenant_counts.is_empty() {
-            return Err(ScheduleError::NoPageservers);
-        }
-
-        let mut tenant_counts: Vec<(NodeId, usize)> = self
-            .tenant_counts
-            .iter()
-            .filter_map(|(k, v)| {
-                if hard_exclude.contains(k) {
-                    None
-                } else {
-                    Some((*k, *v))
-                }
-            })
-            .collect();
-
-        // Sort by tenant count.  Nodes with the same tenant count are sorted by ID.
-        tenant_counts.sort_by_key(|i| (i.1, i.0));
-
-        if tenant_counts.is_empty() {
-            // After applying constraints, no pageservers were left
-            return Err(ScheduleError::ImpossibleConstraint);
-        }
-
-        for (node_id, count) in &tenant_counts {
-            tracing::info!("tenant_counts[{node_id}]={count}");
-        }
-
-        let node_id = tenant_counts.first().unwrap().0;
-        tracing::info!("scheduler selected node {node_id}");
-        *self.tenant_counts.get_mut(&node_id).unwrap() += 1;
-        Ok(node_id)
-    }
-}
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -1,455 +0,0 @@
-use std::{collections::HashMap, sync::Arc, time::Duration};
-
-use control_plane::attachment_service::NodeAvailability;
-use pageserver_api::{
-    models::{LocationConfig, LocationConfigMode, TenantConfig},
-    shard::{ShardIdentity, TenantShardId},
-};
-use tokio::task::JoinHandle;
-use tokio_util::sync::CancellationToken;
-use utils::{
-    generation::Generation,
-    id::NodeId,
-    seqwait::{SeqWait, SeqWaitError},
-};
-
-use crate::{
-    compute_hook::ComputeHook,
-    node::Node,
-    persistence::Persistence,
-    reconciler::{attached_location_conf, secondary_location_conf, ReconcileError, Reconciler},
-    scheduler::{ScheduleError, Scheduler},
-    service, PlacementPolicy, Sequence,
-};
-
-pub(crate) struct TenantState {
-    pub(crate) tenant_shard_id: TenantShardId,
-
-    pub(crate) shard: ShardIdentity,
-
-    // Runtime only: sequence used to coordinate when updating this object while
-    // with background reconcilers may be running.  A reconciler runs to a particular
-    // sequence.
-    pub(crate) sequence: Sequence,
-
-    // Latest generation number: next time we attach, increment this
-    // and use the incremented number when attaching
-    pub(crate) generation: Generation,
-
-    // High level description of how the tenant should be set up.  Provided
-    // externally.
-    pub(crate) policy: PlacementPolicy,
-
-    // Low level description of exactly which pageservers should fulfil
-    // which role.  Generated by `Self::schedule`.
-    pub(crate) intent: IntentState,
-
-    // Low level description of how the tenant is configured on pageservers:
-    // if this does not match `Self::intent` then the tenant needs reconciliation
-    // with `Self::reconcile`.
-    pub(crate) observed: ObservedState,
-
-    // Tenant configuration, passed through opaquely to the pageserver.  Identical
-    // for all shards in a tenant.
-    pub(crate) config: TenantConfig,
-
-    /// If a reconcile task is currently in flight, it may be joined here (it is
-    /// only safe to join if either the result has been received or the reconciler's
-    /// cancellation token has been fired)
-    pub(crate) reconciler: Option<ReconcilerHandle>,
-
-    /// Optionally wait for reconciliation to complete up to a particular
-    /// sequence number.
-    pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
-
-    /// Indicates sequence number for which we have encountered an error reconciling.  If
-    /// this advances ahead of [`Self::waiter`] then a reconciliation error has occurred,
-    /// and callers should stop waiting for `waiter` and propagate the error.
-    pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
-
-    /// The most recent error from a reconcile on this tenant
-    /// TODO: generalize to an array of recent events
-    /// TOOD: use a ArcSwap instead of mutex for faster reads?
-    pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,
-}
-
-#[derive(Default, Clone, Debug)]
-pub(crate) struct IntentState {
-    pub(crate) attached: Option<NodeId>,
-    pub(crate) secondary: Vec<NodeId>,
-}
-
-#[derive(Default, Clone)]
-pub(crate) struct ObservedState {
-    pub(crate) locations: HashMap<NodeId, ObservedStateLocation>,
-}
-
-/// Our latest knowledge of how this tenant is configured in the outside world.
-///
-/// Meaning:
-///     * No instance of this type exists for a node: we are certain that we have nothing configured on that
-///       node for this shard.
-///     * Instance exists with conf==None: we *might* have some state on that node, but we don't know
-///       what it is (e.g. we failed partway through configuring it)
-///     * Instance exists with conf==Some: this tells us what we last successfully configured on this node,
-///       and that configuration will still be present unless something external interfered.
-#[derive(Clone)]
-pub(crate) struct ObservedStateLocation {
-    /// If None, it means we do not know the status of this shard's location on this node, but
-    /// we know that we might have some state on this node.
-    pub(crate) conf: Option<LocationConfig>,
-}
-pub(crate) struct ReconcilerWaiter {
-    // For observability purposes, remember the ID of the shard we're
-    // waiting for.
-    pub(crate) tenant_shard_id: TenantShardId,
-
-    seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
-    error_seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
-    error: std::sync::Arc<std::sync::Mutex<String>>,
-    seq: Sequence,
-}
-
-#[derive(thiserror::Error, Debug)]
-pub enum ReconcileWaitError {
-    #[error("Timeout waiting for shard {0}")]
-    Timeout(TenantShardId),
-    #[error("shutting down")]
-    Shutdown,
-    #[error("Reconcile error on shard {0}: {1}")]
-    Failed(TenantShardId, String),
-}
-
-impl ReconcilerWaiter {
-    pub(crate) async fn wait_timeout(&self, timeout: Duration) -> Result<(), ReconcileWaitError> {
-        tokio::select! {
-            result = self.seq_wait.wait_for_timeout(self.seq, timeout)=> {
-                result.map_err(|e| match e {
-                    SeqWaitError::Timeout => ReconcileWaitError::Timeout(self.tenant_shard_id),
-                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown
-                })?;
-            },
-            result = self.error_seq_wait.wait_for(self.seq) => {
-                result.map_err(|e| match e {
-                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown,
-                    SeqWaitError::Timeout => unreachable!()
-                })?;
-
-                return Err(ReconcileWaitError::Failed(self.tenant_shard_id, self.error.lock().unwrap().clone()))
-            }
-        }
-
-        Ok(())
-    }
-}
-
-/// Having spawned a reconciler task, the tenant shard's state will carry enough
-/// information to optionally cancel & await it later.
-pub(crate) struct ReconcilerHandle {
-    sequence: Sequence,
-    handle: JoinHandle<()>,
-    cancel: CancellationToken,
-}
-
-/// When a reconcile task completes, it sends this result object
-/// to be applied to the primary TenantState.
-pub(crate) struct ReconcileResult {
-    pub(crate) sequence: Sequence,
-    /// On errors, `observed` should be treated as an incompleted description
-    /// of state (i.e. any nodes present in the result should override nodes
-    /// present in the parent tenant state, but any unmentioned nodes should
-    /// not be removed from parent tenant state)
-    pub(crate) result: Result<(), ReconcileError>,
-
-    pub(crate) tenant_shard_id: TenantShardId,
-    pub(crate) generation: Generation,
-    pub(crate) observed: ObservedState,
-}
-
-impl IntentState {
-    pub(crate) fn new() -> Self {
-        Self {
-            attached: None,
-            secondary: vec![],
-        }
-    }
-    pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
-        let mut result = Vec::new();
-        if let Some(p) = self.attached {
-            result.push(p)
-        }
-
-        result.extend(self.secondary.iter().copied());
-
-        result
-    }
-
-    /// When a node goes offline, we update intents to avoid using it
-    /// as their attached pageserver.
-    ///
-    /// Returns true if a change was made
-    pub(crate) fn notify_offline(&mut self, node_id: NodeId) -> bool {
-        if self.attached == Some(node_id) {
-            self.attached = None;
-            self.secondary.push(node_id);
-            true
-        } else {
-            false
-        }
-    }
-}
-
-impl ObservedState {
-    pub(crate) fn new() -> Self {
-        Self {
-            locations: HashMap::new(),
-        }
-    }
-}
-
-impl TenantState {
-    pub(crate) fn new(
-        tenant_shard_id: TenantShardId,
-        shard: ShardIdentity,
-        policy: PlacementPolicy,
-    ) -> Self {
-        Self {
-            tenant_shard_id,
-            policy,
-            intent: IntentState::default(),
-            generation: Generation::new(0),
-            shard,
-            observed: ObservedState::default(),
-            config: TenantConfig::default(),
-            reconciler: None,
-            sequence: Sequence(1),
-            waiter: Arc::new(SeqWait::new(Sequence(0))),
-            error_waiter: Arc::new(SeqWait::new(Sequence(0))),
-            last_error: Arc::default(),
-        }
-    }
-
-    /// For use on startup when learning state from pageservers: generate my [`IntentState`] from my
-    /// [`ObservedState`], even if it violates my [`PlacementPolicy`].  Call [`Self::schedule`] next,
-    /// to get an intent state that complies with placement policy.  The overall goal is to do scheduling
-    /// in a way that makes use of any configured locations that already exist in the outside world.
-    pub(crate) fn intent_from_observed(&mut self) {
-        // Choose an attached location by filtering observed locations, and then sorting to get the highest
-        // generation
-        let mut attached_locs = self
-            .observed
-            .locations
-            .iter()
-            .filter_map(|(node_id, l)| {
-                if let Some(conf) = &l.conf {
-                    if conf.mode == LocationConfigMode::AttachedMulti
-                        || conf.mode == LocationConfigMode::AttachedSingle
-                        || conf.mode == LocationConfigMode::AttachedStale
-                    {
-                        Some((node_id, conf.generation))
-                    } else {
-                        None
-                    }
-                } else {
-                    None
-                }
-            })
-            .collect::<Vec<_>>();
-
-        attached_locs.sort_by_key(|i| i.1);
-        if let Some((node_id, _gen)) = attached_locs.into_iter().last() {
-            self.intent.attached = Some(*node_id);
-        }
-
-        // All remaining observed locations generate secondary intents.  This includes None
-        // observations, as these may well have some local content on disk that is usable (this
-        // is an edge case that might occur if we restarted during a migration or other change)
-        self.observed.locations.keys().for_each(|node_id| {
-            if Some(*node_id) != self.intent.attached {
-                self.intent.secondary.push(*node_id);
-            }
-        });
-    }
-
-    pub(crate) fn schedule(&mut self, scheduler: &mut Scheduler) -> Result<(), ScheduleError> {
-        // TODO: before scheduling new nodes, check if any existing content in
-        // self.intent refers to pageservers that are offline, and pick other
-        // pageservers if so.
-
-        // Build the set of pageservers already in use by this tenant, to avoid scheduling
-        // more work on the same pageservers we're already using.
-        let mut used_pageservers = self.intent.all_pageservers();
-        let mut modified = false;
-
-        use PlacementPolicy::*;
-        match self.policy {
-            Single => {
-                // Should have exactly one attached, and zero secondaries
-                if self.intent.attached.is_none() {
-                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.attached = Some(node_id);
-                    used_pageservers.push(node_id);
-                    modified = true;
-                }
-                if !self.intent.secondary.is_empty() {
-                    self.intent.secondary.clear();
-                    modified = true;
-                }
-            }
-            Double(secondary_count) => {
-                // Should have exactly one attached, and N secondaries
-                if self.intent.attached.is_none() {
-                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.attached = Some(node_id);
-                    used_pageservers.push(node_id);
-                    modified = true;
-                }
-
-                while self.intent.secondary.len() < secondary_count {
-                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
-                    self.intent.secondary.push(node_id);
-                    used_pageservers.push(node_id);
-                    modified = true;
-                }
-            }
-        }
-
-        if modified {
-            self.sequence.0 += 1;
-        }
-
-        Ok(())
-    }
-
-    fn dirty(&self) -> bool {
-        if let Some(node_id) = self.intent.attached {
-            let wanted_conf = attached_location_conf(self.generation, &self.shard, &self.config);
-            match self.observed.locations.get(&node_id) {
-                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
-                Some(_) | None => {
-                    return true;
-                }
-            }
-        }
-
-        for node_id in &self.intent.secondary {
-            let wanted_conf = secondary_location_conf(&self.shard, &self.config);
-            match self.observed.locations.get(node_id) {
-                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
-                Some(_) | None => {
-                    return true;
-                }
-            }
-        }
-
-        false
-    }
-
-    pub(crate) fn maybe_reconcile(
-        &mut self,
-        result_tx: tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
-        pageservers: &Arc<HashMap<NodeId, Node>>,
-        compute_hook: &Arc<ComputeHook>,
-        service_config: &service::Config,
-        persistence: &Arc<Persistence>,
-    ) -> Option<ReconcilerWaiter> {
-        // If there are any ambiguous observed states, and the nodes they refer to are available,
-        // we should reconcile to clean them up.
-        let mut dirty_observed = false;
-        for (node_id, observed_loc) in &self.observed.locations {
-            let node = pageservers
-                .get(node_id)
-                .expect("Nodes may not be removed while referenced");
-            if observed_loc.conf.is_none()
-                && !matches!(node.availability, NodeAvailability::Offline)
-            {
-                dirty_observed = true;
-                break;
-            }
-        }
-
-        if !self.dirty() && !dirty_observed {
-            tracing::info!("Not dirty, no reconciliation needed.");
-            return None;
-        }
-
-        // Reconcile already in flight for the current sequence?
-        if let Some(handle) = &self.reconciler {
-            if handle.sequence == self.sequence {
-                return Some(ReconcilerWaiter {
-                    tenant_shard_id: self.tenant_shard_id,
-                    seq_wait: self.waiter.clone(),
-                    error_seq_wait: self.error_waiter.clone(),
-                    error: self.last_error.clone(),
-                    seq: self.sequence,
-                });
-            }
-        }
-
-        // Reconcile in flight for a stale sequence?  Our sequence's task will wait for it before
-        // doing our sequence's work.
-        let old_handle = self.reconciler.take();
-
-        let cancel = CancellationToken::new();
-        let mut reconciler = Reconciler {
-            tenant_shard_id: self.tenant_shard_id,
-            shard: self.shard,
-            generation: self.generation,
-            intent: self.intent.clone(),
-            config: self.config.clone(),
-            observed: self.observed.clone(),
-            pageservers: pageservers.clone(),
-            compute_hook: compute_hook.clone(),
-            service_config: service_config.clone(),
-            cancel: cancel.clone(),
-            persistence: persistence.clone(),
-        };
-
-        let reconcile_seq = self.sequence;
-
-        tracing::info!("Spawning Reconciler for sequence {}", self.sequence);
-        let join_handle = tokio::task::spawn(async move {
-            // Wait for any previous reconcile task to complete before we start
-            if let Some(old_handle) = old_handle {
-                old_handle.cancel.cancel();
-                if let Err(e) = old_handle.handle.await {
-                    // We can't do much with this other than log it: the task is done, so
-                    // we may proceed with our work.
-                    tracing::error!("Unexpected join error waiting for reconcile task: {e}");
-                }
-            }
-
-            // Early check for cancellation before doing any work
-            // TODO: wrap all remote API operations in cancellation check
-            // as well.
-            if reconciler.cancel.is_cancelled() {
-                return;
-            }
-
-            let result = reconciler.reconcile().await;
-            result_tx
-                .send(ReconcileResult {
-                    sequence: reconcile_seq,
-                    result,
-                    tenant_shard_id: reconciler.tenant_shard_id,
-                    generation: reconciler.generation,
-                    observed: reconciler.observed,
-                })
-                .ok();
-        });
-
-        self.reconciler = Some(ReconcilerHandle {
-            sequence: self.sequence,
-            handle: join_handle,
-            cancel,
-        });
-
-        Some(ReconcilerWaiter {
-            tenant_shard_id: self.tenant_shard_id,
-            seq_wait: self.waiter.clone(),
-            error_seq_wait: self.error_waiter.clone(),
-            error: self.last_error.clone(),
-            seq: self.sequence,
-        })
-    }
-}
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -1,27 +1,14 @@
 use crate::{background_process, local_env::LocalEnv};
+use anyhow::anyhow;
 use camino::Utf8PathBuf;
-use hyper::Method;
-use pageserver_api::{
-    models::{ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo},
-    shard::TenantShardId,
-};
-use pageserver_client::mgmt_api::ResponseErrorMessageExt;
-use postgres_backend::AuthType;
-use postgres_connection::parse_host_port;
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{path::PathBuf, process::Child, str::FromStr};
-use tracing::instrument;
-use utils::{
-    auth::{Claims, Scope},
-    id::{NodeId, TenantId},
-};
+use serde::{Deserialize, Serialize};
+use std::{path::PathBuf, process::Child};
+use utils::id::{NodeId, TenantId};

 pub struct AttachmentService {
    env: LocalEnv,
    listen: String,
    path: PathBuf,
-    jwt_token: Option<String>,
-    public_key_path: Option<Utf8PathBuf>,
    client: reqwest::Client,
 }

@@ -29,7 +16,7 @@ const COMMAND: &str = "attachment_service";

 #[derive(Serialize, Deserialize)]
 pub struct AttachHookRequest {
-    pub tenant_shard_id: TenantShardId,
+    pub tenant_id: TenantId,
    pub node_id: Option<NodeId>,
 }

@@ -40,7 +27,7 @@ pub struct AttachHookResponse {

 #[derive(Serialize, Deserialize)]
 pub struct InspectRequest {
-    pub tenant_shard_id: TenantShardId,
+    pub tenant_id: TenantId,
 }

 #[derive(Serialize, Deserialize)]
@@ -48,125 +35,6 @@ pub struct InspectResponse {
    pub attachment: Option<(u32, NodeId)>,
 }

-#[derive(Serialize, Deserialize)]
-pub struct TenantCreateResponseShard {
-    pub node_id: NodeId,
-    pub generation: u32,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct TenantCreateResponse {
-    pub shards: Vec<TenantCreateResponseShard>,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct NodeRegisterRequest {
-    pub node_id: NodeId,
-
-    pub listen_pg_addr: String,
-    pub listen_pg_port: u16,
-
-    pub listen_http_addr: String,
-    pub listen_http_port: u16,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct NodeConfigureRequest {
-    pub node_id: NodeId,
-
-    pub availability: Option<NodeAvailability>,
-    pub scheduling: Option<NodeSchedulingPolicy>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct TenantLocateResponseShard {
-    pub shard_id: TenantShardId,
-    pub node_id: NodeId,
-
-    pub listen_pg_addr: String,
-    pub listen_pg_port: u16,
-
-    pub listen_http_addr: String,
-    pub listen_http_port: u16,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct TenantLocateResponse {
-    pub shards: Vec<TenantLocateResponseShard>,
-    pub shard_params: ShardParameters,
-}
-
-/// Explicitly migrating a particular shard is a low level operation
-/// TODO: higher level "Reschedule tenant" operation where the request
-/// specifies some constraints, e.g. asking it to get off particular node(s)
-#[derive(Serialize, Deserialize, Debug)]
-pub struct TenantShardMigrateRequest {
-    pub tenant_shard_id: TenantShardId,
-    pub node_id: NodeId,
-}
-
-#[derive(Serialize, Deserialize, Clone, Copy)]
-pub enum NodeAvailability {
-    // Normal, happy state
-    Active,
-    // Offline: Tenants shouldn't try to attach here, but they may assume that their
-    // secondary locations on this node still exist.  Newly added nodes are in this
-    // state until we successfully contact them.
-    Offline,
-}
-
-impl FromStr for NodeAvailability {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "active" => Ok(Self::Active),
-            "offline" => Ok(Self::Offline),
-            _ => Err(anyhow::anyhow!("Unknown availability state '{s}'")),
-        }
-    }
-}
-
-/// FIXME: this is a duplicate of the type in the attachment_service crate, because the
-/// type needs to be defined with diesel traits in there.
-#[derive(Serialize, Deserialize, Clone, Copy)]
-pub enum NodeSchedulingPolicy {
-    Active,
-    Filling,
-    Pause,
-    Draining,
-}
-
-impl FromStr for NodeSchedulingPolicy {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "active" => Ok(Self::Active),
-            "filling" => Ok(Self::Filling),
-            "pause" => Ok(Self::Pause),
-            "draining" => Ok(Self::Draining),
-            _ => Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
-        }
-    }
-}
-
-impl From<NodeSchedulingPolicy> for String {
-    fn from(value: NodeSchedulingPolicy) -> String {
-        use NodeSchedulingPolicy::*;
-        match value {
-            Active => "active",
-            Filling => "filling",
-            Pause => "pause",
-            Draining => "draining",
-        }
-        .to_string()
-    }
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-pub struct TenantShardMigrateResponse {}
-
 impl AttachmentService {
    pub fn from_env(env: &LocalEnv) -> Self {
        let path = env.base_data_dir.join("attachments.json");
@@ -181,34 +49,10 @@ impl AttachmentService {
            listen_url.port().unwrap()
        );

-        // Assume all pageservers have symmetric auth configuration: this service
-        // expects to use one JWT token to talk to all of them.
-        let ps_conf = env
-            .pageservers
-            .first()
-            .expect("Config is validated to contain at least one pageserver");
-        let (jwt_token, public_key_path) = match ps_conf.http_auth_type {
-            AuthType::Trust => (None, None),
-            AuthType::NeonJWT => {
-                let jwt_token = env
-                    .generate_auth_token(&Claims::new(None, Scope::PageServerApi))
-                    .unwrap();
-
-                // If pageserver auth is enabled, this implicitly enables auth for this service,
-                // using the same credentials.
-                let public_key_path =
-                    camino::Utf8PathBuf::try_from(env.base_data_dir.join("auth_public_key.pem"))
-                        .unwrap();
-                (Some(jwt_token), Some(public_key_path))
-            }
-        };
-
        Self {
            env: env.clone(),
            path,
            listen,
-            jwt_token,
-            public_key_path,
            client: reqwest::ClientBuilder::new()
                .build()
                .expect("Failed to construct http client"),
@@ -223,199 +67,72 @@ impl AttachmentService {
    pub async fn start(&self) -> anyhow::Result<Child> {
        let path_str = self.path.to_string_lossy();

-        let mut args = vec!["-l", &self.listen, "-p", &path_str]
-            .into_iter()
-            .map(|s| s.to_string())
-            .collect::<Vec<_>>();
-        if let Some(jwt_token) = &self.jwt_token {
-            args.push(format!("--jwt-token={jwt_token}"));
-        }
-
-        if let Some(public_key_path) = &self.public_key_path {
-            args.push(format!("--public-key={public_key_path}"));
-        }
-
-        let result = background_process::start_process(
+        background_process::start_process(
            COMMAND,
            &self.env.base_data_dir,
            &self.env.attachment_service_bin(),
-            args,
-            [(
-                "NEON_REPO_DIR".to_string(),
-                self.env.base_data_dir.to_string_lossy().to_string(),
-            )],
+            ["-l", &self.listen, "-p", &path_str],
+            [],
            background_process::InitialPidFile::Create(self.pid_file()),
-            || async {
-                match self.status().await {
-                    Ok(_) => Ok(true),
-                    Err(_) => Ok(false),
-                }
-            },
+            // TODO: a real status check
+            || async move { anyhow::Ok(true) },
        )
-        .await;
-
-        for ps_conf in &self.env.pageservers {
-            let (pg_host, pg_port) =
-                parse_host_port(&ps_conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
-            let (http_host, http_port) = parse_host_port(&ps_conf.listen_http_addr)
-                .expect("Unable to parse listen_http_addr");
-            self.node_register(NodeRegisterRequest {
-                node_id: ps_conf.id,
-                listen_pg_addr: pg_host.to_string(),
-                listen_pg_port: pg_port.unwrap_or(5432),
-                listen_http_addr: http_host.to_string(),
-                listen_http_port: http_port.unwrap_or(80),
-            })
-            .await?;
-        }
-
-        result
+        .await
    }

    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
        background_process::stop_process(immediate, COMMAND, &self.pid_file())
    }
-    /// Simple HTTP request wrapper for calling into attachment service
-    async fn dispatch<RQ, RS>(
+
+    /// Call into the attach_hook API, for use before handing out attachments to pageservers
+    pub async fn attach_hook(
        &self,
-        method: hyper::Method,
-        path: String,
-        body: Option<RQ>,
-    ) -> anyhow::Result<RS>
-    where
-        RQ: Serialize + Sized,
-        RS: DeserializeOwned + Sized,
-    {
+        tenant_id: TenantId,
+        pageserver_id: NodeId,
+    ) -> anyhow::Result<Option<u32>> {
+        use hyper::StatusCode;
+
        let url = self
            .env
            .control_plane_api
            .clone()
            .unwrap()
-            .join(&path)
+            .join("attach-hook")
            .unwrap();

-        let mut builder = self.client.request(method, url);
-        if let Some(body) = body {
-            builder = builder.json(&body)
-        }
-        if let Some(jwt_token) = &self.jwt_token {
-            builder = builder.header(
-                reqwest::header::AUTHORIZATION,
-                format!("Bearer {jwt_token}"),
-            );
-        }
-
-        let response = builder.send().await?;
-        let response = response.error_from_body().await?;
-
-        Ok(response
-            .json()
-            .await
-            .map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
-    }
-
-    /// Call into the attach_hook API, for use before handing out attachments to pageservers
-    #[instrument(skip(self))]
-    pub async fn attach_hook(
-        &self,
-        tenant_shard_id: TenantShardId,
-        pageserver_id: NodeId,
-    ) -> anyhow::Result<Option<u32>> {
        let request = AttachHookRequest {
-            tenant_shard_id,
+            tenant_id,
            node_id: Some(pageserver_id),
        };

-        let response = self
-            .dispatch::<_, AttachHookResponse>(
-                Method::POST,
-                "attach-hook".to_string(),
-                Some(request),
-            )
-            .await?;
+        let response = self.client.post(url).json(&request).send().await?;
+        if response.status() != StatusCode::OK {
+            return Err(anyhow!("Unexpected status {}", response.status()));
+        }

+        let response = response.json::<AttachHookResponse>().await?;
        Ok(response.gen)
    }

-    #[instrument(skip(self))]
-    pub async fn inspect(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> anyhow::Result<Option<(u32, NodeId)>> {
-        let request = InspectRequest { tenant_shard_id };
+    pub async fn inspect(&self, tenant_id: TenantId) -> anyhow::Result<Option<(u32, NodeId)>> {
+        use hyper::StatusCode;

-        let response = self
-            .dispatch::<_, InspectResponse>(Method::POST, "inspect".to_string(), Some(request))
-            .await?;
+        let url = self
+            .env
+            .control_plane_api
+            .clone()
+            .unwrap()
+            .join("inspect")
+            .unwrap();

+        let request = InspectRequest { tenant_id };
+
+        let response = self.client.post(url).json(&request).send().await?;
+        if response.status() != StatusCode::OK {
+            return Err(anyhow!("Unexpected status {}", response.status()));
+        }
+
+        let response = response.json::<InspectResponse>().await?;
        Ok(response.attachment)
    }
-
-    #[instrument(skip(self))]
-    pub async fn tenant_create(
-        &self,
-        req: TenantCreateRequest,
-    ) -> anyhow::Result<TenantCreateResponse> {
-        self.dispatch(Method::POST, "tenant".to_string(), Some(req))
-            .await
-    }
-
-    #[instrument(skip(self))]
-    pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {
-        self.dispatch::<(), _>(Method::GET, format!("tenant/{tenant_id}/locate"), None)
-            .await
-    }
-
-    #[instrument(skip(self))]
-    pub async fn tenant_migrate(
-        &self,
-        tenant_shard_id: TenantShardId,
-        node_id: NodeId,
-    ) -> anyhow::Result<TenantShardMigrateResponse> {
-        self.dispatch(
-            Method::PUT,
-            format!("tenant/{tenant_shard_id}/migrate"),
-            Some(TenantShardMigrateRequest {
-                tenant_shard_id,
-                node_id,
-            }),
-        )
-        .await
-    }
-
-    #[instrument(skip_all, fields(node_id=%req.node_id))]
-    pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {
-        self.dispatch::<_, ()>(Method::POST, "node".to_string(), Some(req))
-            .await
-    }
-
-    #[instrument(skip_all, fields(node_id=%req.node_id))]
-    pub async fn node_configure(&self, req: NodeConfigureRequest) -> anyhow::Result<()> {
-        self.dispatch::<_, ()>(
-            Method::PUT,
-            format!("node/{}/config", req.node_id),
-            Some(req),
-        )
-        .await
-    }
-
-    #[instrument(skip(self))]
-    pub async fn status(&self) -> anyhow::Result<()> {
-        self.dispatch::<(), ()>(Method::GET, "status".to_string(), None)
-            .await
-    }
-
-    #[instrument(skip_all, fields(%tenant_id, timeline_id=%req.new_timeline_id))]
-    pub async fn tenant_timeline_create(
-        &self,
-        tenant_id: TenantId,
-        req: TimelineCreateRequest,
-    ) -> anyhow::Result<TimelineInfo> {
-        self.dispatch(
-            Method::POST,
-            format!("tenant/{tenant_id}/timeline"),
-            Some(req),
-        )
-        .await
-    }
 }
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -0,0 +1,337 @@
+/// The attachment service mimics the aspects of the control plane API
+/// that are required for a pageserver to operate.
+///
+/// This enables running & testing pageservers without a full-blown
+/// deployment of the Neon cloud platform.
+///
+use anyhow::anyhow;
+use clap::Parser;
+use hex::FromHex;
+use hyper::StatusCode;
+use hyper::{Body, Request, Response};
+use pageserver_api::shard::TenantShardId;
+use serde::{Deserialize, Serialize};
+use std::path::{Path, PathBuf};
+use std::{collections::HashMap, sync::Arc};
+use utils::http::endpoint::request_span;
+use utils::logging::{self, LogFormat};
+use utils::signals::{ShutdownSignals, Signal};
+
+use utils::{
+    http::{
+        endpoint::{self},
+        error::ApiError,
+        json::{json_request, json_response},
+        RequestExt, RouterBuilder,
+    },
+    id::{NodeId, TenantId},
+    tcp_listener,
+};
+
+use pageserver_api::control_api::{
+    ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, ValidateResponse,
+    ValidateResponseTenant,
+};
+
+use control_plane::attachment_service::{
+    AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
+};
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+#[command(arg_required_else_help(true))]
+struct Cli {
+    /// Host and port to listen on, like `127.0.0.1:1234`
+    #[arg(short, long)]
+    listen: std::net::SocketAddr,
+
+    /// Path to the .json file to store state (will be created if it doesn't exist)
+    #[arg(short, long)]
+    path: PathBuf,
+}
+
+// The persistent state of each Tenant
+#[derive(Serialize, Deserialize, Clone)]
+struct TenantState {
+    // Currently attached pageserver
+    pageserver: Option<NodeId>,
+
+    // Latest generation number: next time we attach, increment this
+    // and use the incremented number when attaching
+    generation: u32,
+}
+
+fn to_hex_map<S, V>(input: &HashMap<TenantId, V>, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+    V: Clone + Serialize,
+{
+    let transformed = input.iter().map(|(k, v)| (hex::encode(k), v.clone()));
+
+    transformed
+        .collect::<HashMap<String, V>>()
+        .serialize(serializer)
+}
+
+fn from_hex_map<'de, D, V>(deserializer: D) -> Result<HashMap<TenantId, V>, D::Error>
+where
+    D: serde::de::Deserializer<'de>,
+    V: Deserialize<'de>,
+{
+    let hex_map = HashMap::<String, V>::deserialize(deserializer)?;
+    hex_map
+        .into_iter()
+        .map(|(k, v)| {
+            TenantId::from_hex(k)
+                .map(|k| (k, v))
+                .map_err(serde::de::Error::custom)
+        })
+        .collect()
+}
+
+// Top level state available to all HTTP handlers
+#[derive(Serialize, Deserialize)]
+struct PersistentState {
+    #[serde(serialize_with = "to_hex_map", deserialize_with = "from_hex_map")]
+    tenants: HashMap<TenantId, TenantState>,
+
+    #[serde(skip)]
+    path: PathBuf,
+}
+
+impl PersistentState {
+    async fn save(&self) -> anyhow::Result<()> {
+        let bytes = serde_json::to_vec(self)?;
+        tokio::fs::write(&self.path, &bytes).await?;
+
+        Ok(())
+    }
+
+    async fn load(path: &Path) -> anyhow::Result<Self> {
+        let bytes = tokio::fs::read(path).await?;
+        let mut decoded = serde_json::from_slice::<Self>(&bytes)?;
+        decoded.path = path.to_owned();
+        Ok(decoded)
+    }
+
+    async fn load_or_new(path: &Path) -> Self {
+        match Self::load(path).await {
+            Ok(s) => {
+                tracing::info!("Loaded state file at {}", path.display());
+                s
+            }
+            Err(e)
+                if e.downcast_ref::<std::io::Error>()
+                    .map(|e| e.kind() == std::io::ErrorKind::NotFound)
+                    .unwrap_or(false) =>
+            {
+                tracing::info!("Will create state file at {}", path.display());
+                Self {
+                    tenants: HashMap::new(),
+                    path: path.to_owned(),
+                }
+            }
+            Err(e) => {
+                panic!("Failed to load state from '{}': {e:#} (maybe your .neon/ dir was written by an older version?)", path.display())
+            }
+        }
+    }
+}
+
+/// State available to HTTP request handlers
+#[derive(Clone)]
+struct State {
+    inner: Arc<tokio::sync::RwLock<PersistentState>>,
+}
+
+impl State {
+    fn new(persistent_state: PersistentState) -> State {
+        Self {
+            inner: Arc::new(tokio::sync::RwLock::new(persistent_state)),
+        }
+    }
+}
+
+#[inline(always)]
+fn get_state(request: &Request<Body>) -> &State {
+    request
+        .data::<Arc<State>>()
+        .expect("unknown state type")
+        .as_ref()
+}
+
+/// Pageserver calls into this on startup, to learn which tenants it should attach
+async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let reattach_req = json_request::<ReAttachRequest>(&mut req).await?;
+
+    let state = get_state(&req).inner.clone();
+    let mut locked = state.write().await;
+
+    let mut response = ReAttachResponse {
+        tenants: Vec::new(),
+    };
+    for (t, state) in &mut locked.tenants {
+        if state.pageserver == Some(reattach_req.node_id) {
+            state.generation += 1;
+            response.tenants.push(ReAttachResponseTenant {
+                // TODO(sharding): make this shard-aware
+                id: TenantShardId::unsharded(*t),
+                gen: state.generation,
+            });
+        }
+    }
+
+    locked.save().await.map_err(ApiError::InternalServerError)?;
+
+    json_response(StatusCode::OK, response)
+}
+
+/// Pageserver calls into this before doing deletions, to confirm that it still
+/// holds the latest generation for the tenants with deletions enqueued
+async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let validate_req = json_request::<ValidateRequest>(&mut req).await?;
+
+    let locked = get_state(&req).inner.read().await;
+
+    let mut response = ValidateResponse {
+        tenants: Vec::new(),
+    };
+
+    for req_tenant in validate_req.tenants {
+        // TODO(sharding): make this shard-aware
+        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id.tenant_id) {
+            let valid = tenant_state.generation == req_tenant.gen;
+            tracing::info!(
+                "handle_validate: {}(gen {}): valid={valid} (latest {})",
+                req_tenant.id,
+                req_tenant.gen,
+                tenant_state.generation
+            );
+            response.tenants.push(ValidateResponseTenant {
+                id: req_tenant.id,
+                valid,
+            });
+        }
+    }
+
+    json_response(StatusCode::OK, response)
+}
+/// Call into this before attaching a tenant to a pageserver, to acquire a generation number
+/// (in the real control plane this is unnecessary, because the same program is managing
+///  generation numbers and doing attachments).
+async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let attach_req = json_request::<AttachHookRequest>(&mut req).await?;
+
+    let state = get_state(&req).inner.clone();
+    let mut locked = state.write().await;
+
+    let tenant_state = locked
+        .tenants
+        .entry(attach_req.tenant_id)
+        .or_insert_with(|| TenantState {
+            pageserver: attach_req.node_id,
+            generation: 0,
+        });
+
+    if let Some(attaching_pageserver) = attach_req.node_id.as_ref() {
+        tenant_state.generation += 1;
+        tracing::info!(
+            tenant_id = %attach_req.tenant_id,
+            ps_id = %attaching_pageserver,
+            generation = %tenant_state.generation,
+            "issuing",
+        );
+    } else if let Some(ps_id) = tenant_state.pageserver {
+        tracing::info!(
+            tenant_id = %attach_req.tenant_id,
+            %ps_id,
+            generation = %tenant_state.generation,
+            "dropping",
+        );
+    } else {
+        tracing::info!(
+            tenant_id = %attach_req.tenant_id,
+            "no-op: tenant already has no pageserver");
+    }
+    tenant_state.pageserver = attach_req.node_id;
+    let generation = tenant_state.generation;
+
+    tracing::info!(
+        "handle_attach_hook: tenant {} set generation {}, pageserver {}",
+        attach_req.tenant_id,
+        tenant_state.generation,
+        attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
+    );
+
+    locked.save().await.map_err(ApiError::InternalServerError)?;
+
+    json_response(
+        StatusCode::OK,
+        AttachHookResponse {
+            gen: attach_req.node_id.map(|_| generation),
+        },
+    )
+}
+
+async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let inspect_req = json_request::<InspectRequest>(&mut req).await?;
+
+    let state = get_state(&req).inner.clone();
+    let locked = state.write().await;
+    let tenant_state = locked.tenants.get(&inspect_req.tenant_id);
+
+    json_response(
+        StatusCode::OK,
+        InspectResponse {
+            attachment: tenant_state.and_then(|s| s.pageserver.map(|ps| (s.generation, ps))),
+        },
+    )
+}
+
+fn make_router(persistent_state: PersistentState) -> RouterBuilder<hyper::Body, ApiError> {
+    endpoint::make_router()
+        .data(Arc::new(State::new(persistent_state)))
+        .post("/re-attach", |r| request_span(r, handle_re_attach))
+        .post("/validate", |r| request_span(r, handle_validate))
+        .post("/attach-hook", |r| request_span(r, handle_attach_hook))
+        .post("/inspect", |r| request_span(r, handle_inspect))
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    logging::init(
+        LogFormat::Plain,
+        logging::TracingErrorLayerEnablement::Disabled,
+        logging::Output::Stdout,
+    )?;
+
+    let args = Cli::parse();
+    tracing::info!(
+        "Starting, state at {}, listening on {}",
+        args.path.to_string_lossy(),
+        args.listen
+    );
+
+    let persistent_state = PersistentState::load_or_new(&args.path).await;
+
+    let http_listener = tcp_listener::bind(args.listen)?;
+    let router = make_router(persistent_state)
+        .build()
+        .map_err(|err| anyhow!(err))?;
+    let service = utils::http::RouterService::new(router).unwrap();
+    let server = hyper::Server::from_tcp(http_listener)?.serve(service);
+
+    tracing::info!("Serving on {0}", args.listen);
+
+    tokio::task::spawn(server);
+
+    ShutdownSignals::handle(|signal| match signal {
+        Signal::Interrupt | Signal::Terminate | Signal::Quit => {
+            tracing::info!("Got {}. Terminating", signal.name());
+            // We're just a test helper: no graceful shutdown.
+            std::process::exit(0);
+        }
+    })?;
+
+    Ok(())
+}
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -6,26 +6,21 @@
 //! rely on `neon_local` to set up the environment for each test.
 //!
 use anyhow::{anyhow, bail, Context, Result};
-use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
+use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
 use compute_api::spec::ComputeMode;
-use control_plane::attachment_service::{
-    AttachmentService, NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy,
-};
+use control_plane::attachment_service::AttachmentService;
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::local_env::{InitForceMode, LocalEnv};
+use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
 use control_plane::safekeeper::SafekeeperNode;
+use control_plane::tenant_migration::migrate_tenant;
 use control_plane::{broker, local_env};
-use pageserver_api::models::{
-    ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
-};
-use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
+use pageserver_api::models::TimelineInfo;
 use pageserver_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
 };
 use postgres_backend::AuthType;
-use postgres_connection::parse_host_port;
 use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
@@ -35,7 +30,6 @@ use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
-use url::Host;
 use utils::{
    auth::{Claims, Scope},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
@@ -282,10 +276,10 @@ fn print_timeline(
 /// Connects to the pageserver to query this information.
 async fn get_timeline_infos(
    env: &local_env::LocalEnv,
-    tenant_shard_id: &TenantShardId,
+    tenant_id: &TenantId,
 ) -> Result<HashMap<TimelineId, TimelineInfo>> {
    Ok(get_default_pageserver(env)
-        .timeline_list(tenant_shard_id)
+        .timeline_list(tenant_id)
        .await?
        .into_iter()
        .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
@@ -303,20 +297,6 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
    }
 }

-// Helper function to parse --tenant_id option, for commands that accept a shard suffix
-fn get_tenant_shard_id(
-    sub_match: &ArgMatches,
-    env: &local_env::LocalEnv,
-) -> anyhow::Result<TenantShardId> {
-    if let Some(tenant_id_from_arguments) = parse_tenant_shard_id(sub_match).transpose() {
-        tenant_id_from_arguments
-    } else if let Some(default_id) = env.default_tenant_id {
-        Ok(TenantShardId::unsharded(default_id))
-    } else {
-        anyhow::bail!("No tenant shard id. Use --tenant-id, or set a default tenant");
-    }
-}
-
 fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
    sub_match
        .get_one::<String>("tenant-id")
@@ -325,14 +305,6 @@ fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
        .context("Failed to parse tenant id from the argument string")
 }

-fn parse_tenant_shard_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantShardId>> {
-    sub_match
-        .get_one::<String>("tenant-id")
-        .map(|id_str| TenantShardId::from_str(id_str))
-        .transpose()
-        .context("Failed to parse tenant shard id from the argument string")
-}
-
 fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
    sub_match
        .get_one::<String>("timeline-id")
@@ -366,7 +338,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {

    let mut env =
        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
-    let force = init_match.get_one("force").expect("we set a default value");
+    let force = init_match.get_flag("force");
    env.init(pg_version, force)
        .context("Failed to initialize neon repository")?;

@@ -421,68 +393,47 @@ async fn handle_tenant(
        Some(("create", create_match)) => {
            let tenant_conf: HashMap<_, _> = create_match
                .get_many::<String>("config")
-                .map(|vals: clap::parser::ValuesRef<'_, String>| {
-                    vals.flat_map(|c| c.split_once(':')).collect()
-                })
+                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
                .unwrap_or_default();

-            let shard_count: u8 = create_match
-                .get_one::<u8>("shard-count")
-                .cloned()
-                .unwrap_or(0);
-
-            let shard_stripe_size: Option<u32> =
-                create_match.get_one::<u32>("shard-stripe-size").cloned();
-
-            let tenant_conf = PageServerNode::parse_config(tenant_conf)?;
-
            // If tenant ID was not specified, generate one
            let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);

-            // We must register the tenant with the attachment service, so
-            // that when the pageserver restarts, it will be re-attached.
-            let attachment_service = AttachmentService::from_env(env);
-            attachment_service
-                .tenant_create(TenantCreateRequest {
-                    // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
-                    // attachment service expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
-                    // type is used both in attachment service (for creating tenants) and in pageserver (for creating shards)
-                    new_tenant_id: TenantShardId::unsharded(tenant_id),
-                    generation: None,
-                    shard_parameters: ShardParameters {
-                        count: ShardCount(shard_count),
-                        stripe_size: shard_stripe_size
-                            .map(ShardStripeSize)
-                            .unwrap_or(ShardParameters::DEFAULT_STRIPE_SIZE),
-                    },
-                    config: tenant_conf,
-                })
+            let generation = if env.control_plane_api.is_some() {
+                // We must register the tenant with the attachment service, so
+                // that when the pageserver restarts, it will be re-attached.
+                let attachment_service = AttachmentService::from_env(env);
+                attachment_service
+                    .attach_hook(tenant_id, pageserver.conf.id)
+                    .await?
+            } else {
+                None
+            };
+
+            pageserver
+                .tenant_create(tenant_id, generation, tenant_conf)
                .await?;
            println!("tenant {tenant_id} successfully created on the pageserver");

            // Create an initial timeline for the new tenant
-            let new_timeline_id =
-                parse_timeline_id(create_match)?.unwrap_or(TimelineId::generate());
+            let new_timeline_id = parse_timeline_id(create_match)?;
            let pg_version = create_match
                .get_one::<u32>("pg-version")
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
-            // different shards picking different start lsns.  Maybe we have to teach attachment service
-            // to let shard 0 branch first and then propagate the chosen LSN to other shards.
-            attachment_service
-                .tenant_timeline_create(
+            let timeline_info = pageserver
+                .timeline_create(
                    tenant_id,
-                    TimelineCreateRequest {
-                        new_timeline_id,
-                        ancestor_timeline_id: None,
-                        ancestor_start_lsn: None,
-                        existing_initdb_timeline_id: None,
-                        pg_version: Some(pg_version),
-                    },
+                    new_timeline_id,
+                    None,
+                    None,
+                    Some(pg_version),
+                    None,
                )
                .await?;
+            let new_timeline_id = timeline_info.timeline_id;
+            let last_record_lsn = timeline_info.last_record_lsn;

            env.register_branch_mapping(
                DEFAULT_BRANCH_NAME.to_string(),
@@ -490,7 +441,9 @@ async fn handle_tenant(
                new_timeline_id,
            )?;

-            println!("Created an initial timeline '{new_timeline_id}' for tenant: {tenant_id}",);
+            println!(
+                "Created an initial timeline '{new_timeline_id}' at Lsn {last_record_lsn} for tenant: {tenant_id}",
+            );

            if create_match.get_flag("set-default") {
                println!("Setting tenant {tenant_id} as a default one");
@@ -517,64 +470,14 @@ async fn handle_tenant(
            println!("tenant {tenant_id} successfully configured on the pageserver");
        }
        Some(("migrate", matches)) => {
-            let tenant_shard_id = get_tenant_shard_id(matches, env)?;
+            let tenant_id = get_tenant_id(matches, env)?;
            let new_pageserver = get_pageserver(env, matches)?;
            let new_pageserver_id = new_pageserver.conf.id;

-            let attachment_service = AttachmentService::from_env(env);
-            attachment_service
-                .tenant_migrate(tenant_shard_id, new_pageserver_id)
-                .await?;
-
-            println!("tenant {tenant_shard_id} migrated to {}", new_pageserver_id);
+            migrate_tenant(env, tenant_id, new_pageserver).await?;
+            println!("tenant {tenant_id} migrated to {}", new_pageserver_id);
        }
-        Some(("status", matches)) => {
-            let tenant_id = get_tenant_id(matches, env)?;

-            let mut shard_table = comfy_table::Table::new();
-            shard_table.set_header(["Shard", "Pageserver", "Physical Size"]);
-
-            let mut tenant_synthetic_size = None;
-
-            let attachment_service = AttachmentService::from_env(env);
-            for shard in attachment_service.tenant_locate(tenant_id).await?.shards {
-                let pageserver =
-                    PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?);
-
-                let size = pageserver
-                    .http_client
-                    .tenant_details(shard.shard_id)
-                    .await?
-                    .tenant_info
-                    .current_physical_size
-                    .unwrap();
-
-                shard_table.add_row([
-                    format!("{}", shard.shard_id.shard_slug()),
-                    format!("{}", shard.node_id.0),
-                    format!("{} MiB", size / (1024 * 1024)),
-                ]);
-
-                if shard.shard_id.is_zero() {
-                    tenant_synthetic_size =
-                        Some(pageserver.tenant_synthetic_size(shard.shard_id).await?);
-                }
-            }
-
-            let Some(synthetic_size) = tenant_synthetic_size else {
-                bail!("Shard 0 not found")
-            };
-
-            let mut tenant_table = comfy_table::Table::new();
-            tenant_table.add_row(["Tenant ID".to_string(), tenant_id.to_string()]);
-            tenant_table.add_row([
-                "Synthetic size".to_string(),
-                format!("{} MiB", synthetic_size.size.unwrap_or(0) / (1024 * 1024)),
-            ]);
-
-            println!("{tenant_table}");
-            println!("{shard_table}");
-        }
        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
        None => bail!("no tenant subcommand provided"),
    }
@@ -586,10 +489,8 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local

    match timeline_match.subcommand() {
        Some(("list", list_match)) => {
-            // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
-            // where shard 0 is attached, and query there.
-            let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
-            let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
+            let tenant_id = get_tenant_id(list_match, env)?;
+            let timelines = pageserver.timeline_list(&tenant_id).await?;
            print_timelines_tree(timelines, env.timeline_name_mappings())?;
        }
        Some(("create", create_match)) => {
@@ -604,19 +505,18 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .context("Failed to parse postgres version from the argument string")?;

            let new_timeline_id_opt = parse_timeline_id(create_match)?;
-            let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());

-            let attachment_service = AttachmentService::from_env(env);
-            let create_req = TimelineCreateRequest {
-                new_timeline_id,
-                ancestor_timeline_id: None,
-                existing_initdb_timeline_id: None,
-                ancestor_start_lsn: None,
-                pg_version: Some(pg_version),
-            };
-            let timeline_info = attachment_service
-                .tenant_timeline_create(tenant_id, create_req)
+            let timeline_info = pageserver
+                .timeline_create(
+                    tenant_id,
+                    new_timeline_id_opt,
+                    None,
+                    None,
+                    Some(pg_version),
+                    None,
+                )
                .await?;
+            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;
            env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
@@ -674,6 +574,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                None,
                pg_version,
                ComputeMode::Primary,
+                DEFAULT_PAGESERVER_ID,
            )?;
            println!("Done");
        }
@@ -697,18 +598,17 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .map(|lsn_str| Lsn::from_str(lsn_str))
                .transpose()
                .context("Failed to parse ancestor start Lsn from the request")?;
-            let new_timeline_id = TimelineId::generate();
-            let attachment_service = AttachmentService::from_env(env);
-            let create_req = TimelineCreateRequest {
-                new_timeline_id,
-                ancestor_timeline_id: Some(ancestor_timeline_id),
-                existing_initdb_timeline_id: None,
-                ancestor_start_lsn: start_lsn,
-                pg_version: None,
-            };
-            let timeline_info = attachment_service
-                .tenant_timeline_create(tenant_id, create_req)
+            let timeline_info = pageserver
+                .timeline_create(
+                    tenant_id,
+                    None,
+                    start_lsn,
+                    Some(ancestor_timeline_id),
+                    None,
+                    None,
+                )
                .await?;
+            let new_timeline_id = timeline_info.timeline_id;

            let last_record_lsn = timeline_info.last_record_lsn;

@@ -735,10 +635,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re

    match sub_name {
        "list" => {
-            // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
-            // where shard 0 is attached, and query there.
-            let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
-            let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
+            let tenant_id = get_tenant_id(sub_args, env)?;
+            let timeline_infos = get_timeline_infos(env, &tenant_id)
                .await
                .unwrap_or_else(|e| {
                    eprintln!("Failed to load timeline info: {}", e);
@@ -763,7 +661,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
            for (endpoint_id, endpoint) in cplane
                .endpoints
                .iter()
-                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_shard_id.tenant_id)
+                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
            {
                let lsn_str = match endpoint.mode {
                    ComputeMode::Static(lsn) => {
@@ -782,10 +680,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                };

                let branch_name = timeline_name_mappings
-                    .get(&TenantTimelineId::new(
-                        tenant_shard_id.tenant_id,
-                        endpoint.timeline_id,
-                    ))
+                    .get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id))
                    .map(|name| name.as_str())
                    .unwrap_or("?");

@@ -833,6 +728,13 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .copied()
                .unwrap_or(false);

+            let pageserver_id =
+                if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
+                    NodeId(id_str.parse().context("while parsing pageserver id")?)
+                } else {
+                    DEFAULT_PAGESERVER_ID
+                };
+
            let mode = match (lsn, hot_standby) {
                (Some(lsn), false) => ComputeMode::Static(lsn),
                (None, true) => ComputeMode::Replica,
@@ -860,6 +762,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                http_port,
                pg_version,
                mode,
+                pageserver_id,
            )?;
        }
        "start" => {
@@ -869,11 +772,9 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re

            let pageserver_id =
                if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
-                    Some(NodeId(
-                        id_str.parse().context("while parsing pageserver id")?,
-                    ))
+                    NodeId(id_str.parse().context("while parsing pageserver id")?)
                } else {
-                    None
+                    DEFAULT_PAGESERVER_ID
                };

            let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
@@ -904,38 +805,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                endpoint.timeline_id,
            )?;

-            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
-                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
-                let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
-                (
-                    vec![(parsed.0, parsed.1.unwrap_or(5432))],
-                    // If caller is telling us what pageserver to use, this is not a tenant which is
-                    // full managed by attachment service, therefore not sharded.
-                    ShardParameters::DEFAULT_STRIPE_SIZE,
-                )
-            } else {
-                // Look up the currently attached location of the tenant, and its striping metadata,
-                // to pass these on to postgres.
-                let attachment_service = AttachmentService::from_env(env);
-                let locate_result = attachment_service.tenant_locate(endpoint.tenant_id).await?;
-                let pageservers = locate_result
-                    .shards
-                    .into_iter()
-                    .map(|shard| {
-                        (
-                            Host::parse(&shard.listen_pg_addr)
-                                .expect("Attachment service reported bad hostname"),
-                            shard.listen_pg_port,
-                        )
-                    })
-                    .collect::<Vec<_>>();
-                let stripe_size = locate_result.shard_params.stripe_size;
-
-                (pageservers, stripe_size)
-            };
-            assert!(!pageservers.is_empty());
-
-            let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
+            let ps_conf = env.get_pageserver_conf(pageserver_id)?;
            let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
                let claims = Claims::new(Some(endpoint.tenant_id), Scope::Tenant);

@@ -946,13 +816,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re

            println!("Starting existing endpoint {endpoint_id}...");
            endpoint
-                .start(
-                    &auth_token,
-                    safekeepers,
-                    pageservers,
-                    remote_ext_config,
-                    stripe_size.0 as usize,
-                )
+                .start(&auth_token, safekeepers, remote_ext_config)
                .await?;
        }
        "reconfigure" => {
@@ -963,31 +827,15 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .endpoints
                .get(endpoint_id.as_str())
                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            let pageservers =
+            let pageserver_id =
                if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
-                    let ps_id = NodeId(id_str.parse().context("while parsing pageserver id")?);
-                    let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
-                    vec![(
-                        pageserver.pg_connection_config.host().clone(),
-                        pageserver.pg_connection_config.port(),
-                    )]
+                    Some(NodeId(
+                        id_str.parse().context("while parsing pageserver id")?,
+                    ))
                } else {
-                    let attachment_service = AttachmentService::from_env(env);
-                    attachment_service
-                        .tenant_locate(endpoint.tenant_id)
-                        .await?
-                        .shards
-                        .into_iter()
-                        .map(|shard| {
-                            (
-                                Host::parse(&shard.listen_pg_addr)
-                                    .expect("Attachment service reported malformed host"),
-                                shard.listen_pg_port,
-                            )
-                        })
-                        .collect::<Vec<_>>()
+                    None
                };
-            endpoint.reconfigure(pageservers).await?;
+            endpoint.reconfigure(pageserver_id).await?;
        }
        "stop" => {
            let endpoint_id = sub_args
@@ -1111,21 +959,6 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
            }
        }

-        Some(("set-state", subcommand_args)) => {
-            let pageserver = get_pageserver(env, subcommand_args)?;
-            let scheduling = subcommand_args.get_one("scheduling");
-            let availability = subcommand_args.get_one("availability");
-
-            let attachment_service = AttachmentService::from_env(env);
-            attachment_service
-                .node_configure(NodeConfigureRequest {
-                    node_id: pageserver.conf.id,
-                    scheduling: scheduling.cloned(),
-                    availability: availability.cloned(),
-                })
-                .await?;
-        }
-
        Some(("status", subcommand_args)) => {
            match get_pageserver(env, subcommand_args)?.check_status().await {
                Ok(_) => println!("Page server is up and running"),
@@ -1433,15 +1266,9 @@ fn cli() -> Command {
        .required(false);

    let force_arg = Arg::new("force")
-        .value_parser(value_parser!(InitForceMode))
+        .value_parser(value_parser!(bool))
        .long("force")
-        .default_value(
-            InitForceMode::MustNotExist
-                .to_possible_value()
-                .unwrap()
-                .get_name()
-                .to_owned(),
-        )
+        .action(ArgAction::SetTrue)
        .help("Force initialization even if the repository is not empty")
        .required(false);

@@ -1525,8 +1352,6 @@ fn cli() -> Command {
                .arg(pg_version_arg.clone())
                .arg(Arg::new("set-default").long("set-default").action(ArgAction::SetTrue).required(false)
                    .help("Use this tenant in future CLI commands where tenant_id is needed, but not specified"))
-                .arg(Arg::new("shard-count").value_parser(value_parser!(u8)).long("shard-count").action(ArgAction::Set).help("Number of shards in the new tenant (default 1)"))
-                .arg(Arg::new("shard-stripe-size").value_parser(value_parser!(u32)).long("shard-stripe-size").action(ArgAction::Set).help("Sharding stripe size in pages"))
                )
            .subcommand(Command::new("set-default").arg(tenant_id_arg.clone().required(true))
                .about("Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"))
@@ -1537,9 +1362,6 @@ fn cli() -> Command {
                .about("Migrate a tenant from one pageserver to another")
                .arg(tenant_id_arg.clone())
                .arg(pageserver_id_arg.clone()))
-            .subcommand(Command::new("status")
-                .about("Human readable summary of the tenant's shards and attachment locations")
-                .arg(tenant_id_arg.clone()))
        )
        .subcommand(
            Command::new("pageserver")
@@ -1559,12 +1381,6 @@ fn cli() -> Command {
                    .about("Restart local pageserver")
                    .arg(pageserver_config_args.clone())
                )
-                .subcommand(Command::new("set-state")
-                    .arg(Arg::new("availability").value_parser(value_parser!(NodeAvailability)).long("availability").action(ArgAction::Set).help("Availability state: offline,active"))
-                    .arg(Arg::new("scheduling").value_parser(value_parser!(NodeSchedulingPolicy)).long("scheduling").action(ArgAction::Set).help("Scheduling state: draining,pause,filling,active"))
-                    .about("Set scheduling or availability state of pageserver node")
-                    .arg(pageserver_config_args.clone())
-                )
        )
        .subcommand(
            Command::new("attachment_service")
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -49,11 +49,10 @@ use compute_api::spec::RemoteExtSpec;
 use nix::sys::signal::kill;
 use nix::sys::signal::Signal;
 use serde::{Deserialize, Serialize};
-use url::Host;
 use utils::id::{NodeId, TenantId, TimelineId};

-use crate::attachment_service::AttachmentService;
 use crate::local_env::LocalEnv;
+use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

 use compute_api::responses::{ComputeState, ComputeStatus};
@@ -70,6 +69,7 @@ pub struct EndpointConf {
    http_port: u16,
    pg_version: u32,
    skip_pg_catalog_updates: bool,
+    pageserver_id: NodeId,
 }

 //
@@ -121,14 +121,19 @@ impl ComputeControlPlane {
        http_port: Option<u16>,
        pg_version: u32,
        mode: ComputeMode,
+        pageserver_id: NodeId,
    ) -> Result<Arc<Endpoint>> {
        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
+        let pageserver =
+            PageServerNode::from_env(&self.env, self.env.get_pageserver_conf(pageserver_id)?);
+
        let ep = Arc::new(Endpoint {
            endpoint_id: endpoint_id.to_owned(),
            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
            env: self.env.clone(),
+            pageserver,
            timeline_id,
            mode,
            tenant_id,
@@ -154,6 +159,7 @@ impl ComputeControlPlane {
                pg_port,
                pg_version,
                skip_pg_catalog_updates: true,
+                pageserver_id,
            })?,
        )?;
        std::fs::write(
@@ -212,6 +218,7 @@ pub struct Endpoint {
    // These are not part of the endpoint as such, but the environment
    // the endpoint runs in.
    pub env: LocalEnv,
+    pageserver: PageServerNode,

    // Optimizations
    skip_pg_catalog_updates: bool,
@@ -234,11 +241,15 @@ impl Endpoint {
        let conf: EndpointConf =
            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;

+        let pageserver =
+            PageServerNode::from_env(env, env.get_pageserver_conf(conf.pageserver_id)?);
+
        Ok(Endpoint {
            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
            endpoint_id,
            env: env.clone(),
+            pageserver,
            timeline_id: conf.timeline_id,
            mode: conf.mode,
            tenant_id: conf.tenant_id,
@@ -458,21 +469,11 @@ impl Endpoint {
        }
    }

-    fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
-        pageservers
-            .iter()
-            .map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
-            .collect::<Vec<_>>()
-            .join(",")
-    }
-
    pub async fn start(
        &self,
        auth_token: &Option<String>,
        safekeepers: Vec<NodeId>,
-        pageservers: Vec<(Host, u16)>,
        remote_ext_config: Option<&String>,
-        shard_stripe_size: usize,
    ) -> Result<()> {
        if self.status() == "running" {
            anyhow::bail!("The endpoint is already running");
@@ -486,9 +487,13 @@ impl Endpoint {
            std::fs::remove_dir_all(self.pgdata())?;
        }

-        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
-        assert!(!pageserver_connstring.is_empty());
+        let pageserver_connstring = {
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());

+            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
+            format!("postgresql://no_user@{host}:{port}")
+        };
        let mut safekeeper_connstrings = Vec::new();
        if self.mode == ComputeMode::Primary {
            for sk_id in safekeepers {
@@ -538,7 +543,6 @@ impl Endpoint {
            storage_auth_token: auth_token.clone(),
            remote_extensions,
            pgbouncer_settings: None,
-            shard_stripe_size: Some(shard_stripe_size),
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
@@ -661,7 +665,7 @@ impl Endpoint {
        }
    }

-    pub async fn reconfigure(&self, mut pageservers: Vec<(Host, u16)>) -> Result<()> {
+    pub async fn reconfigure(&self, pageserver_id: Option<NodeId>) -> Result<()> {
        let mut spec: ComputeSpec = {
            let spec_path = self.endpoint_path().join("spec.json");
            let file = std::fs::File::open(spec_path)?;
@@ -671,26 +675,24 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

-        // If we weren't given explicit pageservers, query the attachment service
-        if pageservers.is_empty() {
-            let attachment_service = AttachmentService::from_env(&self.env);
-            let locate_result = attachment_service.tenant_locate(self.tenant_id).await?;
-            pageservers = locate_result
-                .shards
-                .into_iter()
-                .map(|shard| {
-                    (
-                        Host::parse(&shard.listen_pg_addr)
-                            .expect("Attachment service reported bad hostname"),
-                        shard.listen_pg_port,
-                    )
-                })
-                .collect::<Vec<_>>();
-        }
+        if let Some(pageserver_id) = pageserver_id {
+            let endpoint_config_path = self.endpoint_path().join("endpoint.json");
+            let mut endpoint_conf: EndpointConf = {
+                let file = std::fs::File::open(&endpoint_config_path)?;
+                serde_json::from_reader(file)?
+            };
+            endpoint_conf.pageserver_id = pageserver_id;
+            std::fs::write(
+                endpoint_config_path,
+                serde_json::to_string_pretty(&endpoint_conf)?,
+            )?;

-        let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
-        assert!(!pageserver_connstr.is_empty());
-        spec.pageserver_connstring = Some(pageserver_connstr);
+            let pageserver =
+                PageServerNode::from_env(&self.env, self.env.get_pageserver_conf(pageserver_id)?);
+            let ps_http_conf = &pageserver.pg_connection_config;
+            let (host, port) = (ps_http_conf.host(), ps_http_conf.port());
+            spec.pageserver_connstring = Some(format!("postgresql://no_user@{host}:{port}"));
+        }

        let client = reqwest::Client::new();
        let response = client
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -14,3 +14,4 @@ pub mod local_env;
 pub mod pageserver;
 pub mod postgresql_conf;
 pub mod safekeeper;
+pub mod tenant_migration;
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -5,7 +5,6 @@

 use anyhow::{bail, ensure, Context};

-use clap::ValueEnum;
 use postgres_backend::AuthType;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
@@ -163,31 +162,6 @@ impl Default for SafekeeperConf {
    }
 }

-#[derive(Clone, Copy)]
-pub enum InitForceMode {
-    MustNotExist,
-    EmptyDirOk,
-    RemoveAllContents,
-}
-
-impl ValueEnum for InitForceMode {
-    fn value_variants<'a>() -> &'a [Self] {
-        &[
-            Self::MustNotExist,
-            Self::EmptyDirOk,
-            Self::RemoveAllContents,
-        ]
-    }
-
-    fn to_possible_value(&self) -> Option<clap::builder::PossibleValue> {
-        Some(clap::builder::PossibleValue::new(match self {
-            InitForceMode::MustNotExist => "must-not-exist",
-            InitForceMode::EmptyDirOk => "empty-dir-ok",
-            InitForceMode::RemoveAllContents => "remove-all-contents",
-        }))
-    }
-}
-
 impl SafekeeperConf {
    /// Compute is served by port on which only tenant scoped tokens allowed, if
    /// it is configured.
@@ -251,13 +225,7 @@ impl LocalEnv {
        if let Some(conf) = self.pageservers.iter().find(|node| node.id == id) {
            Ok(conf)
        } else {
-            let have_ids = self
-                .pageservers
-                .iter()
-                .map(|node| format!("{}:{}", node.id, node.listen_http_addr))
-                .collect::<Vec<_>>();
-            let joined = have_ids.join(",");
-            bail!("could not find pageserver {id}, have ids {joined}")
+            bail!("could not find pageserver {id}")
        }
    }

@@ -416,7 +384,7 @@ impl LocalEnv {
    //
    // Initialize a new Neon repository
    //
-    pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
+    pub fn init(&mut self, pg_version: u32, force: bool) -> anyhow::Result<()> {
        // check if config already exists
        let base_path = &self.base_data_dir;
        ensure!(
@@ -425,34 +393,25 @@ impl LocalEnv {
        );

        if base_path.exists() {
-            match force {
-                InitForceMode::MustNotExist => {
-                    bail!(
-                        "directory '{}' already exists. Perhaps already initialized?",
-                        base_path.display()
-                    );
-                }
-                InitForceMode::EmptyDirOk => {
-                    if let Some(res) = std::fs::read_dir(base_path)?.next() {
-                        res.context("check if directory is empty")?;
-                        anyhow::bail!("directory not empty: {base_path:?}");
-                    }
-                }
-                InitForceMode::RemoveAllContents => {
-                    println!("removing all contents of '{}'", base_path.display());
-                    // instead of directly calling `remove_dir_all`, we keep the original dir but removing
-                    // all contents inside. This helps if the developer symbol links another directory (i.e.,
-                    // S3 local SSD) to the `.neon` base directory.
-                    for entry in std::fs::read_dir(base_path)? {
-                        let entry = entry?;
-                        let path = entry.path();
-                        if path.is_dir() {
-                            fs::remove_dir_all(&path)?;
-                        } else {
-                            fs::remove_file(&path)?;
-                        }
+            if force {
+                println!("removing all contents of '{}'", base_path.display());
+                // instead of directly calling `remove_dir_all`, we keep the original dir but removing
+                // all contents inside. This helps if the developer symbol links another directory (i.e.,
+                // S3 local SSD) to the `.neon` base directory.
+                for entry in std::fs::read_dir(base_path)? {
+                    let entry = entry?;
+                    let path = entry.path();
+                    if path.is_dir() {
+                        fs::remove_dir_all(&path)?;
+                    } else {
+                        fs::remove_file(&path)?;
                    }
                }
+            } else {
+                bail!(
+                    "directory '{}' already exists. Perhaps already initialized? (Hint: use --force to remove all contents)",
+                    base_path.display()
+                );
            }
        }

--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -17,9 +17,7 @@ use std::time::Duration;
 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
 use futures::SinkExt;
-use pageserver_api::models::{
-    self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
-};
+use pageserver_api::models::{self, LocationConfig, TenantInfo, TimelineInfo};
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
@@ -108,16 +106,6 @@ impl PageServerNode {
                "control_plane_api='{}'",
                control_plane_api.as_str()
            ));
-
-            // Attachment service uses the same auth as pageserver: if JWT is enabled
-            // for us, we will also need it to talk to them.
-            if matches!(self.conf.http_auth_type, AuthType::NeonJWT) {
-                let jwt_token = self
-                    .env
-                    .generate_auth_token(&Claims::new(None, Scope::PageServerApi))
-                    .unwrap();
-                overrides.push(format!("control_plane_api_token='{}'", jwt_token));
-            }
        }

        if !cli_overrides
@@ -313,8 +301,16 @@ impl PageServerNode {
    pub async fn tenant_list(&self) -> mgmt_api::Result<Vec<TenantInfo>> {
        self.http_client.list_tenants().await
    }
-    pub fn parse_config(mut settings: HashMap<&str, &str>) -> anyhow::Result<models::TenantConfig> {
-        let result = models::TenantConfig {
+
+    pub async fn tenant_create(
+        &self,
+        new_tenant_id: TenantId,
+        generation: Option<u32>,
+        settings: HashMap<&str, &str>,
+    ) -> anyhow::Result<TenantId> {
+        let mut settings = settings.clone();
+
+        let config = models::TenantConfig {
            checkpoint_distance: settings
                .remove("checkpoint_distance")
                .map(|x| x.parse::<u64>())
@@ -375,26 +371,11 @@ impl PageServerNode {
                .context("Failed to parse 'gc_feedback' as bool")?,
            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
        };
-        if !settings.is_empty() {
-            bail!("Unrecognized tenant settings: {settings:?}")
-        } else {
-            Ok(result)
-        }
-    }
-
-    pub async fn tenant_create(
-        &self,
-        new_tenant_id: TenantId,
-        generation: Option<u32>,
-        settings: HashMap<&str, &str>,
-    ) -> anyhow::Result<TenantId> {
-        let config = Self::parse_config(settings.clone())?;

        let request = models::TenantCreateRequest {
            new_tenant_id: TenantShardId::unsharded(new_tenant_id),
            generation,
            config,
-            shard_parameters: ShardParameters::default(),
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -490,21 +471,18 @@ impl PageServerNode {

    pub async fn location_config(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        config: LocationConfig,
        flush_ms: Option<Duration>,
    ) -> anyhow::Result<()> {
        Ok(self
            .http_client
-            .location_config(tenant_shard_id, config, flush_ms)
+            .location_config(tenant_id, config, flush_ms)
            .await?)
    }

-    pub async fn timeline_list(
-        &self,
-        tenant_shard_id: &TenantShardId,
-    ) -> anyhow::Result<Vec<TimelineInfo>> {
-        Ok(self.http_client.list_timelines(*tenant_shard_id).await?)
+    pub async fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
+        Ok(self.http_client.list_timelines(*tenant_id).await?)
    }

    pub async fn tenant_secondary_download(&self, tenant_id: &TenantShardId) -> anyhow::Result<()> {
@@ -516,13 +494,15 @@ impl PageServerNode {

    pub async fn timeline_create(
        &self,
-        tenant_shard_id: TenantShardId,
-        new_timeline_id: TimelineId,
+        tenant_id: TenantId,
+        new_timeline_id: Option<TimelineId>,
        ancestor_start_lsn: Option<Lsn>,
        ancestor_timeline_id: Option<TimelineId>,
        pg_version: Option<u32>,
        existing_initdb_timeline_id: Option<TimelineId>,
    ) -> anyhow::Result<TimelineInfo> {
+        // If timeline ID was not specified, generate one
+        let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
        let req = models::TimelineCreateRequest {
            new_timeline_id,
            ancestor_start_lsn,
@@ -530,10 +510,7 @@ impl PageServerNode {
            pg_version,
            existing_initdb_timeline_id,
        };
-        Ok(self
-            .http_client
-            .timeline_create(tenant_shard_id, &req)
-            .await?)
+        Ok(self.http_client.timeline_create(tenant_id, &req).await?)
    }

    /// Import a basebackup prepared using either:
@@ -611,14 +588,4 @@ impl PageServerNode {

        Ok(())
    }
-
-    pub async fn tenant_synthetic_size(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> anyhow::Result<TenantHistorySize> {
-        Ok(self
-            .http_client
-            .tenant_synthetic_size(tenant_shard_id)
-            .await?)
-    }
 }
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -0,0 +1,220 @@
+//!
+//! Functionality for migrating tenants across pageservers: unlike most of neon_local, this code
+//! isn't scoped to a particular physical service, as it needs to update compute endpoints to
+//! point to the new pageserver.
+//!
+use crate::local_env::LocalEnv;
+use crate::{
+    attachment_service::AttachmentService, endpoint::ComputeControlPlane,
+    pageserver::PageServerNode,
+};
+use pageserver_api::models::{
+    LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
+};
+use pageserver_api::shard::TenantShardId;
+use std::collections::HashMap;
+use std::time::Duration;
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+/// Given an attached pageserver, retrieve the LSN for all timelines
+async fn get_lsns(
+    tenant_id: TenantId,
+    pageserver: &PageServerNode,
+) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
+    let timelines = pageserver.timeline_list(&tenant_id).await?;
+    Ok(timelines
+        .into_iter()
+        .map(|t| (t.timeline_id, t.last_record_lsn))
+        .collect())
+}
+
+/// Wait for the timeline LSNs on `pageserver` to catch up with or overtake
+/// `baseline`.
+async fn await_lsn(
+    tenant_id: TenantId,
+    pageserver: &PageServerNode,
+    baseline: HashMap<TimelineId, Lsn>,
+) -> anyhow::Result<()> {
+    loop {
+        let latest = match get_lsns(tenant_id, pageserver).await {
+            Ok(l) => l,
+            Err(_e) => {
+                println!(
+                    "🕑 Waiting for pageserver {} to activate...",
+                    pageserver.conf.id
+                );
+                std::thread::sleep(Duration::from_millis(500));
+                continue;
+            }
+        };
+
+        let mut any_behind: bool = false;
+        for (timeline_id, baseline_lsn) in &baseline {
+            match latest.get(timeline_id) {
+                Some(latest_lsn) => {
+                    println!("🕑 LSN origin {baseline_lsn} vs destination {latest_lsn}");
+                    if latest_lsn < baseline_lsn {
+                        any_behind = true;
+                    }
+                }
+                None => {
+                    // Expected timeline isn't yet visible on migration destination.
+                    // (IRL we would have to account for timeline deletion, but this
+                    //  is just test helper)
+                    any_behind = true;
+                }
+            }
+        }
+
+        if !any_behind {
+            println!("✅ LSN caught up.  Proceeding...");
+            break;
+        } else {
+            std::thread::sleep(Duration::from_millis(500));
+        }
+    }
+
+    Ok(())
+}
+
+/// This function spans multiple services, to demonstrate live migration of a tenant
+/// between pageservers:
+///  - Coordinate attach/secondary/detach on pageservers
+///  - call into attachment_service for generations
+///  - reconfigure compute endpoints to point to new attached pageserver
+pub async fn migrate_tenant(
+    env: &LocalEnv,
+    tenant_id: TenantId,
+    dest_ps: PageServerNode,
+) -> anyhow::Result<()> {
+    println!("🤔 Checking existing status...");
+    let attachment_service = AttachmentService::from_env(env);
+
+    fn build_location_config(
+        mode: LocationConfigMode,
+        generation: Option<u32>,
+        secondary_conf: Option<LocationConfigSecondary>,
+    ) -> LocationConfig {
+        LocationConfig {
+            mode,
+            generation,
+            secondary_conf,
+            tenant_conf: TenantConfig::default(),
+            shard_number: 0,
+            shard_count: 0,
+            shard_stripe_size: 0,
+        }
+    }
+
+    let previous = attachment_service.inspect(tenant_id).await?;
+    let mut baseline_lsns = None;
+    if let Some((generation, origin_ps_id)) = &previous {
+        let origin_ps = PageServerNode::from_env(env, env.get_pageserver_conf(*origin_ps_id)?);
+
+        if origin_ps_id == &dest_ps.conf.id {
+            println!("🔁 Already attached to {origin_ps_id}, freshening...");
+            let gen = attachment_service
+                .attach_hook(tenant_id, dest_ps.conf.id)
+                .await?;
+            let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+            dest_ps.location_config(tenant_id, dest_conf, None).await?;
+            println!("✅ Migration complete");
+            return Ok(());
+        }
+
+        println!("🔁 Switching origin pageserver {origin_ps_id} to stale mode");
+
+        let stale_conf =
+            build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
+        origin_ps
+            .location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))
+            .await?;
+
+        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps).await?);
+    }
+
+    println!(
+        "🔁 Downloading latest layers to destination pageserver {}",
+        dest_ps.conf.id
+    );
+    match dest_ps
+        .tenant_secondary_download(&TenantShardId::unsharded(tenant_id))
+        .await
+    {
+        Ok(()) => {}
+        Err(_) => {
+            println!("  (skipping, destination wasn't in secondary mode)")
+        }
+    }
+
+    let gen = attachment_service
+        .attach_hook(tenant_id, dest_ps.conf.id)
+        .await?;
+    let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
+
+    println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
+    dest_ps.location_config(tenant_id, dest_conf, None).await?;
+
+    if let Some(baseline) = baseline_lsns {
+        println!("🕑 Waiting for LSN to catch up...");
+        await_lsn(tenant_id, &dest_ps, baseline).await?;
+    }
+
+    let cplane = ComputeControlPlane::load(env.clone())?;
+    for (endpoint_name, endpoint) in &cplane.endpoints {
+        if endpoint.tenant_id == tenant_id {
+            println!(
+                "🔁 Reconfiguring endpoint {} to use pageserver {}",
+                endpoint_name, dest_ps.conf.id
+            );
+            endpoint.reconfigure(Some(dest_ps.conf.id)).await?;
+        }
+    }
+
+    for other_ps_conf in &env.pageservers {
+        if other_ps_conf.id == dest_ps.conf.id {
+            continue;
+        }
+
+        let other_ps = PageServerNode::from_env(env, other_ps_conf);
+        let other_ps_tenants = other_ps.tenant_list().await?;
+
+        // Check if this tenant is attached
+        let found = other_ps_tenants
+            .into_iter()
+            .map(|t| t.id)
+            .any(|i| i.tenant_id == tenant_id);
+        if !found {
+            continue;
+        }
+
+        // Downgrade to a secondary location
+        let secondary_conf = build_location_config(
+            LocationConfigMode::Secondary,
+            None,
+            Some(LocationConfigSecondary { warm: true }),
+        );
+
+        println!(
+            "💤 Switching to secondary mode on pageserver {}",
+            other_ps.conf.id
+        );
+        other_ps
+            .location_config(tenant_id, secondary_conf, None)
+            .await?;
+    }
+
+    println!(
+        "🔁 Switching to AttachedSingle mode on pageserver {}",
+        dest_ps.conf.id
+    );
+    let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
+    dest_ps.location_config(tenant_id, dest_conf, None).await?;
+
+    println!("✅ Migration complete");
+
+    Ok(())
+}
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -75,10 +75,6 @@ pub struct ComputeSpec {
    pub remote_extensions: Option<RemoteExtSpec>,

    pub pgbouncer_settings: Option<HashMap<String, String>>,
-
-    // Stripe size for pageserver sharding, in pages
-    #[serde(default)]
-    pub shard_stripe_size: Option<usize>,
 }

 /// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -86,13 +82,10 @@ pub struct ComputeSpec {
 #[serde(rename_all = "snake_case")]
 pub enum ComputeFeature {
    // XXX: Add more feature flags here.
-    /// Enable the experimental activity monitor logic, which uses `pg_stat_database` to
-    /// track short-lived connections as user activity.
-    ActivityMonitorExperimental,

-    /// This is a special feature flag that is used to represent unknown feature flags.
-    /// Basically all unknown to enum flags are represented as this one. See unit test
-    /// `parse_unknown_features()` for more details.
+    // This is a special feature flag that is used to represent unknown feature flags.
+    // Basically all unknown to enum flags are represented as this one. See unit test
+    // `parse_unknown_features()` for more details.
    #[serde(other)]
    UnknownFeature,
 }
@@ -289,23 +282,4 @@ mod tests {
        assert!(spec.features.contains(&ComputeFeature::UnknownFeature));
        assert_eq!(spec.features, vec![ComputeFeature::UnknownFeature; 2]);
    }
-
-    #[test]
-    fn parse_known_features() {
-        // Test that we can properly parse known feature flags.
-        let file = File::open("tests/cluster_spec.json").unwrap();
-        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
-        let ob = json.as_object_mut().unwrap();
-
-        // Add known feature flags.
-        let features = vec!["activity_monitor_experimental"];
-        ob.insert("features".into(), features.into());
-
-        let spec: ComputeSpec = serde_json::from_value(json).unwrap();
-
-        assert_eq!(
-            spec.features,
-            vec![ComputeFeature::ActivityMonitorExperimental]
-        );
-    }
 }
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -19,7 +19,6 @@ strum.workspace = true
 strum_macros.workspace = true
 hex.workspace = true
 thiserror.workspace = true
-humantime-serde.workspace = true

 workspace_hack.workspace = true

--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -3,8 +3,6 @@ use byteorder::{ByteOrder, BE};
 use serde::{Deserialize, Serialize};
 use std::fmt;

-use crate::reltag::{BlockNumber, RelTag};
-
 /// Key used in the Repository kv-store.
 ///
 /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
@@ -148,22 +146,6 @@ pub fn is_rel_block_key(key: &Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
 }

-/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
-pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
-    Ok(match key.field1 {
-        0x00 => (
-            RelTag {
-                spcnode: key.field2,
-                dbnode: key.field3,
-                relnode: key.field4,
-                forknum: key.field5,
-            },
-            key.field6,
-        ),
-        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
-    })
-}
-
 impl std::str::FromStr for Key {
    type Err = anyhow::Error;

--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -4,7 +4,7 @@ use std::{
    collections::HashMap,
    io::{BufRead, Read},
    num::{NonZeroU64, NonZeroUsize},
-    time::{Duration, SystemTime},
+    time::SystemTime,
 };

 use byteorder::{BigEndian, ReadBytesExt};
@@ -18,10 +18,7 @@ use utils::{
    lsn::Lsn,
 };

-use crate::{
-    reltag::RelTag,
-    shard::{ShardCount, ShardStripeSize, TenantShardId},
-};
+use crate::{reltag::RelTag, shard::TenantShardId};
 use anyhow::bail;
 use bytes::{Buf, BufMut, Bytes, BytesMut};

@@ -191,31 +188,6 @@ pub struct TimelineCreateRequest {
    pub pg_version: Option<u32>,
 }

-/// Parameters that apply to all shards in a tenant.  Used during tenant creation.
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(deny_unknown_fields)]
-pub struct ShardParameters {
-    pub count: ShardCount,
-    pub stripe_size: ShardStripeSize,
-}
-
-impl ShardParameters {
-    pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
-
-    pub fn is_unsharded(&self) -> bool {
-        self.count == ShardCount(0)
-    }
-}
-
-impl Default for ShardParameters {
-    fn default() -> Self {
-        Self {
-            count: ShardCount(0),
-            stripe_size: Self::DEFAULT_STRIPE_SIZE,
-        }
-    }
-}
-
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantCreateRequest {
@@ -223,12 +195,6 @@ pub struct TenantCreateRequest {
    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub generation: Option<u32>,
-
-    // If omitted, create a single shard with TenantShardId::unsharded()
-    #[serde(default)]
-    #[serde(skip_serializing_if = "ShardParameters::is_unsharded")]
-    pub shard_parameters: ShardParameters,
-
    #[serde(flatten)]
    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
 }
@@ -251,7 +217,7 @@ impl std::ops::Deref for TenantCreateRequest {

 /// An alternative representation of `pageserver::tenant::TenantConf` with
 /// simpler types.
-#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Debug, Default)]
 pub struct TenantConfig {
    pub checkpoint_distance: Option<u64>,
    pub checkpoint_timeout: Option<String>,
@@ -266,41 +232,21 @@ pub struct TenantConfig {
    pub lagging_wal_timeout: Option<String>,
    pub max_lsn_wal_lag: Option<NonZeroU64>,
    pub trace_read_requests: Option<bool>,
-    pub eviction_policy: Option<EvictionPolicy>,
+    // We defer the parsing of the eviction_policy field to the request handler.
+    // Otherwise we'd have to move the types for eviction policy into this package.
+    // We might do that once the eviction feature has stabilizied.
+    // For now, this field is not even documented in the openapi_spec.yml.
+    pub eviction_policy: Option<serde_json::Value>,
    pub min_resident_size_override: Option<u64>,
    pub evictions_low_residence_duration_metric_threshold: Option<String>,
    pub gc_feedback: Option<bool>,
    pub heatmap_period: Option<String>,
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(tag = "kind")]
-pub enum EvictionPolicy {
-    NoEviction,
-    LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
-}
-
-impl EvictionPolicy {
-    pub fn discriminant_str(&self) -> &'static str {
-        match self {
-            EvictionPolicy::NoEviction => "NoEviction",
-            EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct EvictionPolicyLayerAccessThreshold {
-    #[serde(with = "humantime_serde")]
-    pub period: Duration,
-    #[serde(with = "humantime_serde")]
-    pub threshold: Duration,
-}
-
 /// A flattened analog of a `pagesever::tenant::LocationMode`, which
 /// lists out all possible states (and the virtual "Detached" state)
 /// in a flat form rather than using rust-style enums.
-#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Debug)]
 pub enum LocationConfigMode {
    AttachedSingle,
    AttachedMulti,
@@ -309,21 +255,19 @@ pub enum LocationConfigMode {
    Detached,
 }

-#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Debug)]
 pub struct LocationConfigSecondary {
    pub warm: bool,
 }

 /// An alternative representation of `pageserver::tenant::LocationConf`,
 /// for use in external-facing APIs.
-#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
+#[derive(Serialize, Deserialize, Debug)]
 pub struct LocationConfig {
    pub mode: LocationConfigMode,
    /// If attaching, in what generation?
    #[serde(default)]
    pub generation: Option<u32>,
-
-    // If requesting mode `Secondary`, configuration for that.
    #[serde(default)]
    pub secondary_conf: Option<LocationConfigSecondary>,

@@ -336,17 +280,11 @@ pub struct LocationConfig {
    #[serde(default)]
    pub shard_stripe_size: u32,

-    // This configuration only affects attached mode, but should be provided irrespective
-    // of the mode, as a secondary location might transition on startup if the response
-    // to the `/re-attach` control plane API requests it.
+    // If requesting mode `Secondary`, configuration for that.
+    // Custom storage configuration for the tenant, if any
    pub tenant_conf: TenantConfig,
 }

-#[derive(Serialize, Deserialize)]
-pub struct LocationConfigListResponse {
-    pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,
-}
-
 #[derive(Serialize, Deserialize)]
 #[serde(transparent)]
 pub struct TenantCreateResponse(pub TenantId);
@@ -359,7 +297,7 @@ pub struct StatusResponse {
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantLocationConfigRequest {
-    pub tenant_id: TenantShardId,
+    pub tenant_id: TenantId,
    #[serde(flatten)]
    pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
 }
@@ -430,8 +368,6 @@ pub struct TenantInfo {
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
    pub attachment_status: TenantAttachmentStatus,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub generation: Option<u32>,
 }

 #[derive(Serialize, Deserialize, Clone)]
@@ -722,17 +658,6 @@ pub struct PagestreamDbSizeResponse {
    pub db_size: i64,
 }

-// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
-// that require pageserver-internal types.  It is sufficient to get the total size.
-#[derive(Serialize, Deserialize, Debug)]
-pub struct TenantHistorySize {
-    pub id: TenantId,
-    /// Size is a mixture of WAL and logical size, so the unit is bytes.
-    ///
-    /// Will be none if `?inputs_only=true` was given.
-    pub size: Option<u64>,
-}
-
 impl PagestreamFeMessage {
    pub fn serialize(&self) -> Bytes {
        let mut bytes = BytesMut::new();
@@ -985,7 +910,6 @@ mod tests {
            state: TenantState::Active,
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
-            generation: None,
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -1006,7 +930,6 @@ mod tests {
            },
            current_physical_size: Some(42),
            attachment_status: TenantAttachmentStatus::Attached,
-            generation: None,
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -32,9 +32,6 @@ pub struct RelTag {
    pub relnode: Oid,
 }

-/// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type.
-pub type BlockNumber = u32;
-
 impl PartialOrd for RelTag {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -1,9 +1,6 @@
 use std::{ops::RangeInclusive, str::FromStr};

-use crate::{
-    key::{is_rel_block_key, Key},
-    models::ShardParameters,
-};
+use crate::key::{is_rel_block_key, Key};
 use hex::FromHex;
 use serde::{Deserialize, Serialize};
 use thiserror;
@@ -88,12 +85,6 @@ impl TenantShardId {
    pub fn is_unsharded(&self) -> bool {
        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
    }
-    pub fn to_index(&self) -> ShardIndex {
-        ShardIndex {
-            shard_number: self.shard_number,
-            shard_count: self.shard_count,
-        }
-    }
 }

 /// Formatting helper
@@ -342,7 +333,7 @@ const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
 pub struct ShardIdentity {
    pub number: ShardNumber,
    pub count: ShardCount,
-    pub stripe_size: ShardStripeSize,
+    stripe_size: ShardStripeSize,
    layout: ShardLayout,
 }

@@ -412,17 +403,6 @@ impl ShardIdentity {
        }
    }

-    /// For use when creating ShardIdentity instances for new shards, where a creation request
-    /// specifies the ShardParameters that apply to all shards.
-    pub fn from_params(number: ShardNumber, params: &ShardParameters) -> Self {
-        Self {
-            number,
-            count: params.count,
-            layout: LAYOUT_V1,
-            stripe_size: params.stripe_size,
-        }
-    }
-
    fn is_broken(&self) -> bool {
        self.layout == LAYOUT_BROKEN
    }
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -5,9 +5,7 @@ use std::collections::HashMap;
 use std::env;
 use std::num::NonZeroU32;
 use std::pin::Pin;
-use std::str::FromStr;
 use std::sync::Arc;
-use std::time::Duration;

 use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Result;
@@ -15,14 +13,12 @@ use azure_core::request_options::{MaxResults, Metadata, Range};
 use azure_core::RetryOptions;
 use azure_identity::DefaultAzureCredential;
 use azure_storage::StorageCredentials;
-use azure_storage_blobs::blob::CopyStatus;
 use azure_storage_blobs::prelude::ClientBuilder;
 use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
 use bytes::Bytes;
 use futures::stream::Stream;
 use futures_util::StreamExt;
-use http_types::{StatusCode, Url};
-use tokio::time::Instant;
+use http_types::StatusCode;
 use tracing::debug;

 use crate::s3_bucket::RequestKind;
@@ -327,49 +323,10 @@ impl RemoteStorage for AzureBlobStorage {
        Ok(())
    }

-    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()> {
-        let _permit = self.permit(RequestKind::Copy).await;
-        let blob_client = self.client.blob_client(self.relative_path_to_name(to));
-
-        let source_url = format!(
-            "{}/{}",
-            self.client.url()?,
-            self.relative_path_to_name(from)
-        );
-        let builder = blob_client.copy(Url::from_str(&source_url)?);
-
-        let result = builder.into_future().await?;
-
-        let mut copy_status = result.copy_status;
-        let start_time = Instant::now();
-        const MAX_WAIT_TIME: Duration = Duration::from_secs(60);
-        loop {
-            match copy_status {
-                CopyStatus::Aborted => {
-                    anyhow::bail!("Received abort for copy from {from} to {to}.");
-                }
-                CopyStatus::Failed => {
-                    anyhow::bail!("Received failure response for copy from {from} to {to}.");
-                }
-                CopyStatus::Success => return Ok(()),
-                CopyStatus::Pending => (),
-            }
-            // The copy is taking longer. Waiting a second and then re-trying.
-            // TODO estimate time based on copy_progress and adjust time based on that
-            tokio::time::sleep(Duration::from_millis(1000)).await;
-            let properties = blob_client.get_properties().into_future().await?;
-            let Some(status) = properties.blob.properties.copy_status else {
-                tracing::warn!("copy_status for copy is None!, from={from}, to={to}");
-                return Ok(());
-            };
-            if start_time.elapsed() > MAX_WAIT_TIME {
-                anyhow::bail!("Copy from from {from} to {to} took longer than limit MAX_WAIT_TIME={}s. copy_pogress={:?}.",
-                    MAX_WAIT_TIME.as_secs_f32(),
-                    properties.blob.properties.copy_progress,
-                );
-            }
-            copy_status = status;
-        }
+    async fn copy(&self, _from: &RemotePath, _to: &RemotePath) -> anyhow::Result<()> {
+        Err(anyhow::anyhow!(
+            "copy for azure blob storage is not implemented"
+        ))
    }
 }

--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -1,288 +0,0 @@
-use anyhow::Context;
-use camino::Utf8Path;
-use remote_storage::RemotePath;
-use std::collections::HashSet;
-use std::sync::Arc;
-use test_context::test_context;
-use tracing::debug;
-
-use crate::common::{download_to_vec, upload_stream, wrap_stream};
-
-use super::{
-    MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs,
-};
-
-/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
-/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
-/// See the client creation in [`create_s3_client`] for details on the required env vars.
-/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
-/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
-///
-/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]
-/// where
-/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
-/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
-///
-/// Then, verifies that the client does return correct prefixes when queried:
-/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
-/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
-///
-/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
-/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
-/// since current default AWS S3 pagination limit is 1000.
-/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
-///
-/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
-/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
-#[test_context(MaybeEnabledStorageWithTestBlobs)]
-#[tokio::test]
-async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledStorageWithTestBlobs::Enabled(ctx) => ctx,
-        MaybeEnabledStorageWithTestBlobs::Disabled => return Ok(()),
-        MaybeEnabledStorageWithTestBlobs::UploadsFailed(e, _) => {
-            anyhow::bail!("S3 init failed: {e:?}")
-        }
-    };
-
-    let test_client = Arc::clone(&ctx.enabled.client);
-    let expected_remote_prefixes = ctx.remote_prefixes.clone();
-
-    let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))
-        .context("common_prefix construction")?;
-    let root_remote_prefixes = test_client
-        .list_prefixes(None)
-        .await
-        .context("client list root prefixes failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        root_remote_prefixes, HashSet::from([base_prefix.clone()]),
-        "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
-    );
-
-    let nested_remote_prefixes = test_client
-        .list_prefixes(Some(&base_prefix))
-        .await
-        .context("client list nested prefixes failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    let remote_only_prefixes = nested_remote_prefixes
-        .difference(&expected_remote_prefixes)
-        .collect::<HashSet<_>>();
-    let missing_uploaded_prefixes = expected_remote_prefixes
-        .difference(&nested_remote_prefixes)
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
-        "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
-    );
-
-    Ok(())
-}
-
-/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.
-/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
-/// See `s3_pagination_should_work` for more information.
-///
-/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
-/// Then performs the following queries:
-///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
-///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
-#[test_context(MaybeEnabledStorageWithSimpleTestBlobs)]
-#[tokio::test]
-async fn list_files_works(ctx: &mut MaybeEnabledStorageWithSimpleTestBlobs) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledStorageWithSimpleTestBlobs::Enabled(ctx) => ctx,
-        MaybeEnabledStorageWithSimpleTestBlobs::Disabled => return Ok(()),
-        MaybeEnabledStorageWithSimpleTestBlobs::UploadsFailed(e, _) => {
-            anyhow::bail!("S3 init failed: {e:?}")
-        }
-    };
-    let test_client = Arc::clone(&ctx.enabled.client);
-    let base_prefix =
-        RemotePath::new(Utf8Path::new("folder1")).context("common_prefix construction")?;
-    let root_files = test_client
-        .list_files(None)
-        .await
-        .context("client list root files failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        root_files,
-        ctx.remote_blobs.clone(),
-        "remote storage list_files on root mismatches with the uploads."
-    );
-    let nested_remote_files = test_client
-        .list_files(Some(&base_prefix))
-        .await
-        .context("client list nested files failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    let trim_remote_blobs: HashSet<_> = ctx
-        .remote_blobs
-        .iter()
-        .map(|x| x.get_path())
-        .filter(|x| x.starts_with("folder1"))
-        .map(|x| RemotePath::new(x).expect("must be valid path"))
-        .collect();
-    assert_eq!(
-        nested_remote_files, trim_remote_blobs,
-        "remote storage list_files on subdirrectory mismatches with the uploads."
-    );
-    Ok(())
-}
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn delete_non_exising_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledStorage::Enabled(ctx) => ctx,
-        MaybeEnabledStorage::Disabled => return Ok(()),
-    };
-
-    let path = RemotePath::new(Utf8Path::new(
-        format!("{}/for_sure_there_is_nothing_there_really", ctx.base_prefix).as_str(),
-    ))
-    .with_context(|| "RemotePath conversion")?;
-
-    ctx.client.delete(&path).await.expect("should succeed");
-
-    Ok(())
-}
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn delete_objects_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledStorage::Enabled(ctx) => ctx,
-        MaybeEnabledStorage::Disabled => return Ok(()),
-    };
-
-    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path2 = RemotePath::new(Utf8Path::new(format!("{}/path2", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-    ctx.client.upload(data, len, &path1, None).await?;
-
-    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
-    ctx.client.upload(data, len, &path2, None).await?;
-
-    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-    ctx.client.upload(data, len, &path3, None).await?;
-
-    ctx.client.delete_objects(&[path1, path2]).await?;
-
-    let prefixes = ctx.client.list_prefixes(None).await?;
-
-    assert_eq!(prefixes.len(), 1);
-
-    ctx.client.delete_objects(&[path3]).await?;
-
-    Ok(())
-}
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return Ok(());
-    };
-
-    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let orig = bytes::Bytes::from_static("remote blob data here".as_bytes());
-
-    let (data, len) = wrap_stream(orig.clone());
-
-    ctx.client.upload(data, len, &path, None).await?;
-
-    // Normal download request
-    let dl = ctx.client.download(&path).await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig);
-
-    // Full range (end specified)
-    let dl = ctx
-        .client
-        .download_byte_range(&path, 0, Some(len as u64))
-        .await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig);
-
-    // partial range (end specified)
-    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig[4..10]);
-
-    // partial range (end beyond real end)
-    let dl = ctx
-        .client
-        .download_byte_range(&path, 8, Some(len as u64 * 100))
-        .await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig[8..]);
-
-    // Partial range (end unspecified)
-    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig[4..]);
-
-    // Full range (end unspecified)
-    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig);
-
-    debug!("Cleanup: deleting file at path {path:?}");
-    ctx.client
-        .delete(&path)
-        .await
-        .with_context(|| format!("{path:?} removal"))?;
-
-    Ok(())
-}
-
-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let MaybeEnabledStorage::Enabled(ctx) = ctx else {
-        return Ok(());
-    };
-
-    let path = RemotePath::new(Utf8Path::new(
-        format!("{}/file_to_copy", ctx.base_prefix).as_str(),
-    ))
-    .with_context(|| "RemotePath conversion")?;
-    let path_dest = RemotePath::new(Utf8Path::new(
-        format!("{}/file_dest", ctx.base_prefix).as_str(),
-    ))
-    .with_context(|| "RemotePath conversion")?;
-
-    let orig = bytes::Bytes::from_static("remote blob data content".as_bytes());
-
-    let (data, len) = wrap_stream(orig.clone());
-
-    ctx.client.upload(data, len, &path, None).await?;
-
-    // Normal download request
-    ctx.client.copy_object(&path, &path_dest).await?;
-
-    let dl = ctx.client.download(&path_dest).await?;
-    let buf = download_to_vec(dl).await?;
-    assert_eq!(&buf, &orig);
-
-    debug!("Cleanup: deleting file at path {path:?}");
-    ctx.client
-        .delete_objects(&[path.clone(), path_dest.clone()])
-        .await
-        .with_context(|| format!("{path:?} removal"))?;
-
-    Ok(())
-}
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -6,23 +6,263 @@ use std::sync::Arc;
 use std::time::UNIX_EPOCH;

 use anyhow::Context;
+use camino::Utf8Path;
 use remote_storage::{
    AzureConfig, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,
 };
-use test_context::AsyncTestContext;
-use tracing::info;
+use test_context::{test_context, AsyncTestContext};
+use tracing::{debug, info};

 mod common;

-#[path = "common/tests.rs"]
-mod tests_azure;
-
-use common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_remote_data};
+use common::{
+    cleanup, download_to_vec, ensure_logging_ready, upload_remote_data, upload_simple_remote_data,
+    upload_stream, wrap_stream,
+};

 const ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_AZURE_REMOTE_STORAGE";

 const BASE_PREFIX: &str = "test";

+/// Tests that the Azure client can list all prefixes, even if the response comes paginated and requires multiple HTTP queries.
+/// Uses real Azure and requires [`ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME`] and related Azure cred env vars specified.
+/// See the client creation in [`create_azure_client`] for details on the required env vars.
+/// If real Azure tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
+/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
+///
+/// First, the test creates a set of Azure blobs with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]
+/// where
+/// * `random_prefix_part` is set for the entire Azure client during the Azure client creation in [`create_azure_client`], to avoid multiple test runs interference
+/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
+///
+/// Then, verifies that the client does return correct prefixes when queried:
+/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
+/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
+///
+/// With the real Azure enabled and `#[cfg(test)]` Rust configuration used, the Azure client test adds a `max-keys` param to limit the response keys.
+/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to Azure.
+///
+/// Lastly, the test attempts to clean up and remove all uploaded Azure files.
+/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
+#[test_context(MaybeEnabledAzureWithTestBlobs)]
+#[tokio::test]
+async fn azure_pagination_should_work(
+    ctx: &mut MaybeEnabledAzureWithTestBlobs,
+) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledAzureWithTestBlobs::Enabled(ctx) => ctx,
+        MaybeEnabledAzureWithTestBlobs::Disabled => return Ok(()),
+        MaybeEnabledAzureWithTestBlobs::UploadsFailed(e, _) => {
+            anyhow::bail!("Azure init failed: {e:?}")
+        }
+    };
+
+    let test_client = Arc::clone(&ctx.enabled.client);
+    let expected_remote_prefixes = ctx.remote_prefixes.clone();
+
+    let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))
+        .context("common_prefix construction")?;
+    let root_remote_prefixes = test_client
+        .list_prefixes(None)
+        .await
+        .context("client list root prefixes failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        root_remote_prefixes, HashSet::from([base_prefix.clone()]),
+        "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
+    );
+
+    let nested_remote_prefixes = test_client
+        .list_prefixes(Some(&base_prefix))
+        .await
+        .context("client list nested prefixes failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    let remote_only_prefixes = nested_remote_prefixes
+        .difference(&expected_remote_prefixes)
+        .collect::<HashSet<_>>();
+    let missing_uploaded_prefixes = expected_remote_prefixes
+        .difference(&nested_remote_prefixes)
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
+        "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
+    );
+
+    Ok(())
+}
+
+/// Tests that Azure client can list all files in a folder, even if the response comes paginated and requirees multiple Azure queries.
+/// Uses real Azure and requires [`ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME`] and related Azure cred env vars specified. Test will skip real code and pass if env vars not set.
+/// See `Azure_pagination_should_work` for more information.
+///
+/// First, create a set of Azure objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
+/// Then performs the following queries:
+///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
+///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
+#[test_context(MaybeEnabledAzureWithSimpleTestBlobs)]
+#[tokio::test]
+async fn azure_list_files_works(
+    ctx: &mut MaybeEnabledAzureWithSimpleTestBlobs,
+) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledAzureWithSimpleTestBlobs::Enabled(ctx) => ctx,
+        MaybeEnabledAzureWithSimpleTestBlobs::Disabled => return Ok(()),
+        MaybeEnabledAzureWithSimpleTestBlobs::UploadsFailed(e, _) => {
+            anyhow::bail!("Azure init failed: {e:?}")
+        }
+    };
+    let test_client = Arc::clone(&ctx.enabled.client);
+    let base_prefix =
+        RemotePath::new(Utf8Path::new("folder1")).context("common_prefix construction")?;
+    let root_files = test_client
+        .list_files(None)
+        .await
+        .context("client list root files failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        root_files,
+        ctx.remote_blobs.clone(),
+        "remote storage list_files on root mismatches with the uploads."
+    );
+    let nested_remote_files = test_client
+        .list_files(Some(&base_prefix))
+        .await
+        .context("client list nested files failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    let trim_remote_blobs: HashSet<_> = ctx
+        .remote_blobs
+        .iter()
+        .map(|x| x.get_path())
+        .filter(|x| x.starts_with("folder1"))
+        .map(|x| RemotePath::new(x).expect("must be valid path"))
+        .collect();
+    assert_eq!(
+        nested_remote_files, trim_remote_blobs,
+        "remote storage list_files on subdirrectory mismatches with the uploads."
+    );
+    Ok(())
+}
+
+#[test_context(MaybeEnabledAzure)]
+#[tokio::test]
+async fn azure_delete_non_exising_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledAzure::Enabled(ctx) => ctx,
+        MaybeEnabledAzure::Disabled => return Ok(()),
+    };
+
+    let path = RemotePath::new(Utf8Path::new(
+        format!("{}/for_sure_there_is_nothing_there_really", ctx.base_prefix).as_str(),
+    ))
+    .with_context(|| "RemotePath conversion")?;
+
+    ctx.client.delete(&path).await.expect("should succeed");
+
+    Ok(())
+}
+
+#[test_context(MaybeEnabledAzure)]
+#[tokio::test]
+async fn azure_delete_objects_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledAzure::Enabled(ctx) => ctx,
+        MaybeEnabledAzure::Disabled => return Ok(()),
+    };
+
+    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let path2 = RemotePath::new(Utf8Path::new(format!("{}/path2", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
+    ctx.client.upload(data, len, &path1, None).await?;
+
+    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
+    ctx.client.upload(data, len, &path2, None).await?;
+
+    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
+    ctx.client.upload(data, len, &path3, None).await?;
+
+    ctx.client.delete_objects(&[path1, path2]).await?;
+
+    let prefixes = ctx.client.list_prefixes(None).await?;
+
+    assert_eq!(prefixes.len(), 1);
+
+    ctx.client.delete_objects(&[path3]).await?;
+
+    Ok(())
+}
+
+#[test_context(MaybeEnabledAzure)]
+#[tokio::test]
+async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Result<()> {
+    let MaybeEnabledAzure::Enabled(ctx) = ctx else {
+        return Ok(());
+    };
+
+    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let orig = bytes::Bytes::from_static("remote blob data here".as_bytes());
+
+    let (data, len) = wrap_stream(orig.clone());
+
+    ctx.client.upload(data, len, &path, None).await?;
+
+    // Normal download request
+    let dl = ctx.client.download(&path).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // Full range (end specified)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 0, Some(len as u64))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // partial range (end specified)
+    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..10]);
+
+    // partial range (end beyond real end)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 8, Some(len as u64 * 100))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[8..]);
+
+    // Partial range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..]);
+
+    // Full range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    debug!("Cleanup: deleting file at path {path:?}");
+    ctx.client
+        .delete(&path)
+        .await
+        .with_context(|| format!("{path:?} removal"))?;
+
+    Ok(())
+}
+
 struct EnabledAzure {
    client: Arc<GenericRemoteStorage>,
    base_prefix: &'static str,
@@ -41,13 +281,13 @@ impl EnabledAzure {
    }
 }

-enum MaybeEnabledStorage {
+enum MaybeEnabledAzure {
    Enabled(EnabledAzure),
    Disabled,
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorage {
+impl AsyncTestContext for MaybeEnabledAzure {
    async fn setup() -> Self {
        ensure_logging_ready();

@@ -63,7 +303,7 @@ impl AsyncTestContext for MaybeEnabledStorage {
    }
 }

-enum MaybeEnabledStorageWithTestBlobs {
+enum MaybeEnabledAzureWithTestBlobs {
    Enabled(AzureWithTestBlobs),
    Disabled,
    UploadsFailed(anyhow::Error, AzureWithTestBlobs),
@@ -76,7 +316,7 @@ struct AzureWithTestBlobs {
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
+impl AsyncTestContext for MaybeEnabledAzureWithTestBlobs {
    async fn setup() -> Self {
        ensure_logging_ready();
        if env::var(ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
@@ -127,7 +367,7 @@ impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
 // However, they are not idential. The list_prefixes function is concerned with listing prefixes,
 // whereas the list_files function is concerned with listing files.
 // See `RemoteStorage::list_files` documentation for more details
-enum MaybeEnabledStorageWithSimpleTestBlobs {
+enum MaybeEnabledAzureWithSimpleTestBlobs {
    Enabled(AzureWithSimpleTestBlobs),
    Disabled,
    UploadsFailed(anyhow::Error, AzureWithSimpleTestBlobs),
@@ -138,7 +378,7 @@ struct AzureWithSimpleTestBlobs {
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
+impl AsyncTestContext for MaybeEnabledAzureWithSimpleTestBlobs {
    async fn setup() -> Self {
        ensure_logging_ready();
        if env::var(ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -6,23 +6,259 @@ use std::sync::Arc;
 use std::time::UNIX_EPOCH;

 use anyhow::Context;
+use camino::Utf8Path;
 use remote_storage::{
    GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
-use test_context::AsyncTestContext;
-use tracing::info;
+use test_context::{test_context, AsyncTestContext};
+use tracing::{debug, info};

 mod common;

-#[path = "common/tests.rs"]
-mod tests_s3;
-
-use common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_remote_data};
+use common::{
+    cleanup, download_to_vec, ensure_logging_ready, upload_remote_data, upload_simple_remote_data,
+    upload_stream, wrap_stream,
+};

 const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";

 const BASE_PREFIX: &str = "test";

+/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
+/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
+/// See the client creation in [`create_s3_client`] for details on the required env vars.
+/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
+/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
+///
+/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]
+/// where
+/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
+/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
+///
+/// Then, verifies that the client does return correct prefixes when queried:
+/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
+/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
+///
+/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
+/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
+/// since current default AWS S3 pagination limit is 1000.
+/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
+///
+/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
+/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
+#[test_context(MaybeEnabledS3WithTestBlobs)]
+#[tokio::test]
+async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3WithTestBlobs) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledS3WithTestBlobs::Enabled(ctx) => ctx,
+        MaybeEnabledS3WithTestBlobs::Disabled => return Ok(()),
+        MaybeEnabledS3WithTestBlobs::UploadsFailed(e, _) => anyhow::bail!("S3 init failed: {e:?}"),
+    };
+
+    let test_client = Arc::clone(&ctx.enabled.client);
+    let expected_remote_prefixes = ctx.remote_prefixes.clone();
+
+    let base_prefix = RemotePath::new(Utf8Path::new(ctx.enabled.base_prefix))
+        .context("common_prefix construction")?;
+    let root_remote_prefixes = test_client
+        .list_prefixes(None)
+        .await
+        .context("client list root prefixes failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        root_remote_prefixes, HashSet::from([base_prefix.clone()]),
+        "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
+    );
+
+    let nested_remote_prefixes = test_client
+        .list_prefixes(Some(&base_prefix))
+        .await
+        .context("client list nested prefixes failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    let remote_only_prefixes = nested_remote_prefixes
+        .difference(&expected_remote_prefixes)
+        .collect::<HashSet<_>>();
+    let missing_uploaded_prefixes = expected_remote_prefixes
+        .difference(&nested_remote_prefixes)
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
+        "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
+    );
+
+    Ok(())
+}
+
+/// Tests that S3 client can list all files in a folder, even if the response comes paginated and requirees multiple S3 queries.
+/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
+/// See `s3_pagination_should_work` for more information.
+///
+/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
+/// Then performs the following queries:
+///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
+///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
+#[test_context(MaybeEnabledS3WithSimpleTestBlobs)]
+#[tokio::test]
+async fn s3_list_files_works(ctx: &mut MaybeEnabledS3WithSimpleTestBlobs) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledS3WithSimpleTestBlobs::Enabled(ctx) => ctx,
+        MaybeEnabledS3WithSimpleTestBlobs::Disabled => return Ok(()),
+        MaybeEnabledS3WithSimpleTestBlobs::UploadsFailed(e, _) => {
+            anyhow::bail!("S3 init failed: {e:?}")
+        }
+    };
+    let test_client = Arc::clone(&ctx.enabled.client);
+    let base_prefix =
+        RemotePath::new(Utf8Path::new("folder1")).context("common_prefix construction")?;
+    let root_files = test_client
+        .list_files(None)
+        .await
+        .context("client list root files failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    assert_eq!(
+        root_files,
+        ctx.remote_blobs.clone(),
+        "remote storage list_files on root mismatches with the uploads."
+    );
+    let nested_remote_files = test_client
+        .list_files(Some(&base_prefix))
+        .await
+        .context("client list nested files failure")?
+        .into_iter()
+        .collect::<HashSet<_>>();
+    let trim_remote_blobs: HashSet<_> = ctx
+        .remote_blobs
+        .iter()
+        .map(|x| x.get_path())
+        .filter(|x| x.starts_with("folder1"))
+        .map(|x| RemotePath::new(x).expect("must be valid path"))
+        .collect();
+    assert_eq!(
+        nested_remote_files, trim_remote_blobs,
+        "remote storage list_files on subdirrectory mismatches with the uploads."
+    );
+    Ok(())
+}
+
+#[test_context(MaybeEnabledS3)]
+#[tokio::test]
+async fn s3_delete_non_exising_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledS3::Enabled(ctx) => ctx,
+        MaybeEnabledS3::Disabled => return Ok(()),
+    };
+
+    let path = RemotePath::new(Utf8Path::new(
+        format!("{}/for_sure_there_is_nothing_there_really", ctx.base_prefix).as_str(),
+    ))
+    .with_context(|| "RemotePath conversion")?;
+
+    ctx.client.delete(&path).await.expect("should succeed");
+
+    Ok(())
+}
+
+#[test_context(MaybeEnabledS3)]
+#[tokio::test]
+async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
+    let ctx = match ctx {
+        MaybeEnabledS3::Enabled(ctx) => ctx,
+        MaybeEnabledS3::Disabled => return Ok(()),
+    };
+
+    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let path2 = RemotePath::new(Utf8Path::new(format!("{}/path2", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let (data, len) = upload_stream("remote blob data1".as_bytes().into());
+    ctx.client.upload(data, len, &path1, None).await?;
+
+    let (data, len) = upload_stream("remote blob data2".as_bytes().into());
+    ctx.client.upload(data, len, &path2, None).await?;
+
+    let (data, len) = upload_stream("remote blob data3".as_bytes().into());
+    ctx.client.upload(data, len, &path3, None).await?;
+
+    ctx.client.delete_objects(&[path1, path2]).await?;
+
+    let prefixes = ctx.client.list_prefixes(None).await?;
+
+    assert_eq!(prefixes.len(), 1);
+
+    ctx.client.delete_objects(&[path3]).await?;
+
+    Ok(())
+}
+
+#[test_context(MaybeEnabledS3)]
+#[tokio::test]
+async fn s3_upload_download_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
+    let MaybeEnabledS3::Enabled(ctx) = ctx else {
+        return Ok(());
+    };
+
+    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let orig = bytes::Bytes::from_static("remote blob data here".as_bytes());
+
+    let (data, len) = wrap_stream(orig.clone());
+
+    ctx.client.upload(data, len, &path, None).await?;
+
+    // Normal download request
+    let dl = ctx.client.download(&path).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // Full range (end specified)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 0, Some(len as u64))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // partial range (end specified)
+    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..10]);
+
+    // partial range (end beyond real end)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 8, Some(len as u64 * 100))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[8..]);
+
+    // Partial range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..]);
+
+    // Full range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    debug!("Cleanup: deleting file at path {path:?}");
+    ctx.client
+        .delete(&path)
+        .await
+        .with_context(|| format!("{path:?} removal"))?;
+
+    Ok(())
+}
+
 struct EnabledS3 {
    client: Arc<GenericRemoteStorage>,
    base_prefix: &'static str,
@@ -41,13 +277,13 @@ impl EnabledS3 {
    }
 }

-enum MaybeEnabledStorage {
+enum MaybeEnabledS3 {
    Enabled(EnabledS3),
    Disabled,
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorage {
+impl AsyncTestContext for MaybeEnabledS3 {
    async fn setup() -> Self {
        ensure_logging_ready();

@@ -63,7 +299,7 @@ impl AsyncTestContext for MaybeEnabledStorage {
    }
 }

-enum MaybeEnabledStorageWithTestBlobs {
+enum MaybeEnabledS3WithTestBlobs {
    Enabled(S3WithTestBlobs),
    Disabled,
    UploadsFailed(anyhow::Error, S3WithTestBlobs),
@@ -76,7 +312,7 @@ struct S3WithTestBlobs {
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
+impl AsyncTestContext for MaybeEnabledS3WithTestBlobs {
    async fn setup() -> Self {
        ensure_logging_ready();
        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
@@ -127,7 +363,7 @@ impl AsyncTestContext for MaybeEnabledStorageWithTestBlobs {
 // However, they are not idential. The list_prefixes function is concerned with listing prefixes,
 // whereas the list_files function is concerned with listing files.
 // See `RemoteStorage::list_files` documentation for more details
-enum MaybeEnabledStorageWithSimpleTestBlobs {
+enum MaybeEnabledS3WithSimpleTestBlobs {
    Enabled(S3WithSimpleTestBlobs),
    Disabled,
    UploadsFailed(anyhow::Error, S3WithSimpleTestBlobs),
@@ -138,7 +374,7 @@ struct S3WithSimpleTestBlobs {
 }

 #[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledStorageWithSimpleTestBlobs {
+impl AsyncTestContext for MaybeEnabledS3WithSimpleTestBlobs {
    async fn setup() -> Self {
        ensure_logging_ready();
        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -1,4 +1,3 @@
-use std::num::ParseIntError;
 use std::{fmt, str::FromStr};

 use anyhow::Context;
@@ -375,13 +374,6 @@ impl fmt::Display for NodeId {
    }
 }

-impl FromStr for NodeId {
-    type Err = ParseIntError;
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        Ok(NodeId(u64::from_str(s)?))
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use serde_assert::{Deserializer, Serializer, Token, Tokens};
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -1,5 +1,5 @@
 use pageserver_api::{models::*, shard::TenantShardId};
-use reqwest::{IntoUrl, Method, StatusCode};
+use reqwest::{IntoUrl, Method};
 use utils::{
    http::error::HttpErrorBody,
    id::{TenantId, TimelineId},
@@ -22,14 +22,14 @@ pub enum Error {
    #[error("receive error body: {0}")]
    ReceiveErrorBody(String),

-    #[error("pageserver API: {1}")]
-    ApiError(StatusCode, String),
+    #[error("pageserver API: {0}")]
+    ApiError(String),
 }

 pub type Result<T> = std::result::Result<T, Error>;

-pub trait ResponseErrorMessageExt: Sized {
-    fn error_from_body(self) -> impl std::future::Future<Output = Result<Self>> + Send;
+pub(crate) trait ResponseErrorMessageExt: Sized {
+    async fn error_from_body(self) -> Result<Self>;
 }

 impl ResponseErrorMessageExt for reqwest::Response {
@@ -41,7 +41,7 @@ impl ResponseErrorMessageExt for reqwest::Response {

        let url = self.url().to_owned();
        Err(match self.json::<HttpErrorBody>().await {
-            Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),
+            Ok(HttpErrorBody { msg }) => Error::ApiError(msg),
            Err(_) => {
                Error::ReceiveErrorBody(format!("Http error ({}) at {}.", status.as_u16(), url))
            }
@@ -49,11 +49,6 @@ impl ResponseErrorMessageExt for reqwest::Response {
    }
 }

-pub enum ForceAwaitLogicalSize {
-    Yes,
-    No,
-}
-
 impl Client {
    pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
        Self {
@@ -71,9 +66,9 @@ impl Client {

    pub async fn tenant_details(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
    ) -> Result<pageserver_api::models::TenantDetails> {
-        let uri = format!("{}/v1/tenant/{tenant_shard_id}", self.mgmt_api_endpoint);
+        let uri = format!("{}/v1/tenant/{tenant_id}", self.mgmt_api_endpoint);
        self.get(uri)
            .await?
            .json()
@@ -83,12 +78,9 @@ impl Client {

    pub async fn list_timelines(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
    ) -> Result<Vec<pageserver_api::models::TimelineInfo>> {
-        let uri = format!(
-            "{}/v1/tenant/{tenant_shard_id}/timeline",
-            self.mgmt_api_endpoint
-        );
+        let uri = format!("{}/v1/tenant/{tenant_id}/timeline", self.mgmt_api_endpoint);
        self.get(&uri)
            .await?
            .json()
@@ -100,18 +92,11 @@ impl Client {
        &self,
        tenant_id: TenantId,
        timeline_id: TimelineId,
-        force_await_logical_size: ForceAwaitLogicalSize,
    ) -> Result<pageserver_api::models::TimelineInfo> {
        let uri = format!(
            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
            self.mgmt_api_endpoint
        );
-
-        let uri = match force_await_logical_size {
-            ForceAwaitLogicalSize::Yes => format!("{}?force-await-logical-size={}", uri, true),
-            ForceAwaitLogicalSize::No => uri,
-        };
-
        self.get(&uri)
            .await?
            .json()
@@ -182,23 +167,23 @@ impl Client {
            "{}/v1/tenant/{}/secondary/download",
            self.mgmt_api_endpoint, tenant_id
        );
-        self.request(Method::POST, &uri, ()).await?;
-        Ok(())
+        self.request(Method::POST, &uri, ())
+            .await?
+            .error_for_status()
+            .map(|_| ())
+            .map_err(|e| Error::ApiError(format!("{}", e)))
    }

    pub async fn location_config(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        config: LocationConfig,
        flush_ms: Option<std::time::Duration>,
    ) -> Result<()> {
-        let req_body = TenantLocationConfigRequest {
-            tenant_id: tenant_shard_id,
-            config,
-        };
+        let req_body = TenantLocationConfigRequest { tenant_id, config };
        let path = format!(
            "{}/v1/tenant/{}/location_config",
-            self.mgmt_api_endpoint, tenant_shard_id
+            self.mgmt_api_endpoint, tenant_id
        );
        let path = if let Some(flush_ms) = flush_ms {
            format!("{}?flush_ms={}", path, flush_ms.as_millis())
@@ -209,23 +194,14 @@ impl Client {
        Ok(())
    }

-    pub async fn list_location_config(&self) -> Result<LocationConfigListResponse> {
-        let path = format!("{}/v1/location_config", self.mgmt_api_endpoint);
-        self.request(Method::GET, &path, ())
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
    pub async fn timeline_create(
        &self,
-        tenant_shard_id: TenantShardId,
+        tenant_id: TenantId,
        req: &TimelineCreateRequest,
    ) -> Result<TimelineInfo> {
        let uri = format!(
            "{}/v1/tenant/{}/timeline",
-            self.mgmt_api_endpoint, tenant_shard_id
+            self.mgmt_api_endpoint, tenant_id
        );
        self.request(Method::POST, &uri, req)
            .await?
@@ -233,46 +209,4 @@ impl Client {
            .await
            .map_err(Error::ReceiveBody)
    }
-
-    pub async fn tenant_reset(&self, tenant_shard_id: TenantShardId) -> Result<()> {
-        let uri = format!(
-            "{}/v1/tenant/{}/reset",
-            self.mgmt_api_endpoint, tenant_shard_id
-        );
-        self.request(Method::POST, &uri, ())
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn timeline_list(
-        &self,
-        tenant_shard_id: &TenantShardId,
-    ) -> Result<Vec<TimelineInfo>> {
-        let uri = format!(
-            "{}/v1/tenant/{}/timeline",
-            self.mgmt_api_endpoint, tenant_shard_id
-        );
-        self.get(&uri)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn tenant_synthetic_size(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> Result<TenantHistorySize> {
-        let uri = format!(
-            "{}/v1/tenant/{}/synthetic_size",
-            self.mgmt_api_endpoint, tenant_shard_id
-        );
-        self.get(&uri)
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
 }
--- a/pageserver/client/src/mgmt_api/util.rs
+++ b/pageserver/client/src/mgmt_api/util.rs
@@ -2,7 +2,6 @@

 use std::sync::Arc;

-use pageserver_api::shard::TenantShardId;
 use tokio::task::JoinSet;
 use utils::id::{TenantId, TenantTimelineId};

@@ -32,10 +31,7 @@ pub async fn get_pageserver_tenant_timelines_unsharded(
            async move {
                (
                    tenant_id,
-                    mgmt_api_client
-                        .tenant_details(TenantShardId::unsharded(tenant_id))
-                        .await
-                        .unwrap(),
+                    mgmt_api_client.tenant_details(tenant_id).await.unwrap(),
                )
            }
        });
--- a/pageserver/client/src/page_service.rs
+++ b/pageserver/client/src/page_service.rs
@@ -108,32 +108,9 @@ pub struct RelTagBlockNo {
 }

 impl PagestreamClient {
-    pub async fn shutdown(self) {
-        let Self {
-            copy_both,
-            cancel_on_client_drop: cancel_conn_task,
-            conn_task,
-        } = self;
-        // The `copy_both` contains internal channel sender, the receiver of which is polled by `conn_task`.
-        // When `conn_task` observes the sender has been dropped, it sends a `FeMessage::CopyFail` into the connection.
-        // (see https://github.com/neondatabase/rust-postgres/blob/2005bf79573b8add5cf205b52a2b208e356cc8b0/tokio-postgres/src/copy_both.rs#L56).
-        //
-        // If we drop(copy_both) first, but then immediately drop the `cancel_on_client_drop`,
-        // the CopyFail mesage only makes it to the socket sometimes (i.e., it's a race).
-        //
-        // Further, the pageserver makes a lot of noise when it receives CopyFail.
-        // Computes don't send it in practice, they just hard-close the connection.
-        //
-        // So, let's behave like the computes and suppress the CopyFail as follows:
-        // kill the socket first, then drop copy_both.
-        //
-        // See also: https://www.postgresql.org/docs/current/protocol-flow.html#PROTOCOL-COPY
-        //
-        // NB: page_service doesn't have a use case to exit the `pagestream` mode currently.
-        // => https://github.com/neondatabase/neon/issues/6390
-        let _ = cancel_conn_task.unwrap();
-        conn_task.await.unwrap();
-        drop(copy_both);
+    pub async fn shutdown(mut self) {
+        let _ = self.cancel_on_client_drop.take();
+        self.conn_task.await.unwrap();
    }

    pub async fn getpage(
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-camino.workspace = true
 clap.workspace = true
 futures.workspace = true
 hdrhistogram.workspace = true
@@ -19,8 +18,8 @@ serde.workspace = true
 serde_json.workspace = true
 tracing.workspace = true
 tokio.workspace = true
-tokio-util.workspace = true

+pageserver = { path = ".." }
 pageserver_client.workspace = true
 pageserver_api.workspace = true
 utils = { path = "../../libs/utils/" }
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -1,5 +1,4 @@
 use anyhow::Context;
-use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
 use pageserver_client::page_service::BasebackupRequest;

 use utils::id::TenantTimelineId;
@@ -93,12 +92,10 @@ async fn main_impl(
    for timeline in &timelines {
        js.spawn({
            let timeline = *timeline;
+            // FIXME: this triggers initial logical size calculation
+            // https://github.com/neondatabase/neon/issues/6168
            let info = mgmt_api_client
-                .timeline_info(
-                    timeline.tenant_id,
-                    timeline.timeline_id,
-                    ForceAwaitLogicalSize::No,
-                )
+                .timeline_info(timeline.tenant_id, timeline.timeline_id)
                .await
                .unwrap();
            async move {
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -1,11 +1,11 @@
 use anyhow::Context;
-use camino::Utf8PathBuf;
 use futures::future::join_all;
-use pageserver_api::key::{is_rel_block_key, key_to_rel_block, Key};
+use pageserver::pgdatadir_mapping::key_to_rel_block;
+use pageserver::repository;
+use pageserver_api::key::is_rel_block_key;
 use pageserver_api::keyspace::KeySpaceAccum;
 use pageserver_api::models::PagestreamGetPageRequest;

-use tokio_util::sync::CancellationToken;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;

@@ -14,7 +14,7 @@ use tokio::sync::Barrier;
 use tokio::task::JoinSet;
 use tracing::{info, instrument};

-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::future::Future;
 use std::num::NonZeroUsize;
 use std::pin::Pin;
@@ -45,12 +45,6 @@ pub(crate) struct Args {
    req_latest_probability: f64,
    #[clap(long)]
    limit_to_first_n_targets: Option<usize>,
-    /// For large pageserver installations, enumerating the keyspace takes a lot of time.
-    /// If specified, the specified path is used to maintain a cache of the keyspace enumeration result.
-    /// The cache is tagged and auto-invalided by the tenant/timeline ids only.
-    /// It doesn't get invalidated if the keyspace changes under the hood, e.g., due to new ingested data or compaction.
-    #[clap(long)]
-    keyspace_cache: Option<Utf8PathBuf>,
    targets: Option<Vec<TenantTimelineId>>,
 }

@@ -65,7 +59,7 @@ impl LiveStats {
    }
 }

-#[derive(Clone, serde::Serialize, serde::Deserialize)]
+#[derive(Clone)]
 struct KeyRange {
    timeline: TenantTimelineId,
    timeline_lsn: Lsn,
@@ -113,107 +107,63 @@ async fn main_impl(
    )
    .await?;

-    #[derive(serde::Deserialize)]
-    struct KeyspaceCacheDe {
-        tag: Vec<TenantTimelineId>,
-        data: Vec<KeyRange>,
-    }
-    #[derive(serde::Serialize)]
-    struct KeyspaceCacheSer<'a> {
-        tag: &'a [TenantTimelineId],
-        data: &'a [KeyRange],
-    }
-    let cache = args
-        .keyspace_cache
-        .as_ref()
-        .map(|keyspace_cache_file| {
-            let contents = match std::fs::read(keyspace_cache_file) {
-                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                    return anyhow::Ok(None);
-                }
-                x => x.context("read keyspace cache file")?,
-            };
-            let cache: KeyspaceCacheDe =
-                serde_json::from_slice(&contents).context("deserialize cache file")?;
-            let tag_ok = HashSet::<TenantTimelineId>::from_iter(cache.tag.into_iter())
-                == HashSet::from_iter(timelines.iter().cloned());
-            info!("keyspace cache file matches tag: {tag_ok}");
-            anyhow::Ok(if tag_ok { Some(cache.data) } else { None })
-        })
-        .transpose()?
-        .flatten();
-    let all_ranges: Vec<KeyRange> = if let Some(cached) = cache {
-        info!("using keyspace cache file");
-        cached
-    } else {
-        let mut js = JoinSet::new();
-        for timeline in &timelines {
-            js.spawn({
-                let mgmt_api_client = Arc::clone(&mgmt_api_client);
-                let timeline = *timeline;
-                async move {
-                    let partitioning = mgmt_api_client
-                        .keyspace(timeline.tenant_id, timeline.timeline_id)
-                        .await?;
-                    let lsn = partitioning.at_lsn;
-                    let start = Instant::now();
-                    let mut filtered = KeySpaceAccum::new();
-                    // let's hope this is inlined and vectorized...
-                    // TODO: turn this loop into a is_rel_block_range() function.
-                    for r in partitioning.keys.ranges.iter() {
-                        let mut i = r.start;
-                        while i != r.end {
-                            if is_rel_block_key(&i) {
-                                filtered.add_key(i);
-                            }
-                            i = i.next();
+    let mut js = JoinSet::new();
+    for timeline in &timelines {
+        js.spawn({
+            let mgmt_api_client = Arc::clone(&mgmt_api_client);
+            let timeline = *timeline;
+            async move {
+                let partitioning = mgmt_api_client
+                    .keyspace(timeline.tenant_id, timeline.timeline_id)
+                    .await?;
+                let lsn = partitioning.at_lsn;
+                let start = Instant::now();
+                let mut filtered = KeySpaceAccum::new();
+                // let's hope this is inlined and vectorized...
+                // TODO: turn this loop into a is_rel_block_range() function.
+                for r in partitioning.keys.ranges.iter() {
+                    let mut i = r.start;
+                    while i != r.end {
+                        if is_rel_block_key(&i) {
+                            filtered.add_key(i);
                        }
+                        i = i.next();
                    }
-                    let filtered = filtered.to_keyspace();
-                    let filter_duration = start.elapsed();
-
-                    anyhow::Ok((
-                        filter_duration,
-                        filtered.ranges.into_iter().map(move |r| KeyRange {
-                            timeline,
-                            timeline_lsn: lsn,
-                            start: r.start.to_i128(),
-                            end: r.end.to_i128(),
-                        }),
-                    ))
                }
-            });
-        }
-        let mut total_filter_duration = Duration::from_secs(0);
-        let mut all_ranges: Vec<KeyRange> = Vec::new();
-        while let Some(res) = js.join_next().await {
-            let (filter_duration, range) = res.unwrap().unwrap();
-            all_ranges.extend(range);
-            total_filter_duration += filter_duration;
-        }
-        info!("filter duration: {}", total_filter_duration.as_secs_f64());
-        if let Some(cachefile) = args.keyspace_cache.as_ref() {
-            let cache = KeyspaceCacheSer {
-                tag: &timelines,
-                data: &all_ranges,
-            };
-            let bytes = serde_json::to_vec(&cache).context("serialize keyspace for cache file")?;
-            std::fs::write(cachefile, bytes).context("write keyspace cache file to disk")?;
-            info!("successfully wrote keyspace cache file");
-        }
-        all_ranges
-    };
+                let filtered = filtered.to_keyspace();
+                let filter_duration = start.elapsed();
+
+                anyhow::Ok((
+                    filter_duration,
+                    filtered.ranges.into_iter().map(move |r| KeyRange {
+                        timeline,
+                        timeline_lsn: lsn,
+                        start: r.start.to_i128(),
+                        end: r.end.to_i128(),
+                    }),
+                ))
+            }
+        });
+    }
+    let mut total_filter_duration = Duration::from_secs(0);
+    let mut all_ranges: Vec<KeyRange> = Vec::new();
+    while let Some(res) = js.join_next().await {
+        let (filter_duration, range) = res.unwrap().unwrap();
+        all_ranges.extend(range);
+        total_filter_duration += filter_duration;
+    }
+    info!("filter duration: {}", total_filter_duration.as_secs_f64());

    let live_stats = Arc::new(LiveStats::default());

    let num_client_tasks = timelines.len();
    let num_live_stats_dump = 1;
    let num_work_sender_tasks = 1;
-    let num_main_impl = 1;

    let start_work_barrier = Arc::new(tokio::sync::Barrier::new(
-        num_client_tasks + num_live_stats_dump + num_work_sender_tasks + num_main_impl,
+        num_client_tasks + num_live_stats_dump + num_work_sender_tasks,
    ));
+    let all_work_done_barrier = Arc::new(tokio::sync::Barrier::new(num_client_tasks));

    tokio::spawn({
        let stats = Arc::clone(&live_stats);
@@ -233,143 +183,126 @@ async fn main_impl(
        }
    });

-    let cancel = CancellationToken::new();
-
-    let mut work_senders: HashMap<TenantTimelineId, _> = HashMap::new();
+    let mut work_senders = HashMap::new();
    let mut tasks = Vec::new();
    for tl in &timelines {
        let (sender, receiver) = tokio::sync::mpsc::channel(10); // TODO: not sure what the implications of this are
-        work_senders.insert(*tl, sender);
+        work_senders.insert(tl, sender);
        tasks.push(tokio::spawn(client(
            args,
            *tl,
            Arc::clone(&start_work_barrier),
            receiver,
+            Arc::clone(&all_work_done_barrier),
            Arc::clone(&live_stats),
-            cancel.clone(),
        )));
    }

-    let work_sender: Pin<Box<dyn Send + Future<Output = ()>>> = {
-        let start_work_barrier = start_work_barrier.clone();
-        let cancel = cancel.clone();
-        match args.per_target_rate_limit {
-            None => Box::pin(async move {
+    let work_sender: Pin<Box<dyn Send + Future<Output = ()>>> = match args.per_target_rate_limit {
+        None => Box::pin(async move {
+            let weights = rand::distributions::weighted::WeightedIndex::new(
+                all_ranges.iter().map(|v| v.len()),
+            )
+            .unwrap();
+
+            start_work_barrier.wait().await;
+
+            loop {
+                let (timeline, req) = {
+                    let mut rng = rand::thread_rng();
+                    let r = &all_ranges[weights.sample(&mut rng)];
+                    let key: i128 = rng.gen_range(r.start..r.end);
+                    let key = repository::Key::from_i128(key);
+                    let (rel_tag, block_no) =
+                        key_to_rel_block(key).expect("we filter non-rel-block keys out above");
+                    (
+                        r.timeline,
+                        PagestreamGetPageRequest {
+                            latest: rng.gen_bool(args.req_latest_probability),
+                            lsn: r.timeline_lsn,
+                            rel: rel_tag,
+                            blkno: block_no,
+                        },
+                    )
+                };
+                let sender = work_senders.get(&timeline).unwrap();
+                // TODO: what if this blocks?
+                sender.send(req).await.ok().unwrap();
+            }
+        }),
+        Some(rps_limit) => Box::pin(async move {
+            let period = Duration::from_secs_f64(1.0 / (rps_limit as f64));
+
+            let make_timeline_task: &dyn Fn(
+                TenantTimelineId,
+            )
+                -> Pin<Box<dyn Send + Future<Output = ()>>> = &|timeline| {
+                let sender = work_senders.get(&timeline).unwrap();
+                let ranges: Vec<KeyRange> = all_ranges
+                    .iter()
+                    .filter(|r| r.timeline == timeline)
+                    .cloned()
+                    .collect();
                let weights = rand::distributions::weighted::WeightedIndex::new(
-                    all_ranges.iter().map(|v| v.len()),
+                    ranges.iter().map(|v| v.len()),
                )
                .unwrap();

-                start_work_barrier.wait().await;
-
-                while !cancel.is_cancelled() {
-                    let (timeline, req) = {
-                        let mut rng = rand::thread_rng();
-                        let r = &all_ranges[weights.sample(&mut rng)];
-                        let key: i128 = rng.gen_range(r.start..r.end);
-                        let key = Key::from_i128(key);
-                        let (rel_tag, block_no) =
-                            key_to_rel_block(key).expect("we filter non-rel-block keys out above");
-                        (
-                            r.timeline,
+                Box::pin(async move {
+                    let mut ticker = tokio::time::interval(period);
+                    ticker.set_missed_tick_behavior(
+                        /* TODO review this choice */
+                        tokio::time::MissedTickBehavior::Burst,
+                    );
+                    loop {
+                        ticker.tick().await;
+                        let req = {
+                            let mut rng = rand::thread_rng();
+                            let r = &ranges[weights.sample(&mut rng)];
+                            let key: i128 = rng.gen_range(r.start..r.end);
+                            let key = repository::Key::from_i128(key);
+                            assert!(is_rel_block_key(&key));
+                            let (rel_tag, block_no) = key_to_rel_block(key)
+                                .expect("we filter non-rel-block keys out above");
                            PagestreamGetPageRequest {
                                latest: rng.gen_bool(args.req_latest_probability),
                                lsn: r.timeline_lsn,
                                rel: rel_tag,
                                blkno: block_no,
-                            },
-                        )
-                    };
-                    let sender = work_senders.get(&timeline).unwrap();
-                    // TODO: what if this blocks?
-                    if sender.send(req).await.is_err() {
-                        assert!(cancel.is_cancelled(), "client has gone away unexpectedly");
-                    }
-                }
-            }),
-            Some(rps_limit) => Box::pin(async move {
-                let period = Duration::from_secs_f64(1.0 / (rps_limit as f64));
-                let make_timeline_task: &dyn Fn(
-                    TenantTimelineId,
-                )
-                    -> Pin<Box<dyn Send + Future<Output = ()>>> = &|timeline| {
-                    let sender = work_senders.get(&timeline).unwrap();
-                    let ranges: Vec<KeyRange> = all_ranges
-                        .iter()
-                        .filter(|r| r.timeline == timeline)
-                        .cloned()
-                        .collect();
-                    let weights = rand::distributions::weighted::WeightedIndex::new(
-                        ranges.iter().map(|v| v.len()),
-                    )
-                    .unwrap();
-
-                    let cancel = cancel.clone();
-                    Box::pin(async move {
-                        let mut ticker = tokio::time::interval(period);
-                        ticker.set_missed_tick_behavior(
-                            /* TODO review this choice */
-                            tokio::time::MissedTickBehavior::Burst,
-                        );
-                        while !cancel.is_cancelled() {
-                            ticker.tick().await;
-                            let req = {
-                                let mut rng = rand::thread_rng();
-                                let r = &ranges[weights.sample(&mut rng)];
-                                let key: i128 = rng.gen_range(r.start..r.end);
-                                let key = Key::from_i128(key);
-                                assert!(is_rel_block_key(&key));
-                                let (rel_tag, block_no) = key_to_rel_block(key)
-                                    .expect("we filter non-rel-block keys out above");
-                                PagestreamGetPageRequest {
-                                    latest: rng.gen_bool(args.req_latest_probability),
-                                    lsn: r.timeline_lsn,
-                                    rel: rel_tag,
-                                    blkno: block_no,
-                                }
-                            };
-                            if sender.send(req).await.is_err() {
-                                assert!(cancel.is_cancelled(), "client has gone away unexpectedly");
                            }
-                        }
-                    })
-                };
+                        };
+                        sender.send(req).await.ok().unwrap();
+                    }
+                })
+            };

-                let tasks: Vec<_> = work_senders
-                    .keys()
-                    .map(|tl| make_timeline_task(*tl))
-                    .collect();
+            let tasks: Vec<_> = work_senders
+                .keys()
+                .map(|tl| make_timeline_task(**tl))
+                .collect();

-                start_work_barrier.wait().await;
+            start_work_barrier.wait().await;

-                join_all(tasks).await;
-            }),
-        }
+            join_all(tasks).await;
+        }),
    };

-    let work_sender_task = tokio::spawn(work_sender);
-
-    info!("waiting for everything to become ready");
-    start_work_barrier.wait().await;
-    info!("work started");
    if let Some(runtime) = args.runtime {
-        tokio::time::sleep(runtime.into()).await;
-        info!("runtime over, signalling cancellation");
-        cancel.cancel();
-        work_sender_task.await.unwrap();
-        info!("work sender exited");
+        match tokio::time::timeout(runtime.into(), work_sender).await {
+            Ok(()) => unreachable!("work sender never terminates"),
+            Err(_timeout) => {
+                // this implicitly drops the work_senders, making all the clients exit
+            }
+        }
    } else {
-        work_sender_task.await.unwrap();
+        work_sender.await;
        unreachable!("work sender never terminates");
    }

-    info!("joining clients");
    for t in tasks {
        t.await.unwrap();
    }

-    info!("all clients stopped");
-
    let output = Output {
        total: {
            let mut agg_stats = request_stats::Stats::new();
@@ -393,9 +326,11 @@ async fn client(
    timeline: TenantTimelineId,
    start_work_barrier: Arc<Barrier>,
    mut work: tokio::sync::mpsc::Receiver<PagestreamGetPageRequest>,
+    all_work_done_barrier: Arc<Barrier>,
    live_stats: Arc<LiveStats>,
-    cancel: CancellationToken,
 ) {
+    start_work_barrier.wait().await;
+
    let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
        .await
        .unwrap();
@@ -404,27 +339,19 @@ async fn client(
        .await
        .unwrap();

-    let do_requests = async {
-        start_work_barrier.wait().await;
-        while let Some(req) = work.recv().await {
-            let start = Instant::now();
-            client
-                .getpage(req)
-                .await
-                .with_context(|| format!("getpage for {timeline}"))
-                .unwrap();
-            let elapsed = start.elapsed();
-            live_stats.inc();
-            STATS.with(|stats| {
-                stats.borrow().lock().unwrap().observe(elapsed).unwrap();
-            });
-        }
-    };
-    tokio::select! {
-        res = do_requests => { res },
-        _ = cancel.cancelled() => {
-            client.shutdown().await;
-            return;
-        }
+    while let Some(req) = work.recv().await {
+        let start = Instant::now();
+        client
+            .getpage(req)
+            .await
+            .with_context(|| format!("getpage for {timeline}"))
+            .unwrap();
+        let elapsed = start.elapsed();
+        live_stats.inc();
+        STATS.with(|stats| {
+            stats.borrow().lock().unwrap().observe(elapsed).unwrap();
+        });
    }
+
+    all_work_done_barrier.wait().await;
 }
--- a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs
+++ b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs
@@ -4,8 +4,6 @@ use humantime::Duration;
 use tokio::task::JoinSet;
 use utils::id::TenantTimelineId;

-use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
-
 #[derive(clap::Parser)]
 pub(crate) struct Args {
    #[clap(long, default_value = "http://localhost:9898")]
@@ -58,15 +56,14 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
    for tl in timelines {
        let mgmt_api_client = Arc::clone(&mgmt_api_client);
        js.spawn(async move {
+            // TODO: API to explicitly trigger initial logical size computation.
+            // Should probably also avoid making it a side effect of timeline details to trigger initial logical size calculation.
+            // => https://github.com/neondatabase/neon/issues/6168
            let info = mgmt_api_client
-                .timeline_info(tl.tenant_id, tl.timeline_id, ForceAwaitLogicalSize::Yes)
+                .timeline_info(tl.tenant_id, tl.timeline_id)
                .await
                .unwrap();

-            // Polling should not be strictly required here since we await
-            // for the initial logical size, however it's possible for the request
-            // to land before the timeline is initialised. This results in an approximate
-            // logical size.
            if let Some(period) = args.poll_for_completion {
                let mut ticker = tokio::time::interval(period.into());
                ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
@@ -74,7 +71,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
                while !info.current_logical_size_is_accurate {
                    ticker.tick().await;
                    info = mgmt_api_client
-                        .timeline_info(tl.tenant_id, tl.timeline_id, ForceAwaitLogicalSize::Yes)
+                        .timeline_info(tl.tenant_id, tl.timeline_id)
                        .await
                        .unwrap();
                }
--- a/pageserver/pagebench/src/main.rs
+++ b/pageserver/pagebench/src/main.rs
@@ -35,7 +35,6 @@ fn main() {
        logging::Output::Stderr,
    )
    .unwrap();
-    logging::replace_panic_hook_with_tracing_panic_hook().forget();

    let args = Args::parse();
    match args {
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -527,7 +527,6 @@ fn start_pageserver(
            conf,
            remote_storage.clone(),
            disk_usage_eviction_state.clone(),
-            tenant_manager.clone(),
            background_jobs_barrier.clone(),
        )?;
    }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -1126,12 +1126,11 @@ mod tests {
    };

    use camino_tempfile::{tempdir, Utf8TempDir};
-    use pageserver_api::models::EvictionPolicy;
    use remote_storage::{RemoteStorageKind, S3Config};
    use utils::serde_percent::Percent;

    use super::*;
-    use crate::DEFAULT_PG_VERSION;
+    use crate::{tenant::config::EvictionPolicy, DEFAULT_PG_VERSION};

    const ALL_BASE_VALUES_TOML: &str = r#"
 # Initial configuration file created by 'pageserver --init'
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -267,7 +267,7 @@ async fn calculate_synthetic_size_worker(
            }
        };

-        for (tenant_shard_id, tenant_state, _gen) in tenants {
+        for (tenant_shard_id, tenant_state) in tenants {
            if tenant_state != TenantState::Active {
                continue;
            }
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -196,7 +196,7 @@ pub(super) async fn collect_all_metrics(
        }
    };

-    let tenants = futures::stream::iter(tenants).filter_map(|(id, state, _)| async move {
+    let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
        if state != TenantState::Active || !id.is_zero() {
            None
        } else {
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -47,24 +47,21 @@ use std::{
 };

 use anyhow::Context;
-use pageserver_api::shard::TenantShardId;
+use camino::Utf8Path;
 use remote_storage::GenericRemoteStorage;
 use serde::{Deserialize, Serialize};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn, Instrument};
+use utils::completion;
 use utils::serde_percent::Percent;
-use utils::{completion, id::TimelineId};

 use crate::{
    config::PageServerConf,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    tenant::{
        self,
-        mgr::TenantManager,
-        remote_timeline_client::LayerFileMetadata,
-        secondary::SecondaryTenant,
-        storage_layer::{AsLayerDesc, EvictionError, Layer, LayerFileName},
+        storage_layer::{AsLayerDesc, EvictionError, Layer},
        Timeline,
    },
 };
@@ -128,7 +125,6 @@ pub fn launch_disk_usage_global_eviction_task(
    conf: &'static PageServerConf,
    storage: GenericRemoteStorage,
    state: Arc<State>,
-    tenant_manager: Arc<TenantManager>,
    background_jobs_barrier: completion::Barrier,
 ) -> anyhow::Result<()> {
    let Some(task_config) = &conf.disk_usage_based_eviction else {
@@ -154,7 +150,8 @@ pub fn launch_disk_usage_global_eviction_task(
                _ = background_jobs_barrier.wait() => { }
            };

-            disk_usage_eviction_task(&state, task_config, &storage, tenant_manager, cancel).await;
+            disk_usage_eviction_task(&state, task_config, &storage, &conf.tenants_path(), cancel)
+                .await;
            Ok(())
        },
    );
@@ -167,7 +164,7 @@ async fn disk_usage_eviction_task(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
    storage: &GenericRemoteStorage,
-    tenant_manager: Arc<TenantManager>,
+    tenants_dir: &Utf8Path,
    cancel: CancellationToken,
 ) {
    scopeguard::defer! {
@@ -194,7 +191,7 @@ async fn disk_usage_eviction_task(
                state,
                task_config,
                storage,
-                &tenant_manager,
+                tenants_dir,
                &cancel,
            )
            .await;
@@ -229,17 +226,15 @@ async fn disk_usage_eviction_task_iteration(
    state: &State,
    task_config: &DiskUsageEvictionTaskConfig,
    storage: &GenericRemoteStorage,
-    tenant_manager: &Arc<TenantManager>,
+    tenants_dir: &Utf8Path,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
-    let tenants_dir = tenant_manager.get_conf().tenants_path();
-    let usage_pre = filesystem_level_usage::get(&tenants_dir, task_config)
+    let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
        .context("get filesystem-level disk usage before evictions")?;
    let res = disk_usage_eviction_task_iteration_impl(
        state,
        storage,
        usage_pre,
-        tenant_manager,
        task_config.eviction_order,
        cancel,
    )
@@ -253,7 +248,7 @@ async fn disk_usage_eviction_task_iteration(
                }
                IterationOutcome::Finished(outcome) => {
                    // Verify with statvfs whether we made any real progress
-                    let after = filesystem_level_usage::get(&tenants_dir, task_config)
+                    let after = filesystem_level_usage::get(tenants_dir, task_config)
                        // It's quite unlikely to hit the error here. Keep the code simple and bail out.
                        .context("get filesystem-level disk usage after evictions")?;

@@ -329,7 +324,6 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    state: &State,
    _storage: &GenericRemoteStorage,
    usage_pre: U,
-    tenant_manager: &Arc<TenantManager>,
    eviction_order: EvictionOrder,
    cancel: &CancellationToken,
 ) -> anyhow::Result<IterationOutcome<U>> {
@@ -350,29 +344,29 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
        "running disk usage based eviction due to pressure"
    );

-    let candidates =
-        match collect_eviction_candidates(tenant_manager, eviction_order, cancel).await? {
-            EvictionCandidates::Cancelled => {
-                return Ok(IterationOutcome::Cancelled);
-            }
-            EvictionCandidates::Finished(partitioned) => partitioned,
-        };
+    let candidates = match collect_eviction_candidates(eviction_order, cancel).await? {
+        EvictionCandidates::Cancelled => {
+            return Ok(IterationOutcome::Cancelled);
+        }
+        EvictionCandidates::Finished(partitioned) => partitioned,
+    };

    // Debug-log the list of candidates
    let now = SystemTime::now();
    for (i, (partition, candidate)) in candidates.iter().enumerate() {
        let nth = i + 1;
+        let desc = candidate.layer.layer_desc();
        let total_candidates = candidates.len();
-        let size = candidate.layer.get_file_size();
+        let size = desc.file_size;
        let rel = candidate.relative_last_activity;
        debug!(
            "cand {nth}/{total_candidates}: size={size}, rel_last_activity={rel}, no_access_for={}us, partition={partition:?}, {}/{}/{}",
            now.duration_since(candidate.last_activity_ts)
                .unwrap()
                .as_micros(),
-            candidate.layer.get_tenant_shard_id(),
-            candidate.layer.get_timeline_id(),
-            candidate.layer.get_name(),
+            desc.tenant_shard_id,
+            desc.timeline_id,
+            candidate.layer,
        );
    }

@@ -386,56 +380,39 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
    // If we get far enough in the list that we start to evict layers that are below
    // the tenant's min-resident-size threshold, print a warning, and memorize the disk
    // usage at that point, in 'usage_planned_min_resident_size_respecting'.
+    let mut warned = None;
+    let mut usage_planned = usage_pre;
+    let mut evicted_amount = 0;

-    let selection = select_victims(&candidates, usage_pre);
-
-    let mut candidates = candidates;
-
-    let selection = if matches!(eviction_order, EvictionOrder::RelativeAccessed { .. }) {
-        // we currently have the layers ordered by AbsoluteAccessed so that we can get the summary
-        // for comparison here. this is a temporary measure to develop alternatives.
-        use std::fmt::Write;
-
-        let mut summary_buf = String::with_capacity(256);
-
-        {
-            let absolute_summary = candidates
-                .iter()
-                .take(selection.amount)
-                .map(|(_, candidate)| candidate)
-                .collect::<summary::EvictionSummary>();
-
-            write!(summary_buf, "{absolute_summary}").expect("string grows");
-
-            info!("absolute accessed selection summary: {summary_buf}");
+    for (i, (partition, candidate)) in candidates.iter().enumerate() {
+        if !usage_planned.has_pressure() {
+            debug!(
+                no_candidates_evicted = i,
+                "took enough candidates for pressure to be relieved"
+            );
+            break;
        }

-        candidates.sort_unstable_by_key(|(partition, candidate)| {
-            (*partition, candidate.relative_last_activity)
-        });
-
-        let selection = select_victims(&candidates, usage_pre);
-
-        {
-            summary_buf.clear();
-
-            let relative_summary = candidates
-                .iter()
-                .take(selection.amount)
-                .map(|(_, candidate)| candidate)
-                .collect::<summary::EvictionSummary>();
-
-            write!(summary_buf, "{relative_summary}").expect("string grows");
-
-            info!("relative accessed selection summary: {summary_buf}");
+        if partition == &MinResidentSizePartition::Below && warned.is_none() {
+            warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
+            warned = Some(usage_planned);
        }

-        selection
-    } else {
-        selection
+        usage_planned.add_available_bytes(candidate.layer.layer_desc().file_size);
+        evicted_amount += 1;
+    }
+
+    let usage_planned = match warned {
+        Some(respecting_tenant_min_resident_size) => PlannedUsage {
+            respecting_tenant_min_resident_size,
+            fallback_to_global_lru: Some(usage_planned),
+        },
+        None => PlannedUsage {
+            respecting_tenant_min_resident_size: usage_planned,
+            fallback_to_global_lru: None,
+        },
    };
-
-    let (evicted_amount, usage_planned) = selection.into_amount_and_planned();
+    debug!(?usage_planned, "usage planned");

    // phase2: evict layers

@@ -486,30 +463,19 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                continue;
            };

-            match candidate.layer {
-                EvictionLayer::Attached(layer) => {
-                    let file_size = layer.layer_desc().file_size;
-                    js.spawn(async move {
-                        layer
-                            .evict_and_wait()
-                            .await
-                            .map(|()| file_size)
-                            .map_err(|e| (file_size, e))
-                    });
-                }
-                EvictionLayer::Secondary(layer) => {
-                    let file_size = layer.metadata.file_size();
-                    let tenant_manager = tenant_manager.clone();
+            js.spawn(async move {
+                let rtc = candidate.timeline.remote_client.as_ref().expect(
+                    "holding the witness, all timelines must have a remote timeline client",
+                );
+                let file_size = candidate.layer.layer_desc().file_size;
+                candidate
+                    .layer
+                    .evict_and_wait(rtc)
+                    .await
+                    .map(|()| file_size)
+                    .map_err(|e| (file_size, e))
+            });

-                    js.spawn(async move {
-                        layer
-                            .secondary_tenant
-                            .evict_layer(tenant_manager.get_conf(), layer.timeline_id, layer.name)
-                            .await;
-                        Ok(file_size)
-                    });
-                }
-            }
            tokio::task::yield_now().await;
        }

@@ -536,100 +502,11 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
 }

 #[derive(Clone)]
-pub(crate) struct EvictionSecondaryLayer {
-    pub(crate) secondary_tenant: Arc<SecondaryTenant>,
-    pub(crate) timeline_id: TimelineId,
-    pub(crate) name: LayerFileName,
-    pub(crate) metadata: LayerFileMetadata,
-}
-
-/// Full [`Layer`] objects are specific to tenants in attached mode.  This type is a layer
-/// of indirection to store either a `Layer`, or a reference to a secondary tenant and a layer name.
-#[derive(Clone)]
-pub(crate) enum EvictionLayer {
-    Attached(Layer),
-    #[allow(dead_code)]
-    Secondary(EvictionSecondaryLayer),
-}
-
-impl From<Layer> for EvictionLayer {
-    fn from(value: Layer) -> Self {
-        Self::Attached(value)
-    }
-}
-
-impl EvictionLayer {
-    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {
-        match self {
-            Self::Attached(l) => &l.layer_desc().tenant_shard_id,
-            Self::Secondary(sl) => sl.secondary_tenant.get_tenant_shard_id(),
-        }
-    }
-
-    pub(crate) fn get_timeline_id(&self) -> &TimelineId {
-        match self {
-            Self::Attached(l) => &l.layer_desc().timeline_id,
-            Self::Secondary(sl) => &sl.timeline_id,
-        }
-    }
-
-    pub(crate) fn get_name(&self) -> LayerFileName {
-        match self {
-            Self::Attached(l) => l.layer_desc().filename(),
-            Self::Secondary(sl) => sl.name.clone(),
-        }
-    }
-
-    pub(crate) fn get_file_size(&self) -> u64 {
-        match self {
-            Self::Attached(l) => l.layer_desc().file_size,
-            Self::Secondary(sl) => sl.metadata.file_size(),
-        }
-    }
-}
-
-#[derive(Clone)]
-pub(crate) struct EvictionCandidate {
-    pub(crate) layer: EvictionLayer,
-    pub(crate) last_activity_ts: SystemTime,
-    pub(crate) relative_last_activity: finite_f32::FiniteF32,
-}
-
-impl std::fmt::Display for EvictionLayer {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            Self::Attached(l) => l.fmt(f),
-            Self::Secondary(sl) => {
-                write!(f, "{}/{}", sl.timeline_id, sl.name)
-            }
-        }
-    }
-}
-
-pub(crate) struct DiskUsageEvictionInfo {
-    /// Timeline's largest layer (remote or resident)
-    pub max_layer_size: Option<u64>,
-    /// Timeline's resident layers
-    pub resident_layers: Vec<EvictionCandidate>,
-}
-
-impl std::fmt::Debug for EvictionCandidate {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // format the tv_sec, tv_nsec into rfc3339 in case someone is looking at it
-        // having to allocate a string to this is bad, but it will rarely be formatted
-        let ts = chrono::DateTime::<chrono::Utc>::from(self.last_activity_ts);
-        let ts = ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, true);
-        struct DisplayIsDebug<'a, T>(&'a T);
-        impl<'a, T: std::fmt::Display> std::fmt::Debug for DisplayIsDebug<'a, T> {
-            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                write!(f, "{}", self.0)
-            }
-        }
-        f.debug_struct("LocalLayerInfoForDiskUsageEviction")
-            .field("layer", &DisplayIsDebug(&self.layer))
-            .field("last_activity", &ts)
-            .finish()
-    }
+struct EvictionCandidate {
+    timeline: Arc<Timeline>,
+    layer: Layer,
+    last_activity_ts: SystemTime,
+    relative_last_activity: finite_f32::FiniteF32,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
@@ -746,7 +623,6 @@ enum EvictionCandidates {
 /// - tenant B 1 layer
 /// - tenant C 8 layers
 async fn collect_eviction_candidates(
-    tenant_manager: &Arc<TenantManager>,
    eviction_order: EvictionOrder,
    cancel: &CancellationToken,
 ) -> anyhow::Result<EvictionCandidates> {
@@ -755,16 +631,13 @@ async fn collect_eviction_candidates(
        .await
        .context("get list of tenants")?;

-    // TODO: avoid listing every layer in every tenant: this loop can block the executor,
-    // and the resulting data structure can be huge.
-    // (https://github.com/neondatabase/neon/issues/6224)
    let mut candidates = Vec::new();

-    for (tenant_id, _state, _gen) in tenants {
+    for (tenant_id, _state) in &tenants {
        if cancel.is_cancelled() {
            return Ok(EvictionCandidates::Cancelled);
        }
-        let tenant = match tenant::mgr::get_tenant(tenant_id, true) {
+        let tenant = match tenant::mgr::get_tenant(*tenant_id, true) {
            Ok(tenant) => tenant,
            Err(e) => {
                // this can happen if tenant has lifecycle transition after we fetched it
@@ -792,7 +665,11 @@ async fn collect_eviction_candidates(
            }
            let info = tl.get_local_layers_for_disk_usage_eviction().await;
            debug!(tenant_id=%tl.tenant_shard_id.tenant_id, shard_id=%tl.tenant_shard_id.shard_slug(), timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
-            tenant_candidates.extend(info.resident_layers.into_iter());
+            tenant_candidates.extend(
+                info.resident_layers
+                    .into_iter()
+                    .map(|layer_infos| (tl.clone(), layer_infos)),
+            );
            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));

            if cancel.is_cancelled() {
@@ -813,16 +690,14 @@ async fn collect_eviction_candidates(
        // A default override can be put in the default tenant conf in the pageserver.toml.
        let min_resident_size = if let Some(s) = tenant.get_min_resident_size_override() {
            debug!(
-                tenant_id=%tenant.tenant_shard_id().tenant_id,
-                shard_id=%tenant.tenant_shard_id().shard_slug(),
+                tenant_id=%tenant.tenant_id(),
                overridden_size=s,
                "using overridden min resident size for tenant"
            );
            s
        } else {
            debug!(
-                tenant_id=%tenant.tenant_shard_id().tenant_id,
-                shard_id=%tenant.tenant_shard_id().shard_slug(),
+                tenant_id=%tenant.tenant_id(),
                max_layer_size,
                "using max layer size as min_resident_size for tenant",
            );
@@ -832,7 +707,7 @@ async fn collect_eviction_candidates(
        // Sort layers most-recently-used first, then partition by
        // cumsum above/below min_resident_size.
        tenant_candidates
-            .sort_unstable_by_key(|layer_info| std::cmp::Reverse(layer_info.last_activity_ts));
+            .sort_unstable_by_key(|(_, layer_info)| std::cmp::Reverse(layer_info.last_activity_ts));
        let mut cumsum: i128 = 0;

        // keeping the -1 or not decides if every tenant should lose their least recently accessed
@@ -866,10 +741,12 @@ async fn collect_eviction_candidates(
            .unwrap_or(1);
        let divider = total as f32;

-        for (i, mut candidate) in tenant_candidates.into_iter().enumerate() {
+        for (i, (timeline, layer_info)) in tenant_candidates.into_iter().enumerate() {
+            let file_size = layer_info.file_size();
+
            // as we iterate this reverse sorted list, the most recently accessed layer will always
            // be 1.0; this is for us to evict it last.
-            candidate.relative_last_activity = if matches!(
+            let relative_last_activity = if matches!(
                eviction_order,
                EvictionOrder::RelativeAccessed { .. }
            ) {
@@ -884,123 +761,41 @@ async fn collect_eviction_candidates(
                finite_f32::FiniteF32::ZERO
            };

+            let candidate = EvictionCandidate {
+                timeline,
+                last_activity_ts: layer_info.last_activity_ts,
+                layer: layer_info.layer,
+                relative_last_activity,
+            };
            let partition = if cumsum > min_resident_size as i128 {
                MinResidentSizePartition::Above
            } else {
                MinResidentSizePartition::Below
            };
-            cumsum += i128::from(candidate.layer.get_file_size());
            candidates.push((partition, candidate));
+            cumsum += i128::from(file_size);
        }
    }

-    // Note: the same tenant ID might be hit twice, if it transitions from attached to
-    // secondary while we run.  That is okay: when we eventually try and run the eviction,
-    // the `Gate` on the object will ensure that whichever one has already been shut down
-    // will not delete anything.
-
-    let mut secondary_tenants = Vec::new();
-    tenant_manager.foreach_secondary_tenants(
-        |_tenant_shard_id: &TenantShardId, state: &Arc<SecondaryTenant>| {
-            secondary_tenants.push(state.clone());
-        },
-    );
-
-    for secondary_tenant in secondary_tenants {
-        let mut layer_info = secondary_tenant.get_layers_for_eviction();
-
-        layer_info
-            .resident_layers
-            .sort_unstable_by_key(|layer_info| std::cmp::Reverse(layer_info.last_activity_ts));
-
-        candidates.extend(layer_info.resident_layers.into_iter().map(|candidate| {
-            (
-                // Secondary locations' layers are always considered above the min resident size,
-                // i.e. secondary locations are permitted to be trimmed to zero layers if all
-                // the layers have sufficiently old access times.
-                MinResidentSizePartition::Above,
-                candidate,
-            )
-        }));
-    }
-
    debug_assert!(MinResidentSizePartition::Above < MinResidentSizePartition::Below,
        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");

-    // always behave as if AbsoluteAccessed was selected. if RelativeAccessed is in use, we
-    // will sort later by candidate.relative_last_activity to get compare evictions.
-    candidates
-        .sort_unstable_by_key(|(partition, candidate)| (*partition, candidate.last_activity_ts));
+    match eviction_order {
+        EvictionOrder::AbsoluteAccessed => {
+            candidates.sort_unstable_by_key(|(partition, candidate)| {
+                (*partition, candidate.last_activity_ts)
+            });
+        }
+        EvictionOrder::RelativeAccessed { .. } => {
+            candidates.sort_unstable_by_key(|(partition, candidate)| {
+                (*partition, candidate.relative_last_activity)
+            });
+        }
+    }

    Ok(EvictionCandidates::Finished(candidates))
 }

-/// Given a pre-sorted vec of all layers in the system, select the first N which are enough to
-/// relieve pressure.
-///
-/// Returns the amount of candidates selected, with the planned usage.
-fn select_victims<U: Usage>(
-    candidates: &[(MinResidentSizePartition, EvictionCandidate)],
-    usage_pre: U,
-) -> VictimSelection<U> {
-    let mut usage_when_switched = None;
-    let mut usage_planned = usage_pre;
-    let mut evicted_amount = 0;
-
-    for (i, (partition, candidate)) in candidates.iter().enumerate() {
-        if !usage_planned.has_pressure() {
-            break;
-        }
-
-        if partition == &MinResidentSizePartition::Below && usage_when_switched.is_none() {
-            usage_when_switched = Some((usage_planned, i));
-        }
-
-        usage_planned.add_available_bytes(candidate.layer.get_file_size());
-        evicted_amount += 1;
-    }
-
-    VictimSelection {
-        amount: evicted_amount,
-        usage_pre,
-        usage_when_switched,
-        usage_planned,
-    }
-}
-
-struct VictimSelection<U> {
-    amount: usize,
-    usage_pre: U,
-    usage_when_switched: Option<(U, usize)>,
-    usage_planned: U,
-}
-
-impl<U: Usage> VictimSelection<U> {
-    fn into_amount_and_planned(self) -> (usize, PlannedUsage<U>) {
-        debug!(
-            evicted_amount=%self.amount,
-            "took enough candidates for pressure to be relieved"
-        );
-
-        if let Some((usage_planned, candidate_no)) = self.usage_when_switched.as_ref() {
-            warn!(usage_pre=?self.usage_pre, ?usage_planned, candidate_no, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
-        }
-
-        let planned = match self.usage_when_switched {
-            Some((respecting_tenant_min_resident_size, _)) => PlannedUsage {
-                respecting_tenant_min_resident_size,
-                fallback_to_global_lru: Some(self.usage_planned),
-            },
-            None => PlannedUsage {
-                respecting_tenant_min_resident_size: self.usage_planned,
-                fallback_to_global_lru: None,
-            },
-        };
-
-        (self.amount, planned)
-    }
-}
-
 struct TimelineKey(Arc<Timeline>);

 impl PartialEq for TimelineKey {
@@ -1026,7 +821,7 @@ impl std::ops::Deref for TimelineKey {
 }

 /// A totally ordered f32 subset we can use with sorting functions.
-pub(crate) mod finite_f32 {
+mod finite_f32 {

    /// A totally ordered f32 subset we can use with sorting functions.
    #[derive(Clone, Copy, PartialEq)]
@@ -1085,137 +880,6 @@ pub(crate) mod finite_f32 {
    }
 }

-mod summary {
-    use super::finite_f32::FiniteF32;
-    use super::{EvictionCandidate, LayerCount};
-    use pageserver_api::shard::TenantShardId;
-    use std::collections::{BTreeMap, HashMap};
-    use std::time::SystemTime;
-
-    #[derive(Debug, Default)]
-    pub(super) struct EvictionSummary {
-        evicted_per_tenant: HashMap<TenantShardId, LayerCount>,
-        total: LayerCount,
-
-        last_absolute: Option<SystemTime>,
-        last_relative: Option<FiniteF32>,
-    }
-
-    impl<'a> FromIterator<&'a EvictionCandidate> for EvictionSummary {
-        fn from_iter<T: IntoIterator<Item = &'a EvictionCandidate>>(iter: T) -> Self {
-            let mut summary = EvictionSummary::default();
-            for item in iter {
-                let counts = summary
-                    .evicted_per_tenant
-                    .entry(*item.layer.get_tenant_shard_id())
-                    .or_default();
-
-                let sz = item.layer.get_file_size();
-
-                counts.file_sizes += sz;
-                counts.count += 1;
-
-                summary.total.file_sizes += sz;
-                summary.total.count += 1;
-
-                summary.last_absolute = Some(item.last_activity_ts);
-                summary.last_relative = Some(item.relative_last_activity);
-            }
-
-            summary
-        }
-    }
-
-    struct SiBytesAmount(u64);
-
-    impl std::fmt::Display for SiBytesAmount {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            if self.0 < 1024 {
-                return write!(f, "{}B", self.0);
-            }
-
-            let mut tmp = self.0;
-            let mut ch = 0;
-            let suffixes = b"KMGTPE";
-
-            while tmp > 1024 * 1024 && ch < suffixes.len() - 1 {
-                tmp /= 1024;
-                ch += 1;
-            }
-
-            let ch = suffixes[ch] as char;
-
-            write!(f, "{:.1}{ch}iB", tmp as f64 / 1024.0)
-        }
-    }
-
-    impl std::fmt::Display for EvictionSummary {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            // wasteful, but it's for testing
-
-            let mut sorted: BTreeMap<usize, Vec<(TenantShardId, u64)>> = BTreeMap::new();
-
-            for (tenant_shard_id, count) in &self.evicted_per_tenant {
-                sorted
-                    .entry(count.count)
-                    .or_default()
-                    .push((*tenant_shard_id, count.file_sizes));
-            }
-
-            let total_file_sizes = SiBytesAmount(self.total.file_sizes);
-
-            writeln!(
-                f,
-                "selected {} layers of {total_file_sizes} up to ({:?}, {:.2?}):",
-                self.total.count, self.last_absolute, self.last_relative,
-            )?;
-
-            for (count, per_tenant) in sorted.iter().rev().take(10) {
-                write!(f, "- {count} layers: ")?;
-
-                if per_tenant.len() < 3 {
-                    for (i, (tenant_shard_id, bytes)) in per_tenant.iter().enumerate() {
-                        if i > 0 {
-                            write!(f, ", ")?;
-                        }
-                        let bytes = SiBytesAmount(*bytes);
-                        write!(f, "{tenant_shard_id} ({bytes})")?;
-                    }
-                } else {
-                    let num_tenants = per_tenant.len();
-                    let total_bytes = per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>();
-                    let total_bytes = SiBytesAmount(total_bytes);
-                    let layers = num_tenants * count;
-
-                    write!(
-                        f,
-                        "{num_tenants} tenants {total_bytes} in total {layers} layers",
-                    )?;
-                }
-
-                writeln!(f)?;
-            }
-
-            if sorted.len() > 10 {
-                let (rem_count, rem_bytes) = sorted
-                    .iter()
-                    .rev()
-                    .map(|(count, per_tenant)| {
-                        (
-                            count,
-                            per_tenant.iter().map(|(_id, bytes)| bytes).sum::<u64>(),
-                        )
-                    })
-                    .fold((0, 0), |acc, next| (acc.0 + next.0, acc.1 + next.1));
-                let rem_bytes = SiBytesAmount(rem_bytes);
-                writeln!(f, "- rest of tenants ({}) not shown ({rem_count} layers or {:.1}%, {rem_bytes} or {:.1}% bytes)", sorted.len() - 10, 100.0 * rem_count as f64 / self.total.count as f64, 100.0 * rem_bytes.0 as f64 / self.total.file_sizes as f64)?;
-            }
-
-            Ok(())
-        }
-    }
-}
-
 mod filesystem_level_usage {
    use anyhow::Context;
    use camino::Utf8Path;
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -14,8 +14,6 @@ use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
-use pageserver_api::models::LocationConfigListResponse;
-use pageserver_api::models::ShardParameters;
 use pageserver_api::models::TenantDetails;
 use pageserver_api::models::TenantState;
 use pageserver_api::models::{
@@ -40,11 +38,11 @@ use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::{LocationConf, TenantConfOpt};
 use crate::tenant::mgr::GetActiveTenantError;
+use crate::tenant::mgr::UpsertLocationError;
 use crate::tenant::mgr::{
    GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError,
    TenantSlotError, TenantSlotUpsertError, TenantStateError,
 };
-use crate::tenant::mgr::{TenantSlot, UpsertLocationError};
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
@@ -267,7 +265,7 @@ impl From<SetNewTenantConfigError> for ApiError {
            SetNewTenantConfigError::GetTenant(tid) => {
                ApiError::NotFound(anyhow!("tenant {}", tid).into())
            }
-            e @ (SetNewTenantConfigError::Persist(_) | SetNewTenantConfigError::Other(_)) => {
+            e @ SetNewTenantConfigError::Persist(_) => {
                ApiError::InternalServerError(anyhow::Error::new(e))
            }
        }
@@ -325,21 +323,11 @@ impl From<crate::tenant::delete::DeleteTenantError> for ApiError {
 async fn build_timeline_info(
    timeline: &Arc<Timeline>,
    include_non_incremental_logical_size: bool,
-    force_await_initial_logical_size: bool,
    ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();

-    if force_await_initial_logical_size {
-        timeline.clone().await_initial_logical_size().await
-    }
-
-    let mut info = build_timeline_info_common(
-        timeline,
-        ctx,
-        tenant::timeline::GetLogicalSizePriority::Background,
-    )
-    .await?;
+    let mut info = build_timeline_info_common(timeline, ctx).await?;
    if include_non_incremental_logical_size {
        // XXX we should be using spawn_ondemand_logical_size_calculation here.
        // Otherwise, if someone deletes the timeline / detaches the tenant while
@@ -356,7 +344,6 @@ async fn build_timeline_info(
 async fn build_timeline_info_common(
    timeline: &Arc<Timeline>,
    ctx: &RequestContext,
-    logical_size_task_priority: tenant::timeline::GetLogicalSizePriority,
 ) -> anyhow::Result<TimelineInfo> {
    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
    let initdb_lsn = timeline.initdb_lsn;
@@ -379,7 +366,8 @@ async fn build_timeline_info_common(
        Lsn(0) => None,
        lsn @ Lsn(_) => Some(lsn),
    };
-    let current_logical_size = timeline.get_current_logical_size(logical_size_task_priority, ctx);
+    let current_logical_size =
+        timeline.get_current_logical_size(tenant::timeline::GetLogicalSizePriority::User, ctx);
    let current_physical_size = Some(timeline.layer_size_sum().await);
    let state = timeline.current_state();
    let remote_consistent_lsn_projected = timeline
@@ -490,7 +478,7 @@ async fn timeline_create_handler(
        .await {
            Ok(new_timeline) => {
                // Created. Construct a TimelineInfo for it.
-                let timeline_info = build_timeline_info_common(&new_timeline, &ctx, tenant::timeline::GetLogicalSizePriority::User)
+                let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
                    .await
                    .map_err(ApiError::InternalServerError)?;
                json_response(StatusCode::CREATED, timeline_info)
@@ -526,8 +514,6 @@ async fn timeline_list_handler(
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    let include_non_incremental_logical_size: Option<bool> =
        parse_query_param(&request, "include-non-incremental-logical-size")?;
-    let force_await_initial_logical_size: Option<bool> =
-        parse_query_param(&request, "force-await-initial-logical-size")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
@@ -541,7 +527,6 @@ async fn timeline_list_handler(
            let timeline_info = build_timeline_info(
                &timeline,
                include_non_incremental_logical_size.unwrap_or(false),
-                force_await_initial_logical_size.unwrap_or(false),
                &ctx,
            )
            .instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
@@ -569,8 +554,6 @@ async fn timeline_detail_handler(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let include_non_incremental_logical_size: Option<bool> =
        parse_query_param(&request, "include-non-incremental-logical-size")?;
-    let force_await_initial_logical_size: Option<bool> =
-        parse_query_param(&request, "force-await-initial-logical-size")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;

    // Logical size calculation needs downloading.
@@ -586,7 +569,6 @@ async fn timeline_detail_handler(
        let timeline_info = build_timeline_info(
            &timeline,
            include_non_incremental_logical_size.unwrap_or(false),
-            force_await_initial_logical_size.unwrap_or(false),
            &ctx,
        )
        .await
@@ -706,9 +688,7 @@ async fn tenant_attach_handler(
    }

    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
-    let shard_params = ShardParameters::default();
-    let location_conf = LocationConf::attached_single(tenant_conf, generation, &shard_params);
-
+    let location_conf = LocationConf::attached_single(tenant_conf, generation);
    let tenant = state
        .tenant_manager
        .upsert_location(
@@ -878,12 +858,11 @@ async fn tenant_list_handler(
            ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into())
        })?
        .iter()
-        .map(|(id, state, gen)| TenantInfo {
+        .map(|(id, state)| TenantInfo {
            id: *id,
            state: state.clone(),
            current_physical_size: None,
            attachment_status: state.attachment_status(),
-            generation: (*gen).into(),
        })
        .collect::<Vec<TenantInfo>>();

@@ -913,7 +892,6 @@ async fn tenant_status(
                state: state.clone(),
                current_physical_size: Some(current_physical_size),
                attachment_status: state.attachment_status(),
-                generation: tenant.generation().into(),
            },
            timelines: tenant.list_timeline_ids(),
        })
@@ -1198,8 +1176,7 @@ async fn tenant_create_handler(

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);

-    let location_conf =
-        LocationConf::attached_single(tenant_conf, generation, &request_data.shard_parameters);
+    let location_conf = LocationConf::attached_single(tenant_conf, generation);

    let new_tenant = state
        .tenant_manager
@@ -1218,6 +1195,7 @@ async fn tenant_create_handler(
            "Upsert succeeded but didn't return tenant!"
        )));
    };
+
    // We created the tenant. Existing API semantics are that the tenant
    // is Active when this function returns.
    if let res @ Err(_) = new_tenant
@@ -1236,7 +1214,7 @@ async fn tenant_create_handler(

    json_response(
        StatusCode::CREATED,
-        TenantCreateResponse(new_tenant.tenant_shard_id().tenant_id),
+        TenantCreateResponse(new_tenant.tenant_id()),
    )
 }

@@ -1355,28 +1333,6 @@ async fn put_tenant_location_config_handler(
    json_response(StatusCode::OK, ())
 }

-async fn list_location_config_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&request);
-    let slots = state.tenant_manager.list();
-    let result = LocationConfigListResponse {
-        tenant_shards: slots
-            .into_iter()
-            .map(|(tenant_shard_id, slot)| {
-                let v = match slot {
-                    TenantSlot::Attached(t) => Some(t.get_location_conf()),
-                    TenantSlot::Secondary(s) => Some(s.get_location_conf()),
-                    TenantSlot::InProgress(_) => None,
-                };
-                (tenant_shard_id, v)
-            })
-            .collect(),
-    };
-    json_response(StatusCode::OK, result)
-}
-
 /// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].
 async fn handle_tenant_break(
    r: Request<Body>,
@@ -1678,13 +1634,12 @@ async fn disk_usage_eviction_run(
        )));
    };

-    let eviction_state = state.disk_usage_eviction_state.clone();
+    let state = state.disk_usage_eviction_state.clone();

    let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
-        &eviction_state,
+        &state,
        storage,
        usage,
-        &state.tenant_manager,
        config.eviction_order,
        &cancel,
    )
@@ -1919,9 +1874,6 @@ pub fn make_router(
        .put("/v1/tenant/:tenant_shard_id/location_config", |r| {
            api_handler(r, put_tenant_location_config_handler)
        })
-        .get("/v1/location_config", |r| {
-            api_handler(r, list_location_config_handler)
-        })
        .get("/v1/tenant/:tenant_shard_id/timeline", |r| {
            api_handler(r, timeline_list_handler)
        })
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
 use strum::{EnumCount, IntoEnumIterator, VariantNames};
 use strum_macros::{EnumVariantNames, IntoStaticStr};
-use utils::id::TimelineId;
+use utils::id::{TenantId, TimelineId};

 /// Prometheus histogram buckets (in seconds) for operations in the critical
 /// path. In other words, operations that directly affect that latency of user
@@ -59,7 +59,7 @@ pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(||
    register_counter_vec!(
        "pageserver_storage_operations_seconds_sum",
        "Total time spent on storage operations with operation, tenant and timeline dimensions",
-        &["operation", "tenant_id", "shard_id", "timeline_id"],
+        &["operation", "tenant_id", "timeline_id"],
    )
    .expect("failed to define a metric")
 });
@@ -68,7 +68,7 @@ pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::n
    register_int_counter_vec!(
        "pageserver_storage_operations_seconds_count",
        "Count of storage operations with operation, tenant and timeline dimensions",
-        &["operation", "tenant_id", "shard_id", "timeline_id"],
+        &["operation", "tenant_id", "timeline_id"],
    )
    .expect("failed to define a metric")
 });
@@ -337,6 +337,15 @@ pub(crate) mod page_cache_eviction_metrics {
    }
 }

+pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "pageserver_page_cache_acquire_pinned_slot_seconds",
+        "Time spent acquiring a pinned slot in the page cache",
+        CRITICAL_OP_BUCKETS.into(),
+    )
+    .expect("failed to define a metric")
+});
+
 static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "page_cache_errors_total",
@@ -373,7 +382,7 @@ static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
    register_int_gauge_vec!(
        "pageserver_last_record_lsn",
        "Last record LSN grouped by timeline",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -382,7 +391,7 @@ static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_resident_physical_size",
        "The size of the layer files present in the pageserver's filesystem.",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -400,7 +409,7 @@ static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
        "pageserver_remote_physical_size",
        "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
        // Corollary: If any files are missing from the index part, they won't be included here.
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -433,7 +442,7 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_current_logical_size",
        "Current logical size grouped by timeline",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define current logical size metric")
 });
@@ -582,7 +591,7 @@ pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_broken_tenants_count",
        "Set of broken tenants",
-        &["tenant_id", "shard_id"]
+        &["tenant_id"]
    )
    .expect("Failed to register pageserver_tenant_states_count metric")
 });
@@ -602,7 +611,7 @@ static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "pageserver_created_persistent_files_total",
        "Number of files created that are meant to be uploaded to cloud storage",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -611,7 +620,7 @@ static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "pageserver_written_persistent_bytes_total",
        "Total bytes written that are meant to be uploaded to cloud storage",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -630,7 +639,7 @@ static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "pageserver_evictions",
        "Number of layers evicted from the pageserver",
-        &["tenant_id", "shard_id", "timeline_id"]
+        &["tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -927,7 +936,7 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
    register_int_gauge_vec!(
        "pageserver_io_operations_bytes_total",
        "Total amount of bytes read/written in IO operations",
-        &["operation", "tenant_id", "shard_id", "timeline_id"]
+        &["operation", "tenant_id", "timeline_id"]
    )
    .expect("failed to define a metric")
 });
@@ -1002,7 +1011,7 @@ static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
    register_histogram_vec!(
        "pageserver_smgr_query_seconds",
        "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
-        &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
+        &["smgr_query_type", "tenant_id", "timeline_id"],
        CRITICAL_OP_BUCKETS.into(),
    )
    .expect("failed to define a metric")
@@ -1069,9 +1078,8 @@ static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
 });

 impl SmgrQueryTimePerTimeline {
-    pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
-        let tenant_id = tenant_shard_id.tenant_id.to_string();
-        let shard_slug = format!("{}", tenant_shard_id.shard_slug());
+    pub(crate) fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
+        let tenant_id = tenant_id.to_string();
        let timeline_id = timeline_id.to_string();
        let metrics = std::array::from_fn(|i| {
            let op = SmgrQueryType::from_repr(i).unwrap();
@@ -1079,7 +1087,7 @@ impl SmgrQueryTimePerTimeline {
                .get_metric_with_label_values(&[op.into()])
                .unwrap();
            let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
-                .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id])
+                .get_metric_with_label_values(&[op.into(), &tenant_id, &timeline_id])
                .unwrap();
            GlobalAndPerTimelineHistogram {
                global,
@@ -1099,7 +1107,6 @@ impl SmgrQueryTimePerTimeline {

 #[cfg(test)]
 mod smgr_query_time_tests {
-    use pageserver_api::shard::TenantShardId;
    use strum::IntoEnumIterator;
    use utils::id::{TenantId, TimelineId};

@@ -1126,10 +1133,7 @@ mod smgr_query_time_tests {
        for op in &ops {
            let tenant_id = TenantId::generate();
            let timeline_id = TimelineId::generate();
-            let metrics = super::SmgrQueryTimePerTimeline::new(
-                &TenantShardId::unsharded(tenant_id),
-                &timeline_id,
-            );
+            let metrics = super::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);

            let get_counts = || {
                let global: u64 = ops
@@ -1210,13 +1214,7 @@ static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::
        "Number of ongoing calls to remote timeline client. \
         Used to populate pageserver_remote_timeline_client_calls_started. \
         This metric is not useful for sampling from Prometheus, but useful in tests.",
-        &[
-            "tenant_id",
-            "shard_id",
-            "timeline_id",
-            "file_kind",
-            "op_kind"
-        ],
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    )
    .expect("failed to define a metric")
 });
@@ -1237,23 +1235,22 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new
    .expect("failed to define a metric")
 });

-static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
-    Lazy::new(|| {
-        register_int_counter_vec!(
+static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
        "pageserver_remote_timeline_client_bytes_started",
        "Incremented by the number of bytes associated with a remote timeline client operation. \
         The increment happens when the operation is scheduled.",
-        &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    )
-        .expect("failed to define a metric")
-    });
+    .expect("failed to define a metric")
+});

 static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "pageserver_remote_timeline_client_bytes_finished",
        "Incremented by the number of bytes associated with a remote timeline client operation. \
         The increment happens when the operation finishes (regardless of success/failure/shutdown).",
-        &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    )
    .expect("failed to define a metric")
 });
@@ -1699,19 +1696,14 @@ pub(crate) struct StorageTimeMetrics {
 }

 impl StorageTimeMetrics {
-    pub fn new(
-        operation: StorageTimeOperation,
-        tenant_id: &str,
-        shard_id: &str,
-        timeline_id: &str,
-    ) -> Self {
+    pub fn new(operation: StorageTimeOperation, tenant_id: &str, timeline_id: &str) -> Self {
        let operation: &'static str = operation.into();

        let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
-            .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
+            .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
            .unwrap();
        let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
-            .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
+            .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
            .unwrap();
        let global_histogram = STORAGE_TIME_GLOBAL
            .get_metric_with_label_values(&[operation])
@@ -1763,66 +1755,40 @@ impl TimelineMetrics {
        let tenant_id = tenant_shard_id.tenant_id.to_string();
        let shard_id = format!("{}", tenant_shard_id.shard_slug());
        let timeline_id = timeline_id.to_string();
-        let flush_time_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::LayerFlush,
-            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
-        let compact_time_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::Compact,
-            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
-        let create_images_time_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::CreateImages,
-            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
-        let logical_size_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::LogicalSize,
-            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
+        let flush_time_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
+        let compact_time_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::Compact, &tenant_id, &timeline_id);
+        let create_images_time_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::CreateImages, &tenant_id, &timeline_id);
+        let logical_size_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::LogicalSize, &tenant_id, &timeline_id);
        let imitate_logical_size_histo = StorageTimeMetrics::new(
            StorageTimeOperation::ImitateLogicalSize,
            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
-        let load_layer_map_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::LoadLayerMap,
-            &tenant_id,
-            &shard_id,
-            &timeline_id,
-        );
-        let garbage_collect_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::Gc,
-            &tenant_id,
-            &shard_id,
            &timeline_id,
        );
+        let load_layer_map_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::LoadLayerMap, &tenant_id, &timeline_id);
+        let garbage_collect_histo =
+            StorageTimeMetrics::new(StorageTimeOperation::Gc, &tenant_id, &timeline_id);
        let last_record_gauge = LAST_RECORD_LSN
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
        let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
-        // TODO: we shouldn't expose this metric
        let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
        let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
        let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
        let evictions = EVICTIONS
-            .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
+            .get_metric_with_label_values(&[&tenant_id, &timeline_id])
            .unwrap();
        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
            .build(&tenant_id, &shard_id, &timeline_id);
@@ -1876,17 +1842,15 @@ impl Drop for TimelineMetrics {
        let tenant_id = &self.tenant_id;
        let timeline_id = &self.timeline_id;
        let shard_id = &self.shard_id;
-        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
+        let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
        {
            RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
-            let _ =
-                RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
+            let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
        }
-        let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
-        let _ =
-            NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
-        let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
-        let _ = EVICTIONS.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
+        let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
+        let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]);

        self.evictions_with_low_residence_duration
            .write()
@@ -1899,42 +1863,29 @@ impl Drop for TimelineMetrics {
        // outlive an individual smgr connection, but not the timeline.

        for op in StorageTimeOperation::VARIANTS {
-            let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
-                op,
-                tenant_id,
-                shard_id,
-                timeline_id,
-            ]);
-            let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
-                op,
-                tenant_id,
-                shard_id,
-                timeline_id,
-            ]);
+            let _ =
+                STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
+            let _ =
+                STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
        }

        for op in STORAGE_IO_SIZE_OPERATIONS {
-            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
+            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, timeline_id]);
        }

        for op in SmgrQueryType::iter() {
            let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
                op.into(),
                tenant_id,
-                shard_id,
                timeline_id,
            ]);
        }
    }
 }

-pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
-    // Only shard zero deals in synthetic sizes
-    if tenant_shard_id.is_zero() {
-        let tid = tenant_shard_id.tenant_id.to_string();
-        let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
-    }
-
+pub fn remove_tenant_metrics(tenant_id: &TenantId) {
+    let tid = tenant_id.to_string();
+    let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    // we leave the BROKEN_TENANTS_SET entry if any
 }

@@ -1984,7 +1935,6 @@ impl Drop for PerTimelineRemotePhysicalSizeGauge {

 pub(crate) struct RemoteTimelineClientMetrics {
    tenant_id: String,
-    shard_id: String,
    timeline_id: String,
    remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
    calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
@@ -1996,7 +1946,6 @@ impl RemoteTimelineClientMetrics {
    pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
        RemoteTimelineClientMetrics {
            tenant_id: tenant_shard_id.tenant_id.to_string(),
-            shard_id: format!("{}", tenant_shard_id.shard_slug()),
            timeline_id: timeline_id.to_string(),
            calls_unfinished_gauge: Mutex::new(HashMap::default()),
            bytes_started_counter: Mutex::new(HashMap::default()),
@@ -2011,9 +1960,8 @@ impl RemoteTimelineClientMetrics {
            PerTimelineRemotePhysicalSizeGauge::new(
                REMOTE_PHYSICAL_SIZE
                    .get_metric_with_label_values(&[
-                        &self.tenant_id,
-                        &self.shard_id,
-                        &self.timeline_id,
+                        &self.tenant_id.to_string(),
+                        &self.timeline_id.to_string(),
                    ])
                    .unwrap(),
            )
@@ -2048,9 +1996,8 @@ impl RemoteTimelineClientMetrics {
        let metric = guard.entry(key).or_insert_with(move || {
            REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
                .get_metric_with_label_values(&[
-                    &self.tenant_id,
-                    &self.shard_id,
-                    &self.timeline_id,
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
                    key.0,
                    key.1,
                ])
@@ -2080,9 +2027,8 @@ impl RemoteTimelineClientMetrics {
        let metric = guard.entry(key).or_insert_with(move || {
            REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
                .get_metric_with_label_values(&[
-                    &self.tenant_id,
-                    &self.shard_id,
-                    &self.timeline_id,
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
                    key.0,
                    key.1,
                ])
@@ -2101,9 +2047,8 @@ impl RemoteTimelineClientMetrics {
        let metric = guard.entry(key).or_insert_with(move || {
            REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
                .get_metric_with_label_values(&[
-                    &self.tenant_id,
-                    &self.shard_id,
-                    &self.timeline_id,
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
                    key.0,
                    key.1,
                ])
@@ -2247,7 +2192,6 @@ impl Drop for RemoteTimelineClientMetrics {
    fn drop(&mut self) {
        let RemoteTimelineClientMetrics {
            tenant_id,
-            shard_id,
            timeline_id,
            remote_physical_size_gauge,
            calls_unfinished_gauge,
@@ -2257,7 +2201,6 @@ impl Drop for RemoteTimelineClientMetrics {
        for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
            let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
                tenant_id,
-                shard_id,
                timeline_id,
                a,
                b,
@@ -2266,7 +2209,6 @@ impl Drop for RemoteTimelineClientMetrics {
        for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
            let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
                tenant_id,
-                shard_id,
                timeline_id,
                a,
                b,
@@ -2275,7 +2217,6 @@ impl Drop for RemoteTimelineClientMetrics {
        for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
            let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
                tenant_id,
-                shard_id,
                timeline_id,
                a,
                b,
@@ -2283,7 +2224,7 @@ impl Drop for RemoteTimelineClientMetrics {
        }
        {
            let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
-            let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
+            let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
        }
    }
 }
@@ -2293,6 +2234,8 @@ impl Drop for RemoteTimelineClientMetrics {
 pub(crate) trait MeasureRemoteOp: Sized {
    fn measure_remote_op(
        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
        file_kind: RemoteOpFileKind,
        op: RemoteOpKind,
        metrics: Arc<RemoteTimelineClientMetrics>,
@@ -2300,6 +2243,8 @@ pub(crate) trait MeasureRemoteOp: Sized {
        let start = Instant::now();
        MeasuredRemoteOp {
            inner: self,
+            tenant_id,
+            timeline_id,
            file_kind,
            op,
            start,
@@ -2315,6 +2260,8 @@ pin_project! {
    {
        #[pin]
        inner: F,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
        file_kind: RemoteOpFileKind,
        op: RemoteOpKind,
        start: Instant,
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -550,6 +550,7 @@ impl PageCache {
    // not require changes.

    async fn try_get_pinned_slot_permit(&self) -> anyhow::Result<PinnedSlotsPermit> {
+        let timer = crate::metrics::PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME.start_timer();
        match tokio::time::timeout(
            // Choose small timeout, neon_smgr does its own retries.
            // https://neondb.slack.com/archives/C04DGM6SMTM/p1694786876476869
@@ -562,6 +563,7 @@ impl PageCache {
                res.expect("this semaphore is never closed"),
            )),
            Err(_timeout) => {
+                timer.stop_and_discard();
                crate::metrics::page_cache_errors_inc(
                    crate::metrics::PageCacheErrorKind::AcquirePinnedSlotTimeout,
                );
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -13,10 +13,7 @@ use anyhow::Context;
 use async_compression::tokio::write::GzipEncoder;
 use bytes::Buf;
 use bytes::Bytes;
-use futures::stream::FuturesUnordered;
 use futures::Stream;
-use futures::StreamExt;
-use pageserver_api::key::Key;
 use pageserver_api::models::TenantState;
 use pageserver_api::models::{
    PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
@@ -24,14 +21,11 @@ use pageserver_api::models::{
    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
    PagestreamNblocksRequest, PagestreamNblocksResponse,
 };
-use pageserver_api::shard::ShardIndex;
-use pageserver_api::shard::{ShardCount, ShardNumber};
 use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError};
 use pq_proto::framed::ConnectionError;
 use pq_proto::FeStartupPacket;
 use pq_proto::{BeMessage, FeMessage, RowDescriptor};
 use std::borrow::Cow;
-use std::collections::HashMap;
 use std::io;
 use std::net::TcpListener;
 use std::pin::pin;
@@ -46,7 +40,6 @@ use tokio_util::sync::CancellationToken;
 use tracing::field;
 use tracing::*;
 use utils::id::ConnectionId;
-use utils::sync::gate::GateGuard;
 use utils::{
    auth::{Claims, Scope, SwappableJwtAuth},
    id::{TenantId, TimelineId},
@@ -281,13 +274,6 @@ async fn page_service_conn_main(
    }
 }

-/// While a handler holds a reference to a Timeline, it also holds a the
-/// timeline's Gate open.
-struct HandlerTimeline {
-    timeline: Arc<Timeline>,
-    _guard: GateGuard,
-}
-
 struct PageServerHandler {
    _conf: &'static PageServerConf,
    broker_client: storage_broker::BrokerClientChannel,
@@ -299,14 +285,6 @@ struct PageServerHandler {
    /// For each query received over the connection,
    /// `process_query` creates a child context from this one.
    connection_ctx: RequestContext,
-
-    /// See [`Self::cache_timeline`] for usage.
-    ///
-    /// Note on size: the typical size of this map is 1.  The largest size we expect
-    /// to see is the number of shards divided by the number of pageservers (typically < 2),
-    /// or the ratio used when splitting shards (i.e. how many children created from one)
-    /// parent shard, where a "large" number might be ~8.
-    shard_timelines: HashMap<ShardIndex, HandlerTimeline>,
 }

 #[derive(thiserror::Error, Debug)]
@@ -380,57 +358,13 @@ impl PageServerHandler {
            auth,
            claims: None,
            connection_ctx,
-            shard_timelines: HashMap::new(),
        }
    }

-    /// Future that completes when we need to shut down the connection.
-    ///
-    /// Reasons for need to shut down are:
-    /// - any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
-    /// - task_mgr requests shutdown of the connection
-    ///
-    /// The need to check for `task_mgr` cancellation arises mainly from `handle_pagerequests`
-    /// where, at first, `shard_timelines` is empty, see <https://github.com/neondatabase/neon/pull/6388>
-    ///
-    /// NB: keep in sync with [`Self::is_connection_cancelled`]
-    async fn await_connection_cancelled(&self) {
-        // A short wait before we expend the cycles to walk our timeline map.  This avoids incurring
-        // that cost every time we check for cancellation.
-        tokio::time::sleep(Duration::from_millis(10)).await;
-
-        // This function is never called concurrently with code that adds timelines to shard_timelines,
-        // which is enforced by the borrow checker (the future returned by this function carries the
-        // immutable &self).  So it's fine to evaluate shard_timelines after the sleep, we don't risk
-        // missing any inserts to the map.
-
-        let mut futs = self
-            .shard_timelines
-            .values()
-            .map(|ht| ht.timeline.cancel.cancelled())
-            .collect::<FuturesUnordered<_>>();
-
-        tokio::select! {
-            _ = task_mgr::shutdown_watcher() => { }
-            _ = futs.next() => {}
-        }
-    }
-
-    /// Checking variant of [`Self::await_connection_cancelled`].
-    fn is_connection_cancelled(&self) -> bool {
-        task_mgr::is_shutdown_requested()
-            || self
-                .shard_timelines
-                .values()
-                .any(|ht| ht.timeline.cancel.is_cancelled() || ht.timeline.is_stopping())
-    }
-
-    /// This function always respects cancellation of any timeline in `[Self::shard_timelines]`.  Pass in
-    /// a cancellation token at the next scope up (such as a tenant cancellation token) to ensure we respect
-    /// cancellation if there aren't any timelines in the cache.
-    ///
-    /// If calling from a function that doesn't use the `[Self::shard_timelines]` cache, then pass in the
-    /// timeline cancellation token.
+    /// Wrap PostgresBackend::flush to respect our CancellationToken: it is important to use
+    /// this rather than naked flush() in order to shut down promptly.  Without this, we would
+    /// block shutdown of a tenant if a postgres client was failing to consume bytes we send
+    /// in the flush.
    async fn flush_cancellable<IO>(
        &self,
        pgb: &mut PostgresBackend<IO>,
@@ -443,9 +377,6 @@ impl PageServerHandler {
            flush_r = pgb.flush() => {
                Ok(flush_r?)
            },
-            _ = self.await_connection_cancelled() => {
-                Err(QueryError::Shutdown)
-            }
            _ = cancel.cancelled() => {
                Err(QueryError::Shutdown)
            }
@@ -521,7 +452,7 @@ impl PageServerHandler {

    #[instrument(skip_all)]
    async fn handle_pagerequests<IO>(
-        &mut self,
+        &self,
        pgb: &mut PostgresBackend<IO>,
        tenant_id: TenantId,
        timeline_id: TimelineId,
@@ -532,6 +463,10 @@ impl PageServerHandler {
    {
        debug_assert_current_span_has_tenant_and_timeline_id();

+        // Note that since one connection may contain getpage requests that target different
+        // shards (e.g. during splitting when the compute is not yet aware of the split), the tenant
+        // that we look up here may not be the one that serves all the actual requests: we will double
+        // check the mapping of key->shard later before calling into Timeline for getpage requests.
        let tenant = mgr::get_active_tenant_with_timeout(
            tenant_id,
            ShardSelector::First,
@@ -552,15 +487,27 @@ impl PageServerHandler {
            None
        };

+        // Check that the timeline exists
+        let timeline = tenant
+            .get_timeline(timeline_id, true)
+            .map_err(|e| QueryError::NotFound(format!("{e}").into()))?;
+
+        // Avoid starting new requests if the timeline has already started shutting down,
+        // and block timeline shutdown until this request is complete, or drops out due
+        // to cancellation.
+        let _timeline_guard = timeline.gate.enter().map_err(|_| QueryError::Shutdown)?;
+
        // switch client to COPYBOTH
        pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
-        self.flush_cancellable(pgb, &tenant.cancel).await?;
+        self.flush_cancellable(pgb, &timeline.cancel).await?;
+
+        let metrics = metrics::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);

        loop {
            let msg = tokio::select! {
                biased;

-                _ = self.await_connection_cancelled() => {
+                _ = timeline.cancel.cancelled() => {
                    // We were requested to shut down.
                    info!("shutdown request received in page handler");
                    return Err(QueryError::Shutdown)
@@ -594,36 +541,40 @@ impl PageServerHandler {

            let (response, span) = match neon_fe_msg {
                PagestreamFeMessage::Exists(req) => {
+                    let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelExists);
                    let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.lsn);
                    (
-                        self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
+                        self.handle_get_rel_exists_request(&timeline, &req, &ctx)
                            .instrument(span.clone())
                            .await,
                        span,
                    )
                }
                PagestreamFeMessage::Nblocks(req) => {
+                    let _timer = metrics.start_timer(metrics::SmgrQueryType::GetRelSize);
                    let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.lsn);
                    (
-                        self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
+                        self.handle_get_nblocks_request(&timeline, &req, &ctx)
                            .instrument(span.clone())
                            .await,
                        span,
                    )
                }
                PagestreamFeMessage::GetPage(req) => {
+                    let _timer = metrics.start_timer(metrics::SmgrQueryType::GetPageAtLsn);
                    let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn);
                    (
-                        self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
+                        self.handle_get_page_at_lsn_request(&timeline, &req, &ctx)
                            .instrument(span.clone())
                            .await,
                        span,
                    )
                }
                PagestreamFeMessage::DbSize(req) => {
+                    let _timer = metrics.start_timer(metrics::SmgrQueryType::GetDbSize);
                    let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.lsn);
                    (
-                        self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
+                        self.handle_db_size_request(&timeline, &req, &ctx)
                            .instrument(span.clone())
                            .await,
                        span,
@@ -643,7 +594,7 @@ impl PageServerHandler {
                    span.in_scope(|| info!("handler requested reconnect: {reason}"));
                    return Err(QueryError::Reconnect);
                }
-                Err(e) if self.is_connection_cancelled() => {
+                Err(e) if timeline.cancel.is_cancelled() || timeline.is_stopping() => {
                    // This branch accomodates code within request handlers that returns an anyhow::Error instead of a clean
                    // shutdown error, this may be buried inside a PageReconstructError::Other for example.
                    //
@@ -666,7 +617,7 @@ impl PageServerHandler {
                    });

                    pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
-                    self.flush_cancellable(pgb, &tenant.cancel).await?;
+                    self.flush_cancellable(pgb, &timeline.cancel).await?;
                }
            }
        }
@@ -863,17 +814,11 @@ impl PageServerHandler {
    }

    async fn handle_get_rel_exists_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
+        &self,
+        timeline: &Timeline,
        req: &PagestreamExistsRequest,
        ctx: &RequestContext,
    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?;
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetRelExists);
-
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn =
            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
@@ -889,18 +834,11 @@ impl PageServerHandler {
    }

    async fn handle_get_nblocks_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
+        &self,
+        timeline: &Timeline,
        req: &PagestreamNblocksRequest,
        ctx: &RequestContext,
    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?;
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetRelSize);
-
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn =
            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
@@ -916,18 +854,11 @@ impl PageServerHandler {
    }

    async fn handle_db_size_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
+        &self,
+        timeline: &Timeline,
        req: &PagestreamDbSizeRequest,
        ctx: &RequestContext,
    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?;
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetDbSize);
-
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn =
            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
@@ -949,164 +880,16 @@ impl PageServerHandler {
        }))
    }

-    /// For most getpage requests, we will already have a Timeline to serve the request: this function
-    /// looks up such a Timeline synchronously and without touching any global state.
-    fn get_cached_timeline_for_page(
-        &mut self,
-        req: &PagestreamGetPageRequest,
-    ) -> Result<&Arc<Timeline>, Key> {
-        let key = if let Some((first_idx, first_timeline)) = self.shard_timelines.iter().next() {
-            // Fastest path: single sharded case
-            if first_idx.shard_count < ShardCount(2) {
-                return Ok(&first_timeline.timeline);
-            }
-
-            let key = rel_block_to_key(req.rel, req.blkno);
-            let shard_num = first_timeline
-                .timeline
-                .get_shard_identity()
-                .get_shard_number(&key);
-
-            // Fast path: matched the first timeline in our local handler map.  This case is common if
-            // only one shard per tenant is attached to this pageserver.
-            if first_timeline.timeline.get_shard_identity().number == shard_num {
-                return Ok(&first_timeline.timeline);
-            }
-
-            let shard_index = ShardIndex {
-                shard_number: shard_num,
-                shard_count: first_timeline.timeline.get_shard_identity().count,
-            };
-
-            // Fast-ish path: timeline is in the connection handler's local cache
-            if let Some(found) = self.shard_timelines.get(&shard_index) {
-                return Ok(&found.timeline);
-            }
-
-            key
-        } else {
-            rel_block_to_key(req.rel, req.blkno)
-        };
-
-        Err(key)
-    }
-
-    /// Having looked up the [`Timeline`] instance for a particular shard, cache it to enable
-    /// use in future requests without having to traverse [`crate::tenant::mgr::TenantManager`]
-    /// again.
-    ///
-    /// Note that all the Timelines in this cache are for the same timeline_id: they're differ
-    /// in which shard they belong to.  When we serve a getpage@lsn request, we choose a shard
-    /// based on key.
-    ///
-    /// The typical size of this cache is 1, as we generally create shards to distribute work
-    /// across pageservers, so don't tend to have multiple shards for the same tenant on the
-    /// same pageserver.
-    fn cache_timeline(
-        &mut self,
-        timeline: Arc<Timeline>,
-    ) -> Result<&Arc<Timeline>, GetActiveTimelineError> {
-        let gate_guard = timeline
-            .gate
-            .enter()
-            .map_err(|_| GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled))?;
-
-        let shard_index = timeline.tenant_shard_id.to_index();
-        let entry = self
-            .shard_timelines
-            .entry(shard_index)
-            .or_insert(HandlerTimeline {
-                timeline,
-                _guard: gate_guard,
-            });
-
-        Ok(&entry.timeline)
-    }
-
-    /// If [`Self::get_cached_timeline_for_page`] missed, then this function is used to populate the cache with
-    /// a Timeline to serve requests for this key, if such a Timeline is present on this pageserver.  If no such
-    /// Timeline is found, then we will return an error (this indicates that the client is talking to the wrong node).
-    async fn load_timeline_for_page(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        key: Key,
-    ) -> anyhow::Result<&Arc<Timeline>, GetActiveTimelineError> {
-        // Slow path: we must call out to the TenantManager to find the timeline for this Key
-        let timeline = self
-            .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Page(key))
-            .await?;
-
-        self.cache_timeline(timeline)
-    }
-
-    async fn get_timeline_shard_zero(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-    ) -> anyhow::Result<&Arc<Timeline>, GetActiveTimelineError> {
-        // This is a borrow-checker workaround: we can't return from inside of the  `if let Some` because
-        // that would be an immutable-borrow-self return, whereas later in the function we will use a mutable
-        // ref to salf.  So instead, we first build a bool, and then return while not borrowing self.
-        let have_cached = if let Some((idx, _tl)) = self.shard_timelines.iter().next() {
-            idx.shard_number == ShardNumber(0)
-        } else {
-            false
-        };
-
-        if have_cached {
-            let entry = self.shard_timelines.iter().next().unwrap();
-            Ok(&entry.1.timeline)
-        } else {
-            let timeline = self
-                .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
-                .await?;
-            Ok(self.cache_timeline(timeline)?)
-        }
-    }
-
-    async fn handle_get_page_at_lsn_request(
-        &mut self,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
+    async fn do_handle_get_page_at_lsn_request(
+        &self,
+        timeline: &Timeline,
        req: &PagestreamGetPageRequest,
        ctx: &RequestContext,
    ) -> Result<PagestreamBeMessage, PageStreamError> {
-        let timeline = match self.get_cached_timeline_for_page(req) {
-            Ok(tl) => tl,
-            Err(key) => {
-                match self
-                    .load_timeline_for_page(tenant_id, timeline_id, key)
-                    .await
-                {
-                    Ok(t) => t,
-                    Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
-                        // We already know this tenant exists in general, because we resolved it at
-                        // start of connection.  Getting a NotFound here indicates that the shard containing
-                        // the requested page is not present on this node: the client's knowledge of shard->pageserver
-                        // mapping is out of date.
-                        //
-                        // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
-                        // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
-                        // and talk to a different pageserver.
-                        return Err(PageStreamError::Reconnect(
-                            "getpage@lsn request routed to wrong shard".into(),
-                        ));
-                    }
-                    Err(e) => return Err(e.into()),
-                }
-            }
-        };
-
-        let _timer = timeline
-            .query_metrics
-            .start_timer(metrics::SmgrQueryType::GetPageAtLsn);
-
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        let lsn =
            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
                .await?;
-
        let page = timeline
            .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), req.latest, ctx)
            .await?;
@@ -1116,6 +899,60 @@ impl PageServerHandler {
        }))
    }

+    async fn handle_get_page_at_lsn_request(
+        &self,
+        timeline: &Timeline,
+        req: &PagestreamGetPageRequest,
+        ctx: &RequestContext,
+    ) -> Result<PagestreamBeMessage, PageStreamError> {
+        let key = rel_block_to_key(req.rel, req.blkno);
+        if timeline.get_shard_identity().is_key_local(&key) {
+            self.do_handle_get_page_at_lsn_request(timeline, req, ctx)
+                .await
+        } else {
+            // The Tenant shard we looked up at connection start does not hold this particular
+            // key: look for other shards in this tenant.  This scenario occurs if a pageserver
+            // has multiple shards for the same tenant.
+            //
+            // TODO: optimize this (https://github.com/neondatabase/neon/pull/6037)
+            let timeline = match self
+                .get_active_tenant_timeline(
+                    timeline.tenant_shard_id.tenant_id,
+                    timeline.timeline_id,
+                    ShardSelector::Page(key),
+                )
+                .await
+            {
+                Ok(t) => t,
+                Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
+                    // We already know this tenant exists in general, because we resolved it at
+                    // start of connection.  Getting a NotFound here indicates that the shard containing
+                    // the requested page is not present on this node: the client's knowledge of shard->pageserver
+                    // mapping is out of date.
+                    tracing::info!("Page request routed to wrong shard: my identity {:?}, should go to shard {}, key {}",
+                        timeline.get_shard_identity(), timeline.get_shard_identity().get_shard_number(&key).0, key);
+                    // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
+                    // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
+                    // and talk to a different pageserver.
+                    return Err(PageStreamError::Reconnect(
+                        "getpage@lsn request routed to wrong shard".into(),
+                    ));
+                }
+                Err(e) => return Err(e.into()),
+            };
+
+            // Take a GateGuard for the duration of this request.  If we were using our main Timeline object,
+            // the GateGuard was already held over the whole connection.
+            let _timeline_guard = timeline
+                .gate
+                .enter()
+                .map_err(|_| PageStreamError::Shutdown)?;
+
+            self.do_handle_get_page_at_lsn_request(&timeline, req, ctx)
+                .await
+        }
+    }
+
    #[allow(clippy::too_many_arguments)]
    #[instrument(skip_all, fields(?lsn, ?prev_lsn, %full_backup))]
    async fn handle_basebackup_request<IO>(
@@ -1663,8 +1500,7 @@ impl From<GetActiveTenantError> for QueryError {
            GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(
                ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
            ),
-            GetActiveTenantError::Cancelled
-            | GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {
+            GetActiveTenantError::WillNotBecomeActive(TenantState::Stopping { .. }) => {
                QueryError::Shutdown
            }
            e => QueryError::Other(anyhow::anyhow!(e)),
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -14,7 +14,7 @@ use crate::walrecord::NeonWalRecord;
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes};
 use pageserver_api::key::is_rel_block_key;
-use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
 use postgres_ffi::{Oid, TimestampTz, TransactionId};
@@ -27,6 +27,9 @@ use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
 use utils::{bin_ser::BeSer, lsn::Lsn};

+/// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type.
+pub type BlockNumber = u32;
+
 #[derive(Debug)]
 pub enum LsnForTimestamp {
    /// Found commits both before and after the given timestamp
@@ -1860,6 +1863,21 @@ pub fn is_inherited_key(key: Key) -> bool {
    key != AUX_FILES_KEY
 }

+/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
+pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
+    Ok(match key.field1 {
+        0x00 => (
+            RelTag {
+                spcnode: key.field2,
+                dbnode: key.field3,
+                relnode: key.field4,
+                forknum: key.field5,
+            },
+            key.field6,
+        ),
+        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
+    })
+}
 pub fn is_rel_fsm_block_key(key: Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
 }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -13,12 +13,10 @@

 use anyhow::{bail, Context};
 use camino::Utf8Path;
-use camino::Utf8PathBuf;
 use enumset::EnumSet;
 use futures::stream::FuturesUnordered;
 use futures::FutureExt;
 use futures::StreamExt;
-use pageserver_api::models;
 use pageserver_api::models::TimelineState;
 use pageserver_api::shard::ShardIdentity;
 use pageserver_api::shard::TenantShardId;
@@ -74,7 +72,6 @@ use crate::tenant::config::LocationMode;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::metadata::load_metadata;
 pub use crate::tenant::remote_timeline_client::index::IndexPart;
-use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
 use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
 use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::storage_layer::DeltaLayer;
@@ -112,7 +109,7 @@ use toml_edit;
 use utils::{
    crashsafe,
    generation::Generation,
-    id::TimelineId,
+    id::{TenantId, TimelineId},
    lsn::{Lsn, RecordLsn},
 };

@@ -133,13 +130,6 @@ macro_rules! pausable_failpoint {
            .expect("spawn_blocking");
        }
    };
-    ($name:literal, $cond:expr) => {
-        if cfg!(feature = "testing") {
-            if $cond {
-                pausable_failpoint!($name)
-            }
-        }
-    };
 }

 pub mod blob_io;
@@ -371,13 +361,13 @@ impl WalRedoManager {
 pub enum GetTimelineError {
    #[error("Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}")]
    NotActive {
-        tenant_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
        state: TimelineState,
    },
    #[error("Timeline {tenant_id}/{timeline_id} was not found")]
    NotFound {
-        tenant_id: TenantShardId,
+        tenant_id: TenantId,
        timeline_id: TimelineId,
    },
 }
@@ -1517,6 +1507,10 @@ impl Tenant {
            .map_err(LoadLocalTimelineError::Load)
    }

+    pub(crate) fn tenant_id(&self) -> TenantId {
+        self.tenant_shard_id.tenant_id
+    }
+
    pub(crate) fn tenant_shard_id(&self) -> TenantShardId {
        self.tenant_shard_id
    }
@@ -1532,13 +1526,13 @@ impl Tenant {
        let timeline = timelines_accessor
            .get(&timeline_id)
            .ok_or(GetTimelineError::NotFound {
-                tenant_id: self.tenant_shard_id,
+                tenant_id: self.tenant_shard_id.tenant_id,
                timeline_id,
            })?;

        if active_only && !timeline.is_active() {
            Err(GetTimelineError::NotActive {
-                tenant_id: self.tenant_shard_id,
+                tenant_id: self.tenant_shard_id.tenant_id,
                timeline_id,
                state: timeline.current_state(),
            })
@@ -1929,10 +1923,6 @@ impl Tenant {
        self.current_state() == TenantState::Active
    }

-    pub fn generation(&self) -> Generation {
-        self.generation
-    }
-
    /// Changes tenant status to active, unless shutdown was already requested.
    ///
    /// `background_jobs_can_start` is an optional barrier set to a value during pageserver startup
@@ -2322,32 +2312,6 @@ impl Tenant {
            .clone()
    }

-    /// For API access: generate a LocationConfig equivalent to the one that would be used to
-    /// create a Tenant in the same state.  Do not use this in hot paths: it's for relatively
-    /// rare external API calls, like a reconciliation at startup.
-    pub(crate) fn get_location_conf(&self) -> models::LocationConfig {
-        let conf = self.tenant_conf.read().unwrap();
-
-        let location_config_mode = match conf.location.attach_mode {
-            AttachmentMode::Single => models::LocationConfigMode::AttachedSingle,
-            AttachmentMode::Multi => models::LocationConfigMode::AttachedMulti,
-            AttachmentMode::Stale => models::LocationConfigMode::AttachedStale,
-        };
-
-        // We have a pageserver TenantConf, we need the API-facing TenantConfig.
-        let tenant_config: models::TenantConfig = conf.tenant_conf.into();
-
-        models::LocationConfig {
-            mode: location_config_mode,
-            generation: self.generation.into(),
-            secondary_conf: None,
-            shard_number: self.shard_identity.number.0,
-            shard_count: self.shard_identity.count.0,
-            shard_stripe_size: self.shard_identity.stripe_size.0,
-            tenant_conf: tenant_config,
-        }
-    }
-
    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {
        &self.tenant_shard_id
    }
@@ -2593,9 +2557,7 @@ impl Tenant {
        let (state, mut rx) = watch::channel(state);

        tokio::spawn(async move {
-            // Strings for metric labels
            let tid = tenant_shard_id.to_string();
-            let shard_id_str = format!("{}", tenant_shard_id.shard_slug());

            fn inspect_state(state: &TenantState) -> ([&'static str; 1], bool) {
                ([state.into()], matches!(state, TenantState::Broken { .. }))
@@ -2608,15 +2570,13 @@ impl Tenant {
                // the tenant might be ignored and reloaded, so first remove any previous set
                // element. it most likely has already been scraped, as these are manual operations
                // right now. most likely we will add it back very soon.
-                drop(
-                    crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid, &shard_id_str]),
-                );
+                drop(crate::metrics::BROKEN_TENANTS_SET.remove_label_values(&[&tid]));
                false
            } else {
                // add the id to the set right away, there should not be any updates on the channel
                // after
                crate::metrics::BROKEN_TENANTS_SET
-                    .with_label_values(&[&tid, &shard_id_str])
+                    .with_label_values(&[&tid])
                    .set(1);
                true
            };
@@ -2642,7 +2602,7 @@ impl Tenant {
                    counted_broken = true;
                    // insert the tenant_id (back) into the set
                    crate::metrics::BROKEN_TENANTS_SET
-                        .with_label_values(&[&tid, &shard_id_str])
+                        .with_label_values(&[&tid])
                        .inc();
                }
            }
@@ -2702,11 +2662,10 @@ impl Tenant {
                }
            }

-            // Legacy configs are implicitly in attached state, and do not support sharding
+            // Legacy configs are implicitly in attached state
            Ok(LocationConf::attached_single(
                tenant_conf,
                Generation::none(),
-                &models::ShardParameters::default(),
            ))
        } else {
            // FIXME If the config file is not found, assume that we're attaching
@@ -3211,55 +3170,6 @@ impl Tenant {
        .await
    }

-    async fn upload_initdb(
-        &self,
-        timelines_path: &Utf8PathBuf,
-        pgdata_path: &Utf8PathBuf,
-        timeline_id: &TimelineId,
-    ) -> anyhow::Result<()> {
-        let Some(storage) = &self.remote_storage else {
-            // No remote storage?  No upload.
-            return Ok(());
-        };
-
-        let temp_path = timelines_path.join(format!(
-            "{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}"
-        ));
-
-        scopeguard::defer! {
-            if let Err(e) = fs::remove_file(&temp_path) {
-                error!("Failed to remove temporary initdb archive '{temp_path}': {e}");
-            }
-        }
-
-        let (pgdata_zstd, tar_zst_size) =
-            import_datadir::create_tar_zst(pgdata_path, &temp_path).await?;
-
-        pausable_failpoint!("before-initdb-upload");
-
-        backoff::retry(
-            || async {
-                self::remote_timeline_client::upload_initdb_dir(
-                    storage,
-                    &self.tenant_shard_id.tenant_id,
-                    timeline_id,
-                    pgdata_zstd.try_clone().await?,
-                    tar_zst_size,
-                    &self.cancel,
-                )
-                .await
-            },
-            |_| false,
-            3,
-            u32::MAX,
-            "persist_initdb_tar_zst",
-            backoff::Cancel::new(self.cancel.clone(), || anyhow::anyhow!("Cancelled")),
-        )
-        .await?;
-
-        Ok(())
-    }
-
    /// - run initdb to init temporary instance and get bootstrap data
    /// - after initialization completes, tar up the temp dir and upload it to S3.
    ///
@@ -3299,18 +3209,6 @@ impl Tenant {
            let Some(storage) = &self.remote_storage else {
                bail!("no storage configured but load_existing_initdb set to {existing_initdb_timeline_id}");
            };
-            if existing_initdb_timeline_id != timeline_id {
-                let source_path = &remote_initdb_archive_path(
-                    &self.tenant_shard_id.tenant_id,
-                    &existing_initdb_timeline_id,
-                );
-                let dest_path =
-                    &remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &timeline_id);
-                storage
-                    .copy_object(source_path, dest_path)
-                    .await
-                    .context("copy initdb tar")?;
-            }
            let (initdb_tar_zst_path, initdb_tar_zst) =
                self::remote_timeline_client::download_initdb_tar_zst(
                    self.conf,
@@ -3321,26 +3219,66 @@ impl Tenant {
                )
                .await
                .context("download initdb tar")?;
-
-            scopeguard::defer! {
-                if let Err(e) = fs::remove_file(&initdb_tar_zst_path) {
-                    error!("Failed to remove temporary initdb archive '{initdb_tar_zst_path}': {e}");
-                }
-            }
-
            let buf_read =
                BufReader::with_capacity(remote_timeline_client::BUFFER_SIZE, initdb_tar_zst);
            import_datadir::extract_tar_zst(&pgdata_path, buf_read)
                .await
                .context("extract initdb tar")?;
+
+            tokio::fs::remove_file(&initdb_tar_zst_path)
+                .await
+                .or_else(|e| {
+                    if e.kind() == std::io::ErrorKind::NotFound {
+                        // If something else already removed the file, ignore the error
+                        Ok(())
+                    } else {
+                        Err(e)
+                    }
+                })
+                .with_context(|| format!("tempfile removal {initdb_tar_zst_path}"))?;
        } else {
-            // Init temporarily repo to get bootstrap data, this creates a directory in the `pgdata_path` path
+            // Init temporarily repo to get bootstrap data, this creates a directory in the `initdb_path` path
            run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?;

            // Upload the created data dir to S3
-            if self.tenant_shard_id().is_zero() {
-                self.upload_initdb(&timelines_path, &pgdata_path, &timeline_id)
-                    .await?;
+            if let Some(storage) = &self.remote_storage {
+                let temp_path = timelines_path.join(format!(
+                    "{INITDB_PATH}.upload-{timeline_id}.{TEMP_FILE_SUFFIX}"
+                ));
+
+                let (pgdata_zstd, tar_zst_size) =
+                    import_datadir::create_tar_zst(&pgdata_path, &temp_path).await?;
+                backoff::retry(
+                    || async {
+                        self::remote_timeline_client::upload_initdb_dir(
+                            storage,
+                            &self.tenant_shard_id.tenant_id,
+                            &timeline_id,
+                            pgdata_zstd.try_clone().await?,
+                            tar_zst_size,
+                            &self.cancel,
+                        )
+                        .await
+                    },
+                    |_| false,
+                    3,
+                    u32::MAX,
+                    "persist_initdb_tar_zst",
+                    backoff::Cancel::new(self.cancel.clone(), || anyhow::anyhow!("Cancelled")),
+                )
+                .await?;
+
+                tokio::fs::remove_file(&temp_path)
+                    .await
+                    .or_else(|e| {
+                        if e.kind() == std::io::ErrorKind::NotFound {
+                            // If something else already removed the file, ignore the error
+                            Ok(())
+                        } else {
+                            Err(e)
+                        }
+                    })
+                    .with_context(|| format!("tempfile removal {temp_path}"))?;
            }
        }
        let pgdata_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
@@ -3629,9 +3567,6 @@ impl Tenant {
        self.cached_synthetic_tenant_size
            .store(size, Ordering::Relaxed);

-        // Only shard zero should be calculating synthetic sizes
-        debug_assert!(self.shard_identity.is_zero());
-
        TENANT_SYNTHETIC_SIZE_METRIC
            .get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()])
            .unwrap()
@@ -3783,7 +3718,7 @@ async fn run_initdb(

 impl Drop for Tenant {
    fn drop(&mut self) {
-        remove_tenant_metrics(&self.tenant_shard_id);
+        remove_tenant_metrics(&self.tenant_shard_id.tenant_id);
    }
 }
 /// Dump contents of a layer file to stdout.
@@ -3822,7 +3757,6 @@ pub(crate) mod harness {
    use bytes::{Bytes, BytesMut};
    use camino::Utf8PathBuf;
    use once_cell::sync::OnceCell;
-    use pageserver_api::models::ShardParameters;
    use pageserver_api::shard::ShardIndex;
    use std::fs;
    use std::sync::Arc;
@@ -4007,7 +3941,6 @@ pub(crate) mod harness {
                AttachedTenantConf::try_from(LocationConf::attached_single(
                    TenantConfOpt::from(self.tenant_conf),
                    self.generation,
-                    &ShardParameters::default(),
                ))
                .unwrap(),
                // This is a legacy/test code path: sharding isn't supported here.
@@ -5211,7 +5144,7 @@ mod tests {
                assert_eq!(
                    e,
                    GetTimelineError::NotFound {
-                        tenant_id: tenant.tenant_shard_id,
+                        tenant_id: tenant.tenant_shard_id.tenant_id,
                        timeline_id: TIMELINE_ID,
                    }
                )
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -10,7 +10,6 @@
 //!
 use anyhow::bail;
 use pageserver_api::models;
-use pageserver_api::models::EvictionPolicy;
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::de::IntoDeserializer;
 use serde::{Deserialize, Serialize};
@@ -168,17 +167,14 @@ impl LocationConf {
    /// For use when loading from a legacy configuration: presence of a tenant
    /// implies it is in AttachmentMode::Single, which used to be the only
    /// possible state.  This function should eventually be removed.
-    pub(crate) fn attached_single(
-        tenant_conf: TenantConfOpt,
-        generation: Generation,
-        shard_params: &models::ShardParameters,
-    ) -> Self {
+    pub(crate) fn attached_single(tenant_conf: TenantConfOpt, generation: Generation) -> Self {
        Self {
            mode: LocationMode::Attached(AttachedLocationConfig {
                generation,
                attach_mode: AttachmentMode::Single,
            }),
-            shard: ShardIdentity::from_params(ShardNumber(0), shard_params),
+            // Legacy configuration loads are always from tenants created before sharding existed.
+            shard: ShardIdentity::unsharded(),
            tenant_conf,
        }
    }
@@ -432,6 +428,30 @@ pub struct TenantConfOpt {
    pub heatmap_period: Option<Duration>,
 }

+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "kind")]
+pub enum EvictionPolicy {
+    NoEviction,
+    LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
+}
+
+impl EvictionPolicy {
+    pub fn discriminant_str(&self) -> &'static str {
+        match self {
+            EvictionPolicy::NoEviction => "NoEviction",
+            EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub struct EvictionPolicyLayerAccessThreshold {
+    #[serde(with = "humantime_serde")]
+    pub period: Duration,
+    #[serde(with = "humantime_serde")]
+    pub threshold: Duration,
+}
+
 impl TenantConfOpt {
    pub fn merge(&self, global_conf: TenantConf) -> TenantConf {
        TenantConf {
@@ -556,38 +576,6 @@ impl TryFrom<toml_edit::Item> for TenantConfOpt {
    }
 }

-/// This is a conversion from our internal tenant config object to the one used
-/// in external APIs.
-impl From<TenantConfOpt> for models::TenantConfig {
-    fn from(value: TenantConfOpt) -> Self {
-        fn humantime(d: Duration) -> String {
-            format!("{}s", d.as_secs())
-        }
-        Self {
-            checkpoint_distance: value.checkpoint_distance,
-            checkpoint_timeout: value.checkpoint_timeout.map(humantime),
-            compaction_target_size: value.compaction_target_size,
-            compaction_period: value.compaction_period.map(humantime),
-            compaction_threshold: value.compaction_threshold,
-            gc_horizon: value.gc_horizon,
-            gc_period: value.gc_period.map(humantime),
-            image_creation_threshold: value.image_creation_threshold,
-            pitr_interval: value.pitr_interval.map(humantime),
-            walreceiver_connect_timeout: value.walreceiver_connect_timeout.map(humantime),
-            lagging_wal_timeout: value.lagging_wal_timeout.map(humantime),
-            max_lsn_wal_lag: value.max_lsn_wal_lag,
-            trace_read_requests: value.trace_read_requests,
-            eviction_policy: value.eviction_policy,
-            min_resident_size_override: value.min_resident_size_override,
-            evictions_low_residence_duration_metric_threshold: value
-                .evictions_low_residence_duration_metric_threshold
-                .map(humantime),
-            gc_feedback: value.gc_feedback,
-            heatmap_period: value.heatmap_period.map(humantime),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -542,7 +542,6 @@ impl DeleteTenantFlow {
        )
        .await?;

-        pausable_failpoint!("tenant-delete-before-cleanup-remaining-fs-traces-pausable");
        fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
            Err(anyhow::anyhow!(
                "failpoint: tenant-delete-before-cleanup-remaining-fs-traces"
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -3,8 +3,7 @@

 use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
 use pageserver_api::key::Key;
-use pageserver_api::models::ShardParameters;
-use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId};
+use pageserver_api::shard::{ShardIdentity, ShardNumber, TenantShardId};
 use rand::{distributions::Alphanumeric, Rng};
 use std::borrow::Cow;
 use std::collections::{BTreeMap, HashMap};
@@ -57,7 +56,6 @@ use super::TenantSharedResources;
 /// that way we avoid having to carefully switch a tenant's ingestion etc on and off during
 /// its lifetime, and we can preserve some important safety invariants like `Tenant` always
 /// having a properly acquired generation (Secondary doesn't need a generation)
-#[derive(Clone)]
 pub(crate) enum TenantSlot {
    Attached(Arc<Tenant>),
    Secondary(Arc<SecondaryTenant>),
@@ -478,8 +476,6 @@ pub async fn init_tenant_mgr(
                            tenant_shard_id,
                            TenantSlot::Secondary(SecondaryTenant::new(
                                tenant_shard_id,
-                                location_conf.shard,
-                                location_conf.tenant_conf,
                                secondary_config,
                            )),
                        );
@@ -764,8 +760,6 @@ pub(crate) enum SetNewTenantConfigError {
    GetTenant(#[from] GetTenantError),
    #[error(transparent)]
    Persist(anyhow::Error),
-    #[error(transparent)]
-    Other(anyhow::Error),
 }

 pub(crate) async fn set_new_tenant_config(
@@ -779,21 +773,10 @@ pub(crate) async fn set_new_tenant_config(
    info!("configuring tenant {tenant_id}");
    let tenant = get_tenant(tenant_shard_id, true)?;

-    if tenant.tenant_shard_id().shard_count > ShardCount(0) {
-        // Note that we use ShardParameters::default below.
-        return Err(SetNewTenantConfigError::Other(anyhow::anyhow!(
-            "This API may only be used on single-sharded tenants, use the /location_config API for sharded tenants"
-        )));
-    }
-
    // This is a legacy API that only operates on attached tenants: the preferred
    // API to use is the location_config/ endpoint, which lets the caller provide
    // the full LocationConf.
-    let location_conf = LocationConf::attached_single(
-        new_tenant_conf,
-        tenant.generation,
-        &ShardParameters::default(),
-    );
+    let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);

    Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
        .await
@@ -847,13 +830,15 @@ impl TenantManager {
                TenantState::Active => Ok(Arc::clone(tenant)),
                _ => {
                    if active_only {
-                        Err(GetTenantError::NotActive(tenant_shard_id))
+                        Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
                    } else {
                        Ok(Arc::clone(tenant))
                    }
                }
            },
-            Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)),
+            Some(TenantSlot::InProgress(_)) => {
+                Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
+            }
            None | Some(TenantSlot::Secondary(_)) => {
                Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
            }
@@ -922,7 +907,6 @@ impl TenantManager {
                    Some(TenantSlot::Secondary(secondary_tenant)),
                ) => {
                    secondary_tenant.set_config(secondary_conf);
-                    secondary_tenant.set_tenant_conf(&new_location_config.tenant_conf);
                    Some(FastPathModified::Secondary(secondary_tenant.clone()))
                }
                _ => {
@@ -1055,36 +1039,16 @@ impl TenantManager {

        let new_slot = match &new_location_config.mode {
            LocationMode::Secondary(secondary_config) => {
-                let shard_identity = new_location_config.shard;
-                TenantSlot::Secondary(SecondaryTenant::new(
-                    tenant_shard_id,
-                    shard_identity,
-                    new_location_config.tenant_conf,
-                    secondary_config,
-                ))
+                TenantSlot::Secondary(SecondaryTenant::new(tenant_shard_id, secondary_config))
            }
            LocationMode::Attached(_attach_config) => {
                let shard_identity = new_location_config.shard;
-
-                // Testing hack: if we are configured with no control plane, then drop the generation
-                // from upserts.  This enables creating generation-less tenants even though neon_local
-                // always uses generations when calling the location conf API.
-                let attached_conf = if cfg!(feature = "testing") {
-                    let mut conf = AttachedTenantConf::try_from(new_location_config)?;
-                    if self.conf.control_plane_api.is_none() {
-                        conf.location.generation = Generation::none();
-                    }
-                    conf
-                } else {
-                    AttachedTenantConf::try_from(new_location_config)?
-                };
-
                let tenant = tenant_spawn(
                    self.conf,
                    tenant_shard_id,
                    &tenant_path,
                    self.resources.clone(),
-                    attached_conf,
+                    AttachedTenantConf::try_from(new_location_config)?,
                    shard_identity,
                    None,
                    self.tenants,
@@ -1225,17 +1189,6 @@ impl TenantManager {
        }
    }

-    /// Total list of all tenant slots: this includes attached, secondary, and InProgress.
-    pub(crate) fn list(&self) -> Vec<(TenantShardId, TenantSlot)> {
-        let locked = self.tenants.read().unwrap();
-        match &*locked {
-            TenantsMap::Initializing => Vec::new(),
-            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => {
-                map.iter().map(|(k, v)| (*k, v.clone())).collect()
-            }
-        }
-    }
-
    pub(crate) async fn delete_tenant(
        &self,
        tenant_shard_id: TenantShardId,
@@ -1304,13 +1257,10 @@ impl TenantManager {

 #[derive(Debug, thiserror::Error)]
 pub(crate) enum GetTenantError {
-    /// NotFound is a TenantId rather than TenantShardId, because this error type is used from
-    /// getters that use a TenantId and a ShardSelector, not just getters that target a specific shard.
    #[error("Tenant {0} not found")]
    NotFound(TenantId),
-
    #[error("Tenant {0} is not active")]
-    NotActive(TenantShardId),
+    NotActive(TenantId),
    /// Broken is logically a subset of NotActive, but a distinct error is useful as
    /// NotActive is usually a retryable state for API purposes, whereas Broken
    /// is a stuck error state
@@ -1343,13 +1293,15 @@ pub(crate) fn get_tenant(
            TenantState::Active => Ok(Arc::clone(tenant)),
            _ => {
                if active_only {
-                    Err(GetTenantError::NotActive(tenant_shard_id))
+                    Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
                } else {
                    Ok(Arc::clone(tenant))
                }
            }
        },
-        Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_shard_id)),
+        Some(TenantSlot::InProgress(_)) => {
+            Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
+        }
        None | Some(TenantSlot::Secondary(_)) => {
            Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
        }
@@ -1425,7 +1377,7 @@ pub(crate) async fn get_active_tenant_with_timeout(
            }
            Some(TenantSlot::Secondary(_)) => {
                return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
-                    tenant_shard_id,
+                    tenant_id,
                )))
            }
            Some(TenantSlot::InProgress(barrier)) => {
@@ -1464,7 +1416,7 @@ pub(crate) async fn get_active_tenant_with_timeout(
                    Some(TenantSlot::Attached(tenant)) => tenant.clone(),
                    _ => {
                        return Err(GetActiveTenantError::NotFound(GetTenantError::NotActive(
-                            tenant_shard_id,
+                            tenant_id,
                        )))
                    }
                }
@@ -1492,7 +1444,7 @@ pub(crate) enum DeleteTimelineError {
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum TenantStateError {
    #[error("Tenant {0} is stopping")]
-    IsStopping(TenantShardId),
+    IsStopping(TenantId),
    #[error(transparent)]
    SlotError(#[from] TenantSlotError),
    #[error(transparent)]
@@ -1677,8 +1629,8 @@ pub(crate) enum TenantMapListError {
 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub(crate) async fn list_tenants(
-) -> Result<Vec<(TenantShardId, TenantState, Generation)>, TenantMapListError> {
+pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
+{
    let tenants = TENANTS.read().unwrap();
    let m = match &*tenants {
        TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1686,9 +1638,7 @@ pub(crate) async fn list_tenants(
    };
    Ok(m.iter()
        .filter_map(|(id, tenant)| match tenant {
-            TenantSlot::Attached(tenant) => {
-                Some((*id, tenant.current_state(), tenant.generation()))
-            }
+            TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
            TenantSlot::Secondary(_) => None,
            TenantSlot::InProgress(_) => None,
        })
@@ -2122,7 +2072,7 @@ where
                    // if pageserver shutdown or other detach/ignore is already ongoing, we don't want to
                    // wait for it but return an error right away because these are distinct requests.
                    slot_guard.revert();
-                    return Err(TenantStateError::IsStopping(tenant_shard_id));
+                    return Err(TenantStateError::IsStopping(tenant_shard_id.tenant_id));
                }
            }
            Some(tenant)
@@ -2251,6 +2201,7 @@ pub(crate) async fn immediate_gc(

 #[cfg(test)]
 mod tests {
+    use pageserver_api::shard::TenantShardId;
    use std::collections::BTreeMap;
    use std::sync::Arc;
    use tracing::{info_span, Instrument};
@@ -2271,7 +2222,7 @@ mod tests {

        // harness loads it to active, which is forced and nothing is running on the tenant

-        let id = t.tenant_shard_id();
+        let id = TenantShardId::unsharded(t.tenant_id());

        // tenant harness configures the logging and we cannot escape it
        let _e = info_span!("testing", tenant_id = %id).entered();
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -182,7 +182,7 @@

 pub(crate) mod download;
 pub mod index;
-pub(crate) mod upload;
+mod upload;

 use anyhow::Context;
 use camino::Utf8Path;
@@ -522,6 +522,8 @@ impl RemoteTimelineClient {
            cancel,
        )
        .measure_remote_op(
+            self.tenant_shard_id.tenant_id,
+            self.timeline_id,
            RemoteOpFileKind::Index,
            RemoteOpKind::Download,
            Arc::clone(&self.metrics),
@@ -564,6 +566,8 @@ impl RemoteTimelineClient {
                cancel,
            )
            .measure_remote_op(
+                self.tenant_shard_id.tenant_id,
+                self.timeline_id,
                RemoteOpFileKind::Layer,
                RemoteOpKind::Download,
                Arc::clone(&self.metrics),
@@ -687,10 +691,7 @@ impl RemoteTimelineClient {
            .insert(layer.layer_desc().filename(), metadata.clone());
        upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;

-        info!(
-            "scheduled layer file upload {layer} gen={:?} shard={:?}",
-            metadata.generation, metadata.shard
-        );
+        info!("scheduled layer file upload {layer}");
        let op = UploadOp::UploadLayer(layer, metadata);
        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -1347,6 +1348,8 @@ impl RemoteTimelineClient {
                        &self.cancel,
                    )
                    .measure_remote_op(
+                        self.tenant_shard_id.tenant_id,
+                        self.timeline_id,
                        RemoteOpFileKind::Layer,
                        RemoteOpKind::Upload,
                        Arc::clone(&self.metrics),
@@ -1372,6 +1375,8 @@ impl RemoteTimelineClient {
                        &self.cancel,
                    )
                    .measure_remote_op(
+                        self.tenant_shard_id.tenant_id,
+                        self.timeline_id,
                        RemoteOpFileKind::Index,
                        RemoteOpKind::Upload,
                        Arc::clone(&self.metrics),
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -3,36 +3,22 @@ pub mod heatmap;
 mod heatmap_uploader;
 mod scheduler;

-use std::{sync::Arc, time::SystemTime};
+use std::sync::Arc;

-use crate::{
-    config::PageServerConf,
-    disk_usage_eviction_task::DiskUsageEvictionInfo,
-    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
-    virtual_file::MaybeFatalIo,
-};
+use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};

 use self::{
    downloader::{downloader_task, SecondaryDetail},
    heatmap_uploader::heatmap_uploader_task,
 };

-use super::{
-    config::{SecondaryLocationConfig, TenantConfOpt},
-    mgr::TenantManager,
-    span::debug_assert_current_span_has_tenant_id,
-    storage_layer::LayerFileName,
-};
+use super::{config::SecondaryLocationConfig, mgr::TenantManager};

-use pageserver_api::{
-    models,
-    shard::{ShardIdentity, TenantShardId},
-};
+use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;

 use tokio_util::sync::CancellationToken;
-use tracing::instrument;
-use utils::{completion::Barrier, fs_ext, id::TimelineId, sync::gate::Gate};
+use utils::{completion::Barrier, sync::gate::Gate};

 enum DownloadCommand {
    Download(TenantShardId),
@@ -89,20 +75,12 @@ pub(crate) struct SecondaryTenant {

    pub(crate) gate: Gate,

-    // Secondary mode does not need the full shard identity or the TenantConfOpt.  However,
-    // storing these enables us to report our full LocationConf, enabling convenient reconciliation
-    // by the control plane (see [`Self::get_location_conf`])
-    shard_identity: ShardIdentity,
-    tenant_conf: std::sync::Mutex<TenantConfOpt>,
-
    detail: std::sync::Mutex<SecondaryDetail>,
 }

 impl SecondaryTenant {
    pub(crate) fn new(
        tenant_shard_id: TenantShardId,
-        shard_identity: ShardIdentity,
-        tenant_conf: TenantConfOpt,
        config: &SecondaryLocationConfig,
    ) -> Arc<Self> {
        Arc::new(Self {
@@ -114,9 +92,6 @@ impl SecondaryTenant {
            cancel: CancellationToken::new(),
            gate: Gate::new(format!("SecondaryTenant {tenant_shard_id}")),

-            shard_identity,
-            tenant_conf: std::sync::Mutex::new(tenant_conf),
-
            detail: std::sync::Mutex::new(SecondaryDetail::new(config.clone())),
        })
    }
@@ -132,91 +107,9 @@ impl SecondaryTenant {
        self.detail.lock().unwrap().config = config.clone();
    }

-    pub(crate) fn set_tenant_conf(&self, config: &TenantConfOpt) {
-        *(self.tenant_conf.lock().unwrap()) = *config;
-    }
-
-    /// For API access: generate a LocationConfig equivalent to the one that would be used to
-    /// create a Tenant in the same state.  Do not use this in hot paths: it's for relatively
-    /// rare external API calls, like a reconciliation at startup.
-    pub(crate) fn get_location_conf(&self) -> models::LocationConfig {
-        let conf = self.detail.lock().unwrap().config.clone();
-
-        let conf = models::LocationConfigSecondary { warm: conf.warm };
-
-        let tenant_conf = *self.tenant_conf.lock().unwrap();
-        models::LocationConfig {
-            mode: models::LocationConfigMode::Secondary,
-            generation: None,
-            secondary_conf: Some(conf),
-            shard_number: self.tenant_shard_id.shard_number.0,
-            shard_count: self.tenant_shard_id.shard_count.0,
-            shard_stripe_size: self.shard_identity.stripe_size.0,
-            tenant_conf: tenant_conf.into(),
-        }
-    }
-
-    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {
+    fn get_tenant_shard_id(&self) -> &TenantShardId {
        &self.tenant_shard_id
    }
-
-    pub(crate) fn get_layers_for_eviction(self: &Arc<Self>) -> DiskUsageEvictionInfo {
-        self.detail.lock().unwrap().get_layers_for_eviction(self)
-    }
-
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%timeline_id, name=%name))]
-    pub(crate) async fn evict_layer(
-        &self,
-        conf: &PageServerConf,
-        timeline_id: TimelineId,
-        name: LayerFileName,
-    ) {
-        debug_assert_current_span_has_tenant_id();
-
-        let _guard = match self.gate.enter() {
-            Ok(g) => g,
-            Err(_) => {
-                tracing::debug!("Dropping layer evictions, secondary tenant shutting down",);
-                return;
-            }
-        };
-
-        let now = SystemTime::now();
-
-        let path = conf
-            .timeline_path(&self.tenant_shard_id, &timeline_id)
-            .join(name.file_name());
-
-        // We tolerate ENOENT, because between planning eviction and executing
-        // it, the secondary downloader could have seen an updated heatmap that
-        // resulted in a layer being deleted.
-        // Other local I/O errors are process-fatal: these should never happen.
-        tokio::fs::remove_file(path)
-            .await
-            .or_else(fs_ext::ignore_not_found)
-            .fatal_err("Deleting layer during eviction");
-
-        // Update the timeline's state.  This does not have to be synchronized with
-        // the download process, because:
-        // - If downloader is racing with us to remove a file (e.g. because it is
-        //   removed from heatmap), then our mutual .remove() operations will both
-        //   succeed.
-        // - If downloader is racing with us to download the object (this would require
-        //   multiple eviction iterations to race with multiple download iterations), then
-        //   if we remove it from the state, the worst that happens is the downloader
-        //   downloads it again before re-inserting, or we delete the file but it remains
-        //   in the state map (in which case it will be downloaded if this secondary
-        //   tenant transitions to attached and tries to access it)
-        //
-        // The important assumption here is that the secondary timeline state does not
-        // have to 100% match what is on disk, because it's a best-effort warming
-        // of the cache.
-        let mut detail = self.detail.lock().unwrap();
-        if let Some(timeline_detail) = detail.timelines.get_mut(&timeline_id) {
-            timeline_detail.on_disk_layers.remove(&name);
-            timeline_detail.evicted_at.insert(name, now);
-        }
-    }
 }

 /// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -8,9 +8,6 @@ use std::{

 use crate::{
    config::PageServerConf,
-    disk_usage_eviction_task::{
-        finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
-    },
    metrics::SECONDARY_MODE,
    tenant::{
        config::SecondaryLocationConfig,
@@ -145,46 +142,6 @@ impl SecondaryDetail {
            timelines: HashMap::new(),
        }
    }
-
-    pub(super) fn get_layers_for_eviction(
-        &self,
-        parent: &Arc<SecondaryTenant>,
-    ) -> DiskUsageEvictionInfo {
-        let mut result = DiskUsageEvictionInfo {
-            max_layer_size: None,
-            resident_layers: Vec::new(),
-        };
-        for (timeline_id, timeline_detail) in &self.timelines {
-            result
-                .resident_layers
-                .extend(timeline_detail.on_disk_layers.iter().map(|(name, ods)| {
-                    EvictionCandidate {
-                        layer: EvictionLayer::Secondary(EvictionSecondaryLayer {
-                            secondary_tenant: parent.clone(),
-                            timeline_id: *timeline_id,
-                            name: name.clone(),
-                            metadata: ods.metadata.clone(),
-                        }),
-                        last_activity_ts: ods.access_time,
-                        relative_last_activity: finite_f32::FiniteF32::ZERO,
-                    }
-                }));
-        }
-        result.max_layer_size = result
-            .resident_layers
-            .iter()
-            .map(|l| l.layer.get_file_size())
-            .max();
-
-        tracing::debug!(
-            "eviction: secondary tenant {} found {} timelines, {} layers",
-            parent.get_tenant_shard_id(),
-            self.timelines.len(),
-            result.resident_layers.len()
-        );
-
-        result
-    }
 }

 struct PendingDownload {
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -15,7 +15,7 @@ use utils::sync::heavier_once_cell;
 use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::repository::Key;
-use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
+use crate::tenant::{remote_timeline_client::LayerFileMetadata, RemoteTimelineClient, Timeline};

 use super::delta_layer::{self, DeltaEntry};
 use super::image_layer;
@@ -204,14 +204,17 @@ impl Layer {
    ///
    /// Technically cancellation safe, but cancelling might shift the viewpoint of what generation
    /// of download-evict cycle on retry.
-    pub(crate) async fn evict_and_wait(&self) -> Result<(), EvictionError> {
-        self.0.evict_and_wait().await
+    pub(crate) async fn evict_and_wait(
+        &self,
+        rtc: &RemoteTimelineClient,
+    ) -> Result<(), EvictionError> {
+        self.0.evict_and_wait(rtc).await
    }

    /// Delete the layer file when the `self` gets dropped, also try to schedule a remote index upload
    /// then.
    ///
-    /// On drop, this will cause a call to [`crate::tenant::remote_timeline_client::RemoteTimelineClient::schedule_deletion_of_unlinked`].
+    /// On drop, this will cause a call to [`RemoteTimelineClient::schedule_deletion_of_unlinked`].
    /// This means that the unlinking by [gc] or [compaction] must have happened strictly before
    /// the value this is called on gets dropped.
    ///
@@ -603,7 +606,10 @@ impl LayerInner {

    /// Cancellation safe, however dropping the future and calling this method again might result
    /// in a new attempt to evict OR join the previously started attempt.
-    pub(crate) async fn evict_and_wait(&self) -> Result<(), EvictionError> {
+    pub(crate) async fn evict_and_wait(
+        &self,
+        _: &RemoteTimelineClient,
+    ) -> Result<(), EvictionError> {
        use tokio::sync::broadcast::error::RecvError;

        assert!(self.have_remote_client);
@@ -939,18 +945,8 @@ impl LayerInner {
            Ok((Err(e), _permit)) => {
                // sleep already happened in the spawned task, if it was not cancelled
                let consecutive_failures = self.consecutive_failures.load(Ordering::Relaxed);
-
-                match e.downcast_ref::<remote_storage::DownloadError>() {
-                    // If the download failed due to its cancellation token,
-                    // propagate the cancellation error upstream.
-                    Some(remote_storage::DownloadError::Cancelled) => {
-                        Err(DownloadError::DownloadCancelled)
-                    }
-                    _ => {
-                        tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
-                        Err(DownloadError::DownloadFailed)
-                    }
-                }
+                tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
+                Err(DownloadError::DownloadFailed)
            }
            Err(_gone) => Err(DownloadError::DownloadCancelled),
        }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -65,11 +65,6 @@ pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
        .with_label_values(&[loop_kind.as_static_str()])
        .guard();

-    pausable_failpoint!(
-        "initial-size-calculation-permit-pause",
-        loop_kind == BackgroundLoopKind::InitialLogicalSizeCalculation
-    );
-
    match CONCURRENT_BACKGROUND_TASKS.acquire().await {
        Ok(permit) => permit,
        Err(_closed) => unreachable!("we never close the semaphore"),
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -15,10 +15,9 @@ use fail::fail_point;
 use itertools::Itertools;
 use pageserver_api::{
    models::{
-        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
-        LayerMapInfo, TimelineState,
+        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo,
+        TimelineState,
    },
-    reltag::BlockNumber,
    shard::{ShardIdentity, TenantShardId},
 };
 use rand::Rng;
@@ -43,38 +42,33 @@ use std::{
    ops::ControlFlow,
 };

+use crate::context::{
+    AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder,
+};
+use crate::tenant::storage_layer::delta_layer::DeltaEntry;
+use crate::tenant::storage_layer::{
+    AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
+    LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
+    ValueReconstructState,
+};
+use crate::tenant::tasks::BackgroundLoopKind;
 use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
    layer_map::{LayerMap, SearchResult},
    metadata::{save_metadata, TimelineMetadata},
    par_fsync,
 };
-use crate::{
-    context::{AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder},
-    disk_usage_eviction_task::DiskUsageEvictionInfo,
-};
 use crate::{deletion_queue::DeletionQueueClient, tenant::remote_timeline_client::StopError};
-use crate::{
-    disk_usage_eviction_task::finite_f32,
-    tenant::storage_layer::{
-        AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer,
-        LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
-        ValueReconstructState,
-    },
-};
-use crate::{
-    disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
-};
-use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum};
 use crate::metrics::{
    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
-use crate::pgdatadir_mapping::CalculateLogicalSizeError;
+use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
-use crate::tenant::config::TenantConfOpt;
+use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
+use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::ShardIndex;

@@ -252,10 +246,6 @@ pub struct Timeline {

    pub(super) metrics: TimelineMetrics,

-    // `Timeline` doesn't write these metrics itself, but it manages the lifetime.  Code
-    // in `crate::page_service` writes these metrics.
-    pub(crate) query_metrics: crate::metrics::SmgrQueryTimePerTimeline,
-
    /// Ensures layers aren't frozen by checkpointer between
    /// [`Timeline::get_layer_for_write`] and layer reads.
    /// Locked automatically by [`TimelineWriter`] and checkpointer.
@@ -1144,7 +1134,12 @@ impl Timeline {
            return Ok(None);
        };

-        match local_layer.evict_and_wait().await {
+        let rtc = self
+            .remote_client
+            .as_ref()
+            .ok_or_else(|| anyhow::anyhow!("remote storage not configured; cannot evict"))?;
+
+        match local_layer.evict_and_wait(rtc).await {
            Ok(()) => Ok(Some(true)),
            Err(EvictionError::NotFound) => Ok(Some(false)),
            Err(EvictionError::Downloaded) => Ok(Some(false)),
@@ -1319,11 +1314,6 @@ impl Timeline {
                    ),
                ),

-                query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new(
-                    &tenant_shard_id,
-                    &timeline_id,
-                ),
-
                flush_loop_state: Mutex::new(FlushLoopState::NotStarted),

                layer_flush_start_tx,
@@ -2113,7 +2103,7 @@ impl Timeline {
        let layer_file_names = eviction_info
            .resident_layers
            .iter()
-            .map(|l| l.layer.get_name())
+            .map(|l| l.layer.layer_desc().filename())
            .collect::<Vec<_>>();

        let decorated = match remote_client.get_layers_metadata(layer_file_names) {
@@ -2131,7 +2121,7 @@ impl Timeline {
        .filter_map(|(layer, remote_info)| {
            remote_info.map(|remote_info| {
                HeatMapLayer::new(
-                    layer.layer.get_name(),
+                    layer.layer.layer_desc().filename(),
                    IndexLayerMetadata::from(remote_info),
                    layer.last_activity_ts,
                )
@@ -4434,6 +4424,43 @@ impl Timeline {
    }
 }

+pub(crate) struct DiskUsageEvictionInfo {
+    /// Timeline's largest layer (remote or resident)
+    pub max_layer_size: Option<u64>,
+    /// Timeline's resident layers
+    pub resident_layers: Vec<LocalLayerInfoForDiskUsageEviction>,
+}
+
+pub(crate) struct LocalLayerInfoForDiskUsageEviction {
+    pub layer: Layer,
+    pub last_activity_ts: SystemTime,
+}
+
+impl std::fmt::Debug for LocalLayerInfoForDiskUsageEviction {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // format the tv_sec, tv_nsec into rfc3339 in case someone is looking at it
+        // having to allocate a string to this is bad, but it will rarely be formatted
+        let ts = chrono::DateTime::<chrono::Utc>::from(self.last_activity_ts);
+        let ts = ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, true);
+        struct DisplayIsDebug<'a, T>(&'a T);
+        impl<'a, T: std::fmt::Display> std::fmt::Debug for DisplayIsDebug<'a, T> {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(f, "{}", self.0)
+            }
+        }
+        f.debug_struct("LocalLayerInfoForDiskUsageEviction")
+            .field("layer", &DisplayIsDebug(&self.layer))
+            .field("last_activity", &ts)
+            .finish()
+    }
+}
+
+impl LocalLayerInfoForDiskUsageEviction {
+    pub fn file_size(&self) -> u64 {
+        self.layer.layer_desc().file_size
+    }
+}
+
 impl Timeline {
    /// Returns non-remote layers for eviction.
    pub(crate) async fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {
@@ -4467,10 +4494,9 @@ impl Timeline {
                SystemTime::now()
            });

-            resident_layers.push(EvictionCandidate {
-                layer: l.drop_eviction_guard().into(),
+            resident_layers.push(LocalLayerInfoForDiskUsageEviction {
+                layer: l.drop_eviction_guard(),
                last_activity_ts,
-                relative_last_activity: finite_f32::FiniteF32::ZERO,
            });
        }

@@ -4627,6 +4653,11 @@ mod tests {
            .await
            .unwrap();

+        let rtc = timeline
+            .remote_client
+            .clone()
+            .expect("just configured this");
+
        let layer = find_some_layer(&timeline).await;
        let layer = layer
            .keep_resident()
@@ -4635,8 +4666,8 @@ mod tests {
            .expect("should had been resident")
            .drop_eviction_guard();

-        let first = async { layer.evict_and_wait().await };
-        let second = async { layer.evict_and_wait().await };
+        let first = async { layer.evict_and_wait(&rtc).await };
+        let second = async { layer.evict_and_wait(&rtc).await };

        let (first, second) = tokio::join!(first, second);

--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -20,7 +20,6 @@ use std::{
    time::{Duration, SystemTime},
 };

-use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold};
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, info_span, instrument, warn, Instrument};
@@ -30,7 +29,10 @@ use crate::{
    pgdatadir_mapping::CollectKeySpaceError,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    tenant::{
-        tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant,
+        config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
+        tasks::BackgroundLoopKind,
+        timeline::EvictionError,
+        LogicalSizeCalculationCause, Tenant,
    },
 };

@@ -213,10 +215,13 @@ impl Timeline {

        // So, we just need to deal with this.

-        if self.remote_client.is_none() {
-            error!("no remote storage configured, cannot evict layers");
-            return ControlFlow::Continue(());
-        }
+        let remote_client = match self.remote_client.as_ref() {
+            Some(c) => c,
+            None => {
+                error!("no remote storage configured, cannot evict layers");
+                return ControlFlow::Continue(());
+            }
+        };

        let mut js = tokio::task::JoinSet::new();
        {
@@ -269,8 +274,9 @@ impl Timeline {
                };
                let layer = guard.drop_eviction_guard();
                if no_activity_for > p.threshold {
+                    let remote_client = remote_client.clone();
                    // this could cause a lot of allocations in some cases
-                    js.spawn(async move { layer.evict_and_wait().await });
+                    js.spawn(async move { layer.evict_and_wait(&remote_client).await });
                    stats.candidates += 1;
                }
            }
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -14,7 +14,6 @@ use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC
 use crate::tenant::TENANTS_SEGMENT_NAME;
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
-use pageserver_api::shard::TenantShardId;
 use std::fs::{self, File, OpenOptions};
 use std::io::{Error, ErrorKind, Seek, SeekFrom};
 use std::os::unix::fs::FileExt;
@@ -61,7 +60,6 @@ pub struct VirtualFile {
    // It makes no sense for us to constantly turn the `TimelineId` and `TenantId` into
    // strings.
    tenant_id: String,
-    shard_id: String,
    timeline_id: String,
 }

@@ -303,24 +301,15 @@ impl VirtualFile {
    ) -> Result<VirtualFile, std::io::Error> {
        let path_str = path.to_string();
        let parts = path_str.split('/').collect::<Vec<&str>>();
-        let (tenant_id, shard_id, timeline_id) =
-            if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME {
-                let tenant_shard_part = parts[parts.len() - 4];
-                let (tenant_id, shard_id) = match tenant_shard_part.parse::<TenantShardId>() {
-                    Ok(tenant_shard_id) => (
-                        tenant_shard_id.tenant_id.to_string(),
-                        format!("{}", tenant_shard_id.shard_slug()),
-                    ),
-                    Err(_) => {
-                        // Malformed path: this ID is just for observability, so tolerate it
-                        // and pass through
-                        (tenant_shard_part.to_string(), "*".to_string())
-                    }
-                };
-                (tenant_id, shard_id, parts[parts.len() - 2].to_string())
-            } else {
-                ("*".to_string(), "*".to_string(), "*".to_string())
-            };
+        let tenant_id;
+        let timeline_id;
+        if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME {
+            tenant_id = parts[parts.len() - 4].to_string();
+            timeline_id = parts[parts.len() - 2].to_string();
+        } else {
+            tenant_id = "*".to_string();
+            timeline_id = "*".to_string();
+        }
        let (handle, mut slot_guard) = get_open_files().find_victim_slot().await;

        // NB: there is also StorageIoOperation::OpenAfterReplace which is for the case
@@ -344,7 +333,6 @@ impl VirtualFile {
            path: path.to_path_buf(),
            open_options: reopen_options,
            tenant_id,
-            shard_id,
            timeline_id,
        };

@@ -586,7 +574,7 @@ impl VirtualFile {
            .read_at(buf, offset));
        if let Ok(size) = result {
            STORAGE_IO_SIZE
-                .with_label_values(&["read", &self.tenant_id, &self.shard_id, &self.timeline_id])
+                .with_label_values(&["read", &self.tenant_id, &self.timeline_id])
                .add(size as i64);
        }
        result
@@ -598,7 +586,7 @@ impl VirtualFile {
            .write_at(buf, offset));
        if let Ok(size) = result {
            STORAGE_IO_SIZE
-                .with_label_values(&["write", &self.tenant_id, &self.shard_id, &self.timeline_id])
+                .with_label_values(&["write", &self.tenant_id, &self.timeline_id])
                .add(size as i64);
        }
        result
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -38,7 +38,7 @@ use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::walrecord::*;
 use crate::ZERO_PAGE;
-use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
@@ -102,9 +102,7 @@ impl WalIngest {
        buf.advance(decoded.main_data_offset);

        assert!(!self.checkpoint_modified);
-        if decoded.xl_xid != pg_constants::INVALID_TRANSACTION_ID
-            && self.checkpoint.update_next_xid(decoded.xl_xid)
-        {
+        if self.checkpoint.update_next_xid(decoded.xl_xid) {
            self.checkpoint_modified = true;
        }

@@ -332,13 +330,8 @@ impl WalIngest {
                        < 0
                    {
                        self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
+                        self.checkpoint_modified = true;
                    }
-
-                    // Write a new checkpoint key-value pair on every checkpoint record, even
-                    // if nothing really changed. Not strictly required, but it seems nice to
-                    // have some trace of the checkpoint records in the layer files at the same
-                    // LSNs.
-                    self.checkpoint_modified = true;
                }
            }
            pg_constants::RM_LOGICALMSG_ID => {
@@ -2208,8 +2201,7 @@ mod tests {
        let harness = TenantHarness::create("test_ingest_real_wal").unwrap();
        let (tenant, ctx) = harness.load().await;

-        let remote_initdb_path =
-            remote_initdb_archive_path(&tenant.tenant_shard_id().tenant_id, &TIMELINE_ID);
+        let remote_initdb_path = remote_initdb_archive_path(&tenant.tenant_id(), &TIMELINE_ID);
        let initdb_path = harness.remote_fs_dir.join(remote_initdb_path.get_path());

        std::fs::create_dir_all(initdb_path.parent().unwrap())
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -47,11 +47,9 @@ use crate::metrics::{
    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
    WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
 };
-use crate::pgdatadir_mapping::key_to_slru_block;
+use crate::pgdatadir_mapping::{key_to_rel_block, key_to_slru_block};
 use crate::repository::Key;
 use crate::walrecord::NeonWalRecord;
-
-use pageserver_api::key::key_to_rel_block;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -308,13 +308,13 @@ lfc_change_limit_hook(int newval, void *extra)
 		Assert(victim->access_count == 0);
 #ifdef FALLOC_FL_PUNCH_HOLE
 		if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * BLOCKS_PER_CHUNK * BLCKSZ, BLOCKS_PER_CHUNK * BLCKSZ) < 0)
-			neon_log(LOG, "Failed to punch hole in file: %m");
+			elog(LOG, "Failed to punch hole in file: %m");
 #endif
 		hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
 		lfc_ctl->used -= 1;
 	}
 	lfc_ctl->limit = new_size;
-	neon_log(DEBUG1, "set local file cache limit to %d", new_size);
+	elog(DEBUG1, "set local file cache limit to %d", new_size);

 	LWLockRelease(lfc_lock);
 }
@@ -327,7 +327,7 @@ lfc_init(void)
 	 * shared_preload_libraries.
 	 */
 	if (!process_shared_preload_libraries_in_progress)
-		neon_log(ERROR, "Neon module should be loaded via shared_preload_libraries");
+		elog(ERROR, "Neon module should be loaded via shared_preload_libraries");


 	DefineCustomIntVariable("neon.max_file_cache_size",
@@ -643,7 +643,7 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void
 			Assert(victim->access_count == 0);
 			entry->offset = victim->offset; /* grab victim's chunk */
 			hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
-			neon_log(DEBUG2, "Swap file cache page");
+			elog(DEBUG2, "Swap file cache page");
 		}
 		else
 		{
@@ -846,10 +846,10 @@ local_cache_pages(PG_FUNCTION_ARGS)
 		 * wrong) function definition though.
 		 */
 		if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
-			neon_log(ERROR, "return type must be a row type");
+			elog(ERROR, "return type must be a row type");

 		if (expected_tupledesc->natts != NUM_LOCALCACHE_PAGES_ELEM)
-			neon_log(ERROR, "incorrect number of output arguments");
+			elog(ERROR, "incorrect number of output arguments");

 		/* Construct a tuple descriptor for the result rows. */
 		tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -990,7 +990,7 @@ nm_pack_request(NeonRequest *msg)
 		case T_NeonErrorResponse:
 		case T_NeonDbSizeResponse:
 		default:
-			neon_log(ERROR, "unexpected neon message tag 0x%02x", msg->tag);
+			elog(ERROR, "unexpected neon message tag 0x%02x", msg->tag);
 			break;
 	}
 	return s;
@@ -1085,7 +1085,7 @@ nm_unpack_response(StringInfo s)
 		case T_NeonGetPageRequest:
 		case T_NeonDbSizeRequest:
 		default:
-			neon_log(ERROR, "unexpected neon message tag 0x%02x", tag);
+			elog(ERROR, "unexpected neon message tag 0x%02x", tag);
 			break;
 	}

@@ -1277,7 +1277,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 		XLogFlush(recptr);
 		lsn = recptr;
 		ereport(SmgrTrace,
-				(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
+				(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
 						blocknum,
 						RelFileInfoFmt(InfoFromSMgrRel(reln)),
 						forknum, LSN_FORMAT_ARGS(lsn))));
@@ -1305,7 +1305,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 		if (PageIsNew((Page) buffer))
 		{
 			ereport(SmgrTrace,
-					(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is all-zeros",
+					(errmsg("Page %u of relation %u/%u/%u.%u is all-zeros",
 							blocknum,
 							RelFileInfoFmt(InfoFromSMgrRel(reln)),
 							forknum)));
@@ -1313,7 +1313,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 		else if (PageIsEmptyHeapPage((Page) buffer))
 		{
 			ereport(SmgrTrace,
-					(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
+					(errmsg("Page %u of relation %u/%u/%u.%u is an empty heap page with no LSN",
 							blocknum,
 							RelFileInfoFmt(InfoFromSMgrRel(reln)),
 							forknum)));
@@ -1321,7 +1321,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 		else
 		{
 			ereport(PANIC,
-					(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
+					(errmsg("Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
 							blocknum,
 							RelFileInfoFmt(InfoFromSMgrRel(reln)),
 							forknum)));
@@ -1330,7 +1330,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 	else
 	{
 		ereport(SmgrTrace,
-				(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
+				(errmsg("Page %u of relation %u/%u/%u.%u is already wal logged at lsn=%X/%X",
 						blocknum,
 						RelFileInfoFmt(InfoFromSMgrRel(reln)),
 						forknum, LSN_FORMAT_ARGS(lsn))));
@@ -1430,7 +1430,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
 		lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
 		lsn = nm_adjust_lsn(lsn);

-		neon_log(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
+		elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ",
 			 (uint32) ((lsn) >> 32), (uint32) (lsn));
 	}
 	else
@@ -1445,7 +1445,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
 		*latest = true;
 		lsn = GetLastWrittenLSN(rinfo, forknum, blkno);
 		Assert(lsn != InvalidXLogRecPtr);
-		neon_log(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
+		elog(DEBUG1, "neon_get_request_lsn GetLastWrittenLSN lsn %X/%X ",
 			 (uint32) ((lsn) >> 32), (uint32) (lsn));

 		lsn = nm_adjust_lsn(lsn);
@@ -1465,7 +1465,7 @@ neon_get_request_lsn(bool *latest, NRelFileInfo rinfo, ForkNumber forknum, Block
 #endif
 		if (lsn > flushlsn)
 		{
-			neon_log(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X",
+			elog(DEBUG5, "last-written LSN %X/%X is ahead of last flushed LSN %X/%X",
 				 (uint32) (lsn >> 32), (uint32) lsn,
 				 (uint32) (flushlsn >> 32), (uint32) flushlsn);
 			XLogFlush(lsn);
@@ -1509,7 +1509,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 			return mdexists(reln, forkNum);

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	if (get_cached_relsize(InfoFromSMgrRel(reln), forkNum, &n_blocks))
@@ -1561,7 +1561,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 		case T_NeonErrorResponse:
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
+					 errmsg("could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
 							RelFileInfoFmt(InfoFromSMgrRel(reln)),
 							forkNum,
 							(uint32) (request_lsn >> 32), (uint32) request_lsn),
@@ -1570,7 +1570,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 			break;

 		default:
-			neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
+			elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
 	}
 	pfree(resp);
 	return exists;
@@ -1587,7 +1587,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrcreate() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrcreate() on rel with unknown persistence");

 		case RELPERSISTENCE_PERMANENT:
 			break;
@@ -1598,10 +1598,10 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	neon_log(SmgrTrace, "Create relation %u/%u/%u.%u",
+	elog(SmgrTrace, "Create relation %u/%u/%u.%u",
 		 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 		 forkNum);

@@ -1696,7 +1696,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");

 		case RELPERSISTENCE_PERMANENT:
 			break;
@@ -1707,7 +1707,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	/*
@@ -1745,7 +1745,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 	set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);

 	lsn = PageGetLSN((Page) buffer);
-	neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
+	elog(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
 		 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 		 forkNum, blkno,
 		 (uint32) (lsn >> 32), (uint32) lsn);
@@ -1785,7 +1785,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrextend() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrextend() on rel with unknown persistence");

 		case RELPERSISTENCE_PERMANENT:
 			break;
@@ -1796,7 +1796,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	if (max_cluster_size > 0 &&
@@ -1808,7 +1808,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 		if (current_size >= ((uint64) max_cluster_size) * 1024 * 1024)
 			ereport(ERROR,
 					(errcode(ERRCODE_DISK_FULL),
-					 errmsg("could not extend file because project size limit (%d MB) has been exceeded",
+					 errmsg("could not extend file because cluster size limit (%d MB) has been exceeded",
 							max_cluster_size),
 					 errhint("This limit is defined by neon.max_cluster_size GUC")));
 	}
@@ -1821,7 +1821,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 	if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg(NEON_TAG "cannot extend file \"%s\" beyond %u blocks",
+				 errmsg("cannot extend file \"%s\" beyond %u blocks",
 						relpath(reln->smgr_rlocator, forkNum),
 						InvalidBlockNumber)));

@@ -1882,7 +1882,7 @@ neon_open(SMgrRelation reln)
 	mdopen(reln);

 	/* no work */
-	neon_log(SmgrTrace, "open noop");
+	elog(SmgrTrace, "[NEON_SMGR] open noop");
 }

 /*
@@ -1919,7 +1919,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 			return mdprefetch(reln, forknum, blocknum);

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	if (lfc_cache_contains(InfoFromSMgrRel(reln), forknum, blocknum))
@@ -1964,11 +1964,11 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum,
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	/* not implemented */
-	neon_log(SmgrTrace, "writeback noop");
+	elog(SmgrTrace, "[NEON_SMGR] writeback noop");

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -2098,7 +2098,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		case T_NeonErrorResponse:
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
+					 errmsg("could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
 							blkno,
 							RelFileInfoFmt(rinfo),
 							forkNum,
@@ -2107,7 +2107,7 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 							   ((NeonErrorResponse *) resp)->message)));
 			break;
 		default:
-			neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
+			elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
 	}

 	/* buffer was used, clean up for later reuse */
@@ -2131,7 +2131,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrread() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrread() on rel with unknown persistence");

 		case RELPERSISTENCE_PERMANENT:
 			break;
@@ -2142,7 +2142,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	/* Try to read from local file cache */
@@ -2170,7 +2170,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 		{
 			if (!PageIsNew((Page) pageserver_masked))
 			{
-				neon_log(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
+				elog(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
 					 blkno,
 					 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 					 forkNum,
@@ -2180,7 +2180,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 		}
 		else if (PageIsNew((Page) buffer))
 		{
-			neon_log(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
+			elog(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n",
 				 blkno,
 				 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 				 forkNum,
@@ -2195,7 +2195,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer

 			if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
 			{
-				neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
+				elog(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
 					 blkno,
 					 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 					 forkNum,
@@ -2214,7 +2214,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer

 				if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
 				{
-					neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
+					elog(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
 						 blkno,
 						 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 						 forkNum,
@@ -2294,13 +2294,13 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	neon_wallog_page(reln, forknum, blocknum, buffer, false);

 	lsn = PageGetLSN((Page) buffer);
-	neon_log(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
+	elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
 		 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 		 forknum, blocknum,
 		 (uint32) (lsn >> 32), (uint32) lsn);
@@ -2327,7 +2327,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrnblocks() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrnblocks() on rel with unknown persistence");
 			break;

 		case RELPERSISTENCE_PERMANENT:
@@ -2338,12 +2338,12 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 			return mdnblocks(reln, forknum);

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	if (get_cached_relsize(InfoFromSMgrRel(reln), forknum, &n_blocks))
 	{
-		neon_log(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
+		elog(SmgrTrace, "cached nblocks for %u/%u/%u.%u: %u blocks",
 			 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 			 forknum, n_blocks);
 		return n_blocks;
@@ -2371,7 +2371,7 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 		case T_NeonErrorResponse:
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
+					 errmsg("could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
 							RelFileInfoFmt(InfoFromSMgrRel(reln)),
 							forknum,
 							(uint32) (request_lsn >> 32), (uint32) request_lsn),
@@ -2380,11 +2380,11 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 			break;

 		default:
-			neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
+			elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
 	}
 	update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);

-	neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
+	elog(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
 		 RelFileInfoFmt(InfoFromSMgrRel(reln)),
 		 forknum,
 		 (uint32) (request_lsn >> 32), (uint32) request_lsn,
@@ -2427,7 +2427,7 @@ neon_dbsize(Oid dbNode)
 		case T_NeonErrorResponse:
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
+					 errmsg("could not read db size of db %u from page server at lsn %X/%08X",
 							dbNode,
 							(uint32) (request_lsn >> 32), (uint32) request_lsn),
 					 errdetail("page server returned error: %s",
@@ -2435,10 +2435,10 @@ neon_dbsize(Oid dbNode)
 			break;

 		default:
-			neon_log(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
+			elog(ERROR, "unexpected response from page server with tag 0x%02x", resp->tag);
 	}

-	neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
+	elog(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
 		 dbNode,
 		 (uint32) (request_lsn >> 32), (uint32) request_lsn,
 		 db_size);
@@ -2458,7 +2458,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrtruncate() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrtruncate() on rel with unknown persistence");
 			break;

 		case RELPERSISTENCE_PERMANENT:
@@ -2470,7 +2470,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
@@ -2526,7 +2526,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgrimmedsync() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgrimmedsync() on rel with unknown persistence");
 			break;

 		case RELPERSISTENCE_PERMANENT:
@@ -2538,10 +2538,10 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

-	neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop");
+	elog(SmgrTrace, "[NEON_SMGR] immedsync noop");

 #ifdef DEBUG_COMPARE_LOCAL
 	if (IS_LOCAL_REL(reln))
@@ -2566,17 +2566,17 @@ neon_start_unlogged_build(SMgrRelation reln)
 	 * progress at a time. That's enough for the current usage.
 	 */
 	if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
-		neon_log(ERROR, "unlogged relation build is already in progress");
+		elog(ERROR, "unlogged relation build is already in progress");
 	Assert(unlogged_build_rel == NULL);

 	ereport(SmgrTrace,
-			(errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u",
+			(errmsg("starting unlogged build of relation %u/%u/%u",
 					RelFileInfoFmt(InfoFromSMgrRel(reln)))));

 	switch (reln->smgr_relpersistence)
 	{
 		case 0:
-			neon_log(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence");
+			elog(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence");
 			break;

 		case RELPERSISTENCE_PERMANENT:
@@ -2589,11 +2589,11 @@ neon_start_unlogged_build(SMgrRelation reln)
 			return;

 		default:
-			neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
+			elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
 	}

 	if (smgrnblocks(reln, MAIN_FORKNUM) != 0)
-		neon_log(ERROR, "cannot perform unlogged index build, index is not empty ");
+		elog(ERROR, "cannot perform unlogged index build, index is not empty ");

 	unlogged_build_rel = reln;
 	unlogged_build_phase = UNLOGGED_BUILD_PHASE_1;
@@ -2620,7 +2620,7 @@ neon_finish_unlogged_build_phase_1(SMgrRelation reln)
 	Assert(unlogged_build_rel == reln);

 	ereport(SmgrTrace,
-			(errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u",
+			(errmsg("finishing phase 1 of unlogged build of relation %u/%u/%u",
 					RelFileInfoFmt(InfoFromSMgrRel(reln)))));

 	if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
@@ -2649,7 +2649,7 @@ neon_end_unlogged_build(SMgrRelation reln)
 	Assert(unlogged_build_rel == reln);

 	ereport(SmgrTrace,
-			(errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u",
+			(errmsg("ending unlogged build of relation %u/%u/%u",
 					RelFileInfoFmt(InfoFromNInfoB(rinfob)))));

 	if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
@@ -2664,7 +2664,7 @@ neon_end_unlogged_build(SMgrRelation reln)
 		rinfob = InfoBFromSMgrRel(reln);
 		for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
 		{
-			neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
+			elog(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
 				 RelFileInfoFmt(InfoFromNInfoB(rinfob)),
 				 forknum);

@@ -2707,7 +2707,7 @@ AtEOXact_neon(XactEvent event, void *arg)
 				unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 				ereport(ERROR,
 						(errcode(ERRCODE_INTERNAL_ERROR),
-						 (errmsg(NEON_TAG "unlogged index build was not properly finished"))));
+						 (errmsg("unlogged index build was not properly finished"))));
 			}
 			break;
 	}
@@ -2806,14 +2806,14 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
 		set_cached_relsize(rinfo, forknum, relsize);
 		SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);

-		neon_log(SmgrTrace, "Set length to %d", relsize);
+		elog(SmgrTrace, "Set length to %d", relsize);
 	}
 }

 #define FSM_TREE_DEPTH	((SlotsPerFSMPage >= 1626) ? 3 : 4)

 /*
- * TODO: May be it is better to make correspondent function from freespace.c public?
+ * TODO: May be it is better to make correspondent fgunctio from freespace.c public?
 */
 static BlockNumber
 get_fsm_physical_block(BlockNumber heapblk)
@@ -2894,7 +2894,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)

 #if PG_VERSION_NUM < 150000
 	if (!XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno))
-		neon_log(PANIC, "failed to locate backup block with ID %d", block_id);
+		elog(PANIC, "failed to locate backup block with ID %d", block_id);
 #else
 	XLogRecGetBlockTag(record, block_id, &rinfo, &forknum, &blkno);
 #endif
--- a/pgxn/neon/relsize_cache.c
+++ b/pgxn/neon/relsize_cache.c
@@ -40,23 +40,11 @@ typedef struct
 {
 	RelTag		tag;
 	BlockNumber size;
-	dlist_node	lru_node;		/* LRU list node */
 } RelSizeEntry;

-typedef struct
-{
-	size_t      size;
-	uint64		hits;
-	uint64		misses;
-	uint64		writes;
-	dlist_head	lru;			/* double linked list for LRU replacement
-								 * algorithm */
-} RelSizeHashControl;
-
 static HTAB *relsize_hash;
 static LWLockId relsize_lock;
 static int	relsize_hash_size;
-static RelSizeHashControl* relsize_ctl;
 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
 #if PG_VERSION_NUM >= 150000
 static shmem_request_hook_type prev_shmem_request_hook = NULL;
@@ -64,7 +52,7 @@ static void relsize_shmem_request(void);
 #endif

 /*
- * Size of a cache entry is 36 bytes. So this default will take about 2.3 MB,
+ * Size of a cache entry is 20 bytes. So this default will take about 1.2 MB,
 * which seems reasonable.
 */
 #define DEFAULT_RELSIZE_HASH_SIZE (64 * 1024)
@@ -73,29 +61,19 @@ static void
 neon_smgr_shmem_startup(void)
 {
 	static HASHCTL info;
-	bool found;

 	if (prev_shmem_startup_hook)
 		prev_shmem_startup_hook();

 	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
-	relsize_ctl = (RelSizeHashControl *) ShmemInitStruct("relsize_hash", sizeof(RelSizeHashControl), &found);
-	if (!found)
-	{
-		relsize_lock = (LWLockId) GetNamedLWLockTranche("neon_relsize");
-		info.keysize = sizeof(RelTag);
-		info.entrysize = sizeof(RelSizeEntry);
-		relsize_hash = ShmemInitHash("neon_relsize",
-									 relsize_hash_size, relsize_hash_size,
-									 &info,
-									 HASH_ELEM | HASH_BLOBS);
-		LWLockRelease(AddinShmemInitLock);
-		relsize_ctl->size = 0;
-		relsize_ctl->hits = 0;
-		relsize_ctl->misses = 0;
-		relsize_ctl->writes = 0;
-		dlist_init(&relsize_ctl->lru);
-	}
+	relsize_lock = (LWLockId) GetNamedLWLockTranche("neon_relsize");
+	info.keysize = sizeof(RelTag);
+	info.entrysize = sizeof(RelSizeEntry);
+	relsize_hash = ShmemInitHash("neon_relsize",
+								 relsize_hash_size, relsize_hash_size,
+								 &info,
+								 HASH_ELEM | HASH_BLOBS);
+	LWLockRelease(AddinShmemInitLock);
 }

 bool
@@ -115,15 +93,7 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
 		if (entry != NULL)
 		{
 			*size = entry->size;
-			relsize_ctl->hits += 1;
 			found = true;
-			/* Move entry to the LRU list tail */
-			dlist_delete(&entry->lru_node);
-			dlist_push_tail(&relsize_ctl->lru, &entry->lru_node);
-		}
-		else
-		{
-			relsize_ctl->misses += 1;
 		}
 		LWLockRelease(relsize_lock);
 	}
@@ -137,43 +107,12 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 	{
 		RelTag		tag;
 		RelSizeEntry *entry;
-		bool		found = false;

 		tag.rinfo = rinfo;
 		tag.forknum = forknum;
 		LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
-		/*
-		 * This should actually never happen! Below we check if hash is full and delete least recently user item in this case.
-		 * But for further safety we also perform check here.
-		 */
-		while ((entry = hash_search(relsize_hash, &tag, HASH_ENTER_NULL, &found)) == NULL)
-		{
-			RelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));
-			hash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);
-			Assert(relsize_ctl->size > 0);
-			relsize_ctl->size -= 1;
-		}
+		entry = hash_search(relsize_hash, &tag, HASH_ENTER, NULL);
 		entry->size = size;
-		if (!found)
-		{
-			if (++relsize_ctl->size == relsize_hash_size)
-			{
-				/*
-				 * Remove least recently used elment from the hash.
-				 * Hash size after is becomes `relsize_hash_size-1`.
-				 * But it is not considered to be a problem, because size of this hash is expecrted large enough and +-1 doesn't matter.
-				 */
-				RelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));
-				hash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);
-				relsize_ctl->size -= 1;
-			}
-		}
-		else
-		{
-			dlist_delete(&entry->lru_node);
-		}
-		dlist_push_tail(&relsize_ctl->lru, &entry->lru_node);
-		relsize_ctl->writes += 1;
 		LWLockRelease(relsize_lock);
 	}
 }
@@ -193,21 +132,6 @@ update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
 		entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);
 		if (!found || entry->size < size)
 			entry->size = size;
-		if (!found)
-		{
-			if (++relsize_ctl->size == relsize_hash_size)
-			{
-				RelSizeEntry *victim = dlist_container(RelSizeEntry, lru_node, dlist_pop_head_node(&relsize_ctl->lru));
-				hash_search(relsize_hash, &victim->tag, HASH_REMOVE, NULL);
-				relsize_ctl->size -= 1;
-			}
-		}
-		else
-		{
-			dlist_delete(&entry->lru_node);
-		}
-		relsize_ctl->writes += 1;
-		dlist_push_tail(&relsize_ctl->lru, &entry->lru_node);
 		LWLockRelease(relsize_lock);
 	}
 }
@@ -218,16 +142,11 @@ forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
 	if (relsize_hash_size > 0)
 	{
 		RelTag		tag;
-		RelSizeEntry *entry;
+
 		tag.rinfo = rinfo;
 		tag.forknum = forknum;
 		LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
-		entry = hash_search(relsize_hash, &tag, HASH_REMOVE, NULL);
-		if (entry)
-		{
-			dlist_delete(&entry->lru_node);
-			relsize_ctl->size -= 1;
-		}
+		hash_search(relsize_hash, &tag, HASH_REMOVE, NULL);
 		LWLockRelease(relsize_lock);
 	}
 }
@@ -272,7 +191,7 @@ relsize_shmem_request(void)
 	if (prev_shmem_request_hook)
 		prev_shmem_request_hook();

-	RequestAddinShmemSpace(sizeof(RelSizeHashControl) + hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry)));
+	RequestAddinShmemSpace(hash_estimate_size(relsize_hash_size, sizeof(RelSizeEntry)));
 	RequestNamedLWLockTranche("neon_relsize", 1);
 }
 #endif
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -959,8 +959,8 @@ DetermineEpochStartLsn(WalProposer *wp)
 	}

 	/*
-	 * If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are bootstrapping
-	 * and nothing was committed yet. Start streaming then from the basebackup LSN.
+	 * If propEpochStartLsn is 0 everywhere, we are bootstrapping -- nothing
+	 * was committed yet. Start streaming then from the basebackup LSN.
 	 */
 	if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers)
 	{
@@ -973,13 +973,12 @@ DetermineEpochStartLsn(WalProposer *wp)
 	}

 	/*
-	 * Safekeepers are setting truncateLsn after timelineStartLsn is known, so it
-	 * should never be zero at this point, if we know timelineStartLsn.
-	 * 
-	 * timelineStartLsn can be zero only on the first syncSafekeepers run.
+	 * If propEpochStartLsn is not 0, at least one msg with WAL was sent to
+	 * some connected safekeeper; it must have carried truncateLsn pointing to
+	 * the first record.
 	 */
 	Assert((wp->truncateLsn != InvalidXLogRecPtr) ||
-		   (wp->config->syncSafekeepers && wp->truncateLsn == wp->timelineStartLsn));
+		   (wp->config->syncSafekeepers && wp->truncateLsn == wp->propEpochStartLsn));

 	/*
 	 * We will be generating WAL since propEpochStartLsn, so we should set
--- a/poetry.lock
+++ b/poetry.lock
@@ -1118,13 +1118,13 @@ files = [

 [[package]]
 name = "jinja2"
-version = "3.1.3"
+version = "3.1.2"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
-    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
+    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
+    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
 ]

 [package.dependencies]
@@ -2421,16 +2421,6 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -2668,4 +2658,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "9cf2734cafd5b6963165d398f1b24621193d5284d0bc7cc26a720a014f523860"
+content-hash = "35c237fe6a9278b2dc65b06ed96bde5afb9e393d52c01b00c59acf1df3a8d482"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -5,7 +5,7 @@ edition.workspace = true
 license.workspace = true

 [features]
-default = []
+default = ["testing"]
 testing = []

 [dependencies]
@@ -27,7 +27,6 @@ hex.workspace = true
 hmac.workspace = true
 hostname.workspace = true
 humantime.workspace = true
-hyper-tungstenite.workspace = true
 hyper.workspace = true
 ipnet.workspace = true
 itertools.workspace = true
@@ -66,11 +65,13 @@ tls-listener.workspace = true
 tokio-postgres.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
+tokio-tungstenite.workspace = true
 tokio = { workspace = true, features = ["signal"] }
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 tracing.workspace = true
+tungstenite.workspace = true
 url.workspace = true
 utils.workspace = true
 uuid.workspace = true
@@ -89,4 +90,3 @@ camino-tempfile.workspace = true
 rcgen.workspace = true
 rstest.workspace = true
 tokio-postgres-rustls.workspace = true
-walkdir.workspace = true
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -10,7 +10,6 @@ use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::validate_password_and_exchange;
 use crate::cache::Cached;
 use crate::console::errors::GetAuthInfoError;
-use crate::console::provider::ConsoleBackend;
 use crate::console::AuthSecret;
 use crate::context::RequestMonitoring;
 use crate::proxy::connect_compute::handle_try_wake;
@@ -44,8 +43,11 @@ use tracing::{error, info, warn};
 ///   this helps us provide the credentials only to those auth
 ///   backends which require them for the authentication process.
 pub enum BackendType<'a, T> {
-    /// Cloud API (V2).
-    Console(Cow<'a, ConsoleBackend>, T),
+    /// Current Cloud API (V2).
+    Console(Cow<'a, console::provider::neon::Api>, T),
+    /// Local mock of Cloud API (V2).
+    #[cfg(feature = "testing")]
+    Postgres(Cow<'a, console::provider::mock::Api>, T),
    /// Authentication via a web browser.
    Link(Cow<'a, url::ApiUrl>),
    #[cfg(test)]
@@ -62,15 +64,9 @@ impl std::fmt::Display for BackendType<'_, ()> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use BackendType::*;
        match self {
-            Console(api, _) => match &**api {
-                ConsoleBackend::Console(endpoint) => {
-                    fmt.debug_tuple("Console").field(&endpoint.url()).finish()
-                }
-                #[cfg(feature = "testing")]
-                ConsoleBackend::Postgres(endpoint) => {
-                    fmt.debug_tuple("Postgres").field(&endpoint.url()).finish()
-                }
-            },
+            Console(endpoint, _) => fmt.debug_tuple("Console").field(&endpoint.url()).finish(),
+            #[cfg(feature = "testing")]
+            Postgres(endpoint, _) => fmt.debug_tuple("Postgres").field(&endpoint.url()).finish(),
            Link(url) => fmt.debug_tuple("Link").field(&url.as_str()).finish(),
            #[cfg(test)]
            Test(_) => fmt.debug_tuple("Test").finish(),
@@ -85,6 +81,8 @@ impl<T> BackendType<'_, T> {
        use BackendType::*;
        match self {
            Console(c, x) => Console(Cow::Borrowed(c), x),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => Postgres(Cow::Borrowed(c), x),
            Link(c) => Link(Cow::Borrowed(c)),
            #[cfg(test)]
            Test(x) => Test(*x),
@@ -100,6 +98,8 @@ impl<'a, T> BackendType<'a, T> {
        use BackendType::*;
        match self {
            Console(c, x) => Console(c, f(x)),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => Postgres(c, f(x)),
            Link(c) => Link(c),
            #[cfg(test)]
            Test(x) => Test(x),
@@ -114,6 +114,8 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
        use BackendType::*;
        match self {
            Console(c, x) => x.map(|x| Console(c, x)),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => x.map(|x| Postgres(c, x)),
            Link(c) => Ok(Link(c)),
            #[cfg(test)]
            Test(x) => Ok(Test(x)),
@@ -323,6 +325,8 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {

        match self {
            Console(_, user_info) => user_info.project.clone(),
+            #[cfg(feature = "testing")]
+            Postgres(_, user_info) => user_info.project.clone(),
            Link(_) => Some("link".into()),
            #[cfg(test)]
            Test(_) => Some("test".into()),
@@ -335,6 +339,8 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {

        match self {
            Console(_, user_info) => &user_info.user,
+            #[cfg(feature = "testing")]
+            Postgres(_, user_info) => &user_info.user,
            Link(_) => "link",
            #[cfg(test)]
            Test(_) => "test",
@@ -365,6 +371,19 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
                        .await?;
                (cache_info, BackendType::Console(api, user_info))
            }
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => {
+                info!(
+                    user = &*user_info.user,
+                    project = user_info.project(),
+                    "performing authentication using a local postgres instance"
+                );
+
+                let (cache_info, user_info) =
+                    auth_and_wake_compute(ctx, &*api, user_info, client, allow_cleartext, config)
+                        .await?;
+                (cache_info, BackendType::Postgres(api, user_info))
+            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
                info!("performing link authentication");
@@ -395,6 +414,8 @@ impl BackendType<'_, ComputeUserInfo> {
        use BackendType::*;
        match self {
            Console(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
            Link(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
            #[cfg(test)]
            Test(x) => Ok(Cached::new_uncached(Arc::new(x.get_allowed_ips()?))),
@@ -411,6 +432,8 @@ impl BackendType<'_, ComputeUserInfo> {

        match self {
            Console(api, user_info) => api.wake_compute(ctx, user_info).map_ok(Some).await,
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => api.wake_compute(ctx, user_info).map_ok(Some).await,
            Link(_) => Ok(None),
            #[cfg(test)]
            Test(x) => x.wake_compute().map(Some),
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -57,31 +57,24 @@ pub(super) async fn authenticate(
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
-    // registering waiter can fail if we get unlucky with rng.
-    // just try again.
-    let (psql_session_id, waiter) = loop {
-        let psql_session_id = new_psql_session_id();
-
-        match console::mgmt::get_waiter(&psql_session_id) {
-            Ok(waiter) => break (psql_session_id, waiter),
-            Err(_e) => continue,
-        }
-    };
-
+    let psql_session_id = new_psql_session_id();
    let span = info_span!("link", psql_session_id = &psql_session_id);
    let greeting = hello_message(link_uri, &psql_session_id);

-    // Give user a URL to spawn a new database.
-    info!(parent: &span, "sending the auth URL to the user");
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&Be::CLIENT_ENCODING)?
-        .write_message(&Be::NoticeResponse(&greeting))
-        .await?;
+    let db_info = console::mgmt::with_waiter(psql_session_id, |waiter| async {
+        // Give user a URL to spawn a new database.
+        info!(parent: &span, "sending the auth URL to the user");
+        client
+            .write_message_noflush(&Be::AuthenticationOk)?
+            .write_message_noflush(&Be::CLIENT_ENCODING)?
+            .write_message(&Be::NoticeResponse(&greeting))
+            .await?;

-    // Wait for web console response (see `mgmt`).
-    info!(parent: &span, "waiting for console's reply...");
-    let db_info = waiter.await.map_err(LinkAuthError::from)?;
+        // Wait for web console response (see `mgmt`).
+        info!(parent: &span, "waiting for console's reply...");
+        waiter.await?.map_err(LinkAuthError::AuthFailed)
+    })
+    .await?;

    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -56,16 +56,16 @@ impl ComputeUserInfoMaybeEndpoint {
 pub fn endpoint_sni<'a>(
    sni: &'a str,
    common_names: &HashSet<String>,
-) -> Result<&'a str, ComputeUserInfoParseError> {
+) -> Result<(&'a str, &'a str), ComputeUserInfoParseError> {
    let Some((subdomain, common_name)) = sni.split_once('.') else {
-        return Err(ComputeUserInfoParseError::UnknownCommonName { cn: sni.into() });
+        return Ok((sni, ""));
    };
    if !common_names.contains(common_name) {
        return Err(ComputeUserInfoParseError::UnknownCommonName {
            cn: common_name.into(),
        });
    }
-    Ok(subdomain)
+    Ok((subdomain, common_name))
 }

 impl ComputeUserInfoMaybeEndpoint {
@@ -102,7 +102,7 @@ impl ComputeUserInfoMaybeEndpoint {

        let project_from_domain = if let Some(sni_str) = sni {
            if let Some(cn) = common_names {
-                Some(SmolStr::from(endpoint_sni(sni_str, cn)?))
+                Some(SmolStr::from(endpoint_sni(sni_str, cn)?.0))
            } else {
                None
            }
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -249,19 +249,12 @@ async fn main() -> anyhow::Result<()> {
    }

    if let auth::BackendType::Console(api, _) = &config.auth_backend {
-        match &**api {
-            proxy::console::provider::ConsoleBackend::Console(api) => {
-                let cache = api.caches.project_info.clone();
-                if let Some(url) = args.redis_notifications {
-                    info!("Starting redis notifications listener ({url})");
-                    maintenance_tasks
-                        .spawn(notifications::task_main(url.to_owned(), cache.clone()));
-                }
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
-            }
-            #[cfg(feature = "testing")]
-            proxy::console::provider::ConsoleBackend::Postgres(_) => {}
+        let cache = api.caches.project_info.clone();
+        if let Some(url) = args.redis_notifications {
+            info!("Starting redis notifications listener ({url})");
+            maintenance_tasks.spawn(notifications::task_main(url.to_owned(), cache.clone()));
        }
+        maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
    }

    let maintenance = loop {
@@ -358,15 +351,13 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));

            let api = console::provider::neon::Api::new(endpoint, caches, locks);
-            let api = console::provider::ConsoleBackend::Console(api);
            auth::BackendType::Console(Cow::Owned(api), ())
        }
        #[cfg(feature = "testing")]
        AuthBackend::Postgres => {
            let url = args.auth_endpoint.parse()?;
            let api = console::provider::mock::Api::new(url);
-            let api = console::provider::ConsoleBackend::Postgres(api);
-            auth::BackendType::Console(Cow::Owned(api), ())
+            auth::BackendType::Postgres(Cow::Owned(api), ())
        }
        AuthBackend::Link => {
            let url = args.uri.parse()?;
--- a/proxy/src/console/mgmt.rs
+++ b/proxy/src/console/mgmt.rs
@@ -13,10 +13,16 @@ use tracing::{error, info, info_span, Instrument};
 static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);

 /// Give caller an opportunity to wait for the cloud's reply.
-pub fn get_waiter(
+pub async fn with_waiter<R, T, E>(
    psql_session_id: impl Into<String>,
-) -> Result<Waiter<'static, ComputeReady>, waiters::RegisterError> {
-    CPLANE_WAITERS.register(psql_session_id.into())
+    action: impl FnOnce(Waiter<'static, ComputeReady>) -> R,
+) -> Result<T, E>
+where
+    R: std::future::Future<Output = Result<T, E>>,
+    E: From<waiters::RegisterError>,
+{
+    let waiter = CPLANE_WAITERS.register(psql_session_id.into())?;
+    action(waiter).await
 }

 pub fn notify(psql_session_id: &str, msg: ComputeReady) -> Result<(), waiters::NotifyError> {
@@ -71,7 +77,7 @@ async fn handle_connection(socket: TcpStream) -> Result<(), QueryError> {
 }

 /// A message received by `mgmt` when a compute node is ready.
-pub type ComputeReady = DatabaseInfo;
+pub type ComputeReady = Result<DatabaseInfo, String>;

 // TODO: replace with an http-based protocol.
 struct MgmtHandler;
@@ -96,7 +102,7 @@ fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), Qu
    let _enter = span.enter();
    info!("got response: {:?}", resp.result);

-    match notify(resp.session_id, resp.result) {
+    match notify(resp.session_id, Ok(resp.result)) {
        Ok(()) => {
            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::DataRow(&[Some(b"ok")]))?
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -248,75 +248,23 @@ pub trait Api {
    async fn get_role_secret(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
+        creds: &ComputeUserInfo,
    ) -> Result<Option<CachedRoleSecret>, errors::GetAuthInfoError>;

    async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
+        creds: &ComputeUserInfo,
    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;

    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
+        creds: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }

-#[derive(Clone)]
-pub enum ConsoleBackend {
-    /// Current Cloud API (V2).
-    Console(neon::Api),
-    /// Local mock of Cloud API (V2).
-    #[cfg(feature = "testing")]
-    Postgres(mock::Api),
-}
-
-#[async_trait]
-impl Api for ConsoleBackend {
-    async fn get_role_secret(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<Option<CachedRoleSecret>, errors::GetAuthInfoError> {
-        use ConsoleBackend::*;
-        match self {
-            Console(api) => api.get_role_secret(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.get_role_secret(ctx, user_info).await,
-        }
-    }
-
-    async fn get_allowed_ips(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
-        use ConsoleBackend::*;
-        match self {
-            Console(api) => api.get_allowed_ips(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.get_allowed_ips(ctx, user_info).await,
-        }
-    }
-
-    async fn wake_compute(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
-        use ConsoleBackend::*;
-
-        match self {
-            Console(api) => api.wake_compute(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.wake_compute(ctx, user_info).await,
-        }
-    }
-}
-
 /// Various caches for [`console`](super).
 pub struct ApiCaches {
    /// Cache for the `wake_compute` API method.
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -179,18 +179,17 @@ impl super::Api for Api {
            return Ok(Some(role_secret));
        }
        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
-        if let Some(project_id) = auth_info.project_id {
-            if let Some(secret) = &auth_info.secret {
-                self.caches
-                    .project_info
-                    .insert_role_secret(&project_id, ep, user, secret.clone())
-            }
-            self.caches.project_info.insert_allowed_ips(
-                &project_id,
-                ep,
-                Arc::new(auth_info.allowed_ips),
-            );
+        let project_id = auth_info.project_id.unwrap_or(ep.clone());
+        if let Some(secret) = &auth_info.secret {
+            self.caches
+                .project_info
+                .insert_role_secret(&project_id, ep, user, secret.clone())
        }
+        self.caches.project_info.insert_allowed_ips(
+            &project_id,
+            ep,
+            Arc::new(auth_info.allowed_ips),
+        );
        // When we just got a secret, we don't need to invalidate it.
        Ok(auth_info.secret.map(Cached::new_uncached))
    }
@@ -213,16 +212,15 @@ impl super::Api for Api {
        let auth_info = self.do_get_auth_info(ctx, user_info).await?;
        let allowed_ips = Arc::new(auth_info.allowed_ips);
        let user = &user_info.user;
-        if let Some(project_id) = auth_info.project_id {
-            if let Some(secret) = &auth_info.secret {
-                self.caches
-                    .project_info
-                    .insert_role_secret(&project_id, ep, user, secret.clone())
-            }
+        let project_id = auth_info.project_id.unwrap_or(ep.clone());
+        if let Some(secret) = &auth_info.secret {
            self.caches
                .project_info
-                .insert_allowed_ips(&project_id, ep, allowed_ips.clone());
+                .insert_role_secret(&project_id, ep, user, secret.clone())
        }
+        self.caches
+            .project_info
+            .insert_allowed_ips(&project_id, ep, allowed_ips.clone());
        Ok(Cached::new_uncached(allowed_ips))
    }

--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -32,7 +32,6 @@ pub struct RequestMonitoring {
    user: Option<SmolStr>,
    application: Option<SmolStr>,
    error_kind: Option<ErrorKind>,
-    success: bool,

    // extra
    // This sender is here to keep the request monitoring channel open while requests are taking place.
@@ -60,7 +59,6 @@ impl RequestMonitoring {
            user: None,
            application: None,
            error_kind: None,
-            success: false,

            sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),
            latency_timer: LatencyTimer::new(protocol),
@@ -98,10 +96,6 @@ impl RequestMonitoring {
        self.user = Some(user);
    }

-    pub fn set_success(&mut self) {
-        self.success = true;
-    }
-
    pub fn log(&mut self) {
        if let Some(tx) = self.sender.take() {
            let _: Result<(), _> = tx.send(self.clone());
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -1,8 +1,7 @@
-use std::{sync::Arc, time::SystemTime};
+use std::sync::Arc;

 use anyhow::Context;
 use bytes::BytesMut;
-use chrono::{Datelike, Timelike};
 use futures::{Stream, StreamExt};
 use parquet::{
    basic::Compression,
@@ -87,12 +86,6 @@ struct RequestData {
    project: Option<String>,
    branch: Option<String>,
    error: Option<&'static str>,
-    /// Success is counted if we form a HTTP response with sql rows inside
-    /// Or if we make it to proxy_pass
-    success: bool,
-    /// Tracks time from session start (HTTP request/libpq TCP handshake)
-    /// Through to success/failure
-    duration_us: u64,
 }

 impl From<RequestMonitoring> for RequestData {
@@ -109,11 +102,6 @@ impl From<RequestMonitoring> for RequestData {
            protocol: value.protocol,
            region: value.region,
            error: value.error_kind.as_ref().map(|e| e.to_str()),
-            success: value.success,
-            duration_us: SystemTime::from(value.first_packet)
-                .elapsed()
-                .unwrap_or_default()
-                .as_micros() as u64, // 584 millenia... good enough
        }
    }
 }
@@ -278,13 +266,7 @@ async fn upload_parquet(

    let compression = len as f64 / len_uncompressed as f64;
    let size = data.len();
-    let now = chrono::Utc::now();
-    let id = uuid::Uuid::new_v7(uuid::Timestamp::from_unix(
-        uuid::NoContext,
-        // we won't be running this in 1970. this cast is ok
-        now.timestamp() as u64,
-        now.timestamp_subsec_nanos(),
-    ));
+    let id = uuid::Uuid::now_v7();

    info!(
        %id,
@@ -292,14 +274,7 @@ async fn upload_parquet(
        size, compression, "uploading request parquet file"
    );

-    let year = now.year();
-    let month = now.month();
-    let day = now.day();
-    let hour = now.hour();
-    // segment files by time for S3 performance
-    let path = RemotePath::from_string(&format!(
-        "{year:04}/{month:02}/{day:02}/{hour:02}/requests_{id}.parquet"
-    ))?;
+    let path = RemotePath::from_string(&format!("requests_{id}.parquet"))?;
    backoff::retry(
        || async {
            let stream = futures::stream::once(futures::future::ready(Ok(data.clone())));
@@ -357,7 +332,6 @@ mod tests {
        DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT,
    };
    use tokio::{sync::mpsc, time};
-    use walkdir::WalkDir;

    use super::{worker_inner, ParquetConfig, ParquetUploadArgs, RequestData};

@@ -446,8 +420,6 @@ mod tests {
            protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)],
            region: "us-east-1",
            error: None,
-            success: rng.gen(),
-            duration_us: rng.gen_range(0..30_000_000),
        }
    }

@@ -470,11 +442,9 @@ mod tests {

        worker_inner(storage, rx, config).await.unwrap();

-        let mut files = WalkDir::new(tmpdir.as_std_path())
-            .into_iter()
-            .filter_map(|entry| entry.ok())
-            .filter(|entry| entry.file_type().is_file())
-            .map(|entry| entry.path().to_path_buf())
+        let mut files = std::fs::read_dir(tmpdir.as_std_path())
+            .unwrap()
+            .map(|entry| entry.unwrap().path())
            .collect_vec();
        files.sort();

@@ -515,15 +485,15 @@ mod tests {
        assert_eq!(
            file_stats,
            [
-                (1087635, 3, 6000),
-                (1087288, 3, 6000),
-                (1087444, 3, 6000),
-                (1087572, 3, 6000),
-                (1087468, 3, 6000),
-                (1087500, 3, 6000),
-                (1087533, 3, 6000),
-                (1087566, 3, 6000),
-                (362671, 1, 2000)
+                (1029153, 3, 6000),
+                (1029075, 3, 6000),
+                (1029216, 3, 6000),
+                (1029129, 3, 6000),
+                (1029250, 3, 6000),
+                (1029017, 3, 6000),
+                (1029175, 3, 6000),
+                (1029247, 3, 6000),
+                (343124, 1, 2000)
            ],
        );

@@ -553,11 +523,11 @@ mod tests {
        assert_eq!(
            file_stats,
            [
-                (1028637, 5, 10000),
-                (1031969, 5, 10000),
-                (1019900, 5, 10000),
-                (1020365, 5, 10000),
-                (1025010, 5, 10000)
+                (1166201, 6, 12000),
+                (1163577, 6, 12000),
+                (1164641, 6, 12000),
+                (1168772, 6, 12000),
+                (196761, 1, 2000)
            ],
        );

@@ -589,11 +559,11 @@ mod tests {
        assert_eq!(
            file_stats,
            [
-                (1210770, 6, 12000),
-                (1211036, 6, 12000),
-                (1210990, 6, 12000),
-                (1210861, 6, 12000),
-                (202073, 1, 2000)
+                (1144934, 6, 12000),
+                (1144941, 6, 12000),
+                (1144735, 6, 12000),
+                (1144936, 6, 12000),
+                (191035, 1, 2000)
            ],
        );

@@ -618,15 +588,15 @@ mod tests {
        assert_eq!(
            file_stats,
            [
-                (1087635, 3, 6000),
-                (1087288, 3, 6000),
-                (1087444, 3, 6000),
-                (1087572, 3, 6000),
-                (1087468, 3, 6000),
-                (1087500, 3, 6000),
-                (1087533, 3, 6000),
-                (1087566, 3, 6000),
-                (362671, 1, 2000)
+                (1029153, 3, 6000),
+                (1029075, 3, 6000),
+                (1029216, 3, 6000),
+                (1029129, 3, 6000),
+                (1029250, 3, 6000),
+                (1029017, 3, 6000),
+                (1029175, 3, 6000),
+                (1029247, 3, 6000),
+                (343124, 1, 2000)
            ],
        );

@@ -663,7 +633,7 @@ mod tests {
        // files are smaller than the size threshold, but they took too long to fill so were flushed early
        assert_eq!(
            file_stats,
-            [(545264, 2, 3001), (545025, 2, 3000), (544857, 2, 2999)],
+            [(515807, 2, 3001), (515585, 2, 3000), (515425, 2, 2999)],
        );

        tmpdir.close().unwrap();
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -356,7 +356,6 @@ pub async fn proxy_pass(
    compute: impl AsyncRead + AsyncWrite + Unpin,
    aux: MetricsAuxInfo,
 ) -> anyhow::Result<()> {
-    ctx.set_success();
    ctx.log();

    let usage = USAGE_METRICS.register(Ids {
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -160,6 +160,8 @@ where
    let node_info = loop {
        let wake_res = match user_info {
            auth::BackendType::Console(api, user_info) => api.wake_compute(ctx, user_info).await,
+            #[cfg(feature = "testing")]
+            auth::BackendType::Postgres(api, user_info) => api.wake_compute(ctx, user_info).await,
            // nothing to do?
            auth::BackendType::Link(_) => return Err(err.into()),
            // test backend
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -46,11 +46,14 @@ enum Notification {
 }
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
 struct AllowedIpsUpdate {
+    #[serde(rename = "project")]
    project_id: SmolStr,
 }
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
 struct PasswordUpdate {
+    #[serde(rename = "project")]
    project_id: SmolStr,
+    #[serde(rename = "role")]
    role_name: SmolStr,
 }
 fn deserialize_json_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
@@ -148,7 +151,7 @@ mod tests {
    #[test]
    fn parse_allowed_ips() -> anyhow::Result<()> {
        let project_id = "new_project".to_string();
-        let data = format!("{{\"project_id\": \"{project_id}\"}}");
+        let data = format!("{{\"project\": \"{project_id}\"}}");
        let text = json!({
            "type": "message",
            "topic": "/allowed_ips_updated",
@@ -174,7 +177,7 @@ mod tests {
    fn parse_password_updated() -> anyhow::Result<()> {
        let project_id = "new_project".to_string();
        let role_name = "new_role".to_string();
-        let data = format!("{{\"project_id\": \"{project_id}\", \"role_name\": \"{role_name}\"}}");
+        let data = format!("{{\"project\": \"{project_id}\", \"role\": \"{role_name}\"}}");
        let text = json!({
            "type": "message",
            "topic": "/password_updated",
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -77,7 +77,11 @@ pub async fn task_main(
            return Ok(());
        }
    };
-    let tls_acceptor: tokio_rustls::TlsAcceptor = tls_config.to_server_config().into();
+
+    let mut tls_server_config = rustls::ServerConfig::clone(&tls_config.to_server_config());
+    // prefer http2, but support http/1.1
+    tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
+    let tls_acceptor: tokio_rustls::TlsAcceptor = Arc::new(tls_server_config).into();

    let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
    let _ = addr_incoming.set_nodelay(true);
@@ -150,6 +154,7 @@ pub async fn task_main(
    );

    hyper::Server::builder(accept::from_stream(tls_listener))
+        .http2_enable_connect_protocol()
        .serve(make_svc)
        .with_graceful_shutdown(cancellation_token.cancelled())
        .await?;
@@ -213,11 +218,13 @@ async fn request_handler(
        .and_then(|h| h.split(':').next())
        .map(|s| s.to_string());

+    let ws_config = None;
+
    // Check if the request is a websocket upgrade request.
-    if hyper_tungstenite::is_upgrade_request(&request) {
+    if websocket::is_upgrade_request(&request) {
        info!(session_id = ?session_id, "performing websocket upgrade");

-        let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
+        let (response, websocket) = websocket::upgrade(&mut request, ws_config)
            .map_err(|e| ApiError::BadRequest(e.into()))?;

        ws_connections.spawn(
@@ -240,6 +247,34 @@ async fn request_handler(
            .in_current_span(),
        );

+        // Return the response so the spawned future can continue.
+        Ok(response)
+    } else if websocket::is_connect_request(&request) {
+        info!(session_id = ?session_id, "performing http2 websocket upgrade");
+
+        let (response, websocket) = websocket::connect(&mut request, ws_config)
+            .map_err(|e| ApiError::BadRequest(e.into()))?;
+
+        ws_connections.spawn(
+            async move {
+                let mut ctx = RequestMonitoring::new(session_id, peer_addr, "ws2", &config.region);
+
+                if let Err(e) = websocket::serve_websocket(
+                    config,
+                    &mut ctx,
+                    websocket,
+                    &cancel_map,
+                    host,
+                    endpoint_rate_limiter,
+                )
+                .await
+                {
+                    error!(session_id = ?session_id, "error in http2 websocket connection: {e:#}");
+                }
+            }
+            .in_current_span(),
+        );
+
        // Return the response so the spawned future can continue.
        Ok(response)
    } else if request.uri().path() == "/sql" && request.method() == Method::POST {
@@ -256,7 +291,7 @@ async fn request_handler(
        .await
    } else if request.uri().path() == "/sql" && request.method() == Method::OPTIONS {
        Response::builder()
-            .header("Allow", "OPTIONS, POST")
+            .header("Allow", "OPTIONS, POST, CONNECT")
            .header("Access-Control-Allow-Origin", "*")
            .header(
                "Access-Control-Allow-Headers",
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -26,7 +26,7 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};

 use crate::{
    auth::{self, backend::ComputeUserInfo, check_peer_addr_is_in_list},
-    console::{self, messages::MetricsAuxInfo},
+    console,
    context::RequestMonitoring,
    metrics::NUM_DB_CONNECTIONS_GAUGE,
    proxy::connect_compute::ConnectMechanism,
@@ -362,7 +362,6 @@ impl GlobalConnPool {

        // ok return cached connection if found and establish a new one otherwise
        let new_client = if let Some(client) = client {
-            ctx.set_project(client.aux.clone());
            if client.inner.is_closed() {
                let conn_id = uuid::Uuid::new_v4();
                info!(%conn_id, "pool: cached connection '{conn_info}' is closed, opening a new one");
@@ -594,6 +593,10 @@ async fn connect_to_compute_once(
    span.in_scope(|| {
        info!(%conn_info, %session, "new connection");
    });
+    let ids = Ids {
+        endpoint_id: node_info.aux.endpoint_id.clone(),
+        branch_id: node_info.aux.branch_id.clone(),
+    };

    let db_user = conn_info.db_and_user();
    tokio::spawn(
@@ -661,7 +664,7 @@ async fn connect_to_compute_once(
    Ok(ClientInner {
        inner: client,
        session: tx,
-        aux: node_info.aux.clone(),
+        ids,
        conn_id,
    })
 }
@@ -669,17 +672,13 @@ async fn connect_to_compute_once(
 struct ClientInner {
    inner: tokio_postgres::Client,
    session: tokio::sync::watch::Sender<uuid::Uuid>,
-    aux: MetricsAuxInfo,
+    ids: Ids,
    conn_id: uuid::Uuid,
 }

 impl Client {
    pub fn metrics(&self) -> Arc<MetricCounter> {
-        let aux = &self.inner.as_ref().unwrap().aux;
-        USAGE_METRICS.register(Ids {
-            endpoint_id: aux.endpoint_id.clone(),
-            branch_id: aux.branch_id.clone(),
-        })
+        USAGE_METRICS.register(self.inner.as_ref().unwrap().ids.clone())
    }
 }

--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -15,7 +15,6 @@ use serde_json::Map;
 use serde_json::Value;
 use smol_str::SmolStr;
 use tokio_postgres::error::DbError;
-use tokio_postgres::error::ErrorPosition;
 use tokio_postgres::types::Kind;
 use tokio_postgres::types::Type;
 use tokio_postgres::GenericClient;
@@ -60,7 +59,6 @@ enum Payload {

 const MAX_RESPONSE_SIZE: usize = 10 * 1024 * 1024; // 10 MiB
 const MAX_REQUEST_SIZE: u64 = 10 * 1024 * 1024; // 10 MiB
-const SERVERLESS_DRIVER_SNI_HOSTNAME_FIRST_PART: &str = "api";

 static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
 static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
@@ -172,23 +170,21 @@ fn get_conn_info(
    let hostname = connection_url
        .host_str()
        .ok_or(anyhow::anyhow!("no host"))?;
+    let (endpoint, common_name) = endpoint_sni(hostname, &tls.common_names)?;

    let host_header = headers
        .get("host")
        .and_then(|h| h.to_str().ok())
        .and_then(|h| h.split(':').next());

-    // sni_hostname has to be either the same as hostname or the one used in serverless driver.
-    if !check_matches(&sni_hostname, hostname)? {
+    if !sni_hostname.ends_with(common_name) {
        return Err(anyhow::anyhow!("mismatched SNI hostname and hostname"));
    } else if let Some(h) = host_header {
-        if h != sni_hostname {
+        if !h.ends_with(common_name) {
            return Err(anyhow::anyhow!("mismatched host header and hostname"));
        }
    }

-    let endpoint = endpoint_sni(hostname, &tls.common_names)?;
-
    let endpoint: SmolStr = endpoint.into();
    ctx.set_endpoint_id(Some(endpoint.clone()));

@@ -216,20 +212,6 @@ fn get_conn_info(
    })
 }

-fn check_matches(sni_hostname: &str, hostname: &str) -> Result<bool, anyhow::Error> {
-    if sni_hostname == hostname {
-        return Ok(true);
-    }
-    let (sni_hostname_first, sni_hostname_rest) = sni_hostname
-        .split_once('.')
-        .ok_or_else(|| anyhow::anyhow!("Unexpected sni format."))?;
-    let (_, hostname_rest) = hostname
-        .split_once('.')
-        .ok_or_else(|| anyhow::anyhow!("Unexpected hostname format."))?;
-    Ok(sni_hostname_rest == hostname_rest
-        && sni_hostname_first == SERVERLESS_DRIVER_SNI_HOSTNAME_FIRST_PART)
-}
-
 // TODO: return different http error codes
 pub async fn handle(
    tls: &'static TlsConfig,
@@ -248,7 +230,7 @@ pub async fn handle(
        Ok(r) => match r {
            Ok(r) => r,
            Err(e) => {
-                let mut message = format!("{:?}", e);
+                let message = format!("{:?}", e);
                let db_error = e
                    .downcast_ref::<tokio_postgres::Error>()
                    .and_then(|e| e.as_db_error());
@@ -261,25 +243,7 @@ pub async fn handle(
                        .unwrap_or_default()
                }

-                if let Some(db_error) = db_error {
-                    db_error.message().clone_into(&mut message);
-                }
-
-                let position = db_error.and_then(|db| db.position());
-                let (position, internal_position, internal_query) = match position {
-                    Some(ErrorPosition::Original(position)) => (
-                        Value::String(position.to_string()),
-                        Value::Null,
-                        Value::Null,
-                    ),
-                    Some(ErrorPosition::Internal { position, query }) => (
-                        Value::Null,
-                        Value::String(position.to_string()),
-                        Value::String(query.clone()),
-                    ),
-                    None => (Value::Null, Value::Null, Value::Null),
-                };
-
+                // TODO(conrad): db_error.position()
                let code = get(db_error, |db| db.code().code());
                let severity = get(db_error, |db| db.severity());
                let detail = get(db_error, |db| db.detail());
@@ -291,7 +255,7 @@ pub async fn handle(
                let datatype = get(db_error, |db| db.datatype());
                let constraint = get(db_error, |db| db.constraint());
                let file = get(db_error, |db| db.file());
-                let line = get(db_error, |db| db.line().map(|l| l.to_string()));
+                let line = get(db_error, |db| db.line());
                let routine = get(db_error, |db| db.routine());

                error!(
@@ -306,15 +270,12 @@ pub async fn handle(
                        "code": code,
                        "detail": detail,
                        "hint": hint,
-                        "position": position,
-                        "internalPosition": internal_position,
-                        "internalQuery": internal_query,
                        "severity": severity,
                        "where": where_,
                        "table": table,
                        "column": column,
                        "schema": schema,
-                        "dataType": datatype,
+                        "datatype": datatype,
                        "constraint": constraint,
                        "file": file,
                        "line": line,
@@ -497,7 +458,6 @@ async fn handle_inner(
            }
        };

-    ctx.set_success();
    ctx.log();
    let metrics = client.metrics();

--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -8,9 +8,15 @@ use crate::{
 };
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream};
-use hyper::upgrade::Upgraded;
-use hyper_tungstenite::{tungstenite::Message, HyperWebsocket, WebSocketStream};
+use hyper::{ext::Protocol, upgrade::Upgraded, Body, Method, Request, Response};
 use pin_project_lite::pin_project;
+use tokio_tungstenite::WebSocketStream;
+use tungstenite::{
+    error::{Error as WSError, ProtocolError},
+    handshake::derive_accept_key,
+    protocol::{Role, WebSocketConfig},
+    Message,
+};

 use std::{
    pin::Pin,
@@ -150,19 +156,202 @@ pub async fn serve_websocket(
    Ok(())
 }

+/// Try to upgrade a received `hyper::Request` to a websocket connection.
+///
+/// The function returns a HTTP response and a future that resolves to the websocket stream.
+/// The response body *MUST* be sent to the client before the future can be resolved.
+///
+/// This functions checks `Sec-WebSocket-Key` and `Sec-WebSocket-Version` headers.
+/// It does not inspect the `Origin`, `Sec-WebSocket-Protocol` or `Sec-WebSocket-Extensions` headers.
+/// You can inspect the headers manually before calling this function,
+/// and modify the response headers appropriately.
+///
+/// This function also does not look at the `Connection` or `Upgrade` headers.
+/// To check if a request is a websocket upgrade request, you can use [`is_upgrade_request`].
+/// Alternatively you can inspect the `Connection` and `Upgrade` headers manually.
+///
+pub fn upgrade<B>(
+    mut request: impl std::borrow::BorrowMut<Request<B>>,
+    config: Option<WebSocketConfig>,
+) -> Result<(Response<Body>, HyperWebsocket), ProtocolError> {
+    let request = request.borrow_mut();
+
+    let key = request
+        .headers()
+        .get("Sec-WebSocket-Key")
+        .ok_or(ProtocolError::MissingSecWebSocketKey)?;
+    if request
+        .headers()
+        .get("Sec-WebSocket-Version")
+        .map(|v| v.as_bytes())
+        != Some(b"13")
+    {
+        return Err(ProtocolError::MissingSecWebSocketVersionHeader);
+    }
+
+    let response = Response::builder()
+        .status(hyper::StatusCode::SWITCHING_PROTOCOLS)
+        .header(hyper::header::CONNECTION, "upgrade")
+        .header(hyper::header::UPGRADE, "websocket")
+        .header("Sec-WebSocket-Accept", &derive_accept_key(key.as_bytes()))
+        .body(Body::from("switching to websocket protocol"))
+        .expect("bug: failed to build response");
+
+    let stream = HyperWebsocket {
+        inner: hyper::upgrade::on(request),
+        config,
+    };
+
+    Ok((response, stream))
+}
+
+/// Check if a request is a websocket upgrade request.
+///
+/// If the `Upgrade` header lists multiple protocols,
+/// this function returns true if of them are `"websocket"`,
+/// If the server supports multiple upgrade protocols,
+/// it would be more appropriate to try each listed protocol in order.
+pub fn is_upgrade_request<B>(request: &hyper::Request<B>) -> bool {
+    header_contains_value(request.headers(), hyper::header::CONNECTION, "Upgrade")
+        && header_contains_value(request.headers(), hyper::header::UPGRADE, "websocket")
+}
+
+/// Check if there is a header of the given name containing the wanted value.
+fn header_contains_value(
+    headers: &hyper::HeaderMap,
+    header: impl hyper::header::AsHeaderName,
+    value: impl AsRef<[u8]>,
+) -> bool {
+    let value = value.as_ref();
+    for header in headers.get_all(header) {
+        if header
+            .as_bytes()
+            .split(|&c| c == b',')
+            .any(|x| trim(x).eq_ignore_ascii_case(value))
+        {
+            return true;
+        }
+    }
+    false
+}
+
+fn trim(data: &[u8]) -> &[u8] {
+    trim_end(trim_start(data))
+}
+
+fn trim_start(data: &[u8]) -> &[u8] {
+    if let Some(start) = data.iter().position(|x| !x.is_ascii_whitespace()) {
+        &data[start..]
+    } else {
+        b""
+    }
+}
+
+fn trim_end(data: &[u8]) -> &[u8] {
+    if let Some(last) = data.iter().rposition(|x| !x.is_ascii_whitespace()) {
+        &data[..last + 1]
+    } else {
+        b""
+    }
+}
+
+/// Try to upgrade a received `hyper::Request` to a websocket connection.
+///
+/// The function returns a HTTP response and a future that resolves to the websocket stream.
+/// The response body *MUST* be sent to the client before the future can be resolved.
+///
+/// This functions checks `Sec-WebSocket-Version` header.
+/// It does not inspect the `Origin`, `Sec-WebSocket-Protocol` or `Sec-WebSocket-Extensions` headers.
+/// You can inspect the headers manually before calling this function,
+/// and modify the response headers appropriately.
+///
+/// This function also does not look at the `Connection` or `Upgrade` headers.
+/// To check if a request is a websocket upgrade request, you can use [`is_upgrade2_request`].
+/// Alternatively you can inspect the `Connection` and `Upgrade` headers manually.
+///
+pub fn connect<B>(
+    mut request: impl std::borrow::BorrowMut<Request<B>>,
+    config: Option<WebSocketConfig>,
+) -> Result<(Response<Body>, HyperWebsocket), ProtocolError> {
+    let request = request.borrow_mut();
+
+    if request
+        .headers()
+        .get("Sec-WebSocket-Version")
+        .map(|v| v.as_bytes())
+        != Some(b"13")
+    {
+        return Err(ProtocolError::MissingSecWebSocketVersionHeader);
+    }
+
+    let response = Response::builder()
+        .status(hyper::StatusCode::OK)
+        .body(Body::from("switching to websocket protocol"))
+        .expect("bug: failed to build response");
+
+    let stream = HyperWebsocket {
+        inner: hyper::upgrade::on(request),
+        config,
+    };
+
+    Ok((response, stream))
+}
+
+/// Check if a request is a websocket connect request.
+pub fn is_connect_request<B>(request: &hyper::Request<B>) -> bool {
+    request.method() == Method::CONNECT
+        && request
+            .extensions()
+            .get::<Protocol>()
+            .is_some_and(|protocol| protocol.as_str() == "websocket")
+}
+
+pin_project_lite::pin_project! {
+    /// A future that resolves to a websocket stream when the associated connection completes.
+    #[derive(Debug)]
+    pub struct HyperWebsocket {
+        #[pin]
+        inner: hyper::upgrade::OnUpgrade,
+        config: Option<WebSocketConfig>
+    }
+}
+
+impl std::future::Future for HyperWebsocket {
+    type Output = Result<WebSocketStream<hyper::upgrade::Upgraded>, WSError>;
+
+    fn poll(self: Pin<&mut Self>, cx: &mut std::task::Context) -> Poll<Self::Output> {
+        let this = self.project();
+        let upgraded = match this.inner.poll(cx) {
+            Poll::Pending => return Poll::Pending,
+            Poll::Ready(x) => x,
+        };
+
+        let upgraded =
+            upgraded.map_err(|_| WSError::Protocol(ProtocolError::HandshakeIncomplete))?;
+
+        let stream = WebSocketStream::from_raw_socket(upgraded, Role::Server, None);
+        tokio::pin!(stream);
+
+        // The future returned by `from_raw_socket` is always ready.
+        // Not sure why it is a future in the first place.
+        match stream.as_mut().poll(cx) {
+            Poll::Pending => unreachable!("from_raw_socket should always be created ready"),
+            Poll::Ready(x) => Poll::Ready(Ok(x)),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::pin::pin;

    use futures::{SinkExt, StreamExt};
-    use hyper_tungstenite::{
-        tungstenite::{protocol::Role, Message},
-        WebSocketStream,
-    };
    use tokio::{
        io::{duplex, AsyncReadExt, AsyncWriteExt},
        task::JoinSet,
    };
+    use tokio_tungstenite::WebSocketStream;
+    use tungstenite::{protocol::Role, Message};

    use super::WebSocketRw;

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ requests = "^2.31.0"
 pytest-xdist = "^3.3.1"
 asyncpg = "^0.29.0"
 aiopg = "^1.4.0"
-Jinja2 = "^3.1.3"
+Jinja2 = "^3.0.2"
 types-requests = "^2.31.0.0"
 types-psycopg2 = "^2.9.21.10"
 boto3 = "^1.34.11"
--- a/safekeeper/src/control_file.rs
+++ b/safekeeper/src/control_file.rs
@@ -13,16 +13,13 @@ use std::time::Instant;

 use crate::control_file_upgrade::upgrade_control_file;
 use crate::metrics::PERSIST_CONTROL_FILE_SECONDS;
-use crate::state::TimelinePersistentState;
+use crate::safekeeper::{SafeKeeperState, SK_FORMAT_VERSION, SK_MAGIC};
 use utils::{bin_ser::LeSer, id::TenantTimelineId};

 use crate::SafeKeeperConf;

 use std::convert::TryInto;

-pub const SK_MAGIC: u32 = 0xcafeceefu32;
-pub const SK_FORMAT_VERSION: u32 = 7;
-
 // contains persistent metadata for safekeeper
 const CONTROL_FILE_NAME: &str = "safekeeper.control";
 // needed to atomically update the state using `rename`
@@ -32,9 +29,9 @@ pub const CHECKSUM_SIZE: usize = std::mem::size_of::<u32>();
 /// Storage should keep actual state inside of it. It should implement Deref
 /// trait to access state fields and have persist method for updating that state.
 #[async_trait::async_trait]
-pub trait Storage: Deref<Target = TimelinePersistentState> {
+pub trait Storage: Deref<Target = SafeKeeperState> {
    /// Persist safekeeper state on disk and update internal state.
-    async fn persist(&mut self, s: &TimelinePersistentState) -> Result<()>;
+    async fn persist(&mut self, s: &SafeKeeperState) -> Result<()>;

    /// Timestamp of last persist.
    fn last_persist_at(&self) -> Instant;
@@ -47,7 +44,7 @@ pub struct FileStorage {
    conf: SafeKeeperConf,

    /// Last state persisted to disk.
-    state: TimelinePersistentState,
+    state: SafeKeeperState,
    /// Not preserved across restarts.
    last_persist_at: Instant,
 }
@@ -71,7 +68,7 @@ impl FileStorage {
    pub fn create_new(
        timeline_dir: Utf8PathBuf,
        conf: &SafeKeeperConf,
-        state: TimelinePersistentState,
+        state: SafeKeeperState,
    ) -> Result<FileStorage> {
        let store = FileStorage {
            timeline_dir,
@@ -84,7 +81,7 @@ impl FileStorage {
    }

    /// Check the magic/version in the on-disk data and deserialize it, if possible.
-    fn deser_sk_state(buf: &mut &[u8]) -> Result<TimelinePersistentState> {
+    fn deser_sk_state(buf: &mut &[u8]) -> Result<SafeKeeperState> {
        // Read the version independent part
        let magic = ReadBytesExt::read_u32::<LittleEndian>(buf)?;
        if magic != SK_MAGIC {
@@ -96,7 +93,7 @@ impl FileStorage {
        }
        let version = ReadBytesExt::read_u32::<LittleEndian>(buf)?;
        if version == SK_FORMAT_VERSION {
-            let res = TimelinePersistentState::des(buf)?;
+            let res = SafeKeeperState::des(buf)?;
            return Ok(res);
        }
        // try to upgrade
@@ -107,15 +104,13 @@ impl FileStorage {
    pub fn load_control_file_conf(
        conf: &SafeKeeperConf,
        ttid: &TenantTimelineId,
-    ) -> Result<TimelinePersistentState> {
+    ) -> Result<SafeKeeperState> {
        let path = conf.timeline_dir(ttid).join(CONTROL_FILE_NAME);
        Self::load_control_file(path)
    }

    /// Read in the control file.
-    pub fn load_control_file<P: AsRef<Path>>(
-        control_file_path: P,
-    ) -> Result<TimelinePersistentState> {
+    pub fn load_control_file<P: AsRef<Path>>(control_file_path: P) -> Result<SafeKeeperState> {
        let mut control_file = std::fs::OpenOptions::new()
            .read(true)
            .write(true)
@@ -158,7 +153,7 @@ impl FileStorage {
 }

 impl Deref for FileStorage {
-    type Target = TimelinePersistentState;
+    type Target = SafeKeeperState;

    fn deref(&self) -> &Self::Target {
        &self.state
@@ -170,7 +165,7 @@ impl Storage for FileStorage {
    /// Persists state durably to the underlying storage.
    ///
    /// For a description, see <https://lwn.net/Articles/457667/>.
-    async fn persist(&mut self, s: &TimelinePersistentState) -> Result<()> {
+    async fn persist(&mut self, s: &SafeKeeperState) -> Result<()> {
        let _timer = PERSIST_CONTROL_FILE_SECONDS.start_timer();

        // write data to safekeeper.control.partial
@@ -247,7 +242,7 @@ impl Storage for FileStorage {
 mod test {
    use super::FileStorage;
    use super::*;
-    use crate::SafeKeeperConf;
+    use crate::{safekeeper::SafeKeeperState, SafeKeeperConf};
    use anyhow::Result;
    use utils::{id::TenantTimelineId, lsn::Lsn};

@@ -262,7 +257,7 @@ mod test {
    async fn load_from_control_file(
        conf: &SafeKeeperConf,
        ttid: &TenantTimelineId,
-    ) -> Result<(FileStorage, TimelinePersistentState)> {
+    ) -> Result<(FileStorage, SafeKeeperState)> {
        fs::create_dir_all(conf.timeline_dir(ttid))
            .await
            .expect("failed to create timeline dir");
@@ -275,11 +270,11 @@ mod test {
    async fn create(
        conf: &SafeKeeperConf,
        ttid: &TenantTimelineId,
-    ) -> Result<(FileStorage, TimelinePersistentState)> {
+    ) -> Result<(FileStorage, SafeKeeperState)> {
        fs::create_dir_all(conf.timeline_dir(ttid))
            .await
            .expect("failed to create timeline dir");
-        let state = TimelinePersistentState::empty();
+        let state = SafeKeeperState::empty();
        let timeline_dir = conf.timeline_dir(ttid);
        let storage = FileStorage::create_new(timeline_dir, conf, state.clone())?;
        Ok((storage, state))
--- a/safekeeper/src/control_file_upgrade.rs
+++ b/safekeeper/src/control_file_upgrade.rs
@@ -1,7 +1,6 @@
 //! Code to deal with safekeeper control file upgrades
-use crate::{
-    safekeeper::{AcceptorState, PgUuid, ServerInfo, Term, TermHistory, TermLsn},
-    state::{PersistedPeers, TimelinePersistentState},
+use crate::safekeeper::{
+    AcceptorState, PersistedPeers, PgUuid, SafeKeeperState, ServerInfo, Term, TermHistory, TermLsn,
 };
 use anyhow::{bail, Result};
 use pq_proto::SystemId;
@@ -138,7 +137,7 @@ pub struct SafeKeeperStateV4 {
    pub peers: PersistedPeers,
 }

-pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersistentState> {
+pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<SafeKeeperState> {
    // migrate to storing full term history
    if version == 1 {
        info!("reading safekeeper control file version {}", version);
@@ -150,7 +149,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
                lsn: Lsn(0),
            }]),
        };
-        return Ok(TimelinePersistentState {
+        return Ok(SafeKeeperState {
            tenant_id: oldstate.server.tenant_id,
            timeline_id: oldstate.server.timeline_id,
            acceptor_state: ac,
@@ -177,7 +176,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
            system_id: oldstate.server.system_id,
            wal_seg_size: oldstate.server.wal_seg_size,
        };
-        return Ok(TimelinePersistentState {
+        return Ok(SafeKeeperState {
            tenant_id: oldstate.server.tenant_id,
            timeline_id: oldstate.server.timeline_id,
            acceptor_state: oldstate.acceptor_state,
@@ -200,7 +199,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
            system_id: oldstate.server.system_id,
            wal_seg_size: oldstate.server.wal_seg_size,
        };
-        return Ok(TimelinePersistentState {
+        return Ok(SafeKeeperState {
            tenant_id: oldstate.server.tenant_id,
            timeline_id: oldstate.server.timeline_id,
            acceptor_state: oldstate.acceptor_state,
@@ -223,7 +222,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
            system_id: oldstate.server.system_id,
            wal_seg_size: oldstate.server.wal_seg_size,
        };
-        return Ok(TimelinePersistentState {
+        return Ok(SafeKeeperState {
            tenant_id: oldstate.tenant_id,
            timeline_id: oldstate.timeline_id,
            acceptor_state: oldstate.acceptor_state,
@@ -239,7 +238,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
        });
    } else if version == 5 {
        info!("reading safekeeper control file version {}", version);
-        let mut oldstate = TimelinePersistentState::des(&buf[..buf.len()])?;
+        let mut oldstate = SafeKeeperState::des(&buf[..buf.len()])?;
        if oldstate.timeline_start_lsn != Lsn(0) {
            return Ok(oldstate);
        }
@@ -252,7 +251,7 @@ pub fn upgrade_control_file(buf: &[u8], version: u32) -> Result<TimelinePersiste
        return Ok(oldstate);
    } else if version == 6 {
        info!("reading safekeeper control file version {}", version);
-        let mut oldstate = TimelinePersistentState::des(&buf[..buf.len()])?;
+        let mut oldstate = SafeKeeperState::des(&buf[..buf.len()])?;
        if oldstate.server.pg_version != 0 {
            return Ok(oldstate);
        }
--- a/safekeeper/src/copy_timeline.rs
+++ b/safekeeper/src/copy_timeline.rs
@@ -14,7 +14,7 @@ use utils::{id::TenantTimelineId, lsn::Lsn};
 use crate::{
    control_file::{FileStorage, Storage},
    pull_timeline::{create_temp_timeline_dir, load_temp_timeline, validate_temp_timeline},
-    state::TimelinePersistentState,
+    safekeeper::SafeKeeperState,
    timeline::{Timeline, TimelineError},
    wal_backup::copy_s3_segments,
    wal_storage::{wal_file_paths, WalReader},
@@ -137,7 +137,7 @@ pub async fn handle_request(request: Request) -> Result<()> {
    )
    .await?;

-    let mut new_state = TimelinePersistentState::new(
+    let mut new_state = SafeKeeperState::new(
        &request.destination_ttid,
        state.server.clone(),
        vec![],
@@ -160,7 +160,7 @@ pub async fn handle_request(request: Request) -> Result<()> {

 async fn copy_disk_segments(
    conf: &SafeKeeperConf,
-    persisted_state: &TimelinePersistentState,
+    persisted_state: &SafeKeeperState,
    wal_seg_size: usize,
    source_ttid: &TenantTimelineId,
    start_lsn: Lsn,
--- a/safekeeper/src/debug_dump.rs
+++ b/safekeeper/src/debug_dump.rs
@@ -22,13 +22,14 @@ use utils::id::TenantTimelineId;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

+use crate::safekeeper::SafeKeeperState;
+use crate::safekeeper::SafekeeperMemState;
 use crate::safekeeper::TermHistory;
+use crate::SafeKeeperConf;
+
 use crate::send_wal::WalSenderState;
-use crate::state::TimelineMemState;
-use crate::state::TimelinePersistentState;
 use crate::wal_storage::WalReader;
 use crate::GlobalTimelines;
-use crate::SafeKeeperConf;

 /// Various filters that influence the resulting JSON output.
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -142,7 +143,7 @@ pub struct Config {
 pub struct Timeline {
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    pub control_file: Option<TimelinePersistentState>,
+    pub control_file: Option<SafeKeeperState>,
    pub memory: Option<Memory>,
    pub disk_content: Option<DiskContent>,
 }
@@ -157,7 +158,7 @@ pub struct Memory {
    pub num_computes: u32,
    pub last_removed_segno: XLogSegNo,
    pub epoch_start_lsn: Lsn,
-    pub mem_state: TimelineMemState,
+    pub mem_state: SafekeeperMemState,

    // PhysicalStorage state.
    pub write_lsn: Lsn,
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -160,7 +160,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
        commit_lsn: inmem.commit_lsn,
        backup_lsn: inmem.backup_lsn,
        peer_horizon_lsn: inmem.peer_horizon_lsn,
-        remote_consistent_lsn: inmem.remote_consistent_lsn,
+        remote_consistent_lsn: tli.get_walsenders().get_remote_consistent_lsn(),
        peers: tli.get_peers(conf).await,
        walsenders: tli.get_walsenders().get_all(),
        walreceivers: tli.get_walreceivers().get_all(),
@@ -288,32 +288,34 @@ async fn timeline_files_handler(request: Request<Body>) -> Result<Response<Body>
 }

 /// Deactivates the timeline and removes its data directory.
-async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn timeline_delete_force_handler(
+    mut request: Request<Body>,
+) -> Result<Response<Body>, ApiError> {
    let ttid = TenantTimelineId::new(
        parse_request_param(&request, "tenant_id")?,
        parse_request_param(&request, "timeline_id")?,
    );
-    let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
    check_permission(&request, Some(ttid.tenant_id))?;
    ensure_no_body(&mut request).await?;
    // FIXME: `delete_force` can fail from both internal errors and bad requests. Add better
    // error handling here when we're able to.
-    let resp = GlobalTimelines::delete(&ttid, only_local)
+    let resp = GlobalTimelines::delete_force(&ttid)
        .await
        .map_err(ApiError::InternalServerError)?;
    json_response(StatusCode::OK, resp)
 }

 /// Deactivates all timelines for the tenant and removes its data directory.
-/// See `timeline_delete_handler`.
-async fn tenant_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+/// See `timeline_delete_force_handler`.
+async fn tenant_delete_force_handler(
+    mut request: Request<Body>,
+) -> Result<Response<Body>, ApiError> {
    let tenant_id = parse_request_param(&request, "tenant_id")?;
-    let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
    check_permission(&request, Some(tenant_id))?;
    ensure_no_body(&mut request).await?;
    // FIXME: `delete_force_all_for_tenant` can return an error for multiple different reasons;
    // Using an `InternalServerError` should be fixed when the types support it
-    let delete_info = GlobalTimelines::delete_force_all_for_tenant(&tenant_id, only_local)
+    let delete_info = GlobalTimelines::delete_force_all_for_tenant(&tenant_id)
        .await
        .map_err(ApiError::InternalServerError)?;
    json_response(
@@ -510,10 +512,10 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
            request_span(r, timeline_status_handler)
        })
        .delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
-            request_span(r, timeline_delete_handler)
+            request_span(r, timeline_delete_force_handler)
        })
        .delete("/v1/tenant/:tenant_id", |r| {
-            request_span(r, tenant_delete_handler)
+            request_span(r, tenant_delete_force_handler)
        })
        .post("/v1/pull_timeline", |r| {
            request_span(r, timeline_pull_handler)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Conrad Ludgate	081e794878	remove hyper-tungstenite	2024-01-11 18:01:11 +00:00
Conrad Ludgate	07ccaa7575	consistency with ws1	2024-01-11 17:46:15 +00:00
Conrad Ludgate	a318213e72	maybe works with ws over http2?	2024-01-11 15:28:03 +00:00
Conrad Ludgate	520171f17a	ws over http2	2024-01-11 15:28:03 +00:00
Conrad Ludgate	85e17bc550	support http2	2024-01-11 15:28:03 +00:00
Conrad Ludgate	76fe42aae0	ruffff	2024-01-11 15:28:03 +00:00
Conrad Ludgate	4d37f89189	proxy http: remove need for exact endpoint match	2024-01-11 15:28:03 +00:00