Merge pull request #7378 from neondatabase/rc/2024-04-15

Release 2024-04-15
Merge pull request #7356 from neondatabase/rc/2024-04-11-#7348
2026-07-18 11:30:36 +00:00 · 2024-04-15 15:48:14 +03:00 · 2024-04-11 09:46:34 -07:00 · 2024-04-10 13:13:08 -07:00 · 2024-04-08 13:10:24 +01:00 · 2024-04-02 18:15:28 +03:00
92 changed files with 2458 additions and 3565 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -2,7 +2,6 @@
 # This is only present for local builds, as it will be overridden
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]
-rustflags = ["--cfg", "tokio_unstable"]

 [alias]
 build_testing = ["build", "--features", "testing"]
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -214,7 +214,6 @@ jobs:
      BUILD_TYPE: ${{ matrix.build_type }}
      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
      BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
-      RUSTFLAGS: "--cfg=tokio_unstable"

    steps:
      - name: Fix git ownership
@@ -1134,6 +1133,8 @@ jobs:
              -f deployPreprodRegion=true

            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main \
+              -f deployPgSniRouter=false \
+              -f deployProxy=false \
              -f deployStorage=true \
              -f deployStorageBroker=true \
              -f deployStorageController=true \
--- a/.github/workflows/check-build-tools-image.yml
+++ b/.github/workflows/check-build-tools-image.yml
@@ -28,9 +28,7 @@ jobs:
      - name: Get build-tools image tag for the current commit
        id: get-build-tools-tag
        env:
-          # Usually, for COMMIT_SHA, we use `github.event.pull_request.head.sha || github.sha`, but here, even for PRs,
-          # we want to use `github.sha` i.e. point to a phantom merge commit to determine the image tag correctly.
-          COMMIT_SHA: ${{ github.sha }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          LAST_BUILD_TOOLS_SHA=$(
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1240,43 +1240,6 @@ dependencies = [
 "crossbeam-utils",
 ]

-[[package]]
-name = "console-api"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd326812b3fd01da5bb1af7d340d0d555fd3d4b641e7f1dfcf5962a902952787"
-dependencies = [
- "futures-core",
- "prost 0.12.4",
- "prost-types 0.12.4",
- "tonic 0.10.2",
- "tracing-core",
-]
-
-[[package]]
-name = "console-subscriber"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7481d4c57092cd1c19dd541b92bdce883de840df30aa5d03fd48a3935c01842e"
-dependencies = [
- "console-api",
- "crossbeam-channel",
- "crossbeam-utils",
- "futures-task",
- "hdrhistogram",
- "humantime",
- "prost-types 0.12.4",
- "serde",
- "serde_json",
- "thread_local",
- "tokio",
- "tokio-stream",
- "tonic 0.10.2",
- "tracing",
- "tracing-core",
- "tracing-subscriber",
-]
-
 [[package]]
 name = "const-oid"
 version = "0.9.5"
@@ -3445,11 +3408,11 @@ dependencies = [
 "opentelemetry-semantic-conventions",
 "opentelemetry_api",
 "opentelemetry_sdk",
- "prost 0.11.9",
+ "prost",
 "reqwest",
 "thiserror",
 "tokio",
- "tonic 0.9.2",
+ "tonic",
 ]

 [[package]]
@@ -3460,8 +3423,8 @@ checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb"
 dependencies = [
 "opentelemetry_api",
 "opentelemetry_sdk",
- "prost 0.11.9",
- "tonic 0.9.2",
+ "prost",
+ "tonic",
 ]

 [[package]]
@@ -4046,6 +4009,17 @@ dependencies = [
 "tokio-postgres",
 ]

+[[package]]
+name = "postgres-native-tls"
+version = "0.5.0"
+source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
+dependencies = [
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-postgres",
+]
+
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
@@ -4260,17 +4234,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
 dependencies = [
 "bytes",
- "prost-derive 0.11.9",
-]
-
-[[package]]
-name = "prost"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922"
-dependencies = [
- "bytes",
- "prost-derive 0.12.4",
+ "prost-derive",
 ]

 [[package]]
@@ -4287,8 +4251,8 @@ dependencies = [
 "multimap",
 "petgraph",
 "prettyplease 0.1.25",
- "prost 0.11.9",
- "prost-types 0.11.9",
+ "prost",
+ "prost-types",
 "regex",
 "syn 1.0.109",
 "tempfile",
@@ -4308,35 +4272,13 @@ dependencies = [
 "syn 1.0.109",
 ]

-[[package]]
-name = "prost-derive"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48"
-dependencies = [
- "anyhow",
- "itertools",
- "proc-macro2",
- "quote",
- "syn 2.0.52",
-]
-
 [[package]]
 name = "prost-types"
 version = "0.11.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13"
 dependencies = [
- "prost 0.11.9",
-]
-
-[[package]]
-name = "prost-types"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3235c33eb02c1f1e212abdbe34c78b264b038fb58ca612664343271e36e55ffe"
-dependencies = [
- "prost 0.12.4",
+ "prost",
 ]

 [[package]]
@@ -4358,7 +4300,6 @@ dependencies = [
 "camino-tempfile",
 "chrono",
 "clap",
- "console-subscriber",
 "consumption_metrics",
 "dashmap",
 "env_logger",
@@ -4383,6 +4324,7 @@ dependencies = [
 "md5",
 "measured",
 "metrics",
+ "native-tls",
 "once_cell",
 "opentelemetry",
 "parking_lot 0.12.1",
@@ -4390,6 +4332,7 @@ dependencies = [
 "parquet_derive",
 "pbkdf2",
 "pin-project-lite",
+ "postgres-native-tls",
 "postgres-protocol",
 "postgres_backend",
 "pq_proto",
@@ -4408,7 +4351,6 @@ dependencies = [
 "rstest",
 "rustc-hash",
 "rustls 0.22.2",
- "rustls-native-certs 0.7.0",
 "rustls-pemfile 2.1.1",
 "scopeguard",
 "serde",
@@ -5775,10 +5717,10 @@ dependencies = [
 "metrics",
 "once_cell",
 "parking_lot 0.12.1",
- "prost 0.11.9",
+ "prost",
 "tokio",
 "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
 "tonic-build",
 "tracing",
 "utils",
@@ -6170,7 +6112,6 @@ dependencies = [
 "signal-hook-registry",
 "socket2 0.5.5",
 "tokio-macros",
- "tracing",
 "windows-sys 0.48.0",
 ]

@@ -6401,7 +6342,7 @@ dependencies = [
 "hyper-timeout",
 "percent-encoding",
 "pin-project",
- "prost 0.11.9",
+ "prost",
 "rustls-native-certs 0.6.2",
 "rustls-pemfile 1.0.2",
 "tokio",
@@ -6413,33 +6354,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "tonic"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d560933a0de61cf715926b9cac824d4c883c2c43142f787595e48280c40a1d0e"
-dependencies = [
- "async-stream",
- "async-trait",
- "axum",
- "base64 0.21.1",
- "bytes",
- "h2 0.3.26",
- "http 0.2.9",
- "http-body 0.4.5",
- "hyper 0.14.26",
- "hyper-timeout",
- "percent-encoding",
- "pin-project",
- "prost 0.12.4",
- "tokio",
- "tokio-stream",
- "tower",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
 [[package]]
 name = "tonic-build"
 version = "0.9.2"
@@ -7420,7 +7334,6 @@ dependencies = [
 "futures-util",
 "getrandom 0.2.11",
 "hashbrown 0.14.0",
- "hdrhistogram",
 "hex",
 "hmac",
 "hyper 0.14.26",
@@ -7435,7 +7348,7 @@ dependencies = [
 "num-traits",
 "once_cell",
 "parquet",
- "prost 0.11.9",
+ "prost",
 "rand 0.8.5",
 "regex",
 "regex-automata 0.4.3",
@@ -7454,11 +7367,10 @@ dependencies = [
 "time-macros",
 "tokio",
 "tokio-rustls 0.24.0",
- "tokio-stream",
 "tokio-util",
 "toml_datetime",
 "toml_edit",
- "tonic 0.9.2",
+ "tonic",
 "tower",
 "tracing",
 "tracing-core",
--- a/2
+++ b/2
@@ -47,7 +47,7 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment --cfg=tokio_unstable" cargo build  \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -818,15 +818,9 @@ impl ComputeNode {
                        Client::connect(zenith_admin_connstr.as_str(), NoTls)
                            .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
                    // Disable forwarding so that users don't get a cloud_admin role
-
-                    let mut func = || {
-                        client.simple_query("SET neon.forward_ddl = false")?;
-                        client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
-                        client.simple_query("GRANT zenith_admin TO cloud_admin")?;
-                        Ok::<_, anyhow::Error>(())
-                    };
-                    func().context("apply_config setup cloud_admin")?;
-
+                    client.simple_query("SET neon.forward_ddl = false")?;
+                    client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
+                    client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                    drop(client);

                    // reconnect with connstring with expected name
@@ -838,29 +832,24 @@ impl ComputeNode {
        };

        // Disable DDL forwarding because control plane already knows about these roles/databases.
-        client
-            .simple_query("SET neon.forward_ddl = false")
-            .context("apply_config SET neon.forward_ddl = false")?;
+        client.simple_query("SET neon.forward_ddl = false")?;

        // Proceed with post-startup configuration. Note, that order of operations is important.
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
-        create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
-        cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
-        handle_roles(spec, &mut client).context("apply_config handle_roles")?;
-        handle_databases(spec, &mut client).context("apply_config handle_databases")?;
-        handle_role_deletions(spec, connstr.as_str(), &mut client)
-            .context("apply_config handle_role_deletions")?;
+        create_neon_superuser(spec, &mut client)?;
+        cleanup_instance(&mut client)?;
+        handle_roles(spec, &mut client)?;
+        handle_databases(spec, &mut client)?;
+        handle_role_deletions(spec, connstr.as_str(), &mut client)?;
        handle_grants(
            spec,
            &mut client,
            connstr.as_str(),
            self.has_feature(ComputeFeature::AnonExtension),
-        )
-        .context("apply_config handle_grants")?;
-        handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
-        handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
-        create_availability_check_data(&mut client)
-            .context("apply_config create_availability_check_data")?;
+        )?;
+        handle_extensions(spec, &mut client)?;
+        handle_extension_neon(&mut client)?;
+        create_availability_check_data(&mut client)?;

        // 'Close' connection
        drop(client);
@@ -868,7 +857,7 @@ impl ComputeNode {
        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
            let mut client = Client::connect(connstr.as_str(), NoTls)?;
-            handle_migrations(&mut client).context("apply_config handle_migrations")
+            handle_migrations(&mut client)
        });
        Ok(())
    }
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -2,7 +2,7 @@ use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;

-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{anyhow, bail, Result};
 use postgres::config::Config;
 use postgres::{Client, NoTls};
 use reqwest::StatusCode;
@@ -698,8 +698,7 @@ pub fn handle_grants(

        // it is important to run this after all grants
        if enable_anon_extension {
-            handle_extension_anon(spec, &db.owner, &mut db_client, false)
-                .context("handle_grants handle_extension_anon")?;
+            handle_extension_anon(spec, &db.owner, &mut db_client, false)?;
        }
    }

@@ -814,36 +813,28 @@ $$;"#,
        // Add new migrations below.
    ];

-    let mut func = || {
-        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-        client.simple_query(query)?;
+    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
+    client.simple_query(query)?;

-        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-        client.simple_query(query)?;
+    query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
+    client.simple_query(query)?;

-        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-        client.simple_query(query)?;
+    query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
+    client.simple_query(query)?;

-        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-        client.simple_query(query)?;
+    query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
+    client.simple_query(query)?;

-        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-        client.simple_query(query)?;
-        Ok::<_, anyhow::Error>(())
-    };
-    func().context("handle_migrations prepare")?;
+    query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
+    client.simple_query(query)?;

-    let query = "SELECT id FROM neon_migration.migration_id";
-    let row = client
-        .query_one(query, &[])
-        .context("handle_migrations get migration_id")?;
+    query = "SELECT id FROM neon_migration.migration_id";
+    let row = client.query_one(query, &[])?;
    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
    let starting_migration_id = current_migration;

-    let query = "BEGIN";
-    client
-        .simple_query(query)
-        .context("handle_migrations begin")?;
+    query = "BEGIN";
+    client.simple_query(query)?;

    while current_migration < migrations.len() {
        let migration = &migrations[current_migration];
@@ -851,9 +842,7 @@ $$;"#,
            info!("Skip migration id={}", current_migration);
        } else {
            info!("Running migration:\n{}\n", migration);
-            client.simple_query(migration).with_context(|| {
-                format!("handle_migrations current_migration={}", current_migration)
-            })?;
+            client.simple_query(migration)?;
        }
        current_migration += 1;
    }
@@ -861,14 +850,10 @@ $$;"#,
        "UPDATE neon_migration.migration_id SET id={}",
        migrations.len()
    );
-    client
-        .simple_query(&setval)
-        .context("handle_migrations update id")?;
+    client.simple_query(&setval)?;

-    let query = "COMMIT";
-    client
-        .simple_query(query)
-        .context("handle_migrations commit")?;
+    query = "COMMIT";
+    client.simple_query(query)?;

    info!(
        "Ran {} migrations",
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -1417,7 +1417,6 @@ fn cli() -> Command {
        .subcommand(
            Command::new("timeline")
            .about("Manage timelines")
-            .arg_required_else_help(true)
            .subcommand(Command::new("list")
                .about("List all timelines, available to this pageserver")
                .arg(tenant_id_arg.clone()))
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -156,7 +156,6 @@ pub struct SafekeeperConf {
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
    pub auth_enabled: bool,
-    pub listen_addr: Option<String>,
 }

 impl Default for SafekeeperConf {
@@ -170,7 +169,6 @@ impl Default for SafekeeperConf {
            remote_storage: None,
            backup_threads: None,
            auth_enabled: false,
-            listen_addr: None,
        }
    }
 }
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -70,31 +70,24 @@ pub struct SafekeeperNode {
    pub pg_connection_config: PgConnectionConfig,
    pub env: LocalEnv,
    pub http_client: reqwest::Client,
-    pub listen_addr: String,
    pub http_base_url: String,
 }

 impl SafekeeperNode {
    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
-        let listen_addr = if let Some(ref listen_addr) = conf.listen_addr {
-            listen_addr.clone()
-        } else {
-            "127.0.0.1".to_string()
-        };
        SafekeeperNode {
            id: conf.id,
            conf: conf.clone(),
-            pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
+            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
            env: env.clone(),
            http_client: reqwest::Client::new(),
-            http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
-            listen_addr,
+            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
        }
    }

    /// Construct libpq connection string for connecting to this safekeeper.
-    fn safekeeper_connection_config(addr: &str, port: u16) -> PgConnectionConfig {
-        PgConnectionConfig::new_host_port(url::Host::parse(addr).unwrap(), port)
+    fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
+        PgConnectionConfig::new_host_port(url::Host::parse("127.0.0.1").unwrap(), port)
    }

    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
@@ -118,8 +111,8 @@ impl SafekeeperNode {
        );
        io::stdout().flush().unwrap();

-        let listen_pg = format!("{}:{}", self.listen_addr, self.conf.pg_port);
-        let listen_http = format!("{}:{}", self.listen_addr, self.conf.http_port);
+        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
+        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
        let id = self.id;
        let datadir = self.datadir_path();

@@ -146,7 +139,7 @@ impl SafekeeperNode {
            availability_zone,
        ];
        if let Some(pg_tenant_only_port) = self.conf.pg_tenant_only_port {
-            let listen_pg_tenant_only = format!("{}:{}", self.listen_addr, pg_tenant_only_port);
+            let listen_pg_tenant_only = format!("127.0.0.1:{}", pg_tenant_only_port);
            args.extend(["--listen-pg-tenant-only".to_owned(), listen_pg_tenant_only]);
        }
        if !self.conf.sync {
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,15 +1,15 @@
-use std::{collections::HashMap, str::FromStr, time::Duration};
+use std::{collections::HashMap, str::FromStr};

 use clap::{Parser, Subcommand};
-use hyper::{Method, StatusCode};
+use hyper::Method;
 use pageserver_api::{
    controller_api::{
        NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy,
        TenantDescribeResponse, TenantPolicyRequest,
    },
    models::{
-        LocationConfigSecondary, ShardParameters, TenantConfig, TenantConfigRequest,
-        TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse,
+        ShardParameters, TenantConfig, TenantConfigRequest, TenantCreateRequest,
+        TenantShardSplitRequest, TenantShardSplitResponse,
    },
    shard::{ShardStripeSize, TenantShardId},
 };
@@ -120,12 +120,6 @@ enum Command {
        #[arg(long)]
        tenant_id: TenantId,
    },
-    /// For a tenant which hasn't been onboarded to the storage controller yet, add it in secondary
-    /// mode so that it can warm up content on a pageserver.
-    TenantWarmup {
-        #[arg(long)]
-        tenant_id: TenantId,
-    },
 }

 #[derive(Parser)]
@@ -587,94 +581,6 @@ async fn main() -> anyhow::Result<()> {
            }
            println!("{table}");
        }
-        Command::TenantWarmup { tenant_id } => {
-            let describe_response = storcon_client
-                .dispatch::<(), TenantDescribeResponse>(
-                    Method::GET,
-                    format!("control/v1/tenant/{tenant_id}"),
-                    None,
-                )
-                .await;
-            match describe_response {
-                Ok(describe) => {
-                    if matches!(describe.policy, PlacementPolicy::Secondary) {
-                        // Fine: it's already known to controller in secondary mode: calling
-                        // again to put it into secondary mode won't cause problems.
-                    } else {
-                        anyhow::bail!("Tenant already present with policy {:?}", describe.policy);
-                    }
-                }
-                Err(mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _)) => {
-                    // Fine: this tenant isn't know to the storage controller yet.
-                }
-                Err(e) => {
-                    // Unexpected API error
-                    return Err(e.into());
-                }
-            }
-
-            vps_client
-                .location_config(
-                    TenantShardId::unsharded(tenant_id),
-                    pageserver_api::models::LocationConfig {
-                        mode: pageserver_api::models::LocationConfigMode::Secondary,
-                        generation: None,
-                        secondary_conf: Some(LocationConfigSecondary { warm: true }),
-                        shard_number: 0,
-                        shard_count: 0,
-                        shard_stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE.0,
-                        tenant_conf: TenantConfig::default(),
-                    },
-                    None,
-                    true,
-                )
-                .await?;
-
-            let describe_response = storcon_client
-                .dispatch::<(), TenantDescribeResponse>(
-                    Method::GET,
-                    format!("control/v1/tenant/{tenant_id}"),
-                    None,
-                )
-                .await?;
-
-            let secondary_ps_id = describe_response
-                .shards
-                .first()
-                .unwrap()
-                .node_secondary
-                .first()
-                .unwrap();
-
-            println!("Tenant {tenant_id} warming up on pageserver {secondary_ps_id}");
-            loop {
-                let (status, progress) = vps_client
-                    .tenant_secondary_download(
-                        TenantShardId::unsharded(tenant_id),
-                        Some(Duration::from_secs(10)),
-                    )
-                    .await?;
-                println!(
-                    "Progress: {}/{} layers, {}/{} bytes",
-                    progress.layers_downloaded,
-                    progress.layers_total,
-                    progress.bytes_downloaded,
-                    progress.bytes_total
-                );
-                match status {
-                    StatusCode::OK => {
-                        println!("Download complete");
-                        break;
-                    }
-                    StatusCode::ACCEPTED => {
-                        // Loop
-                    }
-                    _ => {
-                        anyhow::bail!("Unexpected download status: {status}");
-                    }
-                }
-            }
-        }
    }

    Ok(())
--- a/docs/storage_controller.md
+++ b/docs/storage_controller.md
@@ -1,150 +0,0 @@
-# Storage Controller
-
-## Concepts
-
-The storage controller sits between administrative API clients and pageservers, and handles the details of mapping tenants to pageserver tenant shards. For example, creating a tenant is one API call to the storage controller,
-which is mapped into many API calls to many pageservers (for multiple shards, and for secondary locations).
-
-It implements a pageserver-compatible API that may be used for CRUD operations on tenants and timelines, translating these requests into appropriate operations on the shards within a tenant, which may be on many different pageservers. Using this API, the storage controller may be used in the same way as the pageserver's administrative HTTP API, hiding
-the underlying details of how data is spread across multiple nodes.
-
-The storage controller also manages generations, high availability (via secondary locations) and live migrations for tenants under its management. This is done with a reconciliation loop pattern, where tenants have an “intent” state and a “reconcile” task that tries to make the outside world match the intent.
-
-## APIs
-
-The storage controller’s HTTP server implements four logically separate APIs:
-
- `/v1/...` path is the pageserver-compatible API. This has to be at the path root because that’s where clients expect to find it on a pageserver.
- `/control/v1/...` path is the storage controller’s API, which enables operations such as registering and management pageservers, or executing shard splits.
- `/debug/v1/...` path contains endpoints which are either exclusively used in tests, or are for use by engineers when supporting a deployed system.
- `/upcall/v1/...` path contains endpoints that are called by pageservers. This includes the `/re-attach` and `/validate` APIs used by pageservers
-  to ensure data safety with generation numbers.
-
-The API is authenticated with a JWT token, and tokens must have scope `pageserverapi` (i.e. the same scope as pageservers’ APIs).
-
-See the `http.rs` file in the source for where the HTTP APIs are implemented.
-
-## Database
-
-The storage controller uses a postgres database to persist a subset of its state. Note that the storage controller does _not_ keep all its state in the database: this is a design choice to enable most operations to be done efficiently in memory, rather than having to read from the database. See `persistence.rs` for a more comprehensive comment explaining what we do and do not persist: a useful metaphor is that we persist objects like tenants and nodes, but we do not
-persist the _relationships_ between them: the attachment state of a tenant's shards to nodes is kept in memory and
-rebuilt on startup.
-
-The file `[persistence.rs](http://persistence.rs)` contains all the code for accessing the database, and has a large doc comment that goes into more detail about exactly what we persist and why.
-
-The `diesel` crate is used for defining models & migrations.
-
-Running a local cluster with `cargo neon` automatically starts a vanilla postgress process to host the storage controller’s database.
-
-### Diesel tip: migrations
-
-If you need to modify the database schema, here’s how to create a migration:
-
- Install the diesel CLI with `cargo install diesel_cli`
- Use `diesel migration generate <name>` to create a new migration
- Populate the SQL files in the `migrations/` subdirectory
- Use `DATABASE_URL=... diesel migration run` to apply the migration you just wrote: this will update the `[schema.rs](http://schema.rs)` file automatically.
-  - This requires a running database: the easiest way to do that is to just run `cargo neon init ; cargo neon start`, which will leave a database available at `postgresql://localhost:1235/attachment_service`
- Commit the migration files and the changes to schema.rs
- If you need to iterate, you can rewind migrations with `diesel migration revert -a` and then `diesel migration run` again.
- The migrations are build into the storage controller binary, and automatically run at startup after it is deployed, so once you’ve committed a migration no further steps are needed.
-
-## storcon_cli
-
-The `storcon_cli` tool enables interactive management of the storage controller. This is usually
-only necessary for debug, but may also be used to manage nodes (e.g. marking a node as offline).
-
-`storcon_cli --help` includes details on commands.
-
-# Deploying
-
-This section is aimed at engineers deploying the storage controller outside of Neon's cloud platform, as
-part of a self-hosted system.
-
-_General note: since the default `neon_local` environment includes a storage controller, this is a useful
-reference when figuring out deployment._
-
-## Database
-
-It is **essential** that the database used by the storage controller is durable (**do not store it on ephemeral
-local disk**). This database contains pageserver generation numbers, which are essential to data safety on the pageserver.
-
-The resource requirements for the database are very low: a single CPU core and 1GiB of memory should work well for most deployments. The physical size of the database is typically under a gigabyte.
-
-Set the URL to the database using the `--database-url` CLI option.
-
-There is no need to run migrations manually: the storage controller automatically applies migrations
-when it starts up.
-
-## Configure pageservers to use the storage controller
-
-1. The pageserver `control_plane_api` and `control_plane_api_token` should be set in the `pageserver.toml` file. The API setting should
-   point to the "upcall" prefix, for example `http://127.0.0.1:1234/upcall/v1/` is used in neon_local clusters.
-2. Create a `metadata.json` file in the same directory as `pageserver.toml`: this enables the pageserver to automatically register itself
-   with the storage controller when it starts up. See the example below for the format of this file.
-
-### Example `metadata.json`
-
-```
-{"host":"acmehost.localdomain","http_host":"acmehost.localdomain","http_port":9898,"port":64000}
-```
-
- `port` and `host` refer to the _postgres_ port and host, and these must be accessible from wherever
-  postgres runs.
- `http_port` and `http_host` refer to the pageserver's HTTP api, this must be accessible from where
-  the storage controller runs.
-
-## Handle compute notifications.
-
-The storage controller independently moves tenant attachments between pageservers in response to
-changes such as a pageserver node becoming unavailable, or the tenant's shard count changing. To enable
-postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
-location changes.
-
-The hook is configured using the storage controller's `--compute-hook-url` CLI option. If the hook requires
-JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
-
-In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
-the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
-the compute hook.
-
-When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
-the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
-
-```
-struct ComputeHookNotifyRequestShard {
-    node_id: NodeId,
-    shard_number: ShardNumber,
-}
-
-struct ComputeHookNotifyRequest {
-    tenant_id: TenantId,
-    stripe_size: Option<ShardStripeSize>,
-    shards: Vec<ComputeHookNotifyRequestShard>,
-}
-```
-
-When a notification is received:
-
-1. Modify postgres configuration for this tenant:
-
-   - set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
-     shards identified by `NodeId` must be converted to the address+port of the node.
-   - if stripe_size is not None, set `neon.stripe_size` to this value
-
-2. Send SIGHUP to postgres to reload configuration
-3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
-   will retry the notification until it succeeds..
-
-### Example notification body
-
-```
-{
-  "tenant_id": "1f359dd625e519a1a4e8d7509690f6fc",
-  "stripe_size": 32768,
-  "shards": [
-      {"node_id": 344, "shard_number": 0},
-      {"node_id": 722, "shard_number": 1},
-  ],
-}
-```
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -747,18 +747,10 @@ pub struct TimelineGcRequest {
    pub gc_horizon: Option<u64>,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct WalRedoManagerProcessStatus {
-    pub pid: u32,
-    /// The strum-generated `into::<&'static str>()` for `pageserver::walredo::ProcessKind`.
-    /// `ProcessKind` are a transitory thing, so, they have no enum representation in `pageserver_api`.
-    pub kind: Cow<'static, str>,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct WalRedoManagerStatus {
    pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,
-    pub process: Option<WalRedoManagerProcessStatus>,
+    pub pid: Option<u32>,
 }

 /// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -8,89 +8,12 @@ use hex::FromHex;
 use serde::{Deserialize, Serialize};
 use utils::id::TenantId;

-/// See docs/rfcs/031-sharding-static.md for an overview of sharding.
-///
-/// This module contains a variety of types used to represent the concept of sharding
-/// a Neon tenant across multiple physical shards.  Since there are quite a few of these,
-/// we provide an summary here.
-///
-/// Types used to describe shards:
-/// - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value
-///   which identifies a tenant which is not shard-aware.  This means its storage paths do not include
-///   a shard suffix.
-/// - [`ShardNumber`] is simply the zero-based index of a shard within a tenant.
-/// - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId`
-///   without the tenant ID.  This is useful for things that are implicitly scoped to a particular
-///   tenant, such as layer files.
-/// - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient
-///   detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read.
-/// - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as
-///   four hex digits.  An unsharded tenant is `0000`.
-/// - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant
-///
-/// Types used to describe the parameters for data distribution in a sharded tenant:
-/// - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across
-///   multiple shards.  Its value is given in 8kiB pages.
-/// - [`ShardLayout`] describes the data distribution scheme, and at time of writing is
-///   always zero: this is provided for future upgrades that might introduce different
-///   data distribution schemes.
-///
-/// Examples:
-/// - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000
-/// - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001
-/// - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive),
-///   and their slugs are 0004, 0104, 0204, and 0304.
-
 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardNumber(pub u8);

 #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
 pub struct ShardCount(u8);

-/// Combination of ShardNumber and ShardCount.  For use within the context of a particular tenant,
-/// when we need to know which shard we're dealing with, but do not need to know the full
-/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know
-/// the fully qualified TenantShardId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub struct ShardIndex {
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
-/// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`],
-/// and to check whether that [`ShardNumber`] is the same as the current shard.
-#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
-pub struct ShardIdentity {
-    pub number: ShardNumber,
-    pub count: ShardCount,
-    pub stripe_size: ShardStripeSize,
-    layout: ShardLayout,
-}
-
-/// Formatting helper, for generating the `shard_id` label in traces.
-struct ShardSlug<'a>(&'a TenantShardId);
-
-/// TenantShardId globally identifies a particular shard in a particular tenant.
-///
-/// These are written as `<TenantId>-<ShardSlug>`, for example:
-///   # The second shard in a two-shard tenant
-///   072f1291a5310026820b2fe4b2968934-0102
-///
-/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without
-/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables
-/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`].
-///
-/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs,
-/// is both forward and backward compatible with TenantId: a legacy TenantId can be
-/// decoded as a TenantShardId, and when re-encoded it will be parseable
-/// as a TenantId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
-pub struct TenantShardId {
-    pub tenant_id: TenantId,
-    pub shard_number: ShardNumber,
-    pub shard_count: ShardCount,
-}
-
 impl ShardCount {
    pub const MAX: Self = Self(u8::MAX);

@@ -115,7 +38,6 @@ impl ShardCount {
        self.0
    }

-    ///
    pub fn is_unsharded(&self) -> bool {
        self.0 == 0
    }
@@ -131,6 +53,33 @@ impl ShardNumber {
    pub const MAX: Self = Self(u8::MAX);
 }

+/// TenantShardId identify the units of work for the Pageserver.
+///
+/// These are written as `<tenant_id>-<shard number><shard-count>`, for example:
+///
+///   # The second shard in a two-shard tenant
+///   072f1291a5310026820b2fe4b2968934-0102
+///
+/// Historically, tenants could not have multiple shards, and were identified
+/// by TenantId.  To support this, TenantShardId has a special legacy
+/// mode where `shard_count` is equal to zero: this represents a single-sharded
+/// tenant which should be written as a TenantId with no suffix.
+///
+/// The human-readable encoding of TenantShardId, such as used in API URLs,
+/// is both forward and backward compatible: a legacy TenantId can be
+/// decoded as a TenantShardId, and when re-encoded it will be parseable
+/// as a TenantId.
+///
+/// Note that the binary encoding is _not_ backward compatible, because
+/// at the time sharding is introduced, there are no existing binary structures
+/// containing TenantId that we need to handle.
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub struct TenantShardId {
+    pub tenant_id: TenantId,
+    pub shard_number: ShardNumber,
+    pub shard_count: ShardCount,
+}
+
 impl TenantShardId {
    pub fn unsharded(tenant_id: TenantId) -> Self {
        Self {
@@ -162,13 +111,10 @@ impl TenantShardId {
    }

    /// Convenience for code that has special behavior on the 0th shard.
-    pub fn is_shard_zero(&self) -> bool {
+    pub fn is_zero(&self) -> bool {
        self.shard_number == ShardNumber(0)
    }

-    /// The "unsharded" value is distinct from simply having a single shard: it represents
-    /// a tenant which is not shard-aware at all, and whose storage paths will not include
-    /// a shard suffix.
    pub fn is_unsharded(&self) -> bool {
        self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded()
    }
@@ -204,6 +150,9 @@ impl TenantShardId {
    }
 }

+/// Formatting helper
+struct ShardSlug<'a>(&'a TenantShardId);
+
 impl<'a> std::fmt::Display for ShardSlug<'a> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
@@ -273,6 +222,16 @@ impl From<[u8; 18]> for TenantShardId {
    }
 }

+/// For use within the context of a particular tenant, when we need to know which
+/// shard we're dealing with, but do not need to know the full ShardIdentity (because
+/// we won't be doing any page->shard mapping), and do not need to know the fully qualified
+/// TenantShardId.
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub struct ShardIndex {
+    pub shard_number: ShardNumber,
+    pub shard_count: ShardCount,
+}
+
 impl ShardIndex {
    pub fn new(number: ShardNumber, count: ShardCount) -> Self {
        Self {
@@ -287,9 +246,6 @@ impl ShardIndex {
        }
    }

-    /// The "unsharded" value is distinct from simply having a single shard: it represents
-    /// a tenant which is not shard-aware at all, and whose storage paths will not include
-    /// a shard suffix.
    pub fn is_unsharded(&self) -> bool {
        self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0)
    }
@@ -357,8 +313,6 @@ impl Serialize for TenantShardId {
        if serializer.is_human_readable() {
            serializer.collect_str(self)
        } else {
-            // Note: while human encoding of [`TenantShardId`] is backward and forward
-            // compatible, this binary encoding is not.
            let mut packed: [u8; 18] = [0; 18];
            packed[0..16].clone_from_slice(&self.tenant_id.as_arr());
            packed[16] = self.shard_number.0;
@@ -436,6 +390,16 @@ const LAYOUT_BROKEN: ShardLayout = ShardLayout(255);
 /// Default stripe size in pages: 256MiB divided by 8kiB page size.
 const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);

+/// The ShardIdentity contains the information needed for one member of map
+/// to resolve a key to a shard, and then check whether that shard is ==self.
+#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
+pub struct ShardIdentity {
+    pub number: ShardNumber,
+    pub count: ShardCount,
+    pub stripe_size: ShardStripeSize,
+    layout: ShardLayout,
+}
+
 #[derive(thiserror::Error, Debug, PartialEq, Eq)]
 pub enum ShardConfigError {
    #[error("Invalid shard count")]
@@ -475,9 +439,6 @@ impl ShardIdentity {
        }
    }

-    /// The "unsharded" value is distinct from simply having a single shard: it represents
-    /// a tenant which is not shard-aware at all, and whose storage paths will not include
-    /// a shard suffix.
    pub fn is_unsharded(&self) -> bool {
        self.number == ShardNumber(0) && self.count == ShardCount(0)
    }
@@ -526,8 +487,6 @@ impl ShardIdentity {
    }

    /// Return true if the key should be ingested by this shard
-    ///
-    /// Shards must ingest _at least_ keys which return true from this check.
    pub fn is_key_local(&self, key: &Key) -> bool {
        assert!(!self.is_broken());
        if self.count < ShardCount(2) || (key_is_shard0(key) && self.number == ShardNumber(0)) {
@@ -538,9 +497,7 @@ impl ShardIdentity {
    }

    /// Return true if the key should be discarded if found in this shard's
-    /// data store, e.g. during compaction after a split.
-    ///
-    /// Shards _may_ drop keys which return false here, but are not obliged to.
+    /// data store, e.g. during compaction after a split
    pub fn is_key_disposable(&self, key: &Key) -> bool {
        if key_is_shard0(key) {
            // Q: Why can't we dispose of shard0 content if we're not shard 0?
@@ -566,7 +523,7 @@ impl ShardIdentity {

    /// Convenience for checking if this identity is the 0th shard in a tenant,
    /// for special cases on shard 0 such as ingesting relation sizes.
-    pub fn is_shard_zero(&self) -> bool {
+    pub fn is_zero(&self) -> bool {
        self.number == ShardNumber(0)
    }
 }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -92,8 +92,6 @@ pub mod zstd;

 pub mod env;

-pub mod poison;
-
 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
 /// we have several cases:
--- a/libs/utils/src/poison.rs
+++ b/libs/utils/src/poison.rs
@@ -1,121 +0,0 @@
-//!  Protect a piece of state from reuse after it is left in an inconsistent state.
-//!
-//!  # Example
-//!
-//!  ```
-//!  # tokio_test::block_on(async {
-//!  use utils::poison::Poison;
-//!  use std::time::Duration;
-//!
-//!  struct State {
-//!    clean: bool,
-//!  }
-//!  let state = tokio::sync::Mutex::new(Poison::new("mystate", State { clean: true }));
-//!
-//!  let mut mutex_guard = state.lock().await;
-//!  let mut poison_guard = mutex_guard.check_and_arm()?;
-//!  let state = poison_guard.data_mut();
-//!  state.clean = false;
-//!  // If we get cancelled at this await point, subsequent check_and_arm() calls will fail.
-//!  tokio::time::sleep(Duration::from_secs(10)).await;
-//!  state.clean = true;
-//!  poison_guard.disarm();
-//!  # Ok::<(), utils::poison::Error>(())
-//!  # });
-//!  ```
-
-use tracing::warn;
-
-pub struct Poison<T> {
-    what: &'static str,
-    state: State,
-    data: T,
-}
-
-#[derive(Clone, Copy)]
-enum State {
-    Clean,
-    Armed,
-    Poisoned { at: chrono::DateTime<chrono::Utc> },
-}
-
-impl<T> Poison<T> {
-    /// We log `what` `warning!` level if the [`Guard`] gets dropped without being [`Guard::disarm`]ed.
-    pub fn new(what: &'static str, data: T) -> Self {
-        Self {
-            what,
-            state: State::Clean,
-            data,
-        }
-    }
-
-    /// Check for poisoning and return a [`Guard`] that provides access to the wrapped state.
-    pub fn check_and_arm(&mut self) -> Result<Guard<T>, Error> {
-        match self.state {
-            State::Clean => {
-                self.state = State::Armed;
-                Ok(Guard(self))
-            }
-            State::Armed => unreachable!("transient state"),
-            State::Poisoned { at } => Err(Error::Poisoned {
-                what: self.what,
-                at,
-            }),
-        }
-    }
-}
-
-/// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.
-/// Once modifications are done, use [`Self::disarm`].
-/// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned
-/// and subsequent calls to [`Poison::check_and_arm`] will fail with an error.
-pub struct Guard<'a, T>(&'a mut Poison<T>);
-
-impl<'a, T> Guard<'a, T> {
-    pub fn data(&self) -> &T {
-        &self.0.data
-    }
-    pub fn data_mut(&mut self) -> &mut T {
-        &mut self.0.data
-    }
-
-    pub fn disarm(self) {
-        match self.0.state {
-            State::Clean => unreachable!("we set it to Armed in check_and_arm()"),
-            State::Armed => {
-                self.0.state = State::Clean;
-            }
-            State::Poisoned { at } => {
-                unreachable!("we fail check_and_arm() if it's in that state: {at}")
-            }
-        }
-    }
-}
-
-impl<'a, T> Drop for Guard<'a, T> {
-    fn drop(&mut self) {
-        match self.0.state {
-            State::Clean => {
-                // set by disarm()
-            }
-            State::Armed => {
-                // still armed => poison it
-                let at = chrono::Utc::now();
-                self.0.state = State::Poisoned { at };
-                warn!(at=?at, "poisoning {}", self.0.what);
-            }
-            State::Poisoned { at } => {
-                unreachable!("we fail check_and_arm() if it's in that state: {at}")
-            }
-        }
-    }
-}
-
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error("poisoned at {at}: {what}")]
-    Poisoned {
-        what: &'static str,
-        at: chrono::DateTime<chrono::Utc>,
-    },
-}
--- a/libs/utils/src/sync/heavier_once_cell.rs
+++ b/libs/utils/src/sync/heavier_once_cell.rs
@@ -192,14 +192,6 @@ impl<T> OnceCell<T> {
        }
    }

-    /// Like [`Guard::take_and_deinit`], but will return `None` if this OnceCell was never
-    /// initialized.
-    pub fn take_and_deinit(&mut self) -> Option<(T, InitPermit)> {
-        let inner = self.inner.get_mut().unwrap();
-
-        inner.take_and_deinit()
-    }
-
    /// Return the number of [`Self::get_or_init`] calls waiting for initialization to complete.
    pub fn initializer_count(&self) -> usize {
        self.initializers.load(Ordering::Relaxed)
@@ -254,23 +246,15 @@ impl<'a, T> Guard<'a, T> {
    /// The permit will be on a semaphore part of the new internal value, and any following
    /// [`OnceCell::get_or_init`] will wait on it to complete.
    pub fn take_and_deinit(mut self) -> (T, InitPermit) {
-        self.0
-            .take_and_deinit()
-            .expect("guard is not created unless value has been initialized")
-    }
-}
-
-impl<T> Inner<T> {
-    pub fn take_and_deinit(&mut self) -> Option<(T, InitPermit)> {
-        let value = self.value.take()?;
-
        let mut swapped = Inner::default();
        let sem = swapped.init_semaphore.clone();
        // acquire and forget right away, moving the control over to InitPermit
        sem.try_acquire().expect("we just created this").forget();
-        let permit = InitPermit(sem);
-        std::mem::swap(self, &mut swapped);
-        Some((value, permit))
+        std::mem::swap(&mut *self.0, &mut swapped);
+        swapped
+            .value
+            .map(|v| (v, InitPermit(sem)))
+            .expect("guard is not created unless value has been initialized")
    }
 }

@@ -279,13 +263,6 @@ impl<T> Inner<T> {
 /// On drop, this type will return the permit.
 pub struct InitPermit(Arc<tokio::sync::Semaphore>);

-impl std::fmt::Debug for InitPermit {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let ptr = Arc::as_ptr(&self.0) as *const ();
-        f.debug_tuple("InitPermit").field(&ptr).finish()
-    }
-}
-
 impl Drop for InitPermit {
    fn drop(&mut self) {
        assert_eq!(
@@ -582,22 +559,4 @@ mod tests {

        assert_eq!(*target.get().unwrap(), 11);
    }
-
-    #[tokio::test]
-    async fn take_and_deinit_on_mut() {
-        use std::convert::Infallible;
-
-        let mut target = OnceCell::<u32>::default();
-        assert!(target.take_and_deinit().is_none());
-
-        target
-            .get_or_init(|permit| async move { Ok::<_, Infallible>((42, permit)) })
-            .await
-            .unwrap();
-
-        let again = target.take_and_deinit();
-        assert!(matches!(again, Some((42, _))), "{again:?}");
-
-        assert!(target.take_and_deinit().is_none());
-    }
 }
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -27,50 +27,30 @@
 //!
 //! # Reference Numbers
 //!
-//! 2024-04-15 on i3en.3xlarge
+//! 2024-04-04 on i3en.3xlarge
 //!
 //! ```text
-//! async-short/1           time:   [24.584 µs 24.737 µs 24.922 µs]
-//! async-short/2           time:   [33.479 µs 33.660 µs 33.888 µs]
-//! async-short/4           time:   [42.713 µs 43.046 µs 43.440 µs]
-//! async-short/8           time:   [71.814 µs 72.478 µs 73.240 µs]
-//! async-short/16          time:   [132.73 µs 134.45 µs 136.22 µs]
-//! async-short/32          time:   [258.31 µs 260.73 µs 263.27 µs]
-//! async-short/64          time:   [511.61 µs 514.44 µs 517.51 µs]
-//! async-short/128         time:   [992.64 µs 998.23 µs 1.0042 ms]
-//! async-medium/1          time:   [110.11 µs 110.50 µs 110.96 µs]
-//! async-medium/2          time:   [153.06 µs 153.85 µs 154.99 µs]
-//! async-medium/4          time:   [317.51 µs 319.92 µs 322.85 µs]
-//! async-medium/8          time:   [638.30 µs 644.68 µs 652.12 µs]
-//! async-medium/16         time:   [1.2651 ms 1.2773 ms 1.2914 ms]
-//! async-medium/32         time:   [2.5117 ms 2.5410 ms 2.5720 ms]
-//! async-medium/64         time:   [4.8088 ms 4.8555 ms 4.9047 ms]
-//! async-medium/128        time:   [8.8311 ms 8.9849 ms 9.1263 ms]
-//! sync-short/1            time:   [25.503 µs 25.626 µs 25.771 µs]
-//! sync-short/2            time:   [30.850 µs 31.013 µs 31.208 µs]
-//! sync-short/4            time:   [45.543 µs 45.856 µs 46.193 µs]
-//! sync-short/8            time:   [84.114 µs 84.639 µs 85.220 µs]
-//! sync-short/16           time:   [185.22 µs 186.15 µs 187.13 µs]
-//! sync-short/32           time:   [377.43 µs 378.87 µs 380.46 µs]
-//! sync-short/64           time:   [756.49 µs 759.04 µs 761.70 µs]
-//! sync-short/128          time:   [1.4825 ms 1.4874 ms 1.4923 ms]
-//! sync-medium/1           time:   [105.66 µs 106.01 µs 106.43 µs]
-//! sync-medium/2           time:   [153.10 µs 153.84 µs 154.72 µs]
-//! sync-medium/4           time:   [327.13 µs 329.44 µs 332.27 µs]
-//! sync-medium/8           time:   [654.26 µs 658.73 µs 663.63 µs]
-//! sync-medium/16          time:   [1.2682 ms 1.2748 ms 1.2816 ms]
-//! sync-medium/32          time:   [2.4456 ms 2.4595 ms 2.4731 ms]
-//! sync-medium/64          time:   [4.6523 ms 4.6890 ms 4.7256 ms]
-//! sync-medium/128         time:   [8.7215 ms 8.8323 ms 8.9344 ms]
+//! short/1                 time:   [25.925 µs 26.060 µs 26.209 µs]
+//! short/2                 time:   [31.277 µs 31.483 µs 31.722 µs]
+//! short/4                 time:   [45.496 µs 45.831 µs 46.182 µs]
+//! short/8                 time:   [84.298 µs 84.920 µs 85.566 µs]
+//! short/16                time:   [185.04 µs 186.41 µs 187.88 µs]
+//! short/32                time:   [385.01 µs 386.77 µs 388.70 µs]
+//! short/64                time:   [770.24 µs 773.04 µs 776.04 µs]
+//! short/128               time:   [1.5017 ms 1.5064 ms 1.5113 ms]
+//! medium/1                time:   [106.65 µs 107.20 µs 107.85 µs]
+//! medium/2                time:   [153.28 µs 154.24 µs 155.56 µs]
+//! medium/4                time:   [325.67 µs 327.01 µs 328.71 µs]
+//! medium/8                time:   [646.82 µs 650.17 µs 653.91 µs]
+//! medium/16               time:   [1.2645 ms 1.2701 ms 1.2762 ms]
+//! medium/32               time:   [2.4409 ms 2.4550 ms 2.4692 ms]
+//! medium/64               time:   [4.6814 ms 4.7114 ms 4.7408 ms]
+//! medium/128              time:   [8.7790 ms 8.9037 ms 9.0282 ms]
 //! ```

 use bytes::{Buf, Bytes};
 use criterion::{BenchmarkId, Criterion};
-use pageserver::{
-    config::PageServerConf,
-    walrecord::NeonWalRecord,
-    walredo::{PostgresRedoManager, ProcessKind},
-};
+use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
 use pageserver_api::{key::Key, shard::TenantShardId};
 use std::{
    sync::Arc,
@@ -80,39 +60,33 @@ use tokio::{sync::Barrier, task::JoinSet};
 use utils::{id::TenantId, lsn::Lsn};

 fn bench(c: &mut Criterion) {
-    for process_kind in &[ProcessKind::Async, ProcessKind::Sync] {
-        {
-            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
-            for nclients in nclients {
-                let mut group = c.benchmark_group(format!("{process_kind}-short"));
-                group.bench_with_input(
-                    BenchmarkId::from_parameter(nclients),
-                    &nclients,
-                    |b, nclients| {
-                        let redo_work = Arc::new(Request::short_input());
-                        b.iter_custom(|iters| {
-                            bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
-                        });
-                    },
-                );
-            }
+    {
+        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
+        for nclients in nclients {
+            let mut group = c.benchmark_group("short");
+            group.bench_with_input(
+                BenchmarkId::from_parameter(nclients),
+                &nclients,
+                |b, nclients| {
+                    let redo_work = Arc::new(Request::short_input());
+                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
+                },
+            );
        }
+    }

-        {
-            let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
-            for nclients in nclients {
-                let mut group = c.benchmark_group(format!("{process_kind}-medium"));
-                group.bench_with_input(
-                    BenchmarkId::from_parameter(nclients),
-                    &nclients,
-                    |b, nclients| {
-                        let redo_work = Arc::new(Request::medium_input());
-                        b.iter_custom(|iters| {
-                            bench_impl(*process_kind, Arc::clone(&redo_work), iters, *nclients)
-                        });
-                    },
-                );
-            }
+    {
+        let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
+        for nclients in nclients {
+            let mut group = c.benchmark_group("medium");
+            group.bench_with_input(
+                BenchmarkId::from_parameter(nclients),
+                &nclients,
+                |b, nclients| {
+                    let redo_work = Arc::new(Request::medium_input());
+                    b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
+                },
+            );
        }
    }
 }
@@ -120,16 +94,10 @@ criterion::criterion_group!(benches, bench);
 criterion::criterion_main!(benches);

 // Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
-fn bench_impl(
-    process_kind: ProcessKind,
-    redo_work: Arc<Request>,
-    n_redos: u64,
-    nclients: u64,
-) -> Duration {
+fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
    let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();

-    let mut conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
-    conf.walredo_process_kind = process_kind;
+    let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
    let conf = Box::leak(Box::new(conf));
    let tenant_shard_id = TenantShardId::unsharded(TenantId::generate());

@@ -145,40 +113,25 @@ fn bench_impl(
    let manager = PostgresRedoManager::new(conf, tenant_shard_id);
    let manager = Arc::new(manager);

-    // divide the amount of work equally among the clients.
-    let nredos_per_client = n_redos / nclients;
    for _ in 0..nclients {
        rt.block_on(async {
            tasks.spawn(client(
                Arc::clone(&manager),
                Arc::clone(&start),
                Arc::clone(&redo_work),
-                nredos_per_client,
+                // divide the amount of work equally among the clients
+                n_redos / nclients,
            ))
        });
    }

-    let elapsed = rt.block_on(async move {
-        let mut total_wallclock_time = Duration::ZERO;
+    rt.block_on(async move {
+        let mut total_wallclock_time = std::time::Duration::from_millis(0);
        while let Some(res) = tasks.join_next().await {
            total_wallclock_time += res.unwrap();
        }
        total_wallclock_time
-    });
-
-    // consistency check to ensure process kind setting worked
-    if nredos_per_client > 0 {
-        assert_eq!(
-            manager
-                .status()
-                .process
-                .map(|p| p.kind)
-                .expect("the benchmark work causes a walredo process to be spawned"),
-            std::borrow::Cow::Borrowed(process_kind.into())
-        );
-    }
-
-    elapsed
+    })
 }

 async fn client(
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -285,7 +285,6 @@ fn start_pageserver(
    ))
    .unwrap();
    pageserver::preinitialize_metrics();
-    pageserver::metrics::wal_redo::set_process_kind_metric(conf.walredo_process_kind);

    // If any failpoints were set from FAILPOINTS environment variable,
    // print them to the log for debugging purposes
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -97,8 +97,6 @@ pub mod defaults {

    pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;

-    pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "sync";
-
    ///
    /// Default built-in configuration file.
    ///
@@ -142,8 +140,6 @@ pub mod defaults {

 #validate_vectored_get = '{DEFAULT_VALIDATE_VECTORED_GET}'

-#walredo_process_kind = '{DEFAULT_WALREDO_PROCESS_KIND}'
-
 [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -294,8 +290,6 @@ pub struct PageServerConf {
    ///
    /// Setting this to zero disables limits on total ephemeral layer size.
    pub ephemeral_bytes_per_memory_kb: usize,
-
-    pub walredo_process_kind: crate::walredo::ProcessKind,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -419,8 +413,6 @@ struct PageServerConfigBuilder {
    validate_vectored_get: BuilderValue<bool>,

    ephemeral_bytes_per_memory_kb: BuilderValue<usize>,
-
-    walredo_process_kind: BuilderValue<crate::walredo::ProcessKind>,
 }

 impl PageServerConfigBuilder {
@@ -508,8 +500,6 @@ impl PageServerConfigBuilder {
            )),
            validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
            ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
-
-            walredo_process_kind: Set(DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap()),
        }
    }
 }
@@ -693,10 +683,6 @@ impl PageServerConfigBuilder {
        self.ephemeral_bytes_per_memory_kb = BuilderValue::Set(value);
    }

-    pub fn get_walredo_process_kind(&mut self, value: crate::walredo::ProcessKind) {
-        self.walredo_process_kind = BuilderValue::Set(value);
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
        let default = Self::default_values();

@@ -753,7 +739,6 @@ impl PageServerConfigBuilder {
                max_vectored_read_bytes,
                validate_vectored_get,
                ephemeral_bytes_per_memory_kb,
-                walredo_process_kind,
            }
            CUSTOM LOGIC
            {
@@ -1047,9 +1032,6 @@ impl PageServerConf {
                "ephemeral_bytes_per_memory_kb" => {
                    builder.get_ephemeral_bytes_per_memory_kb(parse_toml_u64("ephemeral_bytes_per_memory_kb", item)? as usize)
                }
-                "walredo_process_kind" => {
-                    builder.get_walredo_process_kind(parse_toml_from_str("walredo_process_kind", item)?)
-                }
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }
@@ -1132,7 +1114,6 @@ impl PageServerConf {
            ),
            validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
            ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
-            walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
        }
    }
 }
@@ -1370,8 +1351,7 @@ background_task_maximum_delay = '334 s'
                        .expect("Invalid default constant")
                ),
                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
-                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
-                walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
+                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1443,8 +1423,7 @@ background_task_maximum_delay = '334 s'
                        .expect("Invalid default constant")
                ),
                validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
-                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
-                walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
+                ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -304,7 +304,7 @@ async fn calculate_synthetic_size_worker(
                continue;
            }

-            if !tenant_shard_id.is_shard_zero() {
+            if !tenant_shard_id.is_zero() {
                // We only send consumption metrics from shard 0, so don't waste time calculating
                // synthetic size on other shards.
                continue;
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -199,7 +199,7 @@ pub(super) async fn collect_all_metrics(
    };

    let tenants = futures::stream::iter(tenants).filter_map(|(id, state, _)| async move {
-        if state != TenantState::Active || !id.is_shard_zero() {
+        if state != TenantState::Active || !id.is_zero() {
            None
        } else {
            tenant_manager
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -58,6 +58,24 @@ paths:
      responses:
        "200":
          description: The reload completed successfully.
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error (also hits if no keys were found)
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}:
    parameters:
@@ -75,14 +93,62 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/TenantInfo"
+        "400":
+          description: Error when no tenant id found in path or no timeline id
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

    delete:
      description: |
        Attempts to delete specified tenant. 500, 503 and 409 errors should be retried until 404 is retrieved.
        404 means that deletion successfully finished"
      responses:
+        "400":
+          description: Error when no tenant id found in path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
        "404":
-          description: Tenant not found. This is the success path.
+          description: Tenant not found
          content:
            application/json:
              schema:
@@ -99,6 +165,18 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/PreconditionFailedError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/time_travel_remote_storage:
    parameters:
@@ -128,6 +206,36 @@ paths:
            application/json:
              schema:
                type: string
+        "400":
+          description: Error when no tenant id found in path or invalid timestamp
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/timeline:
    parameters:
@@ -147,6 +255,36 @@ paths:
                type: array
                items:
                  $ref: "#/components/schemas/TimelineInfo"
+        "400":
+          description: Error when no tenant id found in path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"


  /v1/tenant/{tenant_id}/timeline/{timeline_id}:
@@ -171,12 +309,60 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/TimelineInfo"
+        "400":
+          description: Error when no tenant id found in path or no timeline id
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

    delete:
      description: "Attempts to delete specified timeline. 500 and 409 errors should be retried"
      responses:
+        "400":
+          description: Error when no tenant id found in path or no timeline id
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
        "404":
-          description: Timeline not found. This is the success path.
+          description: Timeline not found
          content:
            application/json:
              schema:
@@ -193,6 +379,18 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/PreconditionFailedError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_timestamp_of_lsn:
    parameters:
@@ -225,6 +423,36 @@ paths:
              schema:
                type: string
                format: date-time
+        "400":
+          description: Error when no tenant id found in path, no timeline id or invalid timestamp
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "404":
+          description: Timeline not found, or there is no timestamp information for the given lsn
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/NotFoundError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp:
    parameters:
@@ -256,6 +484,36 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/LsnByTimestampResponse"
+        "400":
+          description: Error when no tenant id found in path, no timeline id or invalid timestamp
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/timeline/{timeline_id}/do_gc:
    parameters:
@@ -279,6 +537,36 @@ paths:
            application/json:
              schema:
                type: string
+        "400":
+          description: Error when no tenant id found in path, no timeline id or invalid timestamp
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"
  /v1/tenant/{tenant_shard_id}/location_config:
    parameters:
      - name: tenant_shard_id
@@ -340,6 +628,24 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/TenantLocationConfigResponse"
+        "503":
+          description: Tenant's state cannot be changed right now.  Wait a few seconds and retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
        "409":
          description: |
            The tenant is already known to Pageserver in some way,
@@ -356,6 +662,12 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/ConflictError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
  /v1/tenant/{tenant_id}/ignore:
    parameters:
      - name: tenant_id
@@ -372,6 +684,36 @@ paths:
      responses:
        "200":
          description: Tenant ignored
+        "400":
+          description: Error when no tenant id found in path parameters
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"


  /v1/tenant/{tenant_id}/load:
@@ -398,6 +740,36 @@ paths:
      responses:
        "202":
          description: Tenant scheduled to load successfully
+        "400":
+          description: Error when no tenant id found in path parameters
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive:
    parameters:
@@ -418,6 +790,37 @@ paths:
      responses:
        "202":
          description: Tenant scheduled to load successfully
+        "404":
+          description: No tenant or timeline found for the specified ids
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"
+

  /v1/tenant/{tenant_id}/synthetic_size:
    parameters:
@@ -436,8 +839,31 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/SyntheticSizeResponse"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

-  # This route has no handler. TODO: remove?
  /v1/tenant/{tenant_id}/size:
    parameters:
      - name: tenant_id
@@ -519,6 +945,18 @@ paths:
      responses:
        "200":
          description: Success
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_shard_id}/secondary/download:
    parameters:
@@ -549,6 +987,20 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/SecondaryProgress"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"
+
+

  /v1/tenant/{tenant_id}/timeline/:
    parameters:
@@ -591,6 +1043,24 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/TimelineInfo"
+        "400":
+          description: Malformed timeline create request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
        "406":
          description: Permanently unsatisfiable request, don't retry.
          content:
@@ -609,6 +1079,18 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/:
    get:
@@ -622,6 +1104,30 @@ paths:
                type: array
                items:
                  $ref: "#/components/schemas/TenantInfo"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

    post:
      description: |
@@ -642,12 +1148,43 @@ paths:
            application/json:
              schema:
                type: string
+        "400":
+          description: Malformed tenant create request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
        "409":
          description: Tenant already exists, creation skipped
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ConflictError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"
+

  /v1/tenant/config:
    put:
@@ -669,6 +1206,36 @@ paths:
                type: array
                items:
                  $ref: "#/components/schemas/TenantInfo"
+        "400":
+          description: Malformed tenant config request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/tenant/{tenant_id}/config/:
    parameters:
@@ -688,6 +1255,42 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/TenantConfigResponse"
+        "400":
+          description: Malformed get tenanant config request
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "404":
+          description: Tenand or timeline were not found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/NotFoundError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+        "503":
+          description: Temporarily unavailable, please retry.
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ServiceUnavailableError"

  /v1/utilization:
    get:
@@ -701,6 +1304,12 @@ paths:
              application/json:
                schema:
                  $ref: "#/components/schemas/PageserverUtilization"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"

 components:
  securitySchemes:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -457,12 +457,8 @@ async fn reload_auth_validation_keys_handler(
            json_response(StatusCode::OK, ())
        }
        Err(e) => {
-            let err_msg = "Error reloading public keys";
            warn!("Error reloading public keys from {key_path:?}: {e:}");
-            json_response(
-                StatusCode::INTERNAL_SERVER_ERROR,
-                HttpErrorBody::from_msg(err_msg.to_string()),
-            )
+            json_response(StatusCode::INTERNAL_SERVER_ERROR, ())
        }
    }
 }
@@ -700,7 +696,7 @@ async fn get_lsn_by_timestamp_handler(
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let state = get_state(&request);

-    if !tenant_shard_id.is_shard_zero() {
+    if !tenant_shard_id.is_zero() {
        // Requires SLRU contents, which are only stored on shard zero
        return Err(ApiError::BadRequest(anyhow!(
            "Size calculations are only available on shard zero"
@@ -751,7 +747,7 @@ async fn get_timestamp_of_lsn_handler(
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let state = get_state(&request);

-    if !tenant_shard_id.is_shard_zero() {
+    if !tenant_shard_id.is_zero() {
        // Requires SLRU contents, which are only stored on shard zero
        return Err(ApiError::BadRequest(anyhow!(
            "Size calculations are only available on shard zero"
@@ -776,9 +772,7 @@ async fn get_timestamp_of_lsn_handler(
            let time = format_rfc3339(postgres_ffi::from_pg_timestamp(time)).to_string();
            json_response(StatusCode::OK, time)
        }
-        None => Err(ApiError::NotFound(
-            anyhow::anyhow!("Timestamp for lsn {} not found", lsn).into(),
-        )),
+        None => json_response(StatusCode::NOT_FOUND, ()),
    }
 }

@@ -1092,7 +1086,7 @@ async fn tenant_size_handler(
    let headers = request.headers();
    let state = get_state(&request);

-    if !tenant_shard_id.is_shard_zero() {
+    if !tenant_shard_id.is_zero() {
        return Err(ApiError::BadRequest(anyhow!(
            "Size calculations are only available on shard zero"
        )));
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1518,8 +1518,7 @@ pub(crate) struct SecondaryModeMetrics {
    pub(crate) download_heatmap: IntCounter,
    pub(crate) download_layer: IntCounter,
 }
-pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
-    SecondaryModeMetrics {
+pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
    upload_heatmap: register_int_counter!(
        "pageserver_secondary_upload_heatmap",
        "Number of heatmaps written to remote storage by attached tenants"
@@ -1537,7 +1536,7 @@ pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
    .expect("failed to define a metric"),
    download_heatmap: register_int_counter!(
        "pageserver_secondary_download_heatmap",
-        "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
+        "Number of downloads of heatmaps by secondary mode locations"
    )
    .expect("failed to define a metric"),
    download_layer: register_int_counter!(
@@ -1545,7 +1544,6 @@ pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
        "Number of downloads of layers by secondary mode locations"
    )
    .expect("failed to define a metric"),
-}
 });

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@@ -1821,29 +1819,6 @@ impl Default for WalRedoProcessCounters {
 pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
    Lazy::new(WalRedoProcessCounters::default);

-#[cfg(not(test))]
-pub mod wal_redo {
-    use super::*;
-
-    static PROCESS_KIND: Lazy<std::sync::Mutex<UIntGaugeVec>> = Lazy::new(|| {
-        std::sync::Mutex::new(
-            register_uint_gauge_vec!(
-                "pageserver_wal_redo_process_kind",
-                "The configured process kind for walredo",
-                &["kind"],
-            )
-            .unwrap(),
-        )
-    });
-
-    pub fn set_process_kind_metric(kind: crate::walredo::ProcessKind) {
-        // use guard to avoid races around the next two steps
-        let guard = PROCESS_KIND.lock().unwrap();
-        guard.reset();
-        guard.with_label_values(&[&format!("{kind}")]).set(1);
-    }
-}
-
 /// Similar to `prometheus::HistogramTimer` but does not record on drop.
 pub(crate) struct StorageTimeMetricsTimer {
    metrics: StorageTimeMetrics,
@@ -2114,7 +2089,7 @@ impl TimelineMetrics {

 pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
    // Only shard zero deals in synthetic sizes
-    if tenant_shard_id.is_shard_zero() {
+    if tenant_shard_id.is_zero() {
        let tid = tenant_shard_id.tenant_id.to_string();
        let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    }
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -33,52 +33,6 @@ impl Value {
    }
 }

-#[cfg(test)]
-#[derive(Debug, PartialEq)]
-pub(crate) enum InvalidInput {
-    TooShortValue,
-    TooShortPostgresRecord,
-}
-
-/// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
-/// use this type for querying if a slice looks some particular way.
-#[cfg(test)]
-pub(crate) struct ValueBytes;
-
-#[cfg(test)]
-impl ValueBytes {
-    pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
-        if raw.len() < 12 {
-            return Err(InvalidInput::TooShortValue);
-        }
-
-        let value_discriminator = &raw[0..4];
-
-        if value_discriminator == [0, 0, 0, 0] {
-            // Value::Image always initializes
-            return Ok(true);
-        }
-
-        if value_discriminator != [0, 0, 0, 1] {
-            // not a Value::WalRecord(..)
-            return Ok(false);
-        }
-
-        let walrecord_discriminator = &raw[4..8];
-
-        if walrecord_discriminator != [0, 0, 0, 0] {
-            // only NeonWalRecord::Postgres can have will_init
-            return Ok(false);
-        }
-
-        if raw.len() < 17 {
-            return Err(InvalidInput::TooShortPostgresRecord);
-        }
-
-        Ok(raw[8] == 1)
-    }
-}
-
 #[cfg(test)]
 mod test {
    use super::*;
@@ -116,8 +70,6 @@ mod test {
        ];

        roundtrip!(image, expected);
-
-        assert!(ValueBytes::will_init(&expected).unwrap());
    }

    #[test]
@@ -141,96 +93,6 @@ mod test {
        ];

        roundtrip!(rec, expected);
-
-        assert!(ValueBytes::will_init(&expected).unwrap());
-    }
-
-    #[test]
-    fn bytes_inspection_too_short_image() {
-        let rec = Value::Image(Bytes::from_static(b""));
-
-        #[rustfmt::skip]
-        let expected = [
-            // top level discriminator of 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            // 8 byte length
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        ];
-
-        roundtrip!(rec, expected);
-
-        assert!(ValueBytes::will_init(&expected).unwrap());
-        assert_eq!(expected.len(), 12);
-        for len in 0..12 {
-            assert_eq!(
-                ValueBytes::will_init(&expected[..len]).unwrap_err(),
-                InvalidInput::TooShortValue
-            );
-        }
-    }
-
-    #[test]
-    fn bytes_inspection_too_short_postgres_record() {
-        let rec = NeonWalRecord::Postgres {
-            will_init: false,
-            rec: Bytes::from_static(b""),
-        };
-        let rec = Value::WalRecord(rec);
-
-        #[rustfmt::skip]
-        let expected = [
-            // flattened discriminator of total 8 bytes
-            0x00, 0x00, 0x00, 0x01,
-            0x00, 0x00, 0x00, 0x00,
-            // will_init
-            0x00,
-            // 8 byte length
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-        ];
-
-        roundtrip!(rec, expected);
-
-        assert!(!ValueBytes::will_init(&expected).unwrap());
-        assert_eq!(expected.len(), 17);
-        for len in 12..17 {
-            assert_eq!(
-                ValueBytes::will_init(&expected[..len]).unwrap_err(),
-                InvalidInput::TooShortPostgresRecord
-            )
-        }
-        for len in 0..12 {
-            assert_eq!(
-                ValueBytes::will_init(&expected[..len]).unwrap_err(),
-                InvalidInput::TooShortValue
-            )
-        }
-    }
-
-    #[test]
-    fn clear_visibility_map_flags_example() {
-        let rec = NeonWalRecord::ClearVisibilityMapFlags {
-            new_heap_blkno: Some(0x11),
-            old_heap_blkno: None,
-            flags: 0x03,
-        };
-        let rec = Value::WalRecord(rec);
-
-        #[rustfmt::skip]
-        let expected = [
-            // discriminators
-            0x00, 0x00, 0x00, 0x01,
-            0x00, 0x00, 0x00, 0x01,
-            // Some == 1 followed by 4 bytes
-            0x01, 0x00, 0x00, 0x00, 0x11,
-            // None == 0
-            0x00,
-            // flags
-            0x03
-        ];
-
-        roundtrip!(rec, expected);
-
-        assert!(!ValueBytes::will_init(&expected).unwrap());
    }
 }

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -386,7 +386,7 @@ impl WalRedoManager {

    pub(crate) fn status(&self) -> Option<WalRedoManagerStatus> {
        match self {
-            WalRedoManager::Prod(m) => Some(m.status()),
+            WalRedoManager::Prod(m) => m.status(),
            #[cfg(test)]
            WalRedoManager::Test(_) => None,
        }
@@ -3190,7 +3190,7 @@ impl Tenant {
            run_initdb(self.conf, &pgdata_path, pg_version, &self.cancel).await?;

            // Upload the created data dir to S3
-            if self.tenant_shard_id().is_shard_zero() {
+            if self.tenant_shard_id().is_zero() {
                self.upload_initdb(&timelines_path, &pgdata_path, &timeline_id)
                    .await?;
            }
@@ -3437,7 +3437,7 @@ impl Tenant {
            .store(size, Ordering::Relaxed);

        // Only shard zero should be calculating synthetic sizes
-        debug_assert!(self.shard_identity.is_shard_zero());
+        debug_assert!(self.shard_identity.is_zero());

        TENANT_SYNTHETIC_SIZE_METRIC
            .get_metric_with_label_values(&[&self.tenant_shard_id.tenant_id.to_string()])
@@ -3848,8 +3848,6 @@ pub(crate) mod harness {

 #[cfg(test)]
 mod tests {
-    use std::collections::BTreeMap;
-
    use super::*;
    use crate::keyspace::KeySpaceAccum;
    use crate::repository::{Key, Value};
@@ -3860,7 +3858,7 @@ mod tests {
    use hex_literal::hex;
    use pageserver_api::keyspace::KeySpace;
    use rand::{thread_rng, Rng};
-    use tests::timeline::{GetVectoredError, ShutdownMode};
+    use tests::timeline::ShutdownMode;

    static TEST_KEY: Lazy<Key> =
        Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
@@ -4796,166 +4794,6 @@ mod tests {
        Ok(())
    }

-    // Test that vectored get descends into ancestor timelines correctly and
-    // does not return an image that's newer than requested.
-    //
-    // The diagram below ilustrates an interesting case. We have a parent timeline
-    // (top of the Lsn range) and a child timeline. The request key cannot be reconstructed
-    // from the child timeline, so the parent timeline must be visited. When advacing into
-    // the child timeline, the read path needs to remember what the requested Lsn was in
-    // order to avoid returning an image that's too new. The test below constructs such
-    // a timeline setup and does a few queries around the Lsn of each page image.
-    // ```
-    //    LSN
-    //     ^
-    //     |
-    //     |
-    // 500 | --------------------------------------> branch point
-    // 400 |        X
-    // 300 |        X
-    // 200 | --------------------------------------> requested lsn
-    // 100 |        X
-    //     |---------------------------------------> Key
-    //              |
-    //              ------> requested key
-    //
-    // Legend:
-    // * X - page images
-    // ```
-    #[tokio::test]
-    async fn test_get_vectored_ancestor_descent() -> anyhow::Result<()> {
-        let harness = TenantHarness::create("test_get_vectored_on_lsn_axis")?;
-        let (tenant, ctx) = harness.load().await;
-
-        let start_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
-        let end_key = start_key.add(1000);
-        let child_gap_at_key = start_key.add(500);
-        let mut parent_gap_lsns: BTreeMap<Lsn, String> = BTreeMap::new();
-
-        let mut current_lsn = Lsn(0x10);
-
-        let timeline_id = TimelineId::generate();
-        let parent_timeline = tenant
-            .create_test_timeline(timeline_id, current_lsn, DEFAULT_PG_VERSION, &ctx)
-            .await?;
-
-        current_lsn += 0x100;
-
-        for _ in 0..3 {
-            let mut key = start_key;
-            while key < end_key {
-                current_lsn += 0x10;
-
-                let image_value = format!("{} at {}", child_gap_at_key, current_lsn);
-
-                let mut writer = parent_timeline.writer().await;
-                writer
-                    .put(
-                        key,
-                        current_lsn,
-                        &Value::Image(test_img(&image_value)),
-                        &ctx,
-                    )
-                    .await?;
-                writer.finish_write(current_lsn);
-
-                if key == child_gap_at_key {
-                    parent_gap_lsns.insert(current_lsn, image_value);
-                }
-
-                key = key.next();
-            }
-
-            parent_timeline.freeze_and_flush().await?;
-        }
-
-        let child_timeline_id = TimelineId::generate();
-
-        let child_timeline = tenant
-            .branch_timeline_test(&parent_timeline, child_timeline_id, Some(current_lsn), &ctx)
-            .await?;
-
-        let mut key = start_key;
-        while key < end_key {
-            if key == child_gap_at_key {
-                key = key.next();
-                continue;
-            }
-
-            current_lsn += 0x10;
-
-            let mut writer = child_timeline.writer().await;
-            writer
-                .put(
-                    key,
-                    current_lsn,
-                    &Value::Image(test_img(&format!("{} at {}", key, current_lsn))),
-                    &ctx,
-                )
-                .await?;
-            writer.finish_write(current_lsn);
-
-            key = key.next();
-        }
-
-        child_timeline.freeze_and_flush().await?;
-
-        let lsn_offsets: [i64; 5] = [-10, -1, 0, 1, 10];
-        let mut query_lsns = Vec::new();
-        for image_lsn in parent_gap_lsns.keys().rev() {
-            for offset in lsn_offsets {
-                query_lsns.push(Lsn(image_lsn
-                    .0
-                    .checked_add_signed(offset)
-                    .expect("Shouldn't overflow")));
-            }
-        }
-
-        for query_lsn in query_lsns {
-            let results = child_timeline
-                .get_vectored_impl(
-                    KeySpace {
-                        ranges: vec![child_gap_at_key..child_gap_at_key.next()],
-                    },
-                    query_lsn,
-                    &ctx,
-                )
-                .await;
-
-            let expected_item = parent_gap_lsns
-                .iter()
-                .rev()
-                .find(|(lsn, _)| **lsn <= query_lsn);
-
-            info!(
-                "Doing vectored read at LSN {}. Expecting image to be: {:?}",
-                query_lsn, expected_item
-            );
-
-            match expected_item {
-                Some((_, img_value)) => {
-                    let key_results = results.expect("No vectored get error expected");
-                    let key_result = &key_results[&child_gap_at_key];
-                    let returned_img = key_result
-                        .as_ref()
-                        .expect("No page reconstruct error expected");
-
-                    info!(
-                        "Vectored read at LSN {} returned image {}",
-                        query_lsn,
-                        std::str::from_utf8(returned_img)?
-                    );
-                    assert_eq!(*returned_img, test_img(img_value));
-                }
-                None => {
-                    assert!(matches!(results, Err(GetVectoredError::MissingKey(_))));
-                }
-            }
-        }
-
-        Ok(())
-    }
-
    #[tokio::test]
    async fn test_random_updates() -> anyhow::Result<()> {
        let harness = TenantHarness::create("test_random_updates")?;
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -436,11 +436,6 @@ impl DeleteTenantFlow {
        .await
    }

-    /// Check whether background deletion of this tenant is currently in progress
-    pub(crate) fn is_in_progress(tenant: &Tenant) -> bool {
-        tenant.delete_progress.try_lock().is_err()
-    }
-
    async fn prepare(
        tenant: &Arc<Tenant>,
    ) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -678,19 +678,12 @@ pub async fn init_tenant_mgr(
                    }
                }
            }
-            LocationMode::Secondary(secondary_conf) => {
-                info!(
-                    tenant_id = %tenant_shard_id.tenant_id,
-                    shard_id = %tenant_shard_id.shard_slug(),
-                    "Starting secondary tenant"
-                );
-                TenantSlot::Secondary(SecondaryTenant::new(
-                    tenant_shard_id,
-                    shard_identity,
-                    location_conf.tenant_conf,
-                    &secondary_conf,
-                ))
-            }
+            LocationMode::Secondary(secondary_conf) => TenantSlot::Secondary(SecondaryTenant::new(
+                tenant_shard_id,
+                shard_identity,
+                location_conf.tenant_conf,
+                &secondary_conf,
+            )),
        };

        tenants.insert(tenant_shard_id, slot);
@@ -1417,15 +1410,9 @@ impl TenantManager {

        match tenant.current_state() {
            TenantState::Broken { .. } | TenantState::Stopping { .. } => {
-                // If deletion is already in progress, return success (the semantics of this
-                // function are to rerturn success afterr deletion is spawned in background).
-                // Otherwise fall through and let [`DeleteTenantFlow`] handle this state.
-                if DeleteTenantFlow::is_in_progress(&tenant) {
-                    // The `delete_progress` lock is held: deletion is already happening
-                    // in the bacckground
-                    slot_guard.revert();
-                    return Ok(());
-                }
+                // If a tenant is broken or stopping, DeleteTenantFlow can
+                // handle it: broken tenants proceed to delete, stopping tenants
+                // are checked for deletion already in progress.
            }
            _ => {
                tenant
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -167,7 +167,7 @@ pub(crate) async fn time_travel_recover_tenant(
    let warn_after = 3;
    let max_attempts = 10;
    let mut prefixes = Vec::with_capacity(2);
-    if tenant_shard_id.is_shard_zero() {
+    if tenant_shard_id.is_zero() {
        // Also recover the unsharded prefix for a shard of zero:
        // - if the tenant is totally unsharded, the unsharded prefix contains all the data
        // - if the tenant is sharded, we still want to recover the initdb data, but we only
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -312,7 +312,7 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
                    (detail.last_download, detail.next_download.unwrap())
                };

-                if now > next_download {
+                if now < next_download {
                    Some(PendingDownload {
                        secondary_state: secondary_tenant,
                        last_download,
@@ -647,12 +647,6 @@ impl<'a> TenantDownloader<'a> {
                progress.bytes_downloaded += layer_byte_count;
                progress.layers_downloaded += layer_count;
            }
-
-            for delete_timeline in &delete_timelines {
-                // We haven't removed from disk yet, but optimistically remove from in-memory state: if removal
-                // from disk fails that will be a fatal error.
-                detail.timelines.remove(delete_timeline);
-            }
        }

        // Execute accumulated deletions
@@ -716,14 +710,13 @@ impl<'a> TenantDownloader<'a> {
                    .await
                    .map_err(UpdateError::from)?;

-                SECONDARY_MODE.download_heatmap.inc();
-
                if Some(&download.etag) == prev_etag {
                    Ok(HeatMapDownload::Unmodified)
                } else {
                    let mut heatmap_bytes = Vec::new();
                    let mut body = tokio_util::io::StreamReader::new(download.download_stream);
                    let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
+                    SECONDARY_MODE.download_heatmap.inc();
                    Ok(HeatMapDownload::Modified(HeatMapModified {
                        etag: download.etag,
                        last_modified: download.last_modified,
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -20,8 +20,8 @@
 //!    000000067F000032BE0000400000000020B6-000000067F000032BE0000400000000030B6__000000578C6B29-0000000057A50051
 //! ```
 //!
-//! Every delta file consists of three parts: "summary", "values", and
-//! "index". The summary is a fixed size header at the beginning of the file,
+//! Every delta file consists of three parts: "summary", "index", and
+//! "values". The summary is a fixed size header at the beginning of the file,
 //! and it contains basic information about the layer, and offsets to the other
 //! parts. The "index" is a B-tree, mapping from Key and LSN to an offset in the
 //! "values" part.  The actual page images and WAL records are stored in the
@@ -863,7 +863,7 @@ impl DeltaLayerInner {
                .into(),
        );

-        let data_end_offset = self.index_start_offset();
+        let data_end_offset = self.index_start_blk as u64 * PAGE_SZ as u64;

        let reads = Self::plan_reads(
            keyspace,
@@ -939,7 +939,7 @@ impl DeltaLayerInner {
            }

            if !range_end_handled {
-                tracing::debug!("Handling range end fallback at {}", data_end_offset);
+                tracing::info!("Handling range end fallback at {}", data_end_offset);
                planner.handle_range_end(data_end_offset);
            }
        }
@@ -1103,195 +1103,11 @@ impl DeltaLayerInner {
        if let Some(last) = all_keys.last_mut() {
            // Last key occupies all space till end of value storage,
            // which corresponds to beginning of the index
-            last.size = self.index_start_offset() - last.size;
+            last.size = self.index_start_blk as u64 * PAGE_SZ as u64 - last.size;
        }
        Ok(all_keys)
    }

-    /// Using the given writer, write out a truncated version, where LSNs higher than the
-    /// truncate_at are missing.
-    #[cfg(test)]
-    pub(super) async fn copy_prefix(
-        &self,
-        writer: &mut DeltaLayerWriter,
-        truncate_at: Lsn,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        use crate::tenant::vectored_blob_io::{
-            BlobMeta, VectoredReadBuilder, VectoredReadExtended,
-        };
-        use futures::stream::TryStreamExt;
-
-        #[derive(Debug)]
-        enum Item {
-            Actual(Key, Lsn, BlobRef),
-            Sentinel,
-        }
-
-        impl From<Item> for Option<(Key, Lsn, BlobRef)> {
-            fn from(value: Item) -> Self {
-                match value {
-                    Item::Actual(key, lsn, blob) => Some((key, lsn, blob)),
-                    Item::Sentinel => None,
-                }
-            }
-        }
-
-        impl Item {
-            fn offset(&self) -> Option<BlobRef> {
-                match self {
-                    Item::Actual(_, _, blob) => Some(*blob),
-                    Item::Sentinel => None,
-                }
-            }
-
-            fn is_last(&self) -> bool {
-                matches!(self, Item::Sentinel)
-            }
-        }
-
-        let block_reader = FileBlockReader::new(&self.file, self.file_id);
-        let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
-            self.index_start_blk,
-            self.index_root_blk,
-            block_reader,
-        );
-
-        let stream = self.stream_index_forwards(&tree_reader, &[0u8; DELTA_KEY_SIZE], ctx);
-        let stream = stream.map_ok(|(key, lsn, pos)| Item::Actual(key, lsn, pos));
-        // put in a sentinel value for getting the end offset for last item, and not having to
-        // repeat the whole read part
-        let stream = stream.chain(futures::stream::once(futures::future::ready(Ok(
-            Item::Sentinel,
-        ))));
-        let mut stream = std::pin::pin!(stream);
-
-        let mut prev: Option<(Key, Lsn, BlobRef)> = None;
-
-        let mut read_builder: Option<VectoredReadBuilder> = None;
-
-        let max_read_size = self
-            .max_vectored_read_bytes
-            .map(|x| x.0.get())
-            .unwrap_or(8192);
-
-        let mut buffer = Some(BytesMut::with_capacity(max_read_size));
-
-        // FIXME: buffering of DeltaLayerWriter
-        let mut per_blob_copy = Vec::new();
-
-        while let Some(item) = stream.try_next().await? {
-            tracing::debug!(?item, "popped");
-            let offset = item
-                .offset()
-                .unwrap_or(BlobRef::new(self.index_start_offset(), false));
-
-            let actionable = if let Some((key, lsn, start_offset)) = prev.take() {
-                let end_offset = offset;
-
-                Some((BlobMeta { key, lsn }, start_offset..end_offset))
-            } else {
-                None
-            };
-
-            let is_last = item.is_last();
-
-            prev = Option::from(item);
-
-            let actionable = actionable.filter(|x| x.0.lsn < truncate_at);
-
-            let builder = if let Some((meta, offsets)) = actionable {
-                // extend or create a new builder
-                if read_builder
-                    .as_mut()
-                    .map(|x| x.extend(offsets.start.pos(), offsets.end.pos(), meta))
-                    .unwrap_or(VectoredReadExtended::No)
-                    == VectoredReadExtended::Yes
-                {
-                    None
-                } else {
-                    read_builder.replace(VectoredReadBuilder::new(
-                        offsets.start.pos(),
-                        offsets.end.pos(),
-                        meta,
-                        max_read_size,
-                    ))
-                }
-            } else {
-                // nothing to do, except perhaps flush any existing for the last element
-                None
-            };
-
-            // flush the possible older builder and also the new one if the item was the last one
-            let builders = builder.into_iter();
-            let builders = if is_last {
-                builders.chain(read_builder.take())
-            } else {
-                builders.chain(None)
-            };
-
-            for builder in builders {
-                let read = builder.build();
-
-                let reader = VectoredBlobReader::new(&self.file);
-
-                let mut buf = buffer.take().unwrap();
-
-                buf.clear();
-                buf.reserve(read.size());
-                let res = reader.read_blobs(&read, buf).await?;
-
-                for blob in res.blobs {
-                    let key = blob.meta.key;
-                    let lsn = blob.meta.lsn;
-                    let data = &res.buf[blob.start..blob.end];
-
-                    #[cfg(debug_assertions)]
-                    Value::des(data)
-                        .with_context(|| {
-                            format!(
-                                "blob failed to deserialize for {}@{}, {}..{}: {:?}",
-                                blob.meta.key,
-                                blob.meta.lsn,
-                                blob.start,
-                                blob.end,
-                                utils::Hex(data)
-                            )
-                        })
-                        .unwrap();
-
-                    // is it an image or will_init walrecord?
-                    // FIXME: this could be handled by threading the BlobRef to the
-                    // VectoredReadBuilder
-                    let will_init = crate::repository::ValueBytes::will_init(data)
-                        .inspect_err(|_e| {
-                            #[cfg(feature = "testing")]
-                            tracing::error!(data=?utils::Hex(data), err=?_e, "failed to parse will_init out of serialized value");
-                        })
-                        .unwrap_or(false);
-
-                    per_blob_copy.clear();
-                    per_blob_copy.extend_from_slice(data);
-
-                    let (tmp, res) = writer
-                        .put_value_bytes(key, lsn, std::mem::take(&mut per_blob_copy), will_init)
-                        .await;
-                    per_blob_copy = tmp;
-                    res?;
-                }
-
-                buffer = Some(res.buf);
-            }
-        }
-
-        assert!(
-            read_builder.is_none(),
-            "with the sentinel above loop should had handled all"
-        );
-
-        Ok(())
-    }
-
    pub(super) async fn dump(&self, ctx: &RequestContext) -> anyhow::Result<()> {
        println!(
            "index_start_blk: {}, root {}",
@@ -1361,44 +1177,6 @@ impl DeltaLayerInner {

        Ok(())
    }
-
-    #[cfg(test)]
-    fn stream_index_forwards<'a, R>(
-        &'a self,
-        reader: &'a DiskBtreeReader<R, DELTA_KEY_SIZE>,
-        start: &'a [u8; DELTA_KEY_SIZE],
-        ctx: &'a RequestContext,
-    ) -> impl futures::stream::Stream<
-        Item = Result<(Key, Lsn, BlobRef), crate::tenant::disk_btree::DiskBtreeError>,
-    > + 'a
-    where
-        R: BlockReader,
-    {
-        use futures::stream::TryStreamExt;
-        let stream = reader.get_stream_from(start, ctx);
-        stream.map_ok(|(key, value)| {
-            let key = DeltaKey::from_slice(&key);
-            let (key, lsn) = (key.key(), key.lsn());
-            let offset = BlobRef(value);
-
-            (key, lsn, offset)
-        })
-    }
-
-    /// The file offset to the first block of index.
-    ///
-    /// The file structure is summary, values, and index. We often need this for the size of last blob.
-    fn index_start_offset(&self) -> u64 {
-        let offset = self.index_start_blk as u64 * PAGE_SZ as u64;
-        let bref = BlobRef(offset);
-        tracing::debug!(
-            index_start_blk = self.index_start_blk,
-            offset,
-            pos = bref.pos(),
-            "index_start_offset"
-        );
-        offset
-    }
 }

 /// A set of data associated with a delta layer key and its value
@@ -1760,7 +1538,7 @@ mod test {

        let resident = writer.finish(entries_meta.key_range.end, &timeline).await?;

-        let inner = resident.as_delta(&ctx).await?;
+        let inner = resident.get_inner_delta(&ctx).await?;

        let file_size = inner.file.metadata().await?.len();
        tracing::info!(
@@ -1816,217 +1594,4 @@ mod test {

        Ok(())
    }
-
-    #[tokio::test]
-    async fn copy_delta_prefix_smoke() {
-        use crate::walrecord::NeonWalRecord;
-        use bytes::Bytes;
-
-        let h = crate::tenant::harness::TenantHarness::create("truncate_delta_smoke").unwrap();
-        let (tenant, ctx) = h.load().await;
-        let ctx = &ctx;
-        let timeline = tenant
-            .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, ctx)
-            .await
-            .unwrap();
-
-        let initdb_layer = timeline
-            .layers
-            .read()
-            .await
-            .likely_resident_layers()
-            .next()
-            .unwrap();
-
-        {
-            let mut writer = timeline.writer().await;
-
-            let data = [
-                (0x20, 12, Value::Image(Bytes::from_static(b"foobar"))),
-                (
-                    0x30,
-                    12,
-                    Value::WalRecord(NeonWalRecord::Postgres {
-                        will_init: false,
-                        rec: Bytes::from_static(b"1"),
-                    }),
-                ),
-                (
-                    0x40,
-                    12,
-                    Value::WalRecord(NeonWalRecord::Postgres {
-                        will_init: true,
-                        rec: Bytes::from_static(b"2"),
-                    }),
-                ),
-                // build an oversized value so we cannot extend and existing read over
-                // this
-                (
-                    0x50,
-                    12,
-                    Value::WalRecord(NeonWalRecord::Postgres {
-                        will_init: true,
-                        rec: {
-                            let mut buf =
-                                vec![0u8; tenant.conf.max_vectored_read_bytes.0.get() + 1024];
-                            buf.iter_mut()
-                                .enumerate()
-                                .for_each(|(i, slot)| *slot = (i % 256) as u8);
-                            Bytes::from(buf)
-                        },
-                    }),
-                ),
-                // because the oversized read cannot be extended further, we are sure to exercise the
-                // builder created on the last round with this:
-                (
-                    0x60,
-                    12,
-                    Value::WalRecord(NeonWalRecord::Postgres {
-                        will_init: true,
-                        rec: Bytes::from_static(b"3"),
-                    }),
-                ),
-                (
-                    0x60,
-                    9,
-                    Value::Image(Bytes::from_static(b"something for a different key")),
-                ),
-            ];
-
-            let mut last_lsn = None;
-
-            for (lsn, key, value) in data {
-                let key = Key::from_i128(key);
-                writer.put(key, Lsn(lsn), &value, ctx).await.unwrap();
-                last_lsn = Some(lsn);
-            }
-
-            writer.finish_write(Lsn(last_lsn.unwrap()));
-        }
-        timeline.freeze_and_flush().await.unwrap();
-
-        let new_layer = timeline
-            .layers
-            .read()
-            .await
-            .likely_resident_layers()
-            .find(|x| x != &initdb_layer)
-            .unwrap();
-
-        // create a copy for the timeline, so we don't overwrite the file
-        let branch = tenant
-            .branch_timeline_test(&timeline, TimelineId::generate(), None, ctx)
-            .await
-            .unwrap();
-
-        assert_eq!(branch.get_ancestor_lsn(), Lsn(0x60));
-
-        // truncating at 0x61 gives us a full copy, otherwise just go backwards until there's just
-        // a single key
-
-        for truncate_at in [0x61, 0x51, 0x41, 0x31, 0x21] {
-            let truncate_at = Lsn(truncate_at);
-
-            let mut writer = DeltaLayerWriter::new(
-                tenant.conf,
-                branch.timeline_id,
-                tenant.tenant_shard_id,
-                Key::MIN,
-                Lsn(0x11)..truncate_at,
-            )
-            .await
-            .unwrap();
-
-            let new_layer = new_layer.download_and_keep_resident().await.unwrap();
-
-            new_layer
-                .copy_delta_prefix(&mut writer, truncate_at, ctx)
-                .await
-                .unwrap();
-
-            let copied_layer = writer.finish(Key::MAX, &branch).await.unwrap();
-
-            copied_layer.as_delta(ctx).await.unwrap();
-
-            assert_keys_and_values_eq(
-                new_layer.as_delta(ctx).await.unwrap(),
-                copied_layer.as_delta(ctx).await.unwrap(),
-                truncate_at,
-                ctx,
-            )
-            .await;
-        }
-    }
-
-    async fn assert_keys_and_values_eq(
-        source: &DeltaLayerInner,
-        truncated: &DeltaLayerInner,
-        truncated_at: Lsn,
-        ctx: &RequestContext,
-    ) {
-        use futures::future::ready;
-        use futures::stream::TryStreamExt;
-
-        let start_key = [0u8; DELTA_KEY_SIZE];
-
-        let source_reader = FileBlockReader::new(&source.file, source.file_id);
-        let source_tree = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
-            source.index_start_blk,
-            source.index_root_blk,
-            &source_reader,
-        );
-        let source_stream = source.stream_index_forwards(&source_tree, &start_key, ctx);
-        let source_stream = source_stream.filter(|res| match res {
-            Ok((_, lsn, _)) => ready(lsn < &truncated_at),
-            _ => ready(true),
-        });
-        let mut source_stream = std::pin::pin!(source_stream);
-
-        let truncated_reader = FileBlockReader::new(&truncated.file, truncated.file_id);
-        let truncated_tree = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
-            truncated.index_start_blk,
-            truncated.index_root_blk,
-            &truncated_reader,
-        );
-        let truncated_stream = truncated.stream_index_forwards(&truncated_tree, &start_key, ctx);
-        let mut truncated_stream = std::pin::pin!(truncated_stream);
-
-        let mut scratch_left = Vec::new();
-        let mut scratch_right = Vec::new();
-
-        loop {
-            let (src, truncated) = (source_stream.try_next(), truncated_stream.try_next());
-            let (src, truncated) = tokio::try_join!(src, truncated).unwrap();
-
-            if src.is_none() {
-                assert!(truncated.is_none());
-                break;
-            }
-
-            let (src, truncated) = (src.unwrap(), truncated.unwrap());
-
-            // because we've filtered the source with Lsn, we should always have the same keys from both.
-            assert_eq!(src.0, truncated.0);
-            assert_eq!(src.1, truncated.1);
-
-            // if this is needed for something else, just drop this assert.
-            assert!(
-                src.2.pos() >= truncated.2.pos(),
-                "value position should not go backwards {} vs. {}",
-                src.2.pos(),
-                truncated.2.pos()
-            );
-
-            scratch_left.clear();
-            let src_cursor = source_reader.block_cursor();
-            let left = src_cursor.read_blob_into_buf(src.2.pos(), &mut scratch_left, ctx);
-            scratch_right.clear();
-            let trunc_cursor = truncated_reader.block_cursor();
-            let right = trunc_cursor.read_blob_into_buf(truncated.2.pos(), &mut scratch_right, ctx);
-
-            tokio::try_join!(left, right).unwrap();
-
-            assert_eq!(utils::Hex(&scratch_left), utils::Hex(&scratch_right));
-        }
-    }
 }
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -116,12 +116,6 @@ impl AsLayerDesc for Layer {
    }
 }

-impl PartialEq for Layer {
-    fn eq(&self, other: &Self) -> bool {
-        Arc::as_ptr(&self.0) == Arc::as_ptr(&other.0)
-    }
-}
-
 impl Layer {
    /// Creates a layer value for a file we know to not be resident.
    pub(crate) fn for_evicted(
@@ -610,17 +604,9 @@ enum Status {

 impl Drop for LayerInner {
    fn drop(&mut self) {
-        // if there was a pending eviction, mark it cancelled here to balance metrics
-        if let Some((ResidentOrWantedEvicted::WantedEvicted(..), _)) = self.inner.take_and_deinit()
-        {
-            // eviction has already been started
-            LAYER_IMPL_METRICS.inc_eviction_cancelled(EvictionCancelled::LayerGone);
-
-            // eviction request is intentionally not honored as no one is present to wait for it
-            // and we could be delaying shutdown for nothing.
-        }
-
        if !*self.wanted_deleted.get_mut() {
+            // should we try to evict if the last wish was for eviction? seems more like a hazard
+            // than a clear win.
            return;
        }

@@ -1566,8 +1552,8 @@ impl Drop for DownloadedLayer {
        if let Some(owner) = self.owner.upgrade() {
            owner.on_downloaded_layer_drop(self.version);
        } else {
-            // Layer::drop will handle cancelling the eviction; because of drop order and
-            // `DownloadedLayer` never leaking, we cannot know here if eviction was requested.
+            // no need to do anything, we are shutting down
+            LAYER_IMPL_METRICS.inc_eviction_cancelled(EvictionCancelled::LayerGone);
        }
    }
 }
@@ -1766,28 +1752,6 @@ impl ResidentLayer {
        }
    }

-    /// FIXME: truncate is bad name because we are not truncating anything, but copying the
-    /// filtered parts.
-    #[cfg(test)]
-    pub(super) async fn copy_delta_prefix(
-        &self,
-        writer: &mut super::delta_layer::DeltaLayerWriter,
-        truncate_at: Lsn,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        use LayerKind::*;
-
-        let owner = &self.owner.0;
-
-        match self.downloaded.get(owner, ctx).await? {
-            Delta(ref d) => d
-                .copy_prefix(writer, truncate_at, ctx)
-                .await
-                .with_context(|| format!("truncate {self}")),
-            Image(_) => anyhow::bail!(format!("cannot truncate image layer {self}")),
-        }
-    }
-
    pub(crate) fn local_path(&self) -> &Utf8Path {
        &self.owner.0.path
    }
@@ -1797,14 +1761,14 @@ impl ResidentLayer {
    }

    #[cfg(test)]
-    pub(crate) async fn as_delta(
-        &self,
+    pub(crate) async fn get_inner_delta<'a>(
+        &'a self,
        ctx: &RequestContext,
-    ) -> anyhow::Result<&delta_layer::DeltaLayerInner> {
-        use LayerKind::*;
-        match self.downloaded.get(&self.owner.0, ctx).await? {
-            Delta(ref d) => Ok(d),
-            Image(_) => Err(anyhow::anyhow!("image layer")),
+    ) -> anyhow::Result<&'a delta_layer::DeltaLayerInner> {
+        let owner = &self.owner.0;
+        match self.downloaded.get(owner, ctx).await? {
+            LayerKind::Delta(d) => Ok(d),
+            LayerKind::Image(_) => Err(anyhow::anyhow!("Expected a delta layer")),
        }
    }
 }
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -721,103 +721,6 @@ async fn evict_and_wait_does_not_wait_for_download() {
    layer.evict_and_wait(FOREVER).await.unwrap();
 }

-/// Asserts that there is no miscalculation when Layer is dropped while it is being kept resident,
-/// which is the last value.
-///
-/// Also checks that the same does not happen on a non-evicted layer (regression test).
-#[tokio::test(start_paused = true)]
-async fn eviction_cancellation_on_drop() {
-    use crate::repository::Value;
-    use bytes::Bytes;
-
-    // this is the runtime on which Layer spawns the blocking tasks on
-    let handle = tokio::runtime::Handle::current();
-
-    let h = TenantHarness::create("eviction_cancellation_on_drop").unwrap();
-    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
-    let (tenant, ctx) = h.load().await;
-
-    let timeline = tenant
-        .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
-        .await
-        .unwrap();
-
-    {
-        // create_test_timeline wrote us one layer, write another
-        let mut writer = timeline.writer().await;
-        writer
-            .put(
-                Key::from_i128(5),
-                Lsn(0x20),
-                &Value::Image(Bytes::from_static(b"this does not matter either")),
-                &ctx,
-            )
-            .await
-            .unwrap();
-
-        writer.finish_write(Lsn(0x20));
-    }
-
-    timeline.freeze_and_flush().await.unwrap();
-
-    // wait for the upload to complete so our Arc::strong_count assertion holds
-    timeline
-        .remote_client
-        .as_ref()
-        .unwrap()
-        .wait_completion()
-        .await
-        .unwrap();
-
-    let (evicted_layer, not_evicted) = {
-        let mut layers = {
-            let mut guard = timeline.layers.write().await;
-            let layers = guard.likely_resident_layers().collect::<Vec<_>>();
-            // remove the layers from layermap
-            guard.finish_gc_timeline(&layers);
-
-            layers
-        };
-
-        assert_eq!(layers.len(), 2);
-
-        (layers.pop().unwrap(), layers.pop().unwrap())
-    };
-
-    let victims = [(evicted_layer, true), (not_evicted, false)];
-
-    for (victim, evict) in victims {
-        let resident = victim.keep_resident().await.unwrap();
-        drop(victim);
-
-        assert_eq!(Arc::strong_count(&resident.owner.0), 1);
-
-        if evict {
-            let evict_and_wait = resident.owner.evict_and_wait(FOREVER);
-
-            // drive the future to await on the status channel, and then drop it
-            tokio::time::timeout(ADVANCE, evict_and_wait)
-                .await
-                .expect_err("should had been a timeout since we are holding the layer resident");
-        }
-
-        // 1 == we only evict one of the layers
-        assert_eq!(1, LAYER_IMPL_METRICS.started_evictions.get());
-
-        drop(resident);
-
-        // run any spawned
-        tokio::time::sleep(ADVANCE).await;
-
-        SpawnBlockingPoolHelper::consume_and_release_all_of_spawn_blocking_threads(&handle).await;
-
-        assert_eq!(
-            1,
-            LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::LayerGone].get()
-        );
-    }
-}
-
 #[test]
 fn layer_size() {
    assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2040);
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1344,7 +1344,7 @@ impl Timeline {
        background_jobs_can_start: Option<&completion::Barrier>,
        ctx: &RequestContext,
    ) {
-        if self.tenant_shard_id.is_shard_zero() {
+        if self.tenant_shard_id.is_zero() {
            // Logical size is only maintained accurately on shard zero.
            self.spawn_initial_logical_size_computation_task(ctx);
        }
@@ -2237,7 +2237,7 @@ impl Timeline {
        priority: GetLogicalSizePriority,
        ctx: &RequestContext,
    ) -> logical_size::CurrentLogicalSize {
-        if !self.tenant_shard_id.is_shard_zero() {
+        if !self.tenant_shard_id.is_zero() {
            // Logical size is only accurately maintained on shard zero: when called elsewhere, for example
            // when HTTP API is serving a GET for timeline zero, return zero
            return logical_size::CurrentLogicalSize::Approximate(logical_size::Approximate::zero());
@@ -2533,7 +2533,7 @@ impl Timeline {
        crate::span::debug_assert_current_span_has_tenant_and_timeline_id();
        // We should never be calculating logical sizes on shard !=0, because these shards do not have
        // accurate relation sizes, and they do not emit consumption metrics.
-        debug_assert!(self.tenant_shard_id.is_shard_zero());
+        debug_assert!(self.tenant_shard_id.is_zero());

        let guard = self
            .gate
@@ -2968,8 +2968,7 @@ impl Timeline {
                break;
            }

-            // Take the min to avoid reconstructing a page with data newer than request Lsn.
-            cont_lsn = std::cmp::min(Lsn(request_lsn.0 + 1), Lsn(timeline.ancestor_lsn.0 + 1));
+            cont_lsn = Lsn(timeline.ancestor_lsn.0 + 1);
            timeline_owned = timeline
                .get_ready_ancestor_timeline(ctx)
                .await
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -378,7 +378,7 @@ impl Timeline {
        gate: &GateGuard,
        ctx: &RequestContext,
    ) -> ControlFlow<()> {
-        if !self.tenant_shard_id.is_shard_zero() {
+        if !self.tenant_shard_id.is_zero() {
            // Shards !=0 do not maintain accurate relation sizes, and do not need to calculate logical size
            // for consumption metrics (consumption metrics are only sent from shard 0).  We may therefore
            // skip imitating logical size accesses for eviction purposes.
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -427,7 +427,7 @@ pub(super) async fn handle_walreceiver_connection(

            // Send the replication feedback message.
            // Regular standby_status_update fields are put into this message.
-            let current_timeline_size = if timeline.tenant_shard_id.is_shard_zero() {
+            let current_timeline_size = if timeline.tenant_shard_id.is_zero() {
                timeline
                    .get_current_logical_size(
                        crate::tenant::timeline::GetLogicalSizePriority::User,
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -61,18 +61,18 @@ pub struct VectoredRead {
 }

 impl VectoredRead {
-    pub(crate) fn size(&self) -> usize {
+    pub fn size(&self) -> usize {
        (self.end - self.start) as usize
    }
 }

 #[derive(Eq, PartialEq)]
-pub(crate) enum VectoredReadExtended {
+enum VectoredReadExtended {
    Yes,
    No,
 }

-pub(crate) struct VectoredReadBuilder {
+struct VectoredReadBuilder {
    start: u64,
    end: u64,
    blobs_at: VecMap<u64, BlobMeta>,
@@ -80,17 +80,7 @@ pub(crate) struct VectoredReadBuilder {
 }

 impl VectoredReadBuilder {
-    /// Start building a new vectored read.
-    ///
-    /// Note that by design, this does not check against reading more than `max_read_size` to
-    /// support reading larger blobs than the configuration value. The builder will be single use
-    /// however after that.
-    pub(crate) fn new(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        max_read_size: usize,
-    ) -> Self {
+    fn new(start_offset: u64, end_offset: u64, meta: BlobMeta, max_read_size: usize) -> Self {
        let mut blobs_at = VecMap::default();
        blobs_at
            .append(start_offset, meta)
@@ -107,8 +97,7 @@ impl VectoredReadBuilder {
    /// Attempt to extend the current read with a new blob if the start
    /// offset matches with the current end of the vectored read
    /// and the resuting size is below the max read size
-    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
-        tracing::trace!(start, end, "trying to extend");
+    fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
        let size = (end - start) as usize;
        if self.end == start && self.size() + size <= self.max_read_size {
            self.end = end;
@@ -122,11 +111,11 @@ impl VectoredReadBuilder {
        VectoredReadExtended::No
    }

-    pub(crate) fn size(&self) -> usize {
+    fn size(&self) -> usize {
        (self.end - self.start) as usize
    }

-    pub(crate) fn build(self) -> VectoredRead {
+    fn build(self) -> VectoredRead {
        VectoredRead {
            start: self.start,
            end: self.end,
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -403,7 +403,7 @@ impl WalIngest {
            );

            if !key_is_local {
-                if self.shard.is_shard_zero() {
+                if self.shard.is_zero() {
                    // Shard 0 tracks relation sizes.  Although we will not store this block, we will observe
                    // its blkno in case it implicitly extends a relation.
                    self.observe_decoded_block(modification, blk, ctx).await?;
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -55,7 +55,6 @@ impl NeonWalRecord {
    /// Does replaying this WAL record initialize the page from scratch, or does
    /// it need to be applied over the previous image of the page?
    pub fn will_init(&self) -> bool {
-        // If you change this function, you'll also need to change ValueBytes::will_init
        match self {
            NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,

--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -20,7 +20,6 @@

 /// Process lifecycle and abstracction for the IPC protocol.
 mod process;
-pub use process::Kind as ProcessKind;

 /// Code to apply [`NeonWalRecord`]s.
 pub(crate) mod apply_neon;
@@ -35,7 +34,7 @@ use crate::walrecord::NeonWalRecord;
 use anyhow::Context;
 use bytes::{Bytes, BytesMut};
 use pageserver_api::key::key_to_rel_block;
-use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};
+use pageserver_api::models::WalRedoManagerStatus;
 use pageserver_api::shard::TenantShardId;
 use std::sync::Arc;
 use std::time::Duration;
@@ -55,7 +54,7 @@ pub struct PostgresRedoManager {
    tenant_shard_id: TenantShardId,
    conf: &'static PageServerConf,
    last_redo_at: std::sync::Mutex<Option<Instant>>,
-    /// The current [`process::Process`] that is used by new redo requests.
+    /// The current [`process::WalRedoProcess`] that is used by new redo requests.
    /// We use [`heavier_once_cell`] for coalescing the spawning, but the redo
    /// requests don't use the [`heavier_once_cell::Guard`] to keep ahold of the
    /// their process object; we use [`Arc::clone`] for that.
@@ -67,7 +66,7 @@ pub struct PostgresRedoManager {
    /// still be using the old redo process. But, those other tasks will most likely
    /// encounter an error as well, and errors are an unexpected condition anyway.
    /// So, probably we could get rid of the `Arc` in the future.
-    redo_process: heavier_once_cell::OnceCell<Arc<process::Process>>,
+    redo_process: heavier_once_cell::OnceCell<Arc<process::WalRedoProcess>>,
 }

 ///
@@ -140,8 +139,8 @@ impl PostgresRedoManager {
        }
    }

-    pub fn status(&self) -> WalRedoManagerStatus {
-        WalRedoManagerStatus {
+    pub(crate) fn status(&self) -> Option<WalRedoManagerStatus> {
+        Some(WalRedoManagerStatus {
            last_redo_at: {
                let at = *self.last_redo_at.lock().unwrap();
                at.and_then(|at| {
@@ -150,14 +149,8 @@ impl PostgresRedoManager {
                    chrono::Utc::now().checked_sub_signed(chrono::Duration::from_std(age).ok()?)
                })
            },
-            process: self
-                .redo_process
-                .get()
-                .map(|p| WalRedoManagerProcessStatus {
-                    pid: p.id(),
-                    kind: std::borrow::Cow::Borrowed(p.kind().into()),
-                }),
-        }
+            pid: self.redo_process.get().map(|p| p.id()),
+        })
    }
 }

@@ -215,33 +208,37 @@ impl PostgresRedoManager {
        const MAX_RETRY_ATTEMPTS: u32 = 1;
        let mut n_attempts = 0u32;
        loop {
-            let proc: Arc<process::Process> = match self.redo_process.get_or_init_detached().await {
-                Ok(guard) => Arc::clone(&guard),
-                Err(permit) => {
-                    // don't hold poison_guard, the launch code can bail
-                    let start = Instant::now();
-                    let proc = Arc::new(
-                        process::Process::launch(self.conf, self.tenant_shard_id, pg_version)
+            let proc: Arc<process::WalRedoProcess> =
+                match self.redo_process.get_or_init_detached().await {
+                    Ok(guard) => Arc::clone(&guard),
+                    Err(permit) => {
+                        // don't hold poison_guard, the launch code can bail
+                        let start = Instant::now();
+                        let proc = Arc::new(
+                            process::WalRedoProcess::launch(
+                                self.conf,
+                                self.tenant_shard_id,
+                                pg_version,
+                            )
                            .context("launch walredo process")?,
-                    );
-                    let duration = start.elapsed();
-                    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
-                    info!(
-                        duration_ms = duration.as_millis(),
-                        pid = proc.id(),
-                        "launched walredo process"
-                    );
-                    self.redo_process.set(Arc::clone(&proc), permit);
-                    proc
-                }
-            };
+                        );
+                        let duration = start.elapsed();
+                        WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
+                        info!(
+                            duration_ms = duration.as_millis(),
+                            pid = proc.id(),
+                            "launched walredo process"
+                        );
+                        self.redo_process.set(Arc::clone(&proc), permit);
+                        proc
+                    }
+                };

            let started_at = std::time::Instant::now();

            // Relational WAL records are applied using wal-redo-postgres
            let result = proc
                .apply_wal_records(rel, blknum, &base_img, records, wal_redo_timeout)
-                .await
                .context("apply_wal_records");

            let duration = started_at.elapsed();
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -1,67 +1,186 @@
-use std::time::Duration;
-
+use self::no_leak_child::NoLeakChild;
+use crate::{
+    config::PageServerConf,
+    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
+    walrecord::NeonWalRecord,
+};
+use anyhow::Context;
 use bytes::Bytes;
+use nix::poll::{PollFd, PollFlags};
 use pageserver_api::{reltag::RelTag, shard::TenantShardId};
-use utils::lsn::Lsn;
-
-use crate::{config::PageServerConf, walrecord::NeonWalRecord};
+use postgres_ffi::BLCKSZ;
+use std::os::fd::AsRawFd;
+#[cfg(feature = "testing")]
+use std::sync::atomic::AtomicUsize;
+use std::{
+    collections::VecDeque,
+    io::{Read, Write},
+    process::{ChildStdin, ChildStdout, Command, Stdio},
+    sync::{Mutex, MutexGuard},
+    time::Duration,
+};
+use tracing::{debug, error, instrument, Instrument};
+use utils::{lsn::Lsn, nonblock::set_nonblock};

 mod no_leak_child;
 /// The IPC protocol that pageserver and walredo process speak over their shared pipe.
 mod protocol;

-mod process_impl {
-    pub(super) mod process_async;
-    pub(super) mod process_std;
+pub struct WalRedoProcess {
+    #[allow(dead_code)]
+    conf: &'static PageServerConf,
+    tenant_shard_id: TenantShardId,
+    // Some() on construction, only becomes None on Drop.
+    child: Option<NoLeakChild>,
+    stdout: Mutex<ProcessOutput>,
+    stdin: Mutex<ProcessInput>,
+    /// Counter to separate same sized walredo inputs failing at the same millisecond.
+    #[cfg(feature = "testing")]
+    dump_sequence: AtomicUsize,
 }

-#[derive(
-    Clone,
-    Copy,
-    Debug,
-    PartialEq,
-    Eq,
-    strum_macros::EnumString,
-    strum_macros::Display,
-    strum_macros::IntoStaticStr,
-    serde_with::DeserializeFromStr,
-    serde_with::SerializeDisplay,
-)]
-#[strum(serialize_all = "kebab-case")]
-#[repr(u8)]
-pub enum Kind {
-    Sync,
-    Async,
+struct ProcessInput {
+    stdin: ChildStdin,
+    n_requests: usize,
 }

-pub(crate) enum Process {
-    Sync(process_impl::process_std::WalRedoProcess),
-    Async(process_impl::process_async::WalRedoProcess),
+struct ProcessOutput {
+    stdout: ChildStdout,
+    pending_responses: VecDeque<Option<Bytes>>,
+    n_processed_responses: usize,
 }

-impl Process {
-    #[inline(always)]
-    pub fn launch(
+impl WalRedoProcess {
+    //
+    // Start postgres binary in special WAL redo mode.
+    //
+    #[instrument(skip_all,fields(pg_version=pg_version))]
+    pub(crate) fn launch(
        conf: &'static PageServerConf,
        tenant_shard_id: TenantShardId,
        pg_version: u32,
    ) -> anyhow::Result<Self> {
-        Ok(match conf.walredo_process_kind {
-            Kind::Sync => Self::Sync(process_impl::process_std::WalRedoProcess::launch(
-                conf,
-                tenant_shard_id,
-                pg_version,
-            )?),
-            Kind::Async => Self::Async(process_impl::process_async::WalRedoProcess::launch(
-                conf,
-                tenant_shard_id,
-                pg_version,
-            )?),
+        crate::span::debug_assert_current_span_has_tenant_id();
+
+        let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
+        let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
+
+        use no_leak_child::NoLeakChildCommandExt;
+        // Start postgres itself
+        let child = Command::new(pg_bin_dir_path.join("postgres"))
+            // the first arg must be --wal-redo so the child process enters into walredo mode
+            .arg("--wal-redo")
+            // the child doesn't process this arg, but, having it in the argv helps indentify the
+            // walredo process for a particular tenant when debugging a pagserver
+            .args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
+            .stdin(Stdio::piped())
+            .stderr(Stdio::piped())
+            .stdout(Stdio::piped())
+            .env_clear()
+            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
+            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
+            // NB: The redo process is not trusted after we sent it the first
+            // walredo work. Before that, it is trusted. Specifically, we trust
+            // it to
+            // 1. close all file descriptors except stdin, stdout, stderr because
+            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all
+            //    the files it opens, and
+            // 2. to use seccomp to sandbox itself before processing the first
+            //    walredo request.
+            .spawn_no_leak_child(tenant_shard_id)
+            .context("spawn process")?;
+        WAL_REDO_PROCESS_COUNTERS.started.inc();
+        let mut child = scopeguard::guard(child, |child| {
+            error!("killing wal-redo-postgres process due to a problem during launch");
+            child.kill_and_wait(WalRedoKillCause::Startup);
+        });
+
+        let stdin = child.stdin.take().unwrap();
+        let stdout = child.stdout.take().unwrap();
+        let stderr = child.stderr.take().unwrap();
+        let stderr = tokio::process::ChildStderr::from_std(stderr)
+            .context("convert to tokio::ChildStderr")?;
+        macro_rules! set_nonblock_or_log_err {
+        ($file:ident) => {{
+            let res = set_nonblock($file.as_raw_fd());
+            if let Err(e) = &res {
+                error!(error = %e, file = stringify!($file), pid = child.id(), "set_nonblock failed");
+            }
+            res
+        }};
+    }
+        set_nonblock_or_log_err!(stdin)?;
+        set_nonblock_or_log_err!(stdout)?;
+
+        // all fallible operations post-spawn are complete, so get rid of the guard
+        let child = scopeguard::ScopeGuard::into_inner(child);
+
+        tokio::spawn(
+        async move {
+            scopeguard::defer! {
+                debug!("wal-redo-postgres stderr_logger_task finished");
+                crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();
+            }
+            debug!("wal-redo-postgres stderr_logger_task started");
+            crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();
+
+            use tokio::io::AsyncBufReadExt;
+            let mut stderr_lines = tokio::io::BufReader::new(stderr);
+            let mut buf = Vec::new();
+            let res = loop {
+                buf.clear();
+                // TODO we don't trust the process to cap its stderr length.
+                // Currently it can do unbounded Vec allocation.
+                match stderr_lines.read_until(b'\n', &mut buf).await {
+                    Ok(0) => break Ok(()), // eof
+                    Ok(num_bytes) => {
+                        let output = String::from_utf8_lossy(&buf[..num_bytes]);
+                        error!(%output, "received output");
+                    }
+                    Err(e) => {
+                        break Err(e);
+                    }
+                }
+            };
+            match res {
+                Ok(()) => (),
+                Err(e) => {
+                    error!(error=?e, "failed to read from walredo stderr");
+                }
+            }
+        }.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
+    );
+
+        Ok(Self {
+            conf,
+            tenant_shard_id,
+            child: Some(child),
+            stdin: Mutex::new(ProcessInput {
+                stdin,
+                n_requests: 0,
+            }),
+            stdout: Mutex::new(ProcessOutput {
+                stdout,
+                pending_responses: VecDeque::new(),
+                n_processed_responses: 0,
+            }),
+            #[cfg(feature = "testing")]
+            dump_sequence: AtomicUsize::default(),
        })
    }

-    #[inline(always)]
-    pub(crate) async fn apply_wal_records(
+    pub(crate) fn id(&self) -> u32 {
+        self.child
+            .as_ref()
+            .expect("must not call this during Drop")
+            .id()
+    }
+
+    // Apply given WAL records ('records') over an old page image. Returns
+    // new page image.
+    //
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
+    pub(crate) fn apply_wal_records(
        &self,
        rel: RelTag,
        blknum: u32,
@@ -69,29 +188,221 @@ impl Process {
        records: &[(Lsn, NeonWalRecord)],
        wal_redo_timeout: Duration,
    ) -> anyhow::Result<Bytes> {
-        match self {
-            Process::Sync(p) => {
-                p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
-                    .await
+        let tag = protocol::BufferTag { rel, blknum };
+        let input = self.stdin.lock().unwrap();
+
+        // Serialize all the messages to send the WAL redo process first.
+        //
+        // This could be problematic if there are millions of records to replay,
+        // but in practice the number of records is usually so small that it doesn't
+        // matter, and it's better to keep this code simple.
+        //
+        // Most requests start with a before-image with BLCKSZ bytes, followed by
+        // by some other WAL records. Start with a buffer that can hold that
+        // comfortably.
+        let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
+        protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);
+        if let Some(img) = base_img {
+            protocol::build_push_page_msg(tag, img, &mut writebuf);
+        }
+        for (lsn, rec) in records.iter() {
+            if let NeonWalRecord::Postgres {
+                will_init: _,
+                rec: postgres_rec,
+            } = rec
+            {
+                protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);
+            } else {
+                anyhow::bail!("tried to pass neon wal record to postgres WAL redo");
            }
-            Process::Async(p) => {
-                p.apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
-                    .await
+        }
+        protocol::build_get_page_msg(tag, &mut writebuf);
+        WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);
+
+        let res = self.apply_wal_records0(&writebuf, input, wal_redo_timeout);
+
+        if res.is_err() {
+            // not all of these can be caused by this particular input, however these are so rare
+            // in tests so capture all.
+            self.record_and_log(&writebuf);
+        }
+
+        res
+    }
+
+    fn apply_wal_records0(
+        &self,
+        writebuf: &[u8],
+        input: MutexGuard<ProcessInput>,
+        wal_redo_timeout: Duration,
+    ) -> anyhow::Result<Bytes> {
+        let mut proc = { input }; // TODO: remove this legacy rename, but this keep the patch small.
+        let mut nwrite = 0usize;
+
+        while nwrite < writebuf.len() {
+            let mut stdin_pollfds = [PollFd::new(&proc.stdin, PollFlags::POLLOUT)];
+            let n = loop {
+                match nix::poll::poll(&mut stdin_pollfds[..], wal_redo_timeout.as_millis() as i32) {
+                    Err(nix::errno::Errno::EINTR) => continue,
+                    res => break res,
+                }
+            }?;
+
+            if n == 0 {
+                anyhow::bail!("WAL redo timed out");
            }
+
+            // If 'stdin' is writeable, do write.
+            let in_revents = stdin_pollfds[0].revents().unwrap();
+            if in_revents & (PollFlags::POLLERR | PollFlags::POLLOUT) != PollFlags::empty() {
+                nwrite += proc.stdin.write(&writebuf[nwrite..])?;
+            }
+            if in_revents.contains(PollFlags::POLLHUP) {
+                // We still have more data to write, but the process closed the pipe.
+                anyhow::bail!("WAL redo process closed its stdin unexpectedly");
+            }
+        }
+        let request_no = proc.n_requests;
+        proc.n_requests += 1;
+        drop(proc);
+
+        // To improve walredo performance we separate sending requests and receiving
+        // responses. Them are protected by different mutexes (output and input).
+        // If thread T1, T2, T3 send requests D1, D2, D3 to walredo process
+        // then there is not warranty that T1 will first granted output mutex lock.
+        // To address this issue we maintain number of sent requests, number of processed
+        // responses and ring buffer with pending responses. After sending response
+        // (under input mutex), threads remembers request number. Then it releases
+        // input mutex, locks output mutex and fetch in ring buffer all responses until
+        // its stored request number. The it takes correspondent element from
+        // pending responses ring buffer and truncate all empty elements from the front,
+        // advancing processed responses number.
+
+        let mut output = self.stdout.lock().unwrap();
+        let n_processed_responses = output.n_processed_responses;
+        while n_processed_responses + output.pending_responses.len() <= request_no {
+            // We expect the WAL redo process to respond with an 8k page image. We read it
+            // into this buffer.
+            let mut resultbuf = vec![0; BLCKSZ.into()];
+            let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
+            while nresult < BLCKSZ.into() {
+                let mut stdout_pollfds = [PollFd::new(&output.stdout, PollFlags::POLLIN)];
+                // We do two things simultaneously: reading response from stdout
+                // and forward any logging information that the child writes to its stderr to the page server's log.
+                let n = loop {
+                    match nix::poll::poll(
+                        &mut stdout_pollfds[..],
+                        wal_redo_timeout.as_millis() as i32,
+                    ) {
+                        Err(nix::errno::Errno::EINTR) => continue,
+                        res => break res,
+                    }
+                }?;
+
+                if n == 0 {
+                    anyhow::bail!("WAL redo timed out");
+                }
+
+                // If we have some data in stdout, read it to the result buffer.
+                let out_revents = stdout_pollfds[0].revents().unwrap();
+                if out_revents & (PollFlags::POLLERR | PollFlags::POLLIN) != PollFlags::empty() {
+                    nresult += output.stdout.read(&mut resultbuf[nresult..])?;
+                }
+                if out_revents.contains(PollFlags::POLLHUP) {
+                    anyhow::bail!("WAL redo process closed its stdout unexpectedly");
+                }
+            }
+            output
+                .pending_responses
+                .push_back(Some(Bytes::from(resultbuf)));
+        }
+        // Replace our request's response with None in `pending_responses`.
+        // Then make space in the ring buffer by clearing out any seqence of contiguous
+        // `None`'s from the front of `pending_responses`.
+        // NB: We can't pop_front() because other requests' responses because another
+        // requester might have grabbed the output mutex before us:
+        // T1: grab input mutex
+        // T1: send request_no 23
+        // T1: release input mutex
+        // T2: grab input mutex
+        // T2: send request_no 24
+        // T2: release input mutex
+        // T2: grab output mutex
+        // T2: n_processed_responses + output.pending_responses.len() <= request_no
+        //            23                                0                   24
+        // T2: enters poll loop that reads stdout
+        // T2: put response for 23 into pending_responses
+        // T2: put response for 24 into pending_resposnes
+        // pending_responses now looks like this: Front Some(response_23) Some(response_24) Back
+        // T2: takes its response_24
+        // pending_responses now looks like this: Front Some(response_23) None Back
+        // T2: does the while loop below
+        // pending_responses now looks like this: Front Some(response_23) None Back
+        // T2: releases output mutex
+        // T1: grabs output mutex
+        // T1: n_processed_responses + output.pending_responses.len() > request_no
+        //            23                                2                   23
+        // T1: skips poll loop that reads stdout
+        // T1: takes its response_23
+        // pending_responses now looks like this: Front None None Back
+        // T2: does the while loop below
+        // pending_responses now looks like this: Front Back
+        // n_processed_responses now has value 25
+        let res = output.pending_responses[request_no - n_processed_responses]
+            .take()
+            .expect("we own this request_no, nobody else is supposed to take it");
+        while let Some(front) = output.pending_responses.front() {
+            if front.is_none() {
+                output.pending_responses.pop_front();
+                output.n_processed_responses += 1;
+            } else {
+                break;
+            }
+        }
+        Ok(res)
+    }
+
+    #[cfg(feature = "testing")]
+    fn record_and_log(&self, writebuf: &[u8]) {
+        use std::sync::atomic::Ordering;
+
+        let millis = std::time::SystemTime::now()
+            .duration_since(std::time::SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_millis();
+
+        let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);
+
+        // these files will be collected to an allure report
+        let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
+
+        let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);
+
+        let res = std::fs::OpenOptions::new()
+            .write(true)
+            .create_new(true)
+            .read(true)
+            .open(path)
+            .and_then(|mut f| f.write_all(writebuf));
+
+        // trip up allowed_errors
+        if let Err(e) = res {
+            tracing::error!(target=%filename, length=writebuf.len(), "failed to write out the walredo errored input: {e}");
+        } else {
+            tracing::error!(filename, "erroring walredo input saved");
        }
    }

-    pub(crate) fn id(&self) -> u32 {
-        match self {
-            Process::Sync(p) => p.id(),
-            Process::Async(p) => p.id(),
-        }
-    }
+    #[cfg(not(feature = "testing"))]
+    fn record_and_log(&self, _: &[u8]) {}
+}

-    pub(crate) fn kind(&self) -> Kind {
-        match self {
-            Process::Sync(_) => Kind::Sync,
-            Process::Async(_) => Kind::Async,
-        }
+impl Drop for WalRedoProcess {
+    fn drop(&mut self) {
+        self.child
+            .take()
+            .expect("we only do this once")
+            .kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);
+        // no way to wait for stderr_logger_task from Drop because that is async only
    }
 }
--- a/pageserver/src/walredo/process/process_impl/process_async.rs
+++ b/pageserver/src/walredo/process/process_impl/process_async.rs
@@ -1,374 +0,0 @@
-use self::no_leak_child::NoLeakChild;
-use crate::{
-    config::PageServerConf,
-    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
-    walrecord::NeonWalRecord,
-    walredo::process::{no_leak_child, protocol},
-};
-use anyhow::Context;
-use bytes::Bytes;
-use pageserver_api::{reltag::RelTag, shard::TenantShardId};
-use postgres_ffi::BLCKSZ;
-#[cfg(feature = "testing")]
-use std::sync::atomic::AtomicUsize;
-use std::{
-    collections::VecDeque,
-    process::{Command, Stdio},
-    time::Duration,
-};
-use tokio::io::{AsyncReadExt, AsyncWriteExt};
-use tracing::{debug, error, instrument, Instrument};
-use utils::{lsn::Lsn, poison::Poison};
-
-pub struct WalRedoProcess {
-    #[allow(dead_code)]
-    conf: &'static PageServerConf,
-    tenant_shard_id: TenantShardId,
-    // Some() on construction, only becomes None on Drop.
-    child: Option<NoLeakChild>,
-    stdout: tokio::sync::Mutex<Poison<ProcessOutput>>,
-    stdin: tokio::sync::Mutex<Poison<ProcessInput>>,
-    /// Counter to separate same sized walredo inputs failing at the same millisecond.
-    #[cfg(feature = "testing")]
-    dump_sequence: AtomicUsize,
-}
-
-struct ProcessInput {
-    stdin: tokio::process::ChildStdin,
-    n_requests: usize,
-}
-
-struct ProcessOutput {
-    stdout: tokio::process::ChildStdout,
-    pending_responses: VecDeque<Option<Bytes>>,
-    n_processed_responses: usize,
-}
-
-impl WalRedoProcess {
-    //
-    // Start postgres binary in special WAL redo mode.
-    //
-    #[instrument(skip_all,fields(pg_version=pg_version))]
-    pub(crate) fn launch(
-        conf: &'static PageServerConf,
-        tenant_shard_id: TenantShardId,
-        pg_version: u32,
-    ) -> anyhow::Result<Self> {
-        crate::span::debug_assert_current_span_has_tenant_id();
-
-        let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
-        let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
-
-        use no_leak_child::NoLeakChildCommandExt;
-        // Start postgres itself
-        let child = Command::new(pg_bin_dir_path.join("postgres"))
-            // the first arg must be --wal-redo so the child process enters into walredo mode
-            .arg("--wal-redo")
-            // the child doesn't process this arg, but, having it in the argv helps indentify the
-            // walredo process for a particular tenant when debugging a pagserver
-            .args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
-            .stdin(Stdio::piped())
-            .stderr(Stdio::piped())
-            .stdout(Stdio::piped())
-            .env_clear()
-            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
-            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
-            // NB: The redo process is not trusted after we sent it the first
-            // walredo work. Before that, it is trusted. Specifically, we trust
-            // it to
-            // 1. close all file descriptors except stdin, stdout, stderr because
-            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all
-            //    the files it opens, and
-            // 2. to use seccomp to sandbox itself before processing the first
-            //    walredo request.
-            .spawn_no_leak_child(tenant_shard_id)
-            .context("spawn process")?;
-        WAL_REDO_PROCESS_COUNTERS.started.inc();
-        let mut child = scopeguard::guard(child, |child| {
-            error!("killing wal-redo-postgres process due to a problem during launch");
-            child.kill_and_wait(WalRedoKillCause::Startup);
-        });
-
-        let stdin = child.stdin.take().unwrap();
-        let stdout = child.stdout.take().unwrap();
-        let stderr = child.stderr.take().unwrap();
-        let stderr = tokio::process::ChildStderr::from_std(stderr)
-            .context("convert to tokio::ChildStderr")?;
-        let stdin =
-            tokio::process::ChildStdin::from_std(stdin).context("convert to tokio::ChildStdin")?;
-        let stdout = tokio::process::ChildStdout::from_std(stdout)
-            .context("convert to tokio::ChildStdout")?;
-
-        // all fallible operations post-spawn are complete, so get rid of the guard
-        let child = scopeguard::ScopeGuard::into_inner(child);
-
-        tokio::spawn(
-            async move {
-                scopeguard::defer! {
-                    debug!("wal-redo-postgres stderr_logger_task finished");
-                    crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();
-                }
-                debug!("wal-redo-postgres stderr_logger_task started");
-                crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();
-
-                use tokio::io::AsyncBufReadExt;
-                let mut stderr_lines = tokio::io::BufReader::new(stderr);
-                let mut buf = Vec::new();
-                let res = loop {
-                    buf.clear();
-                    // TODO we don't trust the process to cap its stderr length.
-                    // Currently it can do unbounded Vec allocation.
-                    match stderr_lines.read_until(b'\n', &mut buf).await {
-                        Ok(0) => break Ok(()), // eof
-                        Ok(num_bytes) => {
-                            let output = String::from_utf8_lossy(&buf[..num_bytes]);
-                            error!(%output, "received output");
-                        }
-                        Err(e) => {
-                            break Err(e);
-                        }
-                    }
-                };
-                match res {
-                    Ok(()) => (),
-                    Err(e) => {
-                        error!(error=?e, "failed to read from walredo stderr");
-                    }
-                }
-            }.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
-        );
-
-        Ok(Self {
-            conf,
-            tenant_shard_id,
-            child: Some(child),
-            stdin: tokio::sync::Mutex::new(Poison::new(
-                "stdin",
-                ProcessInput {
-                    stdin,
-                    n_requests: 0,
-                },
-            )),
-            stdout: tokio::sync::Mutex::new(Poison::new(
-                "stdout",
-                ProcessOutput {
-                    stdout,
-                    pending_responses: VecDeque::new(),
-                    n_processed_responses: 0,
-                },
-            )),
-            #[cfg(feature = "testing")]
-            dump_sequence: AtomicUsize::default(),
-        })
-    }
-
-    pub(crate) fn id(&self) -> u32 {
-        self.child
-            .as_ref()
-            .expect("must not call this during Drop")
-            .id()
-    }
-
-    /// Apply given WAL records ('records') over an old page image. Returns
-    /// new page image.
-    ///
-    /// # Cancel-Safety
-    ///
-    /// Cancellation safe.
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
-    pub(crate) async fn apply_wal_records(
-        &self,
-        rel: RelTag,
-        blknum: u32,
-        base_img: &Option<Bytes>,
-        records: &[(Lsn, NeonWalRecord)],
-        wal_redo_timeout: Duration,
-    ) -> anyhow::Result<Bytes> {
-        let tag = protocol::BufferTag { rel, blknum };
-
-        // Serialize all the messages to send the WAL redo process first.
-        //
-        // This could be problematic if there are millions of records to replay,
-        // but in practice the number of records is usually so small that it doesn't
-        // matter, and it's better to keep this code simple.
-        //
-        // Most requests start with a before-image with BLCKSZ bytes, followed by
-        // by some other WAL records. Start with a buffer that can hold that
-        // comfortably.
-        let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
-        protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);
-        if let Some(img) = base_img {
-            protocol::build_push_page_msg(tag, img, &mut writebuf);
-        }
-        for (lsn, rec) in records.iter() {
-            if let NeonWalRecord::Postgres {
-                will_init: _,
-                rec: postgres_rec,
-            } = rec
-            {
-                protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);
-            } else {
-                anyhow::bail!("tried to pass neon wal record to postgres WAL redo");
-            }
-        }
-        protocol::build_get_page_msg(tag, &mut writebuf);
-        WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);
-
-        let Ok(res) =
-            tokio::time::timeout(wal_redo_timeout, self.apply_wal_records0(&writebuf)).await
-        else {
-            anyhow::bail!("WAL redo timed out");
-        };
-
-        if res.is_err() {
-            // not all of these can be caused by this particular input, however these are so rare
-            // in tests so capture all.
-            self.record_and_log(&writebuf);
-        }
-
-        res
-    }
-
-    /// # Cancel-Safety
-    ///
-    /// When not polled to completion (e.g. because in `tokio::select!` another
-    /// branch becomes ready before this future), concurrent and subsequent
-    /// calls may fail due to [`utils::poison::Poison::check_and_arm`] calls.
-    /// Dispose of this process instance and create a new one.
-    async fn apply_wal_records0(&self, writebuf: &[u8]) -> anyhow::Result<Bytes> {
-        let request_no = {
-            let mut lock_guard = self.stdin.lock().await;
-            let mut poison_guard = lock_guard.check_and_arm()?;
-            let input = poison_guard.data_mut();
-            input
-                .stdin
-                .write_all(writebuf)
-                .await
-                .context("write to walredo stdin")?;
-            let request_no = input.n_requests;
-            input.n_requests += 1;
-            poison_guard.disarm();
-            request_no
-        };
-
-        // To improve walredo performance we separate sending requests and receiving
-        // responses. Them are protected by different mutexes (output and input).
-        // If thread T1, T2, T3 send requests D1, D2, D3 to walredo process
-        // then there is not warranty that T1 will first granted output mutex lock.
-        // To address this issue we maintain number of sent requests, number of processed
-        // responses and ring buffer with pending responses. After sending response
-        // (under input mutex), threads remembers request number. Then it releases
-        // input mutex, locks output mutex and fetch in ring buffer all responses until
-        // its stored request number. The it takes correspondent element from
-        // pending responses ring buffer and truncate all empty elements from the front,
-        // advancing processed responses number.
-
-        let mut lock_guard = self.stdout.lock().await;
-        let mut poison_guard = lock_guard.check_and_arm()?;
-        let output = poison_guard.data_mut();
-        let n_processed_responses = output.n_processed_responses;
-        while n_processed_responses + output.pending_responses.len() <= request_no {
-            // We expect the WAL redo process to respond with an 8k page image. We read it
-            // into this buffer.
-            let mut resultbuf = vec![0; BLCKSZ.into()];
-            output
-                .stdout
-                .read_exact(&mut resultbuf)
-                .await
-                .context("read walredo stdout")?;
-            output
-                .pending_responses
-                .push_back(Some(Bytes::from(resultbuf)));
-        }
-        // Replace our request's response with None in `pending_responses`.
-        // Then make space in the ring buffer by clearing out any seqence of contiguous
-        // `None`'s from the front of `pending_responses`.
-        // NB: We can't pop_front() because other requests' responses because another
-        // requester might have grabbed the output mutex before us:
-        // T1: grab input mutex
-        // T1: send request_no 23
-        // T1: release input mutex
-        // T2: grab input mutex
-        // T2: send request_no 24
-        // T2: release input mutex
-        // T2: grab output mutex
-        // T2: n_processed_responses + output.pending_responses.len() <= request_no
-        //            23                                0                   24
-        // T2: enters poll loop that reads stdout
-        // T2: put response for 23 into pending_responses
-        // T2: put response for 24 into pending_resposnes
-        // pending_responses now looks like this: Front Some(response_23) Some(response_24) Back
-        // T2: takes its response_24
-        // pending_responses now looks like this: Front Some(response_23) None Back
-        // T2: does the while loop below
-        // pending_responses now looks like this: Front Some(response_23) None Back
-        // T2: releases output mutex
-        // T1: grabs output mutex
-        // T1: n_processed_responses + output.pending_responses.len() > request_no
-        //            23                                2                   23
-        // T1: skips poll loop that reads stdout
-        // T1: takes its response_23
-        // pending_responses now looks like this: Front None None Back
-        // T2: does the while loop below
-        // pending_responses now looks like this: Front Back
-        // n_processed_responses now has value 25
-        let res = output.pending_responses[request_no - n_processed_responses]
-            .take()
-            .expect("we own this request_no, nobody else is supposed to take it");
-        while let Some(front) = output.pending_responses.front() {
-            if front.is_none() {
-                output.pending_responses.pop_front();
-                output.n_processed_responses += 1;
-            } else {
-                break;
-            }
-        }
-        poison_guard.disarm();
-        Ok(res)
-    }
-
-    #[cfg(feature = "testing")]
-    fn record_and_log(&self, writebuf: &[u8]) {
-        use std::sync::atomic::Ordering;
-
-        let millis = std::time::SystemTime::now()
-            .duration_since(std::time::SystemTime::UNIX_EPOCH)
-            .unwrap()
-            .as_millis();
-
-        let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);
-
-        // these files will be collected to an allure report
-        let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
-
-        let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);
-
-        use std::io::Write;
-        let res = std::fs::OpenOptions::new()
-            .write(true)
-            .create_new(true)
-            .read(true)
-            .open(path)
-            .and_then(|mut f| f.write_all(writebuf));
-
-        // trip up allowed_errors
-        if let Err(e) = res {
-            tracing::error!(target=%filename, length=writebuf.len(), "failed to write out the walredo errored input: {e}");
-        } else {
-            tracing::error!(filename, "erroring walredo input saved");
-        }
-    }
-
-    #[cfg(not(feature = "testing"))]
-    fn record_and_log(&self, _: &[u8]) {}
-}
-
-impl Drop for WalRedoProcess {
-    fn drop(&mut self) {
-        self.child
-            .take()
-            .expect("we only do this once")
-            .kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);
-        // no way to wait for stderr_logger_task from Drop because that is async only
-    }
-}
--- a/pageserver/src/walredo/process/process_impl/process_std.rs
+++ b/pageserver/src/walredo/process/process_impl/process_std.rs
@@ -1,405 +0,0 @@
-use self::no_leak_child::NoLeakChild;
-use crate::{
-    config::PageServerConf,
-    metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
-    walrecord::NeonWalRecord,
-    walredo::process::{no_leak_child, protocol},
-};
-use anyhow::Context;
-use bytes::Bytes;
-use nix::poll::{PollFd, PollFlags};
-use pageserver_api::{reltag::RelTag, shard::TenantShardId};
-use postgres_ffi::BLCKSZ;
-use std::os::fd::AsRawFd;
-#[cfg(feature = "testing")]
-use std::sync::atomic::AtomicUsize;
-use std::{
-    collections::VecDeque,
-    io::{Read, Write},
-    process::{ChildStdin, ChildStdout, Command, Stdio},
-    sync::{Mutex, MutexGuard},
-    time::Duration,
-};
-use tracing::{debug, error, instrument, Instrument};
-use utils::{lsn::Lsn, nonblock::set_nonblock};
-
-pub struct WalRedoProcess {
-    #[allow(dead_code)]
-    conf: &'static PageServerConf,
-    tenant_shard_id: TenantShardId,
-    // Some() on construction, only becomes None on Drop.
-    child: Option<NoLeakChild>,
-    stdout: Mutex<ProcessOutput>,
-    stdin: Mutex<ProcessInput>,
-    /// Counter to separate same sized walredo inputs failing at the same millisecond.
-    #[cfg(feature = "testing")]
-    dump_sequence: AtomicUsize,
-}
-
-struct ProcessInput {
-    stdin: ChildStdin,
-    n_requests: usize,
-}
-
-struct ProcessOutput {
-    stdout: ChildStdout,
-    pending_responses: VecDeque<Option<Bytes>>,
-    n_processed_responses: usize,
-}
-
-impl WalRedoProcess {
-    //
-    // Start postgres binary in special WAL redo mode.
-    //
-    #[instrument(skip_all,fields(pg_version=pg_version))]
-    pub(crate) fn launch(
-        conf: &'static PageServerConf,
-        tenant_shard_id: TenantShardId,
-        pg_version: u32,
-    ) -> anyhow::Result<Self> {
-        crate::span::debug_assert_current_span_has_tenant_id();
-
-        let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
-        let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
-
-        use no_leak_child::NoLeakChildCommandExt;
-        // Start postgres itself
-        let child = Command::new(pg_bin_dir_path.join("postgres"))
-            // the first arg must be --wal-redo so the child process enters into walredo mode
-            .arg("--wal-redo")
-            // the child doesn't process this arg, but, having it in the argv helps indentify the
-            // walredo process for a particular tenant when debugging a pagserver
-            .args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
-            .stdin(Stdio::piped())
-            .stderr(Stdio::piped())
-            .stdout(Stdio::piped())
-            .env_clear()
-            .env("LD_LIBRARY_PATH", &pg_lib_dir_path)
-            .env("DYLD_LIBRARY_PATH", &pg_lib_dir_path)
-            // NB: The redo process is not trusted after we sent it the first
-            // walredo work. Before that, it is trusted. Specifically, we trust
-            // it to
-            // 1. close all file descriptors except stdin, stdout, stderr because
-            //    pageserver might not be 100% diligent in setting FD_CLOEXEC on all
-            //    the files it opens, and
-            // 2. to use seccomp to sandbox itself before processing the first
-            //    walredo request.
-            .spawn_no_leak_child(tenant_shard_id)
-            .context("spawn process")?;
-        WAL_REDO_PROCESS_COUNTERS.started.inc();
-        let mut child = scopeguard::guard(child, |child| {
-            error!("killing wal-redo-postgres process due to a problem during launch");
-            child.kill_and_wait(WalRedoKillCause::Startup);
-        });
-
-        let stdin = child.stdin.take().unwrap();
-        let stdout = child.stdout.take().unwrap();
-        let stderr = child.stderr.take().unwrap();
-        let stderr = tokio::process::ChildStderr::from_std(stderr)
-            .context("convert to tokio::ChildStderr")?;
-        macro_rules! set_nonblock_or_log_err {
-        ($file:ident) => {{
-            let res = set_nonblock($file.as_raw_fd());
-            if let Err(e) = &res {
-                error!(error = %e, file = stringify!($file), pid = child.id(), "set_nonblock failed");
-            }
-            res
-        }};
-    }
-        set_nonblock_or_log_err!(stdin)?;
-        set_nonblock_or_log_err!(stdout)?;
-
-        // all fallible operations post-spawn are complete, so get rid of the guard
-        let child = scopeguard::ScopeGuard::into_inner(child);
-
-        tokio::spawn(
-            async move {
-                scopeguard::defer! {
-                    debug!("wal-redo-postgres stderr_logger_task finished");
-                    crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc();
-                }
-                debug!("wal-redo-postgres stderr_logger_task started");
-                crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc();
-
-                use tokio::io::AsyncBufReadExt;
-                let mut stderr_lines = tokio::io::BufReader::new(stderr);
-                let mut buf = Vec::new();
-                let res = loop {
-                    buf.clear();
-                    // TODO we don't trust the process to cap its stderr length.
-                    // Currently it can do unbounded Vec allocation.
-                    match stderr_lines.read_until(b'\n', &mut buf).await {
-                        Ok(0) => break Ok(()), // eof
-                        Ok(num_bytes) => {
-                            let output = String::from_utf8_lossy(&buf[..num_bytes]);
-                            error!(%output, "received output");
-                        }
-                        Err(e) => {
-                            break Err(e);
-                        }
-                    }
-                };
-                match res {
-                    Ok(()) => (),
-                    Err(e) => {
-                        error!(error=?e, "failed to read from walredo stderr");
-                    }
-                }
-            }.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
-        );
-
-        Ok(Self {
-            conf,
-            tenant_shard_id,
-            child: Some(child),
-            stdin: Mutex::new(ProcessInput {
-                stdin,
-                n_requests: 0,
-            }),
-            stdout: Mutex::new(ProcessOutput {
-                stdout,
-                pending_responses: VecDeque::new(),
-                n_processed_responses: 0,
-            }),
-            #[cfg(feature = "testing")]
-            dump_sequence: AtomicUsize::default(),
-        })
-    }
-
-    pub(crate) fn id(&self) -> u32 {
-        self.child
-            .as_ref()
-            .expect("must not call this during Drop")
-            .id()
-    }
-
-    // Apply given WAL records ('records') over an old page image. Returns
-    // new page image.
-    //
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
-    pub(crate) async fn apply_wal_records(
-        &self,
-        rel: RelTag,
-        blknum: u32,
-        base_img: &Option<Bytes>,
-        records: &[(Lsn, NeonWalRecord)],
-        wal_redo_timeout: Duration,
-    ) -> anyhow::Result<Bytes> {
-        let tag = protocol::BufferTag { rel, blknum };
-        let input = self.stdin.lock().unwrap();
-
-        // Serialize all the messages to send the WAL redo process first.
-        //
-        // This could be problematic if there are millions of records to replay,
-        // but in practice the number of records is usually so small that it doesn't
-        // matter, and it's better to keep this code simple.
-        //
-        // Most requests start with a before-image with BLCKSZ bytes, followed by
-        // by some other WAL records. Start with a buffer that can hold that
-        // comfortably.
-        let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
-        protocol::build_begin_redo_for_block_msg(tag, &mut writebuf);
-        if let Some(img) = base_img {
-            protocol::build_push_page_msg(tag, img, &mut writebuf);
-        }
-        for (lsn, rec) in records.iter() {
-            if let NeonWalRecord::Postgres {
-                will_init: _,
-                rec: postgres_rec,
-            } = rec
-            {
-                protocol::build_apply_record_msg(*lsn, postgres_rec, &mut writebuf);
-            } else {
-                anyhow::bail!("tried to pass neon wal record to postgres WAL redo");
-            }
-        }
-        protocol::build_get_page_msg(tag, &mut writebuf);
-        WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64);
-
-        let res = self.apply_wal_records0(&writebuf, input, wal_redo_timeout);
-
-        if res.is_err() {
-            // not all of these can be caused by this particular input, however these are so rare
-            // in tests so capture all.
-            self.record_and_log(&writebuf);
-        }
-
-        res
-    }
-
-    fn apply_wal_records0(
-        &self,
-        writebuf: &[u8],
-        input: MutexGuard<ProcessInput>,
-        wal_redo_timeout: Duration,
-    ) -> anyhow::Result<Bytes> {
-        let mut proc = { input }; // TODO: remove this legacy rename, but this keep the patch small.
-        let mut nwrite = 0usize;
-
-        while nwrite < writebuf.len() {
-            let mut stdin_pollfds = [PollFd::new(&proc.stdin, PollFlags::POLLOUT)];
-            let n = loop {
-                match nix::poll::poll(&mut stdin_pollfds[..], wal_redo_timeout.as_millis() as i32) {
-                    Err(nix::errno::Errno::EINTR) => continue,
-                    res => break res,
-                }
-            }?;
-
-            if n == 0 {
-                anyhow::bail!("WAL redo timed out");
-            }
-
-            // If 'stdin' is writeable, do write.
-            let in_revents = stdin_pollfds[0].revents().unwrap();
-            if in_revents & (PollFlags::POLLERR | PollFlags::POLLOUT) != PollFlags::empty() {
-                nwrite += proc.stdin.write(&writebuf[nwrite..])?;
-            }
-            if in_revents.contains(PollFlags::POLLHUP) {
-                // We still have more data to write, but the process closed the pipe.
-                anyhow::bail!("WAL redo process closed its stdin unexpectedly");
-            }
-        }
-        let request_no = proc.n_requests;
-        proc.n_requests += 1;
-        drop(proc);
-
-        // To improve walredo performance we separate sending requests and receiving
-        // responses. Them are protected by different mutexes (output and input).
-        // If thread T1, T2, T3 send requests D1, D2, D3 to walredo process
-        // then there is not warranty that T1 will first granted output mutex lock.
-        // To address this issue we maintain number of sent requests, number of processed
-        // responses and ring buffer with pending responses. After sending response
-        // (under input mutex), threads remembers request number. Then it releases
-        // input mutex, locks output mutex and fetch in ring buffer all responses until
-        // its stored request number. The it takes correspondent element from
-        // pending responses ring buffer and truncate all empty elements from the front,
-        // advancing processed responses number.
-
-        let mut output = self.stdout.lock().unwrap();
-        let n_processed_responses = output.n_processed_responses;
-        while n_processed_responses + output.pending_responses.len() <= request_no {
-            // We expect the WAL redo process to respond with an 8k page image. We read it
-            // into this buffer.
-            let mut resultbuf = vec![0; BLCKSZ.into()];
-            let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
-            while nresult < BLCKSZ.into() {
-                let mut stdout_pollfds = [PollFd::new(&output.stdout, PollFlags::POLLIN)];
-                // We do two things simultaneously: reading response from stdout
-                // and forward any logging information that the child writes to its stderr to the page server's log.
-                let n = loop {
-                    match nix::poll::poll(
-                        &mut stdout_pollfds[..],
-                        wal_redo_timeout.as_millis() as i32,
-                    ) {
-                        Err(nix::errno::Errno::EINTR) => continue,
-                        res => break res,
-                    }
-                }?;
-
-                if n == 0 {
-                    anyhow::bail!("WAL redo timed out");
-                }
-
-                // If we have some data in stdout, read it to the result buffer.
-                let out_revents = stdout_pollfds[0].revents().unwrap();
-                if out_revents & (PollFlags::POLLERR | PollFlags::POLLIN) != PollFlags::empty() {
-                    nresult += output.stdout.read(&mut resultbuf[nresult..])?;
-                }
-                if out_revents.contains(PollFlags::POLLHUP) {
-                    anyhow::bail!("WAL redo process closed its stdout unexpectedly");
-                }
-            }
-            output
-                .pending_responses
-                .push_back(Some(Bytes::from(resultbuf)));
-        }
-        // Replace our request's response with None in `pending_responses`.
-        // Then make space in the ring buffer by clearing out any seqence of contiguous
-        // `None`'s from the front of `pending_responses`.
-        // NB: We can't pop_front() because other requests' responses because another
-        // requester might have grabbed the output mutex before us:
-        // T1: grab input mutex
-        // T1: send request_no 23
-        // T1: release input mutex
-        // T2: grab input mutex
-        // T2: send request_no 24
-        // T2: release input mutex
-        // T2: grab output mutex
-        // T2: n_processed_responses + output.pending_responses.len() <= request_no
-        //            23                                0                   24
-        // T2: enters poll loop that reads stdout
-        // T2: put response for 23 into pending_responses
-        // T2: put response for 24 into pending_resposnes
-        // pending_responses now looks like this: Front Some(response_23) Some(response_24) Back
-        // T2: takes its response_24
-        // pending_responses now looks like this: Front Some(response_23) None Back
-        // T2: does the while loop below
-        // pending_responses now looks like this: Front Some(response_23) None Back
-        // T2: releases output mutex
-        // T1: grabs output mutex
-        // T1: n_processed_responses + output.pending_responses.len() > request_no
-        //            23                                2                   23
-        // T1: skips poll loop that reads stdout
-        // T1: takes its response_23
-        // pending_responses now looks like this: Front None None Back
-        // T2: does the while loop below
-        // pending_responses now looks like this: Front Back
-        // n_processed_responses now has value 25
-        let res = output.pending_responses[request_no - n_processed_responses]
-            .take()
-            .expect("we own this request_no, nobody else is supposed to take it");
-        while let Some(front) = output.pending_responses.front() {
-            if front.is_none() {
-                output.pending_responses.pop_front();
-                output.n_processed_responses += 1;
-            } else {
-                break;
-            }
-        }
-        Ok(res)
-    }
-
-    #[cfg(feature = "testing")]
-    fn record_and_log(&self, writebuf: &[u8]) {
-        use std::sync::atomic::Ordering;
-
-        let millis = std::time::SystemTime::now()
-            .duration_since(std::time::SystemTime::UNIX_EPOCH)
-            .unwrap()
-            .as_millis();
-
-        let seq = self.dump_sequence.fetch_add(1, Ordering::Relaxed);
-
-        // these files will be collected to an allure report
-        let filename = format!("walredo-{millis}-{}-{seq}.walredo", writebuf.len());
-
-        let path = self.conf.tenant_path(&self.tenant_shard_id).join(&filename);
-
-        let res = std::fs::OpenOptions::new()
-            .write(true)
-            .create_new(true)
-            .read(true)
-            .open(path)
-            .and_then(|mut f| f.write_all(writebuf));
-
-        // trip up allowed_errors
-        if let Err(e) = res {
-            tracing::error!(target=%filename, length=writebuf.len(), "failed to write out the walredo errored input: {e}");
-        } else {
-            tracing::error!(filename, "erroring walredo input saved");
-        }
-    }
-
-    #[cfg(not(feature = "testing"))]
-    fn record_and_log(&self, _: &[u8]) {}
-}
-
-impl Drop for WalRedoProcess {
-    fn drop(&mut self) {
-        self.child
-            .take()
-            .expect("we only do this once")
-            .kill_and_wait(WalRedoKillCause::WalRedoProcessDrop);
-        // no way to wait for stderr_logger_task from Drop because that is async only
-    }
-}
--- a/poetry.lock
+++ b/poetry.lock
@@ -2,87 +2,87 @@

 [[package]]
 name = "aiohttp"
-version = "3.9.4"
+version = "3.9.2"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:76d32588ef7e4a3f3adff1956a0ba96faabbdee58f2407c122dd45aa6e34f372"},
-    {file = "aiohttp-3.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:56181093c10dbc6ceb8a29dfeea1e815e1dfdc020169203d87fd8d37616f73f9"},
-    {file = "aiohttp-3.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7a5b676d3c65e88b3aca41816bf72831898fcd73f0cbb2680e9d88e819d1e4d"},
-    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1df528a85fb404899d4207a8d9934cfd6be626e30e5d3a5544a83dbae6d8a7e"},
-    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f595db1bceabd71c82e92df212dd9525a8a2c6947d39e3c994c4f27d2fe15b11"},
-    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c0b09d76e5a4caac3d27752027fbd43dc987b95f3748fad2b924a03fe8632ad"},
-    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689eb4356649ec9535b3686200b231876fb4cab4aca54e3bece71d37f50c1d13"},
-    {file = "aiohttp-3.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3666cf4182efdb44d73602379a66f5fdfd5da0db5e4520f0ac0dcca644a3497"},
-    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b65b0f8747b013570eea2f75726046fa54fa8e0c5db60f3b98dd5d161052004a"},
-    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a1885d2470955f70dfdd33a02e1749613c5a9c5ab855f6db38e0b9389453dce7"},
-    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0593822dcdb9483d41f12041ff7c90d4d1033ec0e880bcfaf102919b715f47f1"},
-    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:47f6eb74e1ecb5e19a78f4a4228aa24df7fbab3b62d4a625d3f41194a08bd54f"},
-    {file = "aiohttp-3.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c8b04a3dbd54de6ccb7604242fe3ad67f2f3ca558f2d33fe19d4b08d90701a89"},
-    {file = "aiohttp-3.9.4-cp310-cp310-win32.whl", hash = "sha256:8a78dfb198a328bfb38e4308ca8167028920fb747ddcf086ce706fbdd23b2926"},
-    {file = "aiohttp-3.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:e78da6b55275987cbc89141a1d8e75f5070e577c482dd48bd9123a76a96f0bbb"},
-    {file = "aiohttp-3.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c111b3c69060d2bafc446917534150fd049e7aedd6cbf21ba526a5a97b4402a5"},
-    {file = "aiohttp-3.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:efbdd51872cf170093998c87ccdf3cb5993add3559341a8e5708bcb311934c94"},
-    {file = "aiohttp-3.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7bfdb41dc6e85d8535b00d73947548a748e9534e8e4fddd2638109ff3fb081df"},
-    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd9d334412961125e9f68d5b73c1d0ab9ea3f74a58a475e6b119f5293eee7ba"},
-    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:35d78076736f4a668d57ade00c65d30a8ce28719d8a42471b2a06ccd1a2e3063"},
-    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:824dff4f9f4d0f59d0fa3577932ee9a20e09edec8a2f813e1d6b9f89ced8293f"},
-    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52b8b4e06fc15519019e128abedaeb56412b106ab88b3c452188ca47a25c4093"},
-    {file = "aiohttp-3.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eae569fb1e7559d4f3919965617bb39f9e753967fae55ce13454bec2d1c54f09"},
-    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:69b97aa5792428f321f72aeb2f118e56893371f27e0b7d05750bcad06fc42ca1"},
-    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d79aad0ad4b980663316f26d9a492e8fab2af77c69c0f33780a56843ad2f89e"},
-    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d6577140cd7db19e430661e4b2653680194ea8c22c994bc65b7a19d8ec834403"},
-    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:9860d455847cd98eb67897f5957b7cd69fbcb436dd3f06099230f16a66e66f79"},
-    {file = "aiohttp-3.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:69ff36d3f8f5652994e08bd22f093e11cfd0444cea310f92e01b45a4e46b624e"},
-    {file = "aiohttp-3.9.4-cp311-cp311-win32.whl", hash = "sha256:e27d3b5ed2c2013bce66ad67ee57cbf614288bda8cdf426c8d8fe548316f1b5f"},
-    {file = "aiohttp-3.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d6a67e26daa686a6fbdb600a9af8619c80a332556245fa8e86c747d226ab1a1e"},
-    {file = "aiohttp-3.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c5ff8ff44825736a4065d8544b43b43ee4c6dd1530f3a08e6c0578a813b0aa35"},
-    {file = "aiohttp-3.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d12a244627eba4e9dc52cbf924edef905ddd6cafc6513849b4876076a6f38b0e"},
-    {file = "aiohttp-3.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dcad56c8d8348e7e468899d2fb3b309b9bc59d94e6db08710555f7436156097f"},
-    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f7e69a7fd4b5ce419238388e55abd220336bd32212c673ceabc57ccf3d05b55"},
-    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4870cb049f10d7680c239b55428916d84158798eb8f353e74fa2c98980dcc0b"},
-    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2feaf1b7031ede1bc0880cec4b0776fd347259a723d625357bb4b82f62687b"},
-    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:939393e8c3f0a5bcd33ef7ace67680c318dc2ae406f15e381c0054dd658397de"},
-    {file = "aiohttp-3.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d2334e387b2adcc944680bebcf412743f2caf4eeebd550f67249c1c3696be04"},
-    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e0198ea897680e480845ec0ffc5a14e8b694e25b3f104f63676d55bf76a82f1a"},
-    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e40d2cd22914d67c84824045861a5bb0fb46586b15dfe4f046c7495bf08306b2"},
-    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:aba80e77c227f4234aa34a5ff2b6ff30c5d6a827a91d22ff6b999de9175d71bd"},
-    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:fb68dc73bc8ac322d2e392a59a9e396c4f35cb6fdbdd749e139d1d6c985f2527"},
-    {file = "aiohttp-3.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f3460a92638dce7e47062cf088d6e7663adb135e936cb117be88d5e6c48c9d53"},
-    {file = "aiohttp-3.9.4-cp312-cp312-win32.whl", hash = "sha256:32dc814ddbb254f6170bca198fe307920f6c1308a5492f049f7f63554b88ef36"},
-    {file = "aiohttp-3.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:63f41a909d182d2b78fe3abef557fcc14da50c7852f70ae3be60e83ff64edba5"},
-    {file = "aiohttp-3.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c3770365675f6be220032f6609a8fbad994d6dcf3ef7dbcf295c7ee70884c9af"},
-    {file = "aiohttp-3.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:305edae1dea368ce09bcb858cf5a63a064f3bff4767dec6fa60a0cc0e805a1d3"},
-    {file = "aiohttp-3.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6f121900131d116e4a93b55ab0d12ad72573f967b100e49086e496a9b24523ea"},
-    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b71e614c1ae35c3d62a293b19eface83d5e4d194e3eb2fabb10059d33e6e8cbf"},
-    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419f009fa4cfde4d16a7fc070d64f36d70a8d35a90d71aa27670bba2be4fd039"},
-    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b39476ee69cfe64061fd77a73bf692c40021f8547cda617a3466530ef63f947"},
-    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b33f34c9c7decdb2ab99c74be6443942b730b56d9c5ee48fb7df2c86492f293c"},
-    {file = "aiohttp-3.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c78700130ce2dcebb1a8103202ae795be2fa8c9351d0dd22338fe3dac74847d9"},
-    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:268ba22d917655d1259af2d5659072b7dc11b4e1dc2cb9662fdd867d75afc6a4"},
-    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:17e7c051f53a0d2ebf33013a9cbf020bb4e098c4bc5bce6f7b0c962108d97eab"},
-    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7be99f4abb008cb38e144f85f515598f4c2c8932bf11b65add0ff59c9c876d99"},
-    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d58a54d6ff08d2547656356eea8572b224e6f9bbc0cf55fa9966bcaac4ddfb10"},
-    {file = "aiohttp-3.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7673a76772bda15d0d10d1aa881b7911d0580c980dbd16e59d7ba1422b2d83cd"},
-    {file = "aiohttp-3.9.4-cp38-cp38-win32.whl", hash = "sha256:e4370dda04dc8951012f30e1ce7956a0a226ac0714a7b6c389fb2f43f22a250e"},
-    {file = "aiohttp-3.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:eb30c4510a691bb87081192a394fb661860e75ca3896c01c6d186febe7c88530"},
-    {file = "aiohttp-3.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:84e90494db7df3be5e056f91412f9fa9e611fbe8ce4aaef70647297f5943b276"},
-    {file = "aiohttp-3.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7d4845f8501ab28ebfdbeab980a50a273b415cf69e96e4e674d43d86a464df9d"},
-    {file = "aiohttp-3.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69046cd9a2a17245c4ce3c1f1a4ff8c70c7701ef222fce3d1d8435f09042bba1"},
-    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b73a06bafc8dcc508420db43b4dd5850e41e69de99009d0351c4f3007960019"},
-    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:418bb0038dfafeac923823c2e63226179976c76f981a2aaad0ad5d51f2229bca"},
-    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71a8f241456b6c2668374d5d28398f8e8cdae4cce568aaea54e0f39359cd928d"},
-    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:935c369bf8acc2dc26f6eeb5222768aa7c62917c3554f7215f2ead7386b33748"},
-    {file = "aiohttp-3.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4e48c8752d14ecfb36d2ebb3d76d614320570e14de0a3aa7a726ff150a03c"},
-    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:916b0417aeddf2c8c61291238ce25286f391a6acb6f28005dd9ce282bd6311b6"},
-    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9b6787b6d0b3518b2ee4cbeadd24a507756ee703adbac1ab6dc7c4434b8c572a"},
-    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:221204dbda5ef350e8db6287937621cf75e85778b296c9c52260b522231940ed"},
-    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:10afd99b8251022ddf81eaed1d90f5a988e349ee7d779eb429fb07b670751e8c"},
-    {file = "aiohttp-3.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2506d9f7a9b91033201be9ffe7d89c6a54150b0578803cce5cb84a943d075bc3"},
-    {file = "aiohttp-3.9.4-cp39-cp39-win32.whl", hash = "sha256:e571fdd9efd65e86c6af2f332e0e95dad259bfe6beb5d15b3c3eca3a6eb5d87b"},
-    {file = "aiohttp-3.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:7d29dd5319d20aa3b7749719ac9685fbd926f71ac8c77b2477272725f882072d"},
-    {file = "aiohttp-3.9.4.tar.gz", hash = "sha256:6ff71ede6d9a5a58cfb7b6fffc83ab5d4a63138276c771ac91ceaaddf5459644"},
+    {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:772fbe371788e61c58d6d3d904268e48a594ba866804d08c995ad71b144f94cb"},
+    {file = "aiohttp-3.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:edd4f1af2253f227ae311ab3d403d0c506c9b4410c7fc8d9573dec6d9740369f"},
+    {file = "aiohttp-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cfee9287778399fdef6f8a11c9e425e1cb13cc9920fd3a3df8f122500978292b"},
+    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cc158466f6a980a6095ee55174d1de5730ad7dec251be655d9a6a9dd7ea1ff9"},
+    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54ec82f45d57c9a65a1ead3953b51c704f9587440e6682f689da97f3e8defa35"},
+    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abeb813a18eb387f0d835ef51f88568540ad0325807a77a6e501fed4610f864e"},
+    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc91d07280d7d169f3a0f9179d8babd0ee05c79d4d891447629ff0d7d8089ec2"},
+    {file = "aiohttp-3.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b65e861f4bebfb660f7f0f40fa3eb9f2ab9af10647d05dac824390e7af8f75b7"},
+    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:04fd8ffd2be73d42bcf55fd78cde7958eeee6d4d8f73c3846b7cba491ecdb570"},
+    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3d8d962b439a859b3ded9a1e111a4615357b01620a546bc601f25b0211f2da81"},
+    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:8ceb658afd12b27552597cf9a65d9807d58aef45adbb58616cdd5ad4c258c39e"},
+    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0e4ee4df741670560b1bc393672035418bf9063718fee05e1796bf867e995fad"},
+    {file = "aiohttp-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2dec87a556f300d3211decf018bfd263424f0690fcca00de94a837949fbcea02"},
+    {file = "aiohttp-3.9.2-cp310-cp310-win32.whl", hash = "sha256:3e1a800f988ce7c4917f34096f81585a73dbf65b5c39618b37926b1238cf9bc4"},
+    {file = "aiohttp-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:ea510718a41b95c236c992b89fdfc3d04cc7ca60281f93aaada497c2b4e05c46"},
+    {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6aaa6f99256dd1b5756a50891a20f0d252bd7bdb0854c5d440edab4495c9f973"},
+    {file = "aiohttp-3.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a27d8c70ad87bcfce2e97488652075a9bdd5b70093f50b10ae051dfe5e6baf37"},
+    {file = "aiohttp-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:54287bcb74d21715ac8382e9de146d9442b5f133d9babb7e5d9e453faadd005e"},
+    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb3d05569aa83011fcb346b5266e00b04180105fcacc63743fc2e4a1862a891"},
+    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c8534e7d69bb8e8d134fe2be9890d1b863518582f30c9874ed7ed12e48abe3c4"},
+    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4bd9d5b989d57b41e4ff56ab250c5ddf259f32db17159cce630fd543376bd96b"},
+    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa6904088e6642609981f919ba775838ebf7df7fe64998b1a954fb411ffb4663"},
+    {file = "aiohttp-3.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bda42eb410be91b349fb4ee3a23a30ee301c391e503996a638d05659d76ea4c2"},
+    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:193cc1ccd69d819562cc7f345c815a6fc51d223b2ef22f23c1a0f67a88de9a72"},
+    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b9f1cb839b621f84a5b006848e336cf1496688059d2408e617af33e3470ba204"},
+    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:d22a0931848b8c7a023c695fa2057c6aaac19085f257d48baa24455e67df97ec"},
+    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4112d8ba61fbd0abd5d43a9cb312214565b446d926e282a6d7da3f5a5aa71d36"},
+    {file = "aiohttp-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c4ad4241b52bb2eb7a4d2bde060d31c2b255b8c6597dd8deac2f039168d14fd7"},
+    {file = "aiohttp-3.9.2-cp311-cp311-win32.whl", hash = "sha256:ee2661a3f5b529f4fc8a8ffee9f736ae054adfb353a0d2f78218be90617194b3"},
+    {file = "aiohttp-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:4deae2c165a5db1ed97df2868ef31ca3cc999988812e82386d22937d9d6fed52"},
+    {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6f4cdba12539215aaecf3c310ce9d067b0081a0795dd8a8805fdb67a65c0572a"},
+    {file = "aiohttp-3.9.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:84e843b33d5460a5c501c05539809ff3aee07436296ff9fbc4d327e32aa3a326"},
+    {file = "aiohttp-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8008d0f451d66140a5aa1c17e3eedc9d56e14207568cd42072c9d6b92bf19b52"},
+    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61c47ab8ef629793c086378b1df93d18438612d3ed60dca76c3422f4fbafa792"},
+    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc71f748e12284312f140eaa6599a520389273174b42c345d13c7e07792f4f57"},
+    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1c3a4d0ab2f75f22ec80bca62385db2e8810ee12efa8c9e92efea45c1849133"},
+    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a87aa0b13bbee025faa59fa58861303c2b064b9855d4c0e45ec70182bbeba1b"},
+    {file = "aiohttp-3.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2cc0d04688b9f4a7854c56c18aa7af9e5b0a87a28f934e2e596ba7e14783192"},
+    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1956e3ac376b1711c1533266dec4efd485f821d84c13ce1217d53e42c9e65f08"},
+    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:114da29f39eccd71b93a0fcacff178749a5c3559009b4a4498c2c173a6d74dff"},
+    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3f17999ae3927d8a9a823a1283b201344a0627272f92d4f3e3a4efe276972fe8"},
+    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f31df6a32217a34ae2f813b152a6f348154f948c83213b690e59d9e84020925c"},
+    {file = "aiohttp-3.9.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7a75307ffe31329928a8d47eae0692192327c599113d41b278d4c12b54e1bd11"},
+    {file = "aiohttp-3.9.2-cp312-cp312-win32.whl", hash = "sha256:972b63d589ff8f305463593050a31b5ce91638918da38139b9d8deaba9e0fed7"},
+    {file = "aiohttp-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:200dc0246f0cb5405c80d18ac905c8350179c063ea1587580e3335bfc243ba6a"},
+    {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:158564d0d1020e0d3fe919a81d97aadad35171e13e7b425b244ad4337fc6793a"},
+    {file = "aiohttp-3.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:da1346cd0ccb395f0ed16b113ebb626fa43b7b07fd7344fce33e7a4f04a8897a"},
+    {file = "aiohttp-3.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:eaa9256de26ea0334ffa25f1913ae15a51e35c529a1ed9af8e6286dd44312554"},
+    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1543e7fb00214fb4ccead42e6a7d86f3bb7c34751ec7c605cca7388e525fd0b4"},
+    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:186e94570433a004e05f31f632726ae0f2c9dee4762a9ce915769ce9c0a23d89"},
+    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d52d20832ac1560f4510d68e7ba8befbc801a2b77df12bd0cd2bcf3b049e52a4"},
+    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c45e4e815ac6af3b72ca2bde9b608d2571737bb1e2d42299fc1ffdf60f6f9a1"},
+    {file = "aiohttp-3.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa906b9bdfd4a7972dd0628dbbd6413d2062df5b431194486a78f0d2ae87bd55"},
+    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:68bbee9e17d66f17bb0010aa15a22c6eb28583edcc8b3212e2b8e3f77f3ebe2a"},
+    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4c189b64bd6d9a403a1a3f86a3ab3acbc3dc41a68f73a268a4f683f89a4dec1f"},
+    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:8a7876f794523123bca6d44bfecd89c9fec9ec897a25f3dd202ee7fc5c6525b7"},
+    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:d23fba734e3dd7b1d679b9473129cd52e4ec0e65a4512b488981a56420e708db"},
+    {file = "aiohttp-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b141753be581fab842a25cb319f79536d19c2a51995d7d8b29ee290169868eab"},
+    {file = "aiohttp-3.9.2-cp38-cp38-win32.whl", hash = "sha256:103daf41ff3b53ba6fa09ad410793e2e76c9d0269151812e5aba4b9dd674a7e8"},
+    {file = "aiohttp-3.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:328918a6c2835861ff7afa8c6d2c70c35fdaf996205d5932351bdd952f33fa2f"},
+    {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5264d7327c9464786f74e4ec9342afbbb6ee70dfbb2ec9e3dfce7a54c8043aa3"},
+    {file = "aiohttp-3.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07205ae0015e05c78b3288c1517afa000823a678a41594b3fdc870878d645305"},
+    {file = "aiohttp-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0a1e638cffc3ec4d4784b8b4fd1cf28968febc4bd2718ffa25b99b96a741bd"},
+    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d43302a30ba1166325974858e6ef31727a23bdd12db40e725bec0f759abce505"},
+    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16a967685907003765855999af11a79b24e70b34dc710f77a38d21cd9fc4f5fe"},
+    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fa3ee92cd441d5c2d07ca88d7a9cef50f7ec975f0117cd0c62018022a184308"},
+    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b500c5ad9c07639d48615a770f49618130e61be36608fc9bc2d9bae31732b8f"},
+    {file = "aiohttp-3.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c07327b368745b1ce2393ae9e1aafed7073d9199e1dcba14e035cc646c7941bf"},
+    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:cc7d6502c23a0ec109687bf31909b3fb7b196faf198f8cff68c81b49eb316ea9"},
+    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:07be2be7071723c3509ab5c08108d3a74f2181d4964e869f2504aaab68f8d3e8"},
+    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:122468f6fee5fcbe67cb07014a08c195b3d4c41ff71e7b5160a7bcc41d585a5f"},
+    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:00a9abcea793c81e7f8778ca195a1714a64f6d7436c4c0bb168ad2a212627000"},
+    {file = "aiohttp-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a9825fdd64ecac5c670234d80bb52bdcaa4139d1f839165f548208b3779c6c6"},
+    {file = "aiohttp-3.9.2-cp39-cp39-win32.whl", hash = "sha256:5422cd9a4a00f24c7244e1b15aa9b87935c85fb6a00c8ac9b2527b38627a9211"},
+    {file = "aiohttp-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:7d579dcd5d82a86a46f725458418458fa43686f6a7b252f2966d359033ffc8ab"},
+    {file = "aiohttp-3.9.2.tar.gz", hash = "sha256:b0ad0a5e86ce73f5368a164c10ada10504bf91869c05ab75d982c6048217fbf7"},
 ]

 [package.dependencies]
@@ -2900,4 +2900,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "b3452b50901123fd5f2c385ce8a0c1c492296393b8a7926a322b6df0ea3ac572"
+content-hash = "df7161da4fdc3cba0a445176fc9dda2a0e8a53e13a7aa8a864385ca259381b41"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -67,7 +67,6 @@ routerify.workspace = true
 rustc-hash.workspace = true
 rustls-pemfile.workspace = true
 rustls.workspace = true
-rustls-native-certs = "0.7.0"
 scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -82,10 +81,9 @@ thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
 tokio-postgres.workspace = true
-tokio-postgres-rustls.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
-tokio = { workspace = true, features = ["signal", "tracing"] }
+tokio = { workspace = true, features = ["signal"] }
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
@@ -96,11 +94,11 @@ utils.workspace = true
 uuid.workspace = true
 webpki-roots.workspace = true
 x509-parser.workspace = true
+native-tls.workspace = true
+postgres-native-tls.workspace = true
 postgres-protocol.workspace = true
 redis.workspace = true

-console-subscriber = "0.2.0"
-
 workspace_hack.workspace = true

 [dev-dependencies]
@@ -108,5 +106,6 @@ camino-tempfile.workspace = true
 fallible-iterator.workspace = true
 rcgen.workspace = true
 rstest.workspace = true
+tokio-postgres-rustls.workspace = true
 walkdir.workspace = true
 rand_distr = "0.4"
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -2,15 +2,8 @@ mod classic;
 mod hacks;
 mod link;

-use std::net::IpAddr;
-use std::sync::Arc;
-use std::time::Duration;
-
-use ipnet::{Ipv4Net, Ipv6Net};
 pub use link::LinkAuthError;
-use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_postgres::config::AuthKeys;
-use tracing::{info, warn};

 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::validate_password_and_exchange;
@@ -23,7 +16,6 @@ use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
 use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
-use crate::rate_limiter::{BucketRateLimiter, RateBucketInfo};
 use crate::stream::Stream;
 use crate::{
    auth::{self, ComputeUserInfoMaybeEndpoint},
@@ -36,6 +28,9 @@ use crate::{
    stream, url,
 };
 use crate::{scram, EndpointCacheKey, EndpointId, Normalize, RoleName};
+use std::sync::Arc;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tracing::{info, warn};

 /// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality
 pub enum MaybeOwned<'a, T> {
@@ -181,45 +176,11 @@ impl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {
    }
 }

-#[derive(PartialEq, PartialOrd, Hash, Eq, Ord, Debug, Copy, Clone)]
-pub struct MaskedIp(IpAddr);
-
-impl MaskedIp {
-    fn new(value: IpAddr, prefix: u8) -> Self {
-        match value {
-            IpAddr::V4(v4) => Self(IpAddr::V4(
-                Ipv4Net::new(v4, prefix).map_or(v4, |x| x.trunc().addr()),
-            )),
-            IpAddr::V6(v6) => Self(IpAddr::V6(
-                Ipv6Net::new(v6, prefix).map_or(v6, |x| x.trunc().addr()),
-            )),
-        }
-    }
-}
-
-// This can't be just per IP because that would limit some PaaS that share IP addresses
-pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, MaskedIp)>;
-
-impl RateBucketInfo {
-    /// All of these are per endpoint-maskedip pair.
-    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
-    ///
-    /// First bucket: 1000mcpus total per endpoint-ip pair
-    /// * 4096000 requests per second with 1 hash rounds.
-    /// * 1000 requests per second with 4096 hash rounds.
-    /// * 6.8 requests per second with 600000 hash rounds.
-    pub const DEFAULT_AUTH_SET: [Self; 3] = [
-        Self::new(1000 * 4096, Duration::from_secs(1)),
-        Self::new(600 * 4096, Duration::from_secs(60)),
-        Self::new(300 * 4096, Duration::from_secs(600)),
-    ];
-}
-
 impl AuthenticationConfig {
    pub fn check_rate_limit(
        &self,
+
        ctx: &mut RequestMonitoring,
-        config: &AuthenticationConfig,
        secret: AuthSecret,
        endpoint: &EndpointId,
        is_cleartext: bool,
@@ -240,13 +201,9 @@ impl AuthenticationConfig {
            1
        };

-        let limit_not_exceeded = self.rate_limiter.check(
-            (
-                endpoint_int,
-                MaskedIp::new(ctx.peer_addr, config.rate_limit_ip_subnet),
-            ),
-            password_weight,
-        );
+        let limit_not_exceeded = self
+            .rate_limiter
+            .check((endpoint_int, ctx.peer_addr), password_weight);

        if !limit_not_exceeded {
            warn!(
@@ -314,7 +271,6 @@ async fn auth_quirks(
    let secret = match secret {
        Some(secret) => config.check_rate_limit(
            ctx,
-            config,
            secret,
            &info.endpoint,
            unauthenticated_password.is_some() || allow_cleartext,
@@ -517,7 +473,7 @@ impl ComputeConnectBackend for BackendType<'_, ComputeCredentials, &()> {

 #[cfg(test)]
 mod tests {
-    use std::{net::IpAddr, sync::Arc, time::Duration};
+    use std::sync::Arc;

    use bytes::BytesMut;
    use fallible_iterator::FallibleIterator;
@@ -530,7 +486,7 @@ mod tests {
    use tokio::io::{AsyncRead, AsyncReadExt, AsyncWriteExt};

    use crate::{
-        auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern},
+        auth::{ComputeUserInfoMaybeEndpoint, IpPattern},
        config::AuthenticationConfig,
        console::{
            self,
@@ -539,12 +495,12 @@ mod tests {
        },
        context::RequestMonitoring,
        proxy::NeonOptions,
-        rate_limiter::RateBucketInfo,
+        rate_limiter::{AuthRateLimiter, RateBucketInfo},
        scram::ServerSecret,
        stream::{PqStream, Stream},
    };

-    use super::{auth_quirks, AuthRateLimiter};
+    use super::auth_quirks;

    struct Auth {
        ips: Vec<IpPattern>,
@@ -585,7 +541,6 @@ mod tests {
        scram_protocol_timeout: std::time::Duration::from_secs(5),
        rate_limiter_enabled: true,
        rate_limiter: AuthRateLimiter::new(&RateBucketInfo::DEFAULT_AUTH_SET),
-        rate_limit_ip_subnet: 64,
    });

    async fn read_message(r: &mut (impl AsyncRead + Unpin), b: &mut BytesMut) -> PgMessage {
@@ -597,51 +552,6 @@ mod tests {
        }
    }

-    #[test]
-    fn masked_ip() {
-        let ip_a = IpAddr::V4([127, 0, 0, 1].into());
-        let ip_b = IpAddr::V4([127, 0, 0, 2].into());
-        let ip_c = IpAddr::V4([192, 168, 1, 101].into());
-        let ip_d = IpAddr::V4([192, 168, 1, 102].into());
-        let ip_e = IpAddr::V6("abcd:abcd:abcd:abcd:abcd:abcd:abcd:abcd".parse().unwrap());
-        let ip_f = IpAddr::V6("abcd:abcd:abcd:abcd:1234:abcd:abcd:abcd".parse().unwrap());
-
-        assert_ne!(MaskedIp::new(ip_a, 64), MaskedIp::new(ip_b, 64));
-        assert_ne!(MaskedIp::new(ip_a, 32), MaskedIp::new(ip_b, 32));
-        assert_eq!(MaskedIp::new(ip_a, 30), MaskedIp::new(ip_b, 30));
-        assert_eq!(MaskedIp::new(ip_c, 30), MaskedIp::new(ip_d, 30));
-
-        assert_ne!(MaskedIp::new(ip_e, 128), MaskedIp::new(ip_f, 128));
-        assert_eq!(MaskedIp::new(ip_e, 64), MaskedIp::new(ip_f, 64));
-    }
-
-    #[test]
-    fn test_default_auth_rate_limit_set() {
-        // these values used to exceed u32::MAX
-        assert_eq!(
-            RateBucketInfo::DEFAULT_AUTH_SET,
-            [
-                RateBucketInfo {
-                    interval: Duration::from_secs(1),
-                    max_rpi: 1000 * 4096,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(60),
-                    max_rpi: 600 * 4096 * 60,
-                },
-                RateBucketInfo {
-                    interval: Duration::from_secs(600),
-                    max_rpi: 300 * 4096 * 600,
-                }
-            ]
-        );
-
-        for x in RateBucketInfo::DEFAULT_AUTH_SET {
-            let y = x.to_string().parse().unwrap();
-            assert_eq!(x, y);
-        }
-    }
-
    #[tokio::test]
    async fn auth_quirks_scram() {
        let (mut client, server) = tokio::io::duplex(1024);
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -121,5 +121,6 @@ pub(super) async fn authenticate(
    Ok(NodeInfo {
        config,
        aux: db_info.aux,
+        allow_self_signed_compute: false, // caller may override
    })
 }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -9,13 +9,15 @@ use futures::future::Either;
 use itertools::Itertools;
 use proxy::config::TlsServerEndPoint;
 use proxy::context::RequestMonitoring;
-use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled};
+use proxy::proxy::run_until_cancelled;
+use proxy::{BranchId, EndpointId, ProjectId};
 use rustls::pki_types::PrivateKeyDer;
 use tokio::net::TcpListener;

 use anyhow::{anyhow, bail, ensure, Context};
 use clap::Arg;
 use futures::TryFutureExt;
+use proxy::console::messages::MetricsAuxInfo;
 use proxy::stream::{PqStream, Stream};

 use tokio::io::{AsyncRead, AsyncWrite};
@@ -202,7 +204,6 @@ async fn task_main(
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";

 async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: &mut RequestMonitoring,
    raw_stream: S,
    tls_config: Arc<rustls::ServerConfig>,
    tls_server_end_point: TlsServerEndPoint,
@@ -232,10 +233,7 @@ async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
            }

            Ok(Stream::Tls {
-                tls: Box::new(
-                    raw.upgrade(tls_config, !ctx.has_private_peer_addr())
-                        .await?,
-                ),
+                tls: Box::new(raw.upgrade(tls_config).await?),
                tls_server_end_point,
            })
        }
@@ -258,7 +256,7 @@ async fn handle_client(
    tls_server_end_point: TlsServerEndPoint,
    stream: impl AsyncRead + AsyncWrite + Unpin,
 ) -> anyhow::Result<()> {
-    let mut tls_stream = ssl_handshake(&mut ctx, stream, tls_config, tls_server_end_point).await?;
+    let tls_stream = ssl_handshake(stream, tls_config, tls_server_end_point).await?;

    // Cut off first part of the SNI domain
    // We receive required destination details in the format of
@@ -275,15 +273,18 @@ async fn handle_client(

    info!("destination: {}", destination);

-    let mut client = tokio::net::TcpStream::connect(destination).await?;
+    let client = tokio::net::TcpStream::connect(destination).await?;
+
+    let metrics_aux: MetricsAuxInfo = MetricsAuxInfo {
+        endpoint_id: (&EndpointId::from("")).into(),
+        project_id: (&ProjectId::from("")).into(),
+        branch_id: (&BranchId::from("")).into(),
+        cold_start_info: proxy::console::messages::ColdStartInfo::Unknown,
+    };

    // doesn't yet matter as pg-sni-router doesn't report analytics logs
    ctx.set_success();
    ctx.log();

-    // Starting from here we only proxy the client's traffic.
-    info!("performing the proxy pass...");
-    let _ = copy_bidirectional_client_compute(&mut tls_stream, &mut client).await?;
-
-    Ok(())
+    proxy::proxy::passthrough::proxy_pass(tls_stream, client, metrics_aux).await
 }
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -7,7 +7,6 @@ use aws_config::provider_config::ProviderConfig;
 use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
 use futures::future::Either;
 use proxy::auth;
-use proxy::auth::backend::AuthRateLimiter;
 use proxy::auth::backend::MaybeOwned;
 use proxy::cancellation::CancelMap;
 use proxy::cancellation::CancellationHandler;
@@ -21,8 +20,10 @@ use proxy::context::parquet::ParquetUploadArgs;
 use proxy::http;
 use proxy::http::health_server::AppMetrics;
 use proxy::metrics::Metrics;
+use proxy::rate_limiter::AuthRateLimiter;
 use proxy::rate_limiter::EndpointRateLimiter;
 use proxy::rate_limiter::RateBucketInfo;
+use proxy::rate_limiter::RateLimiterConfig;
 use proxy::redis::cancellation_publisher::RedisPublisherClient;
 use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use proxy::redis::elasticache;
@@ -35,8 +36,6 @@ use proxy::config::{self, ProxyConfig};
 use proxy::serverless;
 use std::net::SocketAddr;
 use std::pin::pin;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
 use std::sync::Arc;
 use tokio::net::TcpListener;
 use tokio::sync::Mutex;
@@ -44,7 +43,6 @@ use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 use tracing::warn;
-use tracing::Instrument;
 use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};

 project_git_version!(GIT_VERSION);
@@ -122,6 +120,9 @@ struct ProxyCliArgs {
    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
    #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
    wake_compute_lock: String,
+    /// Allow self-signed certificates for compute nodes (for testing)
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    allow_self_signed_compute: bool,
    #[clap(flatten)]
    sql_over_http: SqlOverHttpArgs,
    /// timeout for scram authentication protocol
@@ -131,8 +132,14 @@ struct ProxyCliArgs {
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    require_client_ip: bool,
    /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
-    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    disable_dynamic_rate_limiter: bool,
+    /// Rate limit algorithm. Makes sense only if `disable_rate_limiter` is `false`.
+    #[clap(value_enum, long, default_value_t = proxy::rate_limiter::RateLimitAlgorithm::Aimd)]
+    rate_limit_algorithm: proxy::rate_limiter::RateLimitAlgorithm,
+    /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
+    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
+    rate_limiter_timeout: tokio::time::Duration,
    /// Endpoint rate limiter max number of requests per second.
    ///
    /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
@@ -145,12 +152,14 @@ struct ProxyCliArgs {
    /// Authentication rate limiter max number of hashes per second.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
    auth_rate_limit: Vec<RateBucketInfo>,
-    /// The IP subnet to use when considering whether two IP addresses are considered the same.
-    #[clap(long, default_value_t = 64)]
-    auth_rate_limit_ip_subnet: u8,
    /// Redis rate limiter max number of requests per second.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
    redis_rps_limit: Vec<RateBucketInfo>,
+    /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
+    #[clap(long, default_value_t = 100)]
+    initial_limit: usize,
+    #[clap(flatten)]
+    aimd_config: proxy::rate_limiter::AimdConfig,
    /// cache for `allowed_ips` (use `size=0` to disable)
    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
    allowed_ips_cache: String,
@@ -235,21 +244,8 @@ struct SqlOverHttpArgs {
    sql_over_http_pool_shards: usize,
 }

-fn main() -> anyhow::Result<()> {
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .thread_name_fn(|| {
-            static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
-            let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst);
-            format!("worker-{}", id)
-        })
-        .build()
-        .unwrap();
-
-    rt.block_on(main2())
-}
-
-async fn main2() -> anyhow::Result<()> {
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
    let _logging_guard = proxy::logging::init().await?;
    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
@@ -343,6 +339,7 @@ async fn main2() -> anyhow::Result<()> {
    let proxy_listener = TcpListener::bind(proxy_address).await?;
    let cancellation_token = CancellationToken::new();

+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
    let cancel_map = CancelMap::default();

    let redis_publisher = match &regional_redis_client {
@@ -364,16 +361,13 @@ async fn main2() -> anyhow::Result<()> {
    // client facing tasks. these will exit on error or on cancellation
    // cancellation returns Ok(())
    let mut client_tasks = JoinSet::new();
-    client_tasks
-        .build_task()
-        .name("tcp main")
-        .spawn(proxy::proxy::task_main(
-            config,
-            proxy_listener,
-            cancellation_token.clone(),
-            cancellation_handler.clone(),
-        ))
-        .unwrap();
+    client_tasks.spawn(proxy::proxy::task_main(
+        config,
+        proxy_listener,
+        cancellation_token.clone(),
+        endpoint_rate_limiter.clone(),
+        cancellation_handler.clone(),
+    ));

    // TODO: rename the argument to something like serverless.
    // It now covers more than just websockets, it also covers SQL over HTTP.
@@ -382,98 +376,58 @@ async fn main2() -> anyhow::Result<()> {
        info!("Starting wss on {serverless_address}");
        let serverless_listener = TcpListener::bind(serverless_address).await?;

-        client_tasks
-            .build_task()
-            .name("serverless main")
-            .spawn(serverless::task_main(
-                config,
-                serverless_listener,
-                cancellation_token.clone(),
-                cancellation_handler.clone(),
-            ))
-            .unwrap();
+        client_tasks.spawn(serverless::task_main(
+            config,
+            serverless_listener,
+            cancellation_token.clone(),
+            endpoint_rate_limiter.clone(),
+            cancellation_handler.clone(),
+        ));
    }

-    client_tasks
-        .build_task()
-        .name("parquet worker")
-        .spawn(proxy::context::parquet::worker(
-            cancellation_token.clone(),
-            args.parquet_upload,
-        ))
-        .unwrap();
+    client_tasks.spawn(proxy::context::parquet::worker(
+        cancellation_token.clone(),
+        args.parquet_upload,
+    ));

    // maintenance tasks. these never return unless there's an error
    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks
-        .build_task()
-        .name("signal handler")
-        .spawn(proxy::handle_signals(cancellation_token.clone()))
-        .unwrap();
-    maintenance_tasks
-        .build_task()
-        .name("health server")
-        .spawn(http::health_server::task_main(
-            http_listener,
-            AppMetrics {
-                jemalloc,
-                neon_metrics,
-                proxy: proxy::metrics::Metrics::get(),
-            },
-        ))
-        .unwrap();
-    maintenance_tasks
-        .build_task()
-        .name("mangement main")
-        .spawn(console::mgmt::task_main(mgmt_listener))
-        .unwrap();
+    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone()));
+    maintenance_tasks.spawn(http::health_server::task_main(
+        http_listener,
+        AppMetrics {
+            jemalloc,
+            neon_metrics,
+            proxy: proxy::metrics::Metrics::get(),
+        },
+    ));
+    maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));

    if let Some(metrics_config) = &config.metric_collection {
        // TODO: Add gc regardles of the metric collection being enabled.
-        maintenance_tasks
-            .build_task()
-            .name("")
-            .spawn(usage_metrics::task_main(metrics_config))
-            .unwrap();
-        client_tasks
-            .build_task()
-            .name("")
-            .spawn(usage_metrics::task_backup(
-                &metrics_config.backup_metric_collection_config,
-                cancellation_token,
-            ))
-            .unwrap();
+        maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
+        client_tasks.spawn(usage_metrics::task_backup(
+            &metrics_config.backup_metric_collection_config,
+            cancellation_token,
+        ));
    }

    if let auth::BackendType::Console(api, _) = &config.auth_backend {
        if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
            if let Some(redis_notifications_client) = redis_notifications_client {
                let cache = api.caches.project_info.clone();
-                maintenance_tasks
-                    .build_task()
-                    .name("redis notifications")
-                    .spawn(notifications::task_main(
-                        redis_notifications_client,
-                        cache.clone(),
-                        cancel_map.clone(),
-                        args.region.clone(),
-                    ))
-                    .unwrap();
-                maintenance_tasks
-                    .build_task()
-                    .name("proj info cache gc")
-                    .spawn(async move { cache.clone().gc_worker().await })
-                    .unwrap();
+                maintenance_tasks.spawn(notifications::task_main(
+                    redis_notifications_client,
+                    cache.clone(),
+                    cancel_map.clone(),
+                    args.region.clone(),
+                ));
+                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
            }
            if let Some(regional_redis_client) = regional_redis_client {
                let cache = api.caches.endpoints_cache.clone();
                let con = regional_redis_client;
-                let span = tracing::info_span!("endpoints_cache");
-                maintenance_tasks
-                    .build_task()
-                    .name("redis endpoints cache read")
-                    .spawn(async move { cache.do_read(con).await }.instrument(span))
-                    .unwrap();
+                maintenance_tasks.spawn(async move { cache.do_read(con).await });
            }
        }
    }
@@ -514,6 +468,9 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        _ => bail!("either both or neither tls-key and tls-cert must be specified"),
    };

+    if args.allow_self_signed_compute {
+        warn!("allowing self-signed compute certificates");
+    }
    let backup_metric_collection_config = config::MetricBackupCollectionConfig {
        interval: args.metric_backup_collection_interval,
        remote_storage_config: remote_storage_from_toml(
@@ -537,9 +494,13 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
             and metric-collection-interval must be specified"
        ),
    };
-    if !args.disable_dynamic_rate_limiter {
-        bail!("dynamic rate limiter should be disabled");
-    }
+    let rate_limiter_config = RateLimiterConfig {
+        disable: args.disable_dynamic_rate_limiter,
+        algorithm: args.rate_limit_algorithm,
+        timeout: args.rate_limiter_timeout,
+        initial_limit: args.initial_limit,
+        aimd_config: Some(args.aimd_config),
+    };

    let auth_backend = match &args.auth_backend {
        AuthBackend::Console => {
@@ -578,19 +539,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                )
                .unwrap(),
            ));
-            tokio::task::Builder::new()
-                .name("wake compute lock gc")
-                .spawn(locks.garbage_collect_worker())
-                .unwrap();
+            tokio::spawn(locks.garbage_collect_worker());

            let url = args.auth_endpoint.parse()?;
-            let endpoint = http::Endpoint::new(url, http::new_client());
+            let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));

-            let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
-            RateBucketInfo::validate(&mut endpoint_rps_limit)?;
-            let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(endpoint_rps_limit));
-            let api =
-                console::provider::neon::Api::new(endpoint, caches, locks, endpoint_rate_limiter);
+            let api = console::provider::neon::Api::new(endpoint, caches, locks);
            let api = console::provider::ConsoleBackend::Console(api);
            auth::BackendType::Console(MaybeOwned::Owned(api), ())
        }
@@ -621,9 +575,10 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        scram_protocol_timeout: args.scram_protocol_timeout,
        rate_limiter_enabled: args.auth_rate_limit_enabled,
        rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
-        rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
    };

+    let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
+    RateBucketInfo::validate(&mut endpoint_rps_limit)?;
    let mut redis_rps_limit = args.redis_rps_limit.clone();
    RateBucketInfo::validate(&mut redis_rps_limit)?;

@@ -631,10 +586,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        tls_config,
        auth_backend,
        metric_collection,
+        allow_self_signed_compute: args.allow_self_signed_compute,
        http_config,
        authentication_config,
        require_client_ip: args.require_client_ip,
        disable_ip_check_for_http: args.disable_ip_check_for_http,
+        endpoint_rps_limit,
        redis_rps_limit,
        handshake_timeout: args.handshake_timeout,
        region: args.region.clone(),
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -13,7 +13,6 @@ use redis::{
 };
 use serde::Deserialize;
 use tokio::sync::Mutex;
-use tracing::info;

 use crate::{
    config::EndpointCacheConfig,
@@ -70,14 +69,17 @@ impl EndpointsCache {
        if !self.ready.load(Ordering::Acquire) {
            return true;
        }
-        let rejected = self.should_reject(endpoint);
-        ctx.set_rejected(rejected);
-        info!(?rejected, "check endpoint is valid, disabled cache");
-        // If cache is disabled, just collect the metrics and return or
-        // If the limiter allows, we don't need to check the cache.
-        if self.config.disable_cache || self.limiter.lock().await.check() {
+        // If cache is disabled, just collect the metrics and return.
+        if self.config.disable_cache {
+            ctx.set_rejected(self.should_reject(endpoint));
            return true;
        }
+        // If the limiter allows, we don't need to check the cache.
+        if self.limiter.lock().await.check() {
+            return true;
+        }
+        let rejected = self.should_reject(endpoint);
+        ctx.set_rejected(rejected);
        !rejected
    }
    fn should_reject(&self, endpoint: &EndpointId) -> bool {
@@ -169,9 +171,6 @@ impl EndpointsCache {

            if res.keys.is_empty() {
                if return_when_finish {
-                    if total != 0 {
-                        break;
-                    }
                    anyhow::bail!(
                        "Redis stream {} is empty, cannot be used to filter endpoints",
                        self.config.stream_name
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -10,12 +10,7 @@ use crate::{
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
-use std::{
-    io,
-    net::SocketAddr,
-    sync::{Arc, OnceLock},
-    time::Duration,
-};
+use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio_postgres::tls::MakeTlsConnect;
@@ -33,6 +28,9 @@ pub enum ConnectionError {
    #[error("{COULD_NOT_CONNECT}: {0}")]
    CouldNotConnect(#[from] io::Error),

+    #[error("{COULD_NOT_CONNECT}: {0}")]
+    TlsError(#[from] native_tls::Error),
+
    #[error("{COULD_NOT_CONNECT}: {0}")]
    WakeComputeError(#[from] WakeComputeError),
 }
@@ -71,6 +69,7 @@ impl ReportableError for ConnectionError {
            }
            ConnectionError::Postgres(_) => crate::error::ErrorKind::Compute,
            ConnectionError::CouldNotConnect(_) => crate::error::ErrorKind::Compute,
+            ConnectionError::TlsError(_) => crate::error::ErrorKind::Compute,
            ConnectionError::WakeComputeError(e) => e.get_error_kind(),
        }
    }
@@ -240,7 +239,7 @@ pub struct PostgresConnection {
    /// Socket connected to a compute node.
    pub stream: tokio_postgres::maybe_tls_stream::MaybeTlsStream<
        tokio::net::TcpStream,
-        tokio_postgres_rustls::RustlsStream<tokio::net::TcpStream>,
+        postgres_native_tls::TlsStream<tokio::net::TcpStream>,
    >,
    /// PostgreSQL connection parameters.
    pub params: std::collections::HashMap<String, String>,
@@ -252,39 +251,22 @@ pub struct PostgresConnection {
    _guage: NumDbConnectionsGuard<'static>,
 }

-static ROOT_STORE: OnceLock<Arc<rustls::RootCertStore>> = OnceLock::new();
-
 impl ConnCfg {
    /// Connect to a corresponding compute node.
    pub async fn connect(
        &self,
        ctx: &mut RequestMonitoring,
+        allow_self_signed_compute: bool,
        aux: MetricsAuxInfo,
        timeout: Duration,
    ) -> Result<PostgresConnection, ConnectionError> {
        let (socket_addr, stream, host) = self.connect_raw(timeout).await?;

-        let root_store = ROOT_STORE.get_or_init(|| {
-            let mut roots = rustls::RootCertStore::empty();
-
-            let certs = match rustls_native_certs::load_native_certs() {
-                Ok(certs) => certs,
-                Err(e) => {
-                    error!("could not load native ssl certs: {e:?}");
-                    return Arc::new(roots);
-                }
-            };
-
-            let (added, ignored) = roots.add_parsable_certificates(certs);
-            info!(added, ignored, "loaded native ssl certifications");
-
-            Arc::new(roots)
-        });
-
-        let client_config = rustls::ClientConfig::builder()
-            .with_root_certificates(root_store.clone())
-            .with_no_client_auth();
-        let mut mk_tls = tokio_postgres_rustls::MakeRustlsConnect::new(client_config);
+        let tls_connector = native_tls::TlsConnector::builder()
+            .danger_accept_invalid_certs(allow_self_signed_compute)
+            .build()
+            .unwrap();
+        let mut mk_tls = postgres_native_tls::MakeTlsConnector::new(tls_connector);
        let tls = MakeTlsConnect::<tokio::net::TcpStream>::make_tls_connect(&mut mk_tls, host)?;

        // connect_raw() will not use TLS if sslmode is "disable"
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,6 +1,6 @@
 use crate::{
-    auth::{self, backend::AuthRateLimiter},
-    rate_limiter::RateBucketInfo,
+    auth,
+    rate_limiter::{AuthRateLimiter, RateBucketInfo},
    serverless::GlobalConnPoolOptions,
 };
 use anyhow::{bail, ensure, Context, Ok};
@@ -24,10 +24,12 @@ pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
    pub auth_backend: auth::BackendType<'static, (), ()>,
    pub metric_collection: Option<MetricCollectionConfig>,
+    pub allow_self_signed_compute: bool,
    pub http_config: HttpConfig,
    pub authentication_config: AuthenticationConfig,
    pub require_client_ip: bool,
    pub disable_ip_check_for_http: bool,
+    pub endpoint_rps_limit: Vec<RateBucketInfo>,
    pub redis_rps_limit: Vec<RateBucketInfo>,
    pub region: String,
    pub handshake_timeout: Duration,
@@ -56,7 +58,6 @@ pub struct AuthenticationConfig {
    pub scram_protocol_timeout: tokio::time::Duration,
    pub rate_limiter_enabled: bool,
    pub rate_limiter: AuthRateLimiter,
-    pub rate_limit_ip_subnet: u8,
 }

 impl TlsConfig {
--- a/proxy/src/console/mgmt.rs
+++ b/proxy/src/console/mgmt.rs
@@ -40,31 +40,28 @@ pub async fn task_main(listener: TcpListener) -> anyhow::Result<Infallible> {

        let span = info_span!("mgmt", peer = %peer_addr);

-        tokio::task::Builder::new()
-            .name("mgmt handler")
-            .spawn(
-                async move {
-                    info!("serving a new console management API connection");
+        tokio::task::spawn(
+            async move {
+                info!("serving a new console management API connection");

-                    // these might be long running connections, have a separate logging for cancelling
-                    // on shutdown and other ways of stopping.
-                    let cancelled = scopeguard::guard(tracing::Span::current(), |span| {
-                        let _e = span.entered();
-                        info!("console management API task cancelled");
-                    });
+                // these might be long running connections, have a separate logging for cancelling
+                // on shutdown and other ways of stopping.
+                let cancelled = scopeguard::guard(tracing::Span::current(), |span| {
+                    let _e = span.entered();
+                    info!("console management API task cancelled");
+                });

-                    if let Err(e) = handle_connection(socket).await {
-                        error!("serving failed with an error: {e}");
-                    } else {
-                        info!("serving completed");
-                    }
-
-                    // we can no longer get dropped
-                    scopeguard::ScopeGuard::into_inner(cancelled);
+                if let Err(e) = handle_connection(socket).await {
+                    error!("serving failed with an error: {e}");
+                } else {
+                    info!("serving completed");
                }
-                .instrument(span),
-            )
-            .unwrap();
+
+                // we can no longer get dropped
+                scopeguard::ScopeGuard::into_inner(cancelled);
+            }
+            .instrument(span),
+        );
    }
 }

--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -208,9 +208,6 @@ pub mod errors {
        #[error(transparent)]
        ApiError(ApiError),

-        #[error("Too many connections attempts")]
-        TooManyConnections,
-
        #[error("Timeout waiting to acquire wake compute lock")]
        TimeoutError,
    }
@@ -243,8 +240,6 @@ pub mod errors {
                // However, API might return a meaningful error.
                ApiError(e) => e.to_string_client(),

-                TooManyConnections => self.to_string(),
-
                TimeoutError => "timeout while acquiring the compute resource lock".to_owned(),
            }
        }
@@ -255,7 +250,6 @@ pub mod errors {
            match self {
                WakeComputeError::BadComputeAddress(_) => crate::error::ErrorKind::ControlPlane,
                WakeComputeError::ApiError(e) => e.get_error_kind(),
-                WakeComputeError::TooManyConnections => crate::error::ErrorKind::RateLimit,
                WakeComputeError::TimeoutError => crate::error::ErrorKind::ServiceRateLimit,
            }
        }
@@ -293,6 +287,9 @@ pub struct NodeInfo {

    /// Labels for proxy's metrics.
    pub aux: MetricsAuxInfo,
+
+    /// Whether we should accept self-signed certificates (for testing)
+    pub allow_self_signed_compute: bool,
 }

 impl NodeInfo {
@@ -301,9 +298,17 @@ impl NodeInfo {
        ctx: &mut RequestMonitoring,
        timeout: Duration,
    ) -> Result<compute::PostgresConnection, compute::ConnectionError> {
-        self.config.connect(ctx, self.aux.clone(), timeout).await
+        self.config
+            .connect(
+                ctx,
+                self.allow_self_signed_compute,
+                self.aux.clone(),
+                timeout,
+            )
+            .await
    }
    pub fn reuse_settings(&mut self, other: Self) {
+        self.allow_self_signed_compute = other.allow_self_signed_compute;
        self.config.reuse_password(other.config);
    }

--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -63,10 +63,7 @@ impl Api {
            let (client, connection) =
                tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;

-            tokio::task::Builder::new()
-                .name("mock conn")
-                .spawn(connection)
-                .unwrap();
+            tokio::spawn(connection);
            let secret = match get_execute_postgres_query(
                &client,
                "select rolpassword from pg_catalog.pg_authid where rolname = $1",
@@ -129,6 +126,7 @@ impl Api {
                branch_id: (&BranchId::from("branch")).into(),
                cold_start_info: crate::console::messages::ColdStartInfo::Warm,
            },
+            allow_self_signed_compute: false,
        };

        Ok(node)
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -12,7 +12,6 @@ use crate::{
    console::messages::ColdStartInfo,
    http,
    metrics::{CacheOutcome, Metrics},
-    rate_limiter::EndpointRateLimiter,
    scram, Normalize,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
@@ -26,7 +25,6 @@ pub struct Api {
    endpoint: http::Endpoint,
    pub caches: &'static ApiCaches,
    pub locks: &'static ApiLocks,
-    pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    jwt: String,
 }

@@ -36,7 +34,6 @@ impl Api {
        endpoint: http::Endpoint,
        caches: &'static ApiCaches,
        locks: &'static ApiLocks,
-        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    ) -> Self {
        let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
            Ok(v) => v,
@@ -46,7 +43,6 @@ impl Api {
            endpoint,
            caches,
            locks,
-            endpoint_rate_limiter,
            jwt,
        }
    }
@@ -175,6 +171,7 @@ impl Api {
            let node = NodeInfo {
                config,
                aux: body.aux,
+                allow_self_signed_compute: false,
            };

            Ok(node)
@@ -280,14 +277,6 @@ impl super::Api for Api {
            return Ok(cached);
        }

-        // check rate limit
-        if !self
-            .endpoint_rate_limiter
-            .check(user_info.endpoint.normalize().into(), 1)
-        {
-            return Err(WakeComputeError::TooManyConnections);
-        }
-
        let permit = self.locks.get_wake_compute_permit(&key).await?;

        // after getting back a permit - it's possible the cache was filled
--- a/proxy/src/context.rs
+++ b/proxy/src/context.rs
@@ -5,7 +5,7 @@ use once_cell::sync::OnceCell;
 use smol_str::SmolStr;
 use std::net::IpAddr;
 use tokio::sync::mpsc;
-use tracing::{field::display, info, info_span, Span};
+use tracing::{field::display, info_span, Span};
 use uuid::Uuid;

 use crate::{
@@ -51,7 +51,7 @@ pub struct RequestMonitoring {
    sender: Option<mpsc::UnboundedSender<RequestData>>,
    pub latency_timer: LatencyTimer,
    // Whether proxy decided that it's not a valid endpoint end rejected it before going to cplane.
-    rejected: Option<bool>,
+    rejected: bool,
 }

 #[derive(Clone, Debug)]
@@ -76,7 +76,6 @@ impl RequestMonitoring {
            ?session_id,
            %peer_addr,
            ep = tracing::field::Empty,
-            role = tracing::field::Empty,
        );

        Self {
@@ -96,7 +95,7 @@ impl RequestMonitoring {
            error_kind: None,
            auth_method: None,
            success: false,
-            rejected: None,
+            rejected: false,
            cold_start_info: ColdStartInfo::Unknown,

            sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()),
@@ -118,7 +117,7 @@ impl RequestMonitoring {
    }

    pub fn set_rejected(&mut self, rejected: bool) {
-        self.rejected = Some(rejected);
+        self.rejected = rejected;
    }

    pub fn set_cold_start_info(&mut self, info: ColdStartInfo) {
@@ -158,7 +157,6 @@ impl RequestMonitoring {
    }

    pub fn set_user(&mut self, user: RoleName) {
-        self.span.record("role", display(&user));
        self.user = Some(user);
    }

@@ -166,18 +164,8 @@ impl RequestMonitoring {
        self.auth_method = Some(auth_method);
    }

-    pub fn has_private_peer_addr(&self) -> bool {
-        match self.peer_addr {
-            IpAddr::V4(ip) => ip.is_private(),
-            _ => false,
-        }
-    }
-
    pub fn set_error_kind(&mut self, kind: ErrorKind) {
-        // Do not record errors from the private address to metrics.
-        if !self.has_private_peer_addr() {
-            Metrics::get().proxy.errors_total.inc(kind);
-        }
+        Metrics::get().proxy.errors_total.inc(kind);
        if let Some(ep) = &self.endpoint_id {
            let metric = &Metrics::get().proxy.endpoints_affected_by_errors;
            let label = metric.with_labels(kind);
@@ -200,28 +188,14 @@ impl Drop for RequestMonitoring {
        } else {
            ConnectOutcome::Failed
        };
-        if let Some(rejected) = self.rejected {
-            let ep = self
-                .endpoint_id
-                .as_ref()
-                .map(|x| x.as_str())
-                .unwrap_or_default();
-            // This makes sense only if cache is disabled
-            info!(
-                ?outcome,
-                ?rejected,
-                ?ep,
-                "check endpoint is valid with outcome"
-            );
-            Metrics::get()
-                .proxy
-                .invalid_endpoints_total
-                .inc(InvalidEndpointsGroup {
-                    protocol: self.protocol,
-                    rejected: rejected.into(),
-                    outcome,
-                });
-        }
+        Metrics::get()
+            .proxy
+            .invalid_endpoints_total
+            .inc(InvalidEndpointsGroup {
+                protocol: self.protocol,
+                rejected: self.rejected.into(),
+                outcome,
+            });
        if let Some(tx) = self.sender.take() {
            let _: Result<(), _> = tx.send(RequestData::from(&*self));
        }
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -141,15 +141,12 @@ pub async fn worker(
    LOG_CHAN.set(tx.downgrade()).unwrap();

    // setup row stream that will close on cancellation
-    tokio::task::Builder::new()
-        .name("drop parquet conn")
-        .spawn(async move {
-            cancellation_token.cancelled().await;
-            // dropping this sender will cause the channel to close only once
-            // all the remaining inflight requests have been completed.
-            drop(tx);
-        })
-        .unwrap();
+    tokio::spawn(async move {
+        cancellation_token.cancelled().await;
+        // dropping this sender will cause the channel to close only once
+        // all the remaining inflight requests have been completed.
+        drop(tx);
+    });
    let rx = futures::stream::poll_fn(move |cx| rx.poll_recv(cx));
    let rx = rx.map(RequestData::from);

--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -15,6 +15,7 @@ use tracing::trace;

 use crate::{
    metrics::{ConsoleRequest, Metrics},
+    rate_limiter,
    url::ApiUrl,
 };
 use reqwest_middleware::RequestBuilder;
@@ -22,7 +23,7 @@ use reqwest_middleware::RequestBuilder;
 /// This is the preferred way to create new http clients,
 /// because it takes care of observability (OpenTelemetry).
 /// We deliberately don't want to replace this with a public static.
-pub fn new_client() -> ClientWithMiddleware {
+pub fn new_client(rate_limiter_config: rate_limiter::RateLimiterConfig) -> ClientWithMiddleware {
    let client = reqwest::ClientBuilder::new()
        .dns_resolver(Arc::new(GaiResolver::default()))
        .connection_verbose(true)
@@ -31,6 +32,7 @@ pub fn new_client() -> ClientWithMiddleware {

    reqwest_middleware::ClientBuilder::new(client)
        .with(reqwest_tracing::TracingMiddleware::default())
+        .with(rate_limiter::Limiter::new(rate_limiter_config))
        .build()
 }

--- a/proxy/src/http/health_server.rs
+++ b/proxy/src/http/health_server.rs
@@ -75,10 +75,6 @@ async fn prometheus_metrics_handler(

    let span = info_span!("blocking");
    let body = tokio::task::spawn_blocking(move || {
-        // there are situations where we lose scraped metrics under load, try to gather some clues
-        // since all nodes are queried this, keep the message count low.
-        let spawned_at = std::time::Instant::now();
-
        let _span = span.entered();

        let mut state = state.lock().unwrap();
@@ -88,19 +84,11 @@ async fn prometheus_metrics_handler(
            .collect_group_into(&mut *encoder)
            .unwrap_or_else(|infallible| match infallible {});

-        let encoded_at = std::time::Instant::now();
-
        let body = encoder.finish();

-        let spawned_in = spawned_at - started_at;
-        let encoded_in = encoded_at - spawned_at;
-        let total = encoded_at - started_at;
-
        tracing::info!(
            bytes = body.len(),
-            total_ms = total.as_millis(),
-            spawning_ms = spawned_in.as_millis(),
-            encoding_ms = encoded_in.as_millis(),
+            elapsed_ms = started_at.elapsed().as_millis(),
            "responded /metrics"
        );

--- a/proxy/src/logging.rs
+++ b/proxy/src/logging.rs
@@ -26,12 +26,7 @@ pub async fn init() -> anyhow::Result<LoggingGuard> {
        .await
        .map(OpenTelemetryLayer::new);

-    // spawn the console server in the background,
-    // returning a `Layer`:
-    let console_layer = console_subscriber::spawn();
-
    tracing_subscriber::registry()
-        .with(console_layer)
        .with(env_filter)
        .with(otlp_layer)
        .with(fmt_layer)
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -4,8 +4,8 @@ use lasso::ThreadedRodeo;
 use measured::{
    label::StaticLabelSet,
    metric::{histogram::Thresholds, name::MetricName},
-    Counter, CounterVec, FixedCardinalityLabel, Gauge, Histogram, HistogramVec, LabelGroup,
-    MetricGroup,
+    Counter, CounterVec, FixedCardinalityLabel, Gauge, GaugeVec, Histogram, HistogramVec,
+    LabelGroup, MetricGroup,
 };
 use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};

@@ -20,6 +20,9 @@ pub struct Metrics {

    #[metric(namespace = "wake_compute_lock")]
    pub wake_compute_lock: ApiLockMetrics,
+
+    // the one metric not called proxy_....
+    pub semaphore_control_plane_limit: GaugeVec<StaticLabelSet<RateLimit>>,
 }

 impl Metrics {
@@ -28,6 +31,7 @@ impl Metrics {
        SELF.get_or_init(|| Metrics {
            proxy: ProxyMetrics::default(),
            wake_compute_lock: ApiLockMetrics::new(),
+            semaphore_control_plane_limit: GaugeVec::default(),
        })
    }
 }
@@ -282,6 +286,13 @@ pub enum LatencyExclusions {
    ClientAndCplane,
 }

+#[derive(FixedCardinalityLabel, Copy, Clone)]
+#[label(singleton = "limit")]
+pub enum RateLimit {
+    Actual,
+    Expected,
+}
+
 #[derive(FixedCardinalityLabel, Copy, Clone)]
 #[label(singleton = "kind")]
 pub enum SniKind {
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -7,7 +7,6 @@ pub mod handshake;
 pub mod passthrough;
 pub mod retry;
 pub mod wake_compute;
-pub use copy_bidirectional::copy_bidirectional_client_compute;

 use crate::{
    auth,
@@ -19,8 +18,9 @@ use crate::{
    metrics::{Metrics, NumClientConnectionsGuard},
    protocol2::WithClientIp,
    proxy::handshake::{handshake, HandshakeData},
+    rate_limiter::EndpointRateLimiter,
    stream::{PqStream, Stream},
-    EndpointCacheKey,
+    EndpointCacheKey, Normalize,
 };
 use futures::TryFutureExt;
 use itertools::Itertools;
@@ -60,6 +60,7 @@ pub async fn task_main(
    config: &'static ProxyConfig,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    cancellation_handler: Arc<CancellationHandlerMain>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
@@ -84,10 +85,11 @@ pub async fn task_main(

        let session_id = uuid::Uuid::new_v4();
        let cancellation_handler = Arc::clone(&cancellation_handler);
+        let endpoint_rate_limiter = endpoint_rate_limiter.clone();

        tracing::info!(protocol = "tcp", %session_id, "accepted new TCP connection");

-        tokio::task::Builder::new().name("tcp client connection").spawn(connections.track_future(async move {
+        connections.spawn(async move {
            let mut socket = WithClientIp::new(socket);
            let mut peer_addr = peer_addr.ip();
            match socket.wait_for_addr().await {
@@ -125,6 +127,7 @@ pub async fn task_main(
                cancellation_handler,
                socket,
                ClientMode::Tcp,
+                endpoint_rate_limiter,
                conn_gauge,
            )
            .instrument(span.clone())
@@ -152,7 +155,7 @@ pub async fn task_main(
                    }
                }
            }
-        })).unwrap();
+        });
    }

    connections.close();
@@ -178,6 +181,13 @@ impl ClientMode {
        }
    }

+    pub fn allow_self_signed_compute(&self, config: &ProxyConfig) -> bool {
+        match self {
+            ClientMode::Tcp => config.allow_self_signed_compute,
+            ClientMode::Websockets { .. } => false,
+        }
+    }
+
    fn hostname<'a, S>(&'a self, s: &'a Stream<S>) -> Option<&'a str> {
        match self {
            ClientMode::Tcp => s.sni_hostname(),
@@ -231,6 +241,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    cancellation_handler: Arc<CancellationHandlerMain>,
    stream: S,
    mode: ClientMode,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    conn_gauge: NumClientConnectionsGuard<'static>,
 ) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
    info!(
@@ -245,9 +256,8 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    let tls = config.tls_config.as_ref();

-    let record_handshake_error = !ctx.has_private_peer_addr();
    let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Client);
-    let do_handshake = handshake(stream, mode.handshake_tls(tls), record_handshake_error);
+    let do_handshake = handshake(stream, mode.handshake_tls(tls));
    let (mut stream, params) =
        match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
            HandshakeData::Startup(stream, params) => (stream, params),
@@ -276,6 +286,15 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        Err(e) => stream.throw_error(e).await?,
    };

+    // check rate limit
+    if let Some(ep) = user_info.get_endpoint() {
+        if !endpoint_rate_limiter.check(ep.normalize(), 1) {
+            return stream
+                .throw_error(auth::AuthError::too_many_connections())
+                .await?;
+        }
+    }
+
    let user = user_info.get_user().to_owned();
    let user_info = match user_info
        .authenticate(
@@ -296,9 +315,14 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
        }
    };

-    let mut node = connect_to_compute(ctx, &TcpMechanism { params: &params }, &user_info)
-        .or_else(|e| stream.throw_error(e))
-        .await?;
+    let mut node = connect_to_compute(
+        ctx,
+        &TcpMechanism { params: &params },
+        &user_info,
+        mode.allow_self_signed_compute(config),
+    )
+    .or_else(|e| stream.throw_error(e))
+    .await?;

    let session = cancellation_handler.get_session();
    prepare_client_connection(&node, &session, &mut stream).await?;
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -92,6 +92,7 @@ pub async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
    ctx: &mut RequestMonitoring,
    mechanism: &M,
    user_info: &B,
+    allow_self_signed_compute: bool,
 ) -> Result<M::Connection, M::Error>
 where
    M::ConnectError: ShouldRetry + std::fmt::Debug,
@@ -102,6 +103,8 @@ where
    if let Some(keys) = user_info.get_keys() {
        node_info.set_keys(keys);
    }
+    node_info.allow_self_signed_compute = allow_self_signed_compute;
+    // let mut node_info = credentials.get_node_info(ctx, user_info).await?;
    mechanism.update_connect_config(&mut node_info.config);

    // try once
--- a/proxy/src/proxy/copy_bidirectional.rs
+++ b/proxy/src/proxy/copy_bidirectional.rs
@@ -41,7 +41,7 @@ where
 }

 #[tracing::instrument(skip_all)]
-pub async fn copy_bidirectional_client_compute<Client, Compute>(
+pub(super) async fn copy_bidirectional_client_compute<Client, Compute>(
    client: &mut Client,
    compute: &mut Compute,
 ) -> Result<(u64, u64), std::io::Error>
--- a/proxy/src/proxy/handshake.rs
+++ b/proxy/src/proxy/handshake.rs
@@ -63,7 +63,6 @@ pub enum HandshakeData<S> {
 pub async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
    stream: S,
    mut tls: Option<&TlsConfig>,
-    record_handshake_error: bool,
 ) -> Result<HandshakeData<S>, HandshakeError> {
    // Client may try upgrading to each protocol only once
    let (mut tried_ssl, mut tried_gss) = (false, false);
@@ -96,9 +95,7 @@ pub async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
                        if !read_buf.is_empty() {
                            return Err(HandshakeError::EarlyData);
                        }
-                        let tls_stream = raw
-                            .upgrade(tls.to_server_config(), record_handshake_error)
-                            .await?;
+                        let tls_stream = raw.upgrade(tls.to_server_config()).await?;

                        let (_, tls_server_end_point) = tls
                            .cert_resolver
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -175,7 +175,7 @@ async fn dummy_proxy(
    auth: impl TestAuth + Send,
 ) -> anyhow::Result<()> {
    let client = WithClientIp::new(client);
-    let mut stream = match handshake(client, tls.as_ref(), false).await? {
+    let mut stream = match handshake(client, tls.as_ref()).await? {
        HandshakeData::Startup(stream, _) => stream,
        HandshakeData::Cancel(_) => bail!("cancellation not supported"),
    };
@@ -519,6 +519,7 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn
            branch_id: (&BranchId::from("branch")).into(),
            cold_start_info: crate::console::messages::ColdStartInfo::Warm,
        },
+        allow_self_signed_compute: false,
    };
    let (_, node) = cache.insert("key".into(), node);
    node
@@ -548,7 +549,7 @@ async fn connect_to_compute_success() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap();
    mechanism.verify();
@@ -561,7 +562,7 @@ async fn connect_to_compute_retry() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap();
    mechanism.verify();
@@ -575,7 +576,7 @@ async fn connect_to_compute_non_retry_1() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap_err();
    mechanism.verify();
@@ -589,7 +590,7 @@ async fn connect_to_compute_non_retry_2() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap();
    mechanism.verify();
@@ -607,7 +608,7 @@ async fn connect_to_compute_non_retry_3() {
        Retry, Retry, Retry, Retry, Retry, /* the 17th time */ Retry,
    ]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap_err();
    mechanism.verify();
@@ -621,7 +622,7 @@ async fn wake_retry() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap();
    mechanism.verify();
@@ -635,7 +636,7 @@ async fn wake_non_retry() {
    let mut ctx = RequestMonitoring::test();
    let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]);
    let user_info = helper_create_connect_info(&mechanism);
-    connect_to_compute(&mut ctx, &mechanism, &user_info)
+    connect_to_compute(&mut ctx, &mechanism, &user_info, false)
        .await
        .unwrap_err();
    mechanism.verify();
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -34,10 +34,7 @@ async fn proxy_mitm(
    tokio::spawn(async move {
        // begin handshake with end_server
        let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await;
-        let (end_client, startup) = match handshake(client1, Some(&server_config1), false)
-            .await
-            .unwrap()
-        {
+        let (end_client, startup) = match handshake(client1, Some(&server_config1)).await.unwrap() {
            HandshakeData::Startup(stream, params) => (stream, params),
            HandshakeData::Cancel(_) => panic!("cancellation not supported"),
        };
--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -90,7 +90,6 @@ fn report_error(e: &WakeComputeError, retry: bool) {
        WakeComputeError::ApiError(ApiError::Console { .. }) => {
            WakeupFailureKind::ApiConsoleOtherError
        }
-        WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,
        WakeComputeError::TimeoutError => WakeupFailureKind::TimeoutError,
    };
    Metrics::get()
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -1,2 +1,7 @@
+mod aimd;
+mod limit_algorithm;
 mod limiter;
-pub use limiter::{BucketRateLimiter, EndpointRateLimiter, GlobalRateLimiter, RateBucketInfo};
+pub use aimd::Aimd;
+pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
+pub use limiter::Limiter;
+pub use limiter::{AuthRateLimiter, EndpointRateLimiter, GlobalRateLimiter, RateBucketInfo};
--- a/proxy/src/rate_limiter/aimd.rs
+++ b/proxy/src/rate_limiter/aimd.rs
@@ -0,0 +1,166 @@
+use std::usize;
+
+use async_trait::async_trait;
+
+use super::limit_algorithm::{AimdConfig, LimitAlgorithm, Sample};
+
+use super::limiter::Outcome;
+
+/// Loss-based congestion avoidance.
+///
+/// Additive-increase, multiplicative decrease.
+///
+/// Adds available currency when:
+/// 1. no load-based errors are observed, and
+/// 2. the utilisation of the current limit is high.
+///
+/// Reduces available concurrency by a factor when load-based errors are detected.
+pub struct Aimd {
+    min_limit: usize,
+    max_limit: usize,
+    decrease_factor: f32,
+    increase_by: usize,
+    min_utilisation_threshold: f32,
+}
+
+impl Aimd {
+    pub fn new(config: AimdConfig) -> Self {
+        Self {
+            min_limit: config.aimd_min_limit,
+            max_limit: config.aimd_max_limit,
+            decrease_factor: config.aimd_decrease_factor,
+            increase_by: config.aimd_increase_by,
+            min_utilisation_threshold: config.aimd_min_utilisation_threshold,
+        }
+    }
+}
+
+#[async_trait]
+impl LimitAlgorithm for Aimd {
+    async fn update(&mut self, old_limit: usize, sample: Sample) -> usize {
+        use Outcome::*;
+        match sample.outcome {
+            Success => {
+                let utilisation = sample.in_flight as f32 / old_limit as f32;
+
+                if utilisation > self.min_utilisation_threshold {
+                    let limit = old_limit + self.increase_by;
+                    limit.clamp(self.min_limit, self.max_limit)
+                } else {
+                    old_limit
+                }
+            }
+            Overload => {
+                let limit = old_limit as f32 * self.decrease_factor;
+
+                // Floor instead of round, so the limit reduces even with small numbers.
+                // E.g. round(2 * 0.9) = 2, but floor(2 * 0.9) = 1
+                let limit = limit.floor() as usize;
+
+                limit.clamp(self.min_limit, self.max_limit)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use tokio::sync::Notify;
+
+    use super::*;
+
+    use crate::rate_limiter::{Limiter, RateLimiterConfig};
+
+    #[tokio::test]
+    async fn should_decrease_limit_on_overload() {
+        let config = RateLimiterConfig {
+            initial_limit: 10,
+            aimd_config: Some(AimdConfig {
+                aimd_decrease_factor: 0.5,
+                ..Default::default()
+            }),
+            disable: false,
+            ..Default::default()
+        };
+
+        let release_notifier = Arc::new(Notify::new());
+
+        let limiter = Limiter::new(config).with_release_notifier(release_notifier.clone());
+
+        let token = limiter.try_acquire().unwrap();
+        limiter.release(token, Some(Outcome::Overload)).await;
+        release_notifier.notified().await;
+        assert_eq!(limiter.state().limit(), 5, "overload: decrease");
+    }
+
+    #[tokio::test]
+    async fn should_increase_limit_on_success_when_using_gt_util_threshold() {
+        let config = RateLimiterConfig {
+            initial_limit: 4,
+            aimd_config: Some(AimdConfig {
+                aimd_decrease_factor: 0.5,
+                aimd_min_utilisation_threshold: 0.5,
+                aimd_increase_by: 1,
+                ..Default::default()
+            }),
+            disable: false,
+            ..Default::default()
+        };
+
+        let limiter = Limiter::new(config);
+
+        let token = limiter.try_acquire().unwrap();
+        let _token = limiter.try_acquire().unwrap();
+        let _token = limiter.try_acquire().unwrap();
+
+        limiter.release(token, Some(Outcome::Success)).await;
+        assert_eq!(limiter.state().limit(), 5, "success: increase");
+    }
+
+    #[tokio::test]
+    async fn should_not_change_limit_on_success_when_using_lt_util_threshold() {
+        let config = RateLimiterConfig {
+            initial_limit: 4,
+            aimd_config: Some(AimdConfig {
+                aimd_decrease_factor: 0.5,
+                aimd_min_utilisation_threshold: 0.5,
+                ..Default::default()
+            }),
+            disable: false,
+            ..Default::default()
+        };
+
+        let limiter = Limiter::new(config);
+
+        let token = limiter.try_acquire().unwrap();
+
+        limiter.release(token, Some(Outcome::Success)).await;
+        assert_eq!(
+            limiter.state().limit(),
+            4,
+            "success: ignore when < half limit"
+        );
+    }
+
+    #[tokio::test]
+    async fn should_not_change_limit_when_no_outcome() {
+        let config = RateLimiterConfig {
+            initial_limit: 10,
+            aimd_config: Some(AimdConfig {
+                aimd_decrease_factor: 0.5,
+                aimd_min_utilisation_threshold: 0.5,
+                ..Default::default()
+            }),
+            disable: false,
+            ..Default::default()
+        };
+
+        let limiter = Limiter::new(config);
+
+        let token = limiter.try_acquire().unwrap();
+        limiter.release(token, None).await;
+        assert_eq!(limiter.state().limit(), 10, "ignore");
+    }
+}
--- a/proxy/src/rate_limiter/limit_algorithm.rs
+++ b/proxy/src/rate_limiter/limit_algorithm.rs
@@ -0,0 +1,98 @@
+//! Algorithms for controlling concurrency limits.
+use async_trait::async_trait;
+use std::time::Duration;
+
+use super::{limiter::Outcome, Aimd};
+
+/// An algorithm for controlling a concurrency limit.
+#[async_trait]
+pub trait LimitAlgorithm: Send + Sync + 'static {
+    /// Update the concurrency limit in response to a new job completion.
+    async fn update(&mut self, old_limit: usize, sample: Sample) -> usize;
+}
+
+/// The result of a job (or jobs), including the [Outcome] (loss) and latency (delay).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Sample {
+    pub(crate) latency: Duration,
+    /// Jobs in flight when the sample was taken.
+    pub(crate) in_flight: usize,
+    pub(crate) outcome: Outcome,
+}
+
+#[derive(Clone, Copy, Debug, Default, clap::ValueEnum)]
+pub enum RateLimitAlgorithm {
+    Fixed,
+    #[default]
+    Aimd,
+}
+
+pub struct Fixed;
+
+#[async_trait]
+impl LimitAlgorithm for Fixed {
+    async fn update(&mut self, old_limit: usize, _sample: Sample) -> usize {
+        old_limit
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct RateLimiterConfig {
+    pub disable: bool,
+    pub algorithm: RateLimitAlgorithm,
+    pub timeout: Duration,
+    pub initial_limit: usize,
+    pub aimd_config: Option<AimdConfig>,
+}
+
+impl RateLimiterConfig {
+    pub fn create_rate_limit_algorithm(self) -> Box<dyn LimitAlgorithm> {
+        match self.algorithm {
+            RateLimitAlgorithm::Fixed => Box::new(Fixed),
+            RateLimitAlgorithm::Aimd => Box::new(Aimd::new(self.aimd_config.unwrap())), // For aimd algorithm config is mandatory.
+        }
+    }
+}
+
+impl Default for RateLimiterConfig {
+    fn default() -> Self {
+        Self {
+            disable: true,
+            algorithm: RateLimitAlgorithm::Aimd,
+            timeout: Duration::from_secs(1),
+            initial_limit: 100,
+            aimd_config: Some(AimdConfig::default()),
+        }
+    }
+}
+
+#[derive(clap::Parser, Clone, Copy, Debug)]
+pub struct AimdConfig {
+    /// Minimum limit for AIMD algorithm. Makes sense only if `rate_limit_algorithm` is `Aimd`.
+    #[clap(long, default_value_t = 1)]
+    pub aimd_min_limit: usize,
+    /// Maximum limit for AIMD algorithm. Makes sense only if `rate_limit_algorithm` is `Aimd`.
+    #[clap(long, default_value_t = 1500)]
+    pub aimd_max_limit: usize,
+    /// Increase AIMD increase by value in case of success. Makes sense only if `rate_limit_algorithm` is `Aimd`.
+    #[clap(long, default_value_t = 10)]
+    pub aimd_increase_by: usize,
+    /// Decrease AIMD decrease by value in case of timout/429. Makes sense only if `rate_limit_algorithm` is `Aimd`.
+    #[clap(long, default_value_t = 0.9)]
+    pub aimd_decrease_factor: f32,
+    /// A threshold below which the limit won't be increased. Makes sense only if `rate_limit_algorithm` is `Aimd`.
+    #[clap(long, default_value_t = 0.8)]
+    pub aimd_min_utilisation_threshold: f32,
+}
+
+impl Default for AimdConfig {
+    fn default() -> Self {
+        Self {
+            aimd_min_limit: 1,
+            aimd_max_limit: 1500,
+            aimd_increase_by: 10,
+            aimd_decrease_factor: 0.9,
+            aimd_min_utilisation_threshold: 0.8,
+        }
+    }
+}
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -2,9 +2,10 @@ use std::{
    borrow::Cow,
    collections::hash_map::RandomState,
    hash::{BuildHasher, Hash},
+    net::IpAddr,
    sync::{
        atomic::{AtomicUsize, Ordering},
-        Mutex,
+        Arc, Mutex,
    },
 };

@@ -12,10 +13,22 @@ use anyhow::bail;
 use dashmap::DashMap;
 use itertools::Itertools;
 use rand::{rngs::StdRng, Rng, SeedableRng};
-use tokio::time::{Duration, Instant};
+use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
+use tokio::time::{timeout, Duration, Instant};
 use tracing::info;

-use crate::intern::EndpointIdInt;
+use crate::{
+    intern::EndpointIdInt,
+    {
+        metrics::{Metrics, RateLimit},
+        EndpointId,
+    },
+};
+
+use super::{
+    limit_algorithm::{LimitAlgorithm, Sample},
+    RateLimiterConfig,
+};

 pub struct GlobalRateLimiter {
    data: Vec<RateBucket>,
@@ -61,7 +74,15 @@ impl GlobalRateLimiter {
 // Purposefully ignore user name and database name as clients can reconnect
 // with different names, so we'll end up sending some http requests to
 // the control plane.
-pub type EndpointRateLimiter = BucketRateLimiter<EndpointIdInt, StdRng, RandomState>;
+//
+// We also may save quite a lot of CPU (I think) by bailing out right after we
+// saw SNI, before doing TLS handshake. User-side error messages in that case
+// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
+// I went with a more expensive way that yields user-friendlier error messages.
+pub type EndpointRateLimiter = BucketRateLimiter<EndpointId, StdRng, RandomState>;
+
+// This can't be just per IP because that would limit some PaaS that share IP addresses
+pub type AuthRateLimiter = BucketRateLimiter<(EndpointIdInt, IpAddr), StdRng, RandomState>;

 pub struct BucketRateLimiter<Key, Rand = StdRng, Hasher = RandomState> {
    map: DashMap<Key, Vec<RateBucket>, Hasher>,
@@ -134,6 +155,19 @@ impl RateBucketInfo {
        Self::new(100, Duration::from_secs(600)),
    ];

+    /// All of these are per endpoint-ip pair.
+    /// Context: 4096 rounds of pbkdf2 take about 1ms of cpu time to execute (1 milli-cpu-second or 1mcpus).
+    ///
+    /// First bucket: 300mcpus total per endpoint-ip pair
+    /// * 1228800 requests per second with 1 hash rounds. (endpoint rate limiter will catch this first)
+    /// * 300 requests per second with 4096 hash rounds.
+    /// * 2 requests per second with 600000 hash rounds.
+    pub const DEFAULT_AUTH_SET: [Self; 3] = [
+        Self::new(300 * 4096, Duration::from_secs(1)),
+        Self::new(200 * 4096, Duration::from_secs(60)),
+        Self::new(100 * 4096, Duration::from_secs(600)),
+    ];
+
    pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {
        info.sort_unstable_by_key(|info| info.interval);
        let invalid = info
@@ -231,16 +265,423 @@ impl<K: Hash + Eq, R: Rng, S: BuildHasher + Clone> BucketRateLimiter<K, R, S> {
    }
 }

+/// Limits the number of concurrent jobs.
+///
+/// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the
+/// token once the job is finished.
+///
+/// The limit will be automatically adjusted based on observed latency (delay) and/or failures
+/// caused by overload (loss).
+pub struct Limiter {
+    limit_algo: AsyncMutex<Box<dyn LimitAlgorithm>>,
+    semaphore: std::sync::Arc<Semaphore>,
+    config: RateLimiterConfig,
+
+    // ONLY WRITE WHEN LIMIT_ALGO IS LOCKED
+    limits: AtomicUsize,
+
+    // ONLY USE ATOMIC ADD/SUB
+    in_flight: Arc<AtomicUsize>,
+
+    #[cfg(test)]
+    notifier: Option<std::sync::Arc<tokio::sync::Notify>>,
+}
+
+/// A concurrency token, required to run a job.
+///
+/// Release the token back to the [Limiter] after the job is complete.
+#[derive(Debug)]
+pub struct Token<'t> {
+    permit: Option<tokio::sync::SemaphorePermit<'t>>,
+    start: Instant,
+    in_flight: Arc<AtomicUsize>,
+}
+
+/// A snapshot of the state of the [Limiter].
+///
+/// Not guaranteed to be consistent under high concurrency.
+#[derive(Debug, Clone, Copy)]
+pub struct LimiterState {
+    limit: usize,
+    in_flight: usize,
+}
+
+/// Whether a job succeeded or failed as a result of congestion/overload.
+///
+/// Errors not considered to be caused by overload should be ignored.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Outcome {
+    /// The job succeeded, or failed in a way unrelated to overload.
+    Success,
+    /// The job failed because of overload, e.g. it timed out or an explicit backpressure signal
+    /// was observed.
+    Overload,
+}
+
+impl Outcome {
+    fn from_reqwest_error(error: &reqwest_middleware::Error) -> Self {
+        match error {
+            reqwest_middleware::Error::Middleware(_) => Outcome::Success,
+            reqwest_middleware::Error::Reqwest(e) => {
+                if let Some(status) = e.status() {
+                    if status.is_server_error()
+                        || reqwest::StatusCode::TOO_MANY_REQUESTS.as_u16() == status
+                    {
+                        Outcome::Overload
+                    } else {
+                        Outcome::Success
+                    }
+                } else {
+                    Outcome::Success
+                }
+            }
+        }
+    }
+    fn from_reqwest_response(response: &reqwest::Response) -> Self {
+        if response.status().is_server_error()
+            || response.status() == reqwest::StatusCode::TOO_MANY_REQUESTS
+        {
+            Outcome::Overload
+        } else {
+            Outcome::Success
+        }
+    }
+}
+
+impl Limiter {
+    /// Create a limiter with a given limit control algorithm.
+    pub fn new(config: RateLimiterConfig) -> Self {
+        assert!(config.initial_limit > 0);
+        Self {
+            limit_algo: AsyncMutex::new(config.create_rate_limit_algorithm()),
+            semaphore: Arc::new(Semaphore::new(config.initial_limit)),
+            config,
+            limits: AtomicUsize::new(config.initial_limit),
+            in_flight: Arc::new(AtomicUsize::new(0)),
+            #[cfg(test)]
+            notifier: None,
+        }
+    }
+    // pub fn new(limit_algorithm: T, timeout: Duration, initial_limit: usize) -> Self {
+    //     assert!(initial_limit > 0);
+
+    //     Self {
+    //         limit_algo: AsyncMutex::new(limit_algorithm),
+    //         semaphore: Arc::new(Semaphore::new(initial_limit)),
+    //         timeout,
+    //         limits: AtomicUsize::new(initial_limit),
+    //         in_flight: Arc::new(AtomicUsize::new(0)),
+    //         #[cfg(test)]
+    //         notifier: None,
+    //     }
+    // }
+
+    /// In some cases [Token]s are acquired asynchronously when updating the limit.
+    #[cfg(test)]
+    pub fn with_release_notifier(mut self, n: std::sync::Arc<tokio::sync::Notify>) -> Self {
+        self.notifier = Some(n);
+        self
+    }
+
+    /// Try to immediately acquire a concurrency [Token].
+    ///
+    /// Returns `None` if there are none available.
+    pub fn try_acquire(&self) -> Option<Token> {
+        let result = if self.config.disable {
+            // If the rate limiter is disabled, we can always acquire a token.
+            Some(Token::new(None, self.in_flight.clone()))
+        } else {
+            self.semaphore
+                .try_acquire()
+                .map(|permit| Token::new(Some(permit), self.in_flight.clone()))
+                .ok()
+        };
+        if result.is_some() {
+            self.in_flight.fetch_add(1, Ordering::AcqRel);
+        }
+        result
+    }
+
+    /// Try to acquire a concurrency [Token], waiting for `duration` if there are none available.
+    ///
+    /// Returns `None` if there are none available after `duration`.
+    pub async fn acquire_timeout(&self, duration: Duration) -> Option<Token<'_>> {
+        info!("acquiring token: {:?}", self.semaphore.available_permits());
+        let result = if self.config.disable {
+            // If the rate limiter is disabled, we can always acquire a token.
+            Some(Token::new(None, self.in_flight.clone()))
+        } else {
+            match timeout(duration, self.semaphore.acquire()).await {
+                Ok(maybe_permit) => maybe_permit
+                    .map(|permit| Token::new(Some(permit), self.in_flight.clone()))
+                    .ok(),
+                Err(_) => None,
+            }
+        };
+        if result.is_some() {
+            self.in_flight.fetch_add(1, Ordering::AcqRel);
+        }
+        result
+    }
+
+    /// Return the concurrency [Token], along with the outcome of the job.
+    ///
+    /// The [Outcome] of the job, and the time taken to perform it, may be used
+    /// to update the concurrency limit.
+    ///
+    /// Set the outcome to `None` to ignore the job.
+    pub async fn release(&self, mut token: Token<'_>, outcome: Option<Outcome>) {
+        tracing::info!("outcome is {:?}", outcome);
+        let in_flight = self.in_flight.load(Ordering::Acquire);
+        let old_limit = self.limits.load(Ordering::Acquire);
+        let available = if self.config.disable {
+            0 // This is not used in the algorithm and can be anything. If the config disable it makes sense to set it to 0.
+        } else {
+            self.semaphore.available_permits()
+        };
+        let total = in_flight + available;
+
+        let mut algo = self.limit_algo.lock().await;
+
+        let new_limit = if let Some(outcome) = outcome {
+            let sample = Sample {
+                latency: token.start.elapsed(),
+                in_flight,
+                outcome,
+            };
+            algo.update(old_limit, sample).await
+        } else {
+            old_limit
+        };
+        tracing::info!("new limit is {}", new_limit);
+        let actual_limit = if new_limit < total {
+            token.forget();
+            total.saturating_sub(1)
+        } else {
+            if !self.config.disable {
+                self.semaphore.add_permits(new_limit.saturating_sub(total));
+            }
+            new_limit
+        };
+        let metric = &Metrics::get().semaphore_control_plane_limit;
+        metric.set(RateLimit::Expected, new_limit as i64);
+        metric.set(RateLimit::Actual, actual_limit as i64);
+        self.limits.store(new_limit, Ordering::Release);
+        #[cfg(test)]
+        if let Some(n) = &self.notifier {
+            n.notify_one();
+        }
+    }
+
+    /// The current state of the limiter.
+    pub fn state(&self) -> LimiterState {
+        let limit = self.limits.load(Ordering::Relaxed);
+        let in_flight = self.in_flight.load(Ordering::Relaxed);
+        LimiterState { limit, in_flight }
+    }
+}
+
+impl<'t> Token<'t> {
+    fn new(permit: Option<SemaphorePermit<'t>>, in_flight: Arc<AtomicUsize>) -> Self {
+        Self {
+            permit,
+            start: Instant::now(),
+            in_flight,
+        }
+    }
+
+    pub fn forget(&mut self) {
+        if let Some(permit) = self.permit.take() {
+            permit.forget();
+        }
+    }
+}
+
+impl Drop for Token<'_> {
+    fn drop(&mut self) {
+        self.in_flight.fetch_sub(1, Ordering::AcqRel);
+    }
+}
+
+impl LimiterState {
+    /// The current concurrency limit.
+    pub fn limit(&self) -> usize {
+        self.limit
+    }
+    /// The number of jobs in flight.
+    pub fn in_flight(&self) -> usize {
+        self.in_flight
+    }
+}
+
+#[async_trait::async_trait]
+impl reqwest_middleware::Middleware for Limiter {
+    async fn handle(
+        &self,
+        req: reqwest::Request,
+        extensions: &mut task_local_extensions::Extensions,
+        next: reqwest_middleware::Next<'_>,
+    ) -> reqwest_middleware::Result<reqwest::Response> {
+        let timer = Metrics::get()
+            .proxy
+            .control_plane_token_acquire_seconds
+            .start_timer();
+        let token = self
+            .acquire_timeout(self.config.timeout)
+            .await
+            .ok_or_else(|| {
+                reqwest_middleware::Error::Middleware(
+                    // TODO: Should we map it into user facing errors?
+                    crate::console::errors::ApiError::Console {
+                        status: crate::http::StatusCode::TOO_MANY_REQUESTS,
+                        text: "Too many requests".into(),
+                    }
+                    .into(),
+                )
+            })?;
+        let duration = timer.observe();
+        info!(
+            ?duration,
+            "waiting for token to connect to the control plane"
+        );
+
+        match next.run(req, extensions).await {
+            Ok(response) => {
+                self.release(token, Some(Outcome::from_reqwest_response(&response)))
+                    .await;
+                Ok(response)
+            }
+            Err(e) => {
+                self.release(token, Some(Outcome::from_reqwest_error(&e)))
+                    .await;
+                Err(e)
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
-    use std::{hash::BuildHasherDefault, time::Duration};
+    use std::{hash::BuildHasherDefault, pin::pin, task::Context, time::Duration};

+    use futures::{task::noop_waker_ref, Future};
    use rand::SeedableRng;
    use rustc_hash::FxHasher;
    use tokio::time;

-    use super::{BucketRateLimiter, EndpointRateLimiter};
-    use crate::{intern::EndpointIdInt, rate_limiter::RateBucketInfo, EndpointId};
+    use super::{BucketRateLimiter, EndpointRateLimiter, Limiter, Outcome};
+    use crate::{
+        rate_limiter::{RateBucketInfo, RateLimitAlgorithm},
+        EndpointId,
+    };
+
+    #[tokio::test]
+    async fn it_works() {
+        let config = super::RateLimiterConfig {
+            algorithm: RateLimitAlgorithm::Fixed,
+            timeout: Duration::from_secs(1),
+            initial_limit: 10,
+            disable: false,
+            ..Default::default()
+        };
+        let limiter = Limiter::new(config);
+
+        let token = limiter.try_acquire().unwrap();
+
+        limiter.release(token, Some(Outcome::Success)).await;
+
+        assert_eq!(limiter.state().limit(), 10);
+    }
+
+    #[tokio::test]
+    async fn is_fair() {
+        let config = super::RateLimiterConfig {
+            algorithm: RateLimitAlgorithm::Fixed,
+            timeout: Duration::from_secs(1),
+            initial_limit: 1,
+            disable: false,
+            ..Default::default()
+        };
+        let limiter = Limiter::new(config);
+
+        // === TOKEN 1 ===
+        let token1 = limiter.try_acquire().unwrap();
+
+        let mut token2_fut = pin!(limiter.acquire_timeout(Duration::from_secs(1)));
+        assert!(
+            token2_fut
+                .as_mut()
+                .poll(&mut Context::from_waker(noop_waker_ref()))
+                .is_pending(),
+            "token is acquired by token1"
+        );
+
+        let mut token3_fut = pin!(limiter.acquire_timeout(Duration::from_secs(1)));
+        assert!(
+            token3_fut
+                .as_mut()
+                .poll(&mut Context::from_waker(noop_waker_ref()))
+                .is_pending(),
+            "token is acquired by token1"
+        );
+
+        limiter.release(token1, Some(Outcome::Success)).await;
+        // === END TOKEN 1 ===
+
+        // === TOKEN 2 ===
+        assert!(
+            limiter.try_acquire().is_none(),
+            "token is acquired by token2"
+        );
+
+        assert!(
+            token3_fut
+                .as_mut()
+                .poll(&mut Context::from_waker(noop_waker_ref()))
+                .is_pending(),
+            "token is acquired by token2"
+        );
+
+        let token2 = token2_fut.await.unwrap();
+
+        limiter.release(token2, Some(Outcome::Success)).await;
+        // === END TOKEN 2 ===
+
+        // === TOKEN 3 ===
+        assert!(
+            limiter.try_acquire().is_none(),
+            "token is acquired by token3"
+        );
+
+        let token3 = token3_fut.await.unwrap();
+        limiter.release(token3, Some(Outcome::Success)).await;
+        // === END TOKEN 3 ===
+
+        // === TOKEN 4 ===
+        let token4 = limiter.try_acquire().unwrap();
+        limiter.release(token4, Some(Outcome::Success)).await;
+    }
+
+    #[tokio::test]
+    async fn disable() {
+        let config = super::RateLimiterConfig {
+            algorithm: RateLimitAlgorithm::Fixed,
+            timeout: Duration::from_secs(1),
+            initial_limit: 1,
+            disable: true,
+            ..Default::default()
+        };
+        let limiter = Limiter::new(config);
+
+        // === TOKEN 1 ===
+        let token1 = limiter.try_acquire().unwrap();
+        let token2 = limiter.try_acquire().unwrap();
+        let state = limiter.state();
+        assert_eq!(state.limit(), 1);
+        assert_eq!(state.in_flight(), 2); // For disabled limiter, it's expected.
+        limiter.release(token1, None).await;
+        limiter.release(token2, None).await;
+    }

    #[test]
    fn rate_bucket_rpi() {
@@ -290,40 +731,39 @@ mod tests {
        let limiter = EndpointRateLimiter::new(rates);

        let endpoint = EndpointId::from("ep-my-endpoint-1234");
-        let endpoint = EndpointIdInt::from(endpoint);

        time::pause();

        for _ in 0..100 {
-            assert!(limiter.check(endpoint, 1));
+            assert!(limiter.check(endpoint.clone(), 1));
        }
        // more connections fail
-        assert!(!limiter.check(endpoint, 1));
+        assert!(!limiter.check(endpoint.clone(), 1));

        // fail even after 500ms as it's in the same bucket
        time::advance(time::Duration::from_millis(500)).await;
-        assert!(!limiter.check(endpoint, 1));
+        assert!(!limiter.check(endpoint.clone(), 1));

        // after a full 1s, 100 requests are allowed again
        time::advance(time::Duration::from_millis(500)).await;
        for _ in 1..6 {
            for _ in 0..50 {
-                assert!(limiter.check(endpoint, 2));
+                assert!(limiter.check(endpoint.clone(), 2));
            }
            time::advance(time::Duration::from_millis(1000)).await;
        }

        // more connections after 600 will exceed the 20rps@30s limit
-        assert!(!limiter.check(endpoint, 1));
+        assert!(!limiter.check(endpoint.clone(), 1));

        // will still fail before the 30 second limit
        time::advance(time::Duration::from_millis(30_000 - 6_000 - 1)).await;
-        assert!(!limiter.check(endpoint, 1));
+        assert!(!limiter.check(endpoint.clone(), 1));

        // after the full 30 seconds, 100 requests are allowed again
        time::advance(time::Duration::from_millis(1)).await;
        for _ in 0..100 {
-            assert!(limiter.check(endpoint, 1));
+            assert!(limiter.check(endpoint.clone(), 1));
        }
    }

@@ -343,4 +783,31 @@ mod tests {
        }
        assert!(limiter.map.len() < 150_000);
    }
+
+    #[test]
+    fn test_default_auth_set() {
+        // these values used to exceed u32::MAX
+        assert_eq!(
+            RateBucketInfo::DEFAULT_AUTH_SET,
+            [
+                RateBucketInfo {
+                    interval: Duration::from_secs(1),
+                    max_rpi: 300 * 4096,
+                },
+                RateBucketInfo {
+                    interval: Duration::from_secs(60),
+                    max_rpi: 200 * 4096 * 60,
+                },
+                RateBucketInfo {
+                    interval: Duration::from_secs(600),
+                    max_rpi: 100 * 4096 * 600,
+                }
+            ]
+        );
+
+        for x in RateBucketInfo::DEFAULT_AUTH_SET {
+            let y = x.to_string().parse().unwrap();
+            assert_eq!(x, y);
+        }
+    }
 }
--- a/proxy/src/redis/connection_with_credentials_provider.rs
+++ b/proxy/src/redis/connection_with_credentials_provider.rs
@@ -77,14 +77,10 @@ impl ConnectionWithCredentialsProvider {
        }
    }

-    async fn ping(con: &mut MultiplexedConnection) -> RedisResult<()> {
-        redis::cmd("PING").query_async(con).await
-    }
-
    pub async fn connect(&mut self) -> anyhow::Result<()> {
        let _guard = self.mutex.lock().await;
        if let Some(con) = self.con.as_mut() {
-            match Self::ping(con).await {
+            match redis::cmd("PING").query_async(con).await {
                Ok(()) => {
                    return Ok(());
                }
@@ -100,7 +96,7 @@ impl ConnectionWithCredentialsProvider {
        if let Some(f) = self.refresh_token_task.take() {
            f.abort()
        }
-        let mut con = self
+        let con = self
            .get_client()
            .await?
            .get_multiplexed_tokio_connection()
@@ -108,20 +104,10 @@ impl ConnectionWithCredentialsProvider {
        if let Credentials::Dynamic(credentials_provider, _) = &self.credentials {
            let credentials_provider = credentials_provider.clone();
            let con2 = con.clone();
-            let f = tokio::task::Builder::new()
-                .name("redis keep connection")
-                .spawn(async move {
-                    let _ = Self::keep_connection(con2, credentials_provider).await;
-                });
-            self.refresh_token_task = Some(f.unwrap());
-        }
-        match Self::ping(&mut con).await {
-            Ok(()) => {
-                info!("Connection succesfully established");
-            }
-            Err(e) => {
-                error!("Connection is broken. Error during PING: {e:?}");
-            }
+            let f = tokio::spawn(async move {
+                let _ = Self::keep_connection(con2, credentials_provider).await;
+            });
+            self.refresh_token_task = Some(f);
        }
        self.con = Some(con);
        Ok(())
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -142,13 +142,10 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                // To make sure that the entry is invalidated, let's repeat the invalidation in INVALIDATION_LAG seconds.
                // TODO: include the version (or the timestamp) in the message and invalidate only if the entry is cached before the message.
                let cache = self.cache.clone();
-                tokio::task::Builder::new()
-                    .name("invalidate cache lazy")
-                    .spawn(async move {
-                        tokio::time::sleep(INVALIDATION_LAG).await;
-                        invalidate_cache(cache, msg);
-                    })
-                    .unwrap();
+                tokio::spawn(async move {
+                    tokio::time::sleep(INVALIDATION_LAG).await;
+                    invalidate_cache(cache, msg);
+                });
            }
        }

--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -35,6 +35,7 @@ use crate::context::RequestMonitoring;
 use crate::metrics::Metrics;
 use crate::protocol2::WithClientIp;
 use crate::proxy::run_until_cancelled;
+use crate::rate_limiter::EndpointRateLimiter;
 use crate::serverless::backend::PoolingBackend;
 use crate::serverless::http_util::{api_error_into_response, json_response};

@@ -52,6 +53,7 @@ pub async fn task_main(
    config: &'static ProxyConfig,
    ws_listener: TcpListener,
    cancellation_token: CancellationToken,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    cancellation_handler: Arc<CancellationHandlerMain>,
 ) -> anyhow::Result<()> {
    scopeguard::defer! {
@@ -61,28 +63,22 @@ pub async fn task_main(
    let conn_pool = conn_pool::GlobalConnPool::new(&config.http_config);
    {
        let conn_pool = Arc::clone(&conn_pool);
-        tokio::task::Builder::new()
-            .name("serverless pool gc")
-            .spawn(async move {
-                conn_pool.gc_worker(StdRng::from_entropy()).await;
-            })
-            .unwrap();
+        tokio::spawn(async move {
+            conn_pool.gc_worker(StdRng::from_entropy()).await;
+        });
    }

    // shutdown the connection pool
-    tokio::task::Builder::new()
-        .name("serverless pool shutdown")
-        .spawn({
-            let cancellation_token = cancellation_token.clone();
-            let conn_pool = conn_pool.clone();
-            async move {
-                cancellation_token.cancelled().await;
-                tokio::task::spawn_blocking(move || conn_pool.shutdown())
-                    .await
-                    .unwrap();
-            }
-        })
-        .unwrap();
+    tokio::spawn({
+        let cancellation_token = cancellation_token.clone();
+        let conn_pool = conn_pool.clone();
+        async move {
+            cancellation_token.cancelled().await;
+            tokio::task::spawn_blocking(move || conn_pool.shutdown())
+                .await
+                .unwrap();
+        }
+    });

    let backend = Arc::new(PoolingBackend {
        pool: Arc::clone(&conn_pool),
@@ -115,25 +111,21 @@ pub async fn task_main(
        let conn_id = uuid::Uuid::new_v4();
        let http_conn_span = tracing::info_span!("http_conn", ?conn_id);

-        tokio::task::Builder::new()
-            .name("serverless conn handler")
-            .spawn(
-                connections.track_future(
-                    connection_handler(
-                        config,
-                        backend.clone(),
-                        connections.clone(),
-                        cancellation_handler.clone(),
-                        cancellation_token.clone(),
-                        server.clone(),
-                        tls_acceptor.clone(),
-                        conn,
-                        peer_addr,
-                    )
-                    .instrument(http_conn_span),
-                ),
+        connections.spawn(
+            connection_handler(
+                config,
+                backend.clone(),
+                connections.clone(),
+                cancellation_handler.clone(),
+                endpoint_rate_limiter.clone(),
+                cancellation_token.clone(),
+                server.clone(),
+                tls_acceptor.clone(),
+                conn,
+                peer_addr,
            )
-            .unwrap();
+            .instrument(http_conn_span),
+        );
    }

    connections.wait().await;
@@ -155,6 +147,7 @@ async fn connection_handler(
    backend: Arc<PoolingBackend>,
    connections: TaskTracker,
    cancellation_handler: Arc<CancellationHandlerMain>,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    cancellation_token: CancellationToken,
    server: Builder<TokioExecutor>,
    tls_acceptor: TlsAcceptor,
@@ -179,10 +172,6 @@ async fn connection_handler(
    };

    let peer_addr = peer.unwrap_or(peer_addr).ip();
-    let has_private_peer_addr = match peer_addr {
-        IpAddr::V4(ip) => ip.is_private(),
-        _ => false,
-    };
    info!(?session_id, %peer_addr, "accepted new TCP connection");

    // try upgrade to TLS, but with a timeout.
@@ -193,17 +182,13 @@ async fn connection_handler(
        }
        // The handshake failed
        Ok(Err(e)) => {
-            if !has_private_peer_addr {
-                Metrics::get().proxy.tls_handshake_failures.inc();
-            }
+            Metrics::get().proxy.tls_handshake_failures.inc();
            warn!(?session_id, %peer_addr, "failed to accept TLS connection: {e:?}");
            return;
        }
        // The handshake timed out
        Err(e) => {
-            if !has_private_peer_addr {
-                Metrics::get().proxy.tls_handshake_failures.inc();
-            }
+            Metrics::get().proxy.tls_handshake_failures.inc();
            warn!(?session_id, %peer_addr, "failed to accept TLS connection: {e:?}");
            return;
        }
@@ -229,25 +214,21 @@ async fn connection_handler(

            // `request_handler` is not cancel safe. It expects to be cancelled only at specific times.
            // By spawning the future, we ensure it never gets cancelled until it decides to.
-            let handler = tokio::task::Builder::new()
-                .name("serverless request handler")
-                .spawn(
-                    connections.track_future(
-                        request_handler(
-                            req,
-                            config,
-                            backend.clone(),
-                            connections.clone(),
-                            cancellation_handler.clone(),
-                            session_id,
-                            peer_addr,
-                            http_request_token,
-                        )
-                        .in_current_span()
-                        .map_ok_or_else(api_error_into_response, |r| r),
-                    ),
+            let handler = connections.spawn(
+                request_handler(
+                    req,
+                    config,
+                    backend.clone(),
+                    connections.clone(),
+                    cancellation_handler.clone(),
+                    session_id,
+                    peer_addr,
+                    endpoint_rate_limiter.clone(),
+                    http_request_token,
                )
-                .unwrap();
+                .in_current_span()
+                .map_ok_or_else(api_error_into_response, |r| r),
+            );

            async move {
                let res = handler.await;
@@ -281,6 +262,7 @@ async fn request_handler(
    cancellation_handler: Arc<CancellationHandlerMain>,
    session_id: uuid::Uuid,
    peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    // used to cancel in-flight HTTP requests. not used to cancel websockets
    http_cancellation_token: CancellationToken,
 ) -> Result<Response<Full<Bytes>>, ApiError> {
@@ -306,27 +288,23 @@ async fn request_handler(
        let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
            .map_err(|e| ApiError::BadRequest(e.into()))?;

-        tokio::task::Builder::new()
-            .name("websocket client conn")
-            .spawn(
-                ws_connections.track_future(
-                    async move {
-                        if let Err(e) = websocket::serve_websocket(
-                            config,
-                            ctx,
-                            websocket,
-                            cancellation_handler,
-                            host,
-                        )
-                        .await
-                        {
-                            error!("error in websocket connection: {e:#}");
-                        }
-                    }
-                    .instrument(span),
-                ),
-            )
-            .unwrap();
+        ws_connections.spawn(
+            async move {
+                if let Err(e) = websocket::serve_websocket(
+                    config,
+                    ctx,
+                    websocket,
+                    cancellation_handler,
+                    host,
+                    endpoint_rate_limiter,
+                )
+                .await
+                {
+                    error!("error in websocket connection: {e:#}");
+                }
+            }
+            .instrument(span),
+        );

        // Return the response so the spawned future can continue.
        Ok(response)
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -6,7 +6,7 @@ use tracing::{field::display, info};
 use crate::{
    auth::{backend::ComputeCredentials, check_peer_addr_is_in_list, AuthError},
    compute,
-    config::{AuthenticationConfig, ProxyConfig},
+    config::ProxyConfig,
    console::{
        errors::{GetAuthInfoError, WakeComputeError},
        CachedNodeInfo,
@@ -27,7 +27,6 @@ impl PoolingBackend {
    pub async fn authenticate(
        &self,
        ctx: &mut RequestMonitoring,
-        config: &AuthenticationConfig,
        conn_info: &ConnInfo,
    ) -> Result<ComputeCredentials, AuthError> {
        let user_info = conn_info.user_info.clone();
@@ -44,7 +43,6 @@ impl PoolingBackend {
        let secret = match cached_secret.value.clone() {
            Some(secret) => self.config.authentication_config.check_rate_limit(
                ctx,
-                config,
                secret,
                &user_info.endpoint,
                true,
@@ -107,6 +105,7 @@ impl PoolingBackend {
                pool: self.pool.clone(),
            },
            &backend,
+            false, // do not allow self signed compute for http flow
        )
        .await
    }
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -492,7 +492,7 @@ pub fn poll_client<C: ClientInnerExt>(
    let cancel = CancellationToken::new();
    let cancelled = cancel.clone().cancelled_owned();

-    tokio::task::Builder::new().name("pooled conn").spawn(
+    tokio::spawn(
    async move {
        let _conn_gauge = conn_gauge;
        let mut idle_timeout = pin!(tokio::time::sleep(idle));
@@ -565,7 +565,7 @@ pub fn poll_client<C: ClientInnerExt>(
        }).await;

    }
-    .instrument(span)).unwrap();
+    .instrument(span));
    let inner = ClientInner {
        inner: client,
        session: tx,
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -541,9 +541,7 @@ async fn handle_inner(
    .map_err(SqlOverHttpError::from);

    let authenticate_and_connect = async {
-        let keys = backend
-            .authenticate(ctx, &config.authentication_config, &conn_info)
-            .await?;
+        let keys = backend.authenticate(ctx, &conn_info).await?;
        let client = backend
            .connect_to_compute(ctx, conn_info, keys, !allow_pool)
            .await?;
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -5,6 +5,7 @@ use crate::{
    error::{io_error, ReportableError},
    metrics::Metrics,
    proxy::{handle_client, ClientMode},
+    rate_limiter::EndpointRateLimiter,
 };
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream};
@@ -135,6 +136,7 @@ pub async fn serve_websocket(
    websocket: HyperWebsocket,
    cancellation_handler: Arc<CancellationHandlerMain>,
    hostname: Option<String>,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
    let websocket = websocket.await?;
    let conn_gauge = Metrics::get()
@@ -148,6 +150,7 @@ pub async fn serve_websocket(
        cancellation_handler,
        WebSocketRw::new(websocket),
        ClientMode::Websockets { hostname },
+        endpoint_rate_limiter,
        conn_gauge,
    )
    .await;
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -223,20 +223,12 @@ pub enum StreamUpgradeError {

 impl<S: AsyncRead + AsyncWrite + Unpin> Stream<S> {
    /// If possible, upgrade raw stream into a secure TLS-based stream.
-    pub async fn upgrade(
-        self,
-        cfg: Arc<ServerConfig>,
-        record_handshake_error: bool,
-    ) -> Result<TlsStream<S>, StreamUpgradeError> {
+    pub async fn upgrade(self, cfg: Arc<ServerConfig>) -> Result<TlsStream<S>, StreamUpgradeError> {
        match self {
            Stream::Raw { raw } => Ok(tokio_rustls::TlsAcceptor::from(cfg)
                .accept(raw)
                .await
-                .inspect_err(|_| {
-                    if record_handshake_error {
-                        Metrics::get().proxy.tls_handshake_failures.inc()
-                    }
-                })?),
+                .inspect_err(|_| Metrics::get().proxy.tls_handshake_failures.inc())?),
            Stream::Tls { .. } => Err(StreamUpgradeError::AlreadyTls),
        }
    }
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -495,7 +495,7 @@ mod tests {
    use url::Url;

    use super::*;
-    use crate::{http, BranchId, EndpointId};
+    use crate::{http, rate_limiter::RateLimiterConfig, BranchId, EndpointId};

    #[tokio::test]
    async fn metrics() {
@@ -525,7 +525,7 @@ mod tests {
        tokio::spawn(server);

        let metrics = Metrics::default();
-        let client = http::new_client();
+        let client = http::new_client(RateLimiterConfig::default());
        let endpoint = Url::parse(&format!("http://{addr}")).unwrap();
        let now = Utc::now();

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ psutil = "^5.9.4"
 types-psutil = "^5.9.5.12"
 types-toml = "^0.10.8.6"
 pytest-httpserver = "^1.0.8"
-aiohttp = "3.9.4"
+aiohttp = "3.9.2"
 pytest-rerunfailures = "^13.0"
 types-pytest-lazy-fixture = "^0.6.3.3"
 pytest-split = "^0.8.1"
--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -84,20 +84,6 @@ impl std::ops::Add for AffinityScore {
    }
 }

-/// Hint for whether this is a sincere attempt to schedule, or a speculative
-/// check for where we _would_ schedule (done during optimization)
-#[derive(Debug)]
-pub(crate) enum ScheduleMode {
-    Normal,
-    Speculative,
-}
-
-impl Default for ScheduleMode {
-    fn default() -> Self {
-        Self::Normal
-    }
-}
-
 // For carrying state between multiple calls to [`TenantShard::schedule`], e.g. when calling
 // it for many shards in the same tenant.
 #[derive(Debug, Default)]
@@ -107,8 +93,6 @@ pub(crate) struct ScheduleContext {

    /// Specifically how many _attached_ locations are on each node
    pub(crate) attached_nodes: HashMap<NodeId, usize>,
-
-    pub(crate) mode: ScheduleMode,
 }

 impl ScheduleContext {
@@ -345,34 +329,27 @@ impl Scheduler {
        scores.sort_by_key(|i| (i.1, i.2, i.0));

        if scores.is_empty() {
-            // After applying constraints, no pageservers were left.
-            if !matches!(context.mode, ScheduleMode::Speculative) {
-                // If this was not a speculative attempt, log details to understand why we couldn't
-                // schedule: this may help an engineer understand if some nodes are marked offline
-                // in a way that's preventing progress.
+            // After applying constraints, no pageservers were left.  We log some detail about
+            // the state of nodes to help understand why this happened.  This is not logged as an error because
+            // it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
+            tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
+            for (node_id, node) in &self.nodes {
                tracing::info!(
-                    "Scheduling failure, while excluding {hard_exclude:?}, node states:"
+                    "Node {node_id}: may_schedule={} shards={}",
+                    node.may_schedule != MaySchedule::No,
+                    node.shard_count
                );
-                for (node_id, node) in &self.nodes {
-                    tracing::info!(
-                        "Node {node_id}: may_schedule={} shards={}",
-                        node.may_schedule != MaySchedule::No,
-                        node.shard_count
-                    );
-                }
            }
+
            return Err(ScheduleError::ImpossibleConstraint);
        }

        // Lowest score wins
        let node_id = scores.first().unwrap().0;
-
-        if !matches!(context.mode, ScheduleMode::Speculative) {
-            tracing::info!(
+        tracing::info!(
            "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})",
            scores.iter().map(|i| i.0 .0).collect::<Vec<_>>()
        );
-        }

        // Note that we do not update shard count here to reflect the scheduling: that
        // is IntentState's job when the scheduled location is used.
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -11,7 +11,7 @@ use crate::{
    id_lock_map::IdLockMap,
    persistence::{AbortShardSplitStatus, TenantFilter},
    reconciler::ReconcileError,
-    scheduler::{ScheduleContext, ScheduleMode},
+    scheduler::ScheduleContext,
 };
 use anyhow::Context;
 use control_plane::storage_controller::{
@@ -2744,7 +2744,7 @@ impl Service {
        let mut describe_shards = Vec::new();

        for shard in shards {
-            if shard.tenant_shard_id.is_shard_zero() {
+            if shard.tenant_shard_id.is_zero() {
                shard_zero = Some(shard);
            }

@@ -4084,7 +4084,7 @@ impl Service {

        let mut reconciles_spawned = 0;
        for (tenant_shard_id, shard) in tenants.iter_mut() {
-            if tenant_shard_id.is_shard_zero() {
+            if tenant_shard_id.is_zero() {
                schedule_context = ScheduleContext::default();
            }

@@ -4134,10 +4134,9 @@ impl Service {
        let mut work = Vec::new();

        for (tenant_shard_id, shard) in tenants.iter() {
-            if tenant_shard_id.is_shard_zero() {
+            if tenant_shard_id.is_zero() {
                // Reset accumulators on the first shard in a tenant
                schedule_context = ScheduleContext::default();
-                schedule_context.mode = ScheduleMode::Speculative;
                tenant_shards.clear();
            }

--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -2449,12 +2449,10 @@ class NeonPageserver(PgProtocol):
                if cur_line_no < skip_until_line_no:
                    cur_line_no += 1
                    continue
-                elif contains_re.search(line):
+                if contains_re.search(line):
                    # found it!
                    cur_line_no += 1
                    return (line, LogCursor(cur_line_no))
-                else:
-                    cur_line_no += 1
        return None

    def tenant_attach(
--- a/test_runner/regress/test_pageserver_config.py
+++ b/test_runner/regress/test_pageserver_config.py
@@ -1,35 +0,0 @@
-import pytest
-from fixtures.neon_fixtures import (
-    NeonEnvBuilder,
-    last_flush_lsn_upload,
-)
-
-
-@pytest.mark.parametrize("kind", ["sync", "async"])
-def test_walredo_process_kind_config(neon_env_builder: NeonEnvBuilder, kind: str):
-    neon_env_builder.pageserver_config_override = f"walredo_process_kind = '{kind}'"
-    # ensure it starts
-    env = neon_env_builder.init_start()
-    # ensure the metric is set
-    ps_http = env.pageserver.http_client()
-    metrics = ps_http.get_metrics()
-    samples = metrics.query_all("pageserver_wal_redo_process_kind")
-    assert [(s.labels, s.value) for s in samples] == [({"kind": kind}, 1)]
-    # ensure default tenant's config kind matches
-    # => write some data to force-spawn walredo
-    ep = env.endpoints.create_start("main")
-    with ep.connect() as conn:
-        with conn.cursor() as cur:
-            cur.execute("create table foo(bar text)")
-            cur.execute("insert into foo select from generate_series(1, 100)")
-    last_flush_lsn_upload(env, ep, env.initial_tenant, env.initial_timeline)
-    ep.stop()
-    ep.start()
-    with ep.connect() as conn:
-        with conn.cursor() as cur:
-            cur.execute("select count(*) from foo")
-            [(count,)] = cur.fetchall()
-            assert count == 100
-
-    status = ps_http.tenant_status(env.initial_tenant)
-    assert status["walredo"]["process"]["kind"] == kind
--- a/test_runner/regress/test_pageserver_secondary.py
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -1,7 +1,6 @@
 import json
 import os
 import random
-import time
 from pathlib import Path
 from typing import Any, Dict, Optional

@@ -583,91 +582,6 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
    )


-def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder):
-    """
-    Slow test that runs in realtime, checks that the background scheduling of secondary
-    downloads happens as expected.
-    """
-    neon_env_builder.num_pageservers = 2
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    # Create this many tenants, each with two timelines
-    tenant_count = 4
-    tenant_timelines = {}
-
-    # This mirrors a constant in `downloader.rs`
-    freshen_interval_secs = 60
-
-    for _i in range(0, tenant_count):
-        tenant_id = TenantId.generate()
-        timeline_a = TimelineId.generate()
-        timeline_b = TimelineId.generate()
-        env.neon_cli.create_tenant(
-            tenant_id,
-            timeline_a,
-            placement_policy='{"Attached":1}',
-            # Run with a low heatmap period so that we can avoid having to do synthetic API calls
-            # to trigger the upload promptly.
-            conf={"heatmap_period": "1s"},
-        )
-        env.neon_cli.create_timeline("main2", tenant_id, timeline_b)
-
-        tenant_timelines[tenant_id] = [timeline_a, timeline_b]
-
-    t_start = time.time()
-
-    # Wait long enough that the background downloads should happen; we expect all the inital layers
-    # of all the initial timelines to show up on the secondary location of each tenant.
-    time.sleep(freshen_interval_secs * 1.5)
-
-    for tenant_id, timelines in tenant_timelines.items():
-        attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
-        ps_attached = env.get_pageserver(attached_to_id)
-        # We only have two: the other one must be secondary
-        ps_secondary = next(p for p in env.pageservers if p != ps_attached)
-
-        for timeline_id in timelines:
-            log.info(f"Checking for secondary timeline {timeline_id} on node {ps_secondary.id}")
-            # One or more layers should be present for all timelines
-            assert list_layers(ps_secondary, tenant_id, timeline_id)
-
-        # Delete the second timeline: this should be reflected later on the secondary
-        env.storage_controller.pageserver_api().timeline_delete(tenant_id, timelines[1])
-
-    # Wait long enough for the secondary locations to see the deletion
-    time.sleep(freshen_interval_secs * 1.5)
-
-    for tenant_id, timelines in tenant_timelines.items():
-        attached_to_id = env.storage_controller.locate(tenant_id)[0]["node_id"]
-        ps_attached = env.get_pageserver(attached_to_id)
-        # We only have two: the other one must be secondary
-        ps_secondary = next(p for p in env.pageservers if p != ps_attached)
-
-        # This one was not deleted
-        assert list_layers(ps_secondary, tenant_id, timelines[0])
-
-        # This one was deleted
-        assert not list_layers(ps_secondary, tenant_id, timelines[1])
-
-    t_end = time.time()
-
-    # Measure how many heatmap downloads we did in total: this checks that we succeeded with
-    # proper scheduling, and not some bug that just runs downloads in a loop.
-    total_heatmap_downloads = 0
-    for ps in env.pageservers:
-        v = ps.http_client().get_metric_value("pageserver_secondary_download_heatmap_total")
-        assert v is not None
-        total_heatmap_downloads += int(v)
-
-    download_rate = (total_heatmap_downloads / tenant_count) / (t_end - t_start)
-
-    expect_download_rate = 1.0 / freshen_interval_secs
-    log.info(f"Download rate: {download_rate * 60}/min vs expected {expect_download_rate * 60}/min")
-
-    assert download_rate < expect_download_rate * 2
-
-
@pytest.mark.skipif(os.environ.get("BUILD_TYPE") == "debug", reason="only run with release build")
@pytest.mark.parametrize("via_controller", [True, False])
 def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controller: bool):
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -273,8 +273,7 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
    but imports the generation number.
    """

-    # One pageserver to simulate legacy environment, two to be managed by storage controller
-    neon_env_builder.num_pageservers = 3
+    neon_env_builder.num_pageservers = 2

    # Start services by hand so that we can skip registration on one of the pageservers
    env = neon_env_builder.init_configs()
@@ -289,10 +288,10 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
    )
    origin_ps = env.pageservers[0]

-    # These are the pageservers managed by the sharding service, where the tenant
+    # This is the pageserver managed by the sharding service, where the tenant
    # will be attached after onboarding
    env.pageservers[1].start()
-    env.pageservers[2].start()
+    dest_ps = env.pageservers[1]
    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)

    for sk in env.safekeepers:
@@ -331,9 +330,6 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
        )

        virtual_ps_http.tenant_secondary_download(tenant_id)
-        warm_up_ps = env.storage_controller.tenant_describe(tenant_id)["shards"][0][
-            "node_secondary"
-        ][0]

    # Call into storage controller to onboard the tenant
    generation += 1
@@ -348,18 +344,6 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
    )
    assert len(r["shards"]) == 1

-    describe = env.storage_controller.tenant_describe(tenant_id)["shards"][0]
-    dest_ps_id = describe["node_attached"]
-    dest_ps = env.get_pageserver(dest_ps_id)
-    if warm_up:
-        # The storage controller should have attached the tenant to the same placce
-        # it had a secondary location, otherwise there was no point warming it up
-        assert dest_ps_id == warm_up_ps
-
-        # It should have been given a new secondary location as well
-        assert len(describe["node_secondary"]) == 1
-        assert describe["node_secondary"][0] != warm_up_ps
-
    # As if doing a live migration, detach the original pageserver
    origin_ps.http_client().tenant_location_conf(
        tenant_id,
@@ -431,9 +415,6 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
        dest_tenant_after_conf_change["generation"] == dest_tenant_before_conf_change["generation"]
    )
    dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
-
-    # Storage controller auto-sets heatmap period, ignore it for the comparison
-    del dest_tenant_conf_after.tenant_specific_overrides["heatmap_period"]
    assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf

    env.storage_controller.consistency_check()
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -469,8 +469,7 @@ def test_tenant_delete_concurrent(
 ):
    """
    Validate that concurrent delete requests to the same tenant behave correctly:
-    exactly one should execute: the rest should give 202 responses but not start
-    another deletion.
+    exactly one should succeed.

    This is a reproducer for https://github.com/neondatabase/neon/issues/5936
    """
@@ -485,10 +484,14 @@ def test_tenant_delete_concurrent(
        run_pg_bench_small(pg_bin, endpoint.connstr())
        last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)

+    CONFLICT_MESSAGE = "Precondition failed: Invalid state Stopping. Expected Active or Broken"
+
    env.pageserver.allowed_errors.extend(
        [
            # lucky race with stopping from flushing a layer we fail to schedule any uploads
            ".*layer flush task.+: could not flush frozen layer: update_metadata_file",
+            # Errors logged from our 4xx requests
+            f".*{CONFLICT_MESSAGE}.*",
        ]
    )

@@ -504,7 +507,7 @@ def test_tenant_delete_concurrent(
        return ps_http.tenant_delete(tenant_id)

    def hit_remove_failpoint():
-        return env.pageserver.assert_log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")[1]
+        env.pageserver.assert_log_contains(f"at failpoint {BEFORE_REMOVE_FAILPOINT}")

    def hit_run_failpoint():
        env.pageserver.assert_log_contains(f"at failpoint {BEFORE_RUN_FAILPOINT}")
@@ -515,14 +518,11 @@ def test_tenant_delete_concurrent(

        # Wait until the first request completes its work and is blocked on removing
        # the TenantSlot from tenant manager.
-        log_cursor = wait_until(100, 0.1, hit_remove_failpoint)
-        assert log_cursor is not None
+        wait_until(100, 0.1, hit_remove_failpoint)

-        # Start another request: this should succeed without actually entering the deletion code
-        ps_http.tenant_delete(tenant_id)
-        assert not env.pageserver.log_contains(
-            f"at failpoint {BEFORE_RUN_FAILPOINT}", offset=log_cursor
-        )
+        # Start another request: this should fail when it sees a tenant in Stopping state
+        with pytest.raises(PageserverApiException, match=CONFLICT_MESSAGE):
+            ps_http.tenant_delete(tenant_id)

        # Start another background request, which will pause after acquiring a TenantSlotGuard
        # but before completing.
@@ -539,10 +539,8 @@ def test_tenant_delete_concurrent(

        # Permit the duplicate background request to run to completion and fail.
        ps_http.configure_failpoints((BEFORE_RUN_FAILPOINT, "off"))
-        background_4xx_req.result(timeout=10)
-        assert not env.pageserver.log_contains(
-            f"at failpoint {BEFORE_RUN_FAILPOINT}", offset=log_cursor
-        )
+        with pytest.raises(PageserverApiException, match=CONFLICT_MESSAGE):
+            background_4xx_req.result(timeout=10)

    # Physical deletion should have happened
    assert_prefix_empty(
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -38,7 +38,6 @@ futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 hashbrown = { version = "0.14", features = ["raw"] }
-hdrhistogram = { version = "7" }
 hex = { version = "0.4", features = ["serde"] }
 hmac = { version = "0.12", default-features = false, features = ["reset"] }
 hyper = { version = "0.14", features = ["full"] }
@@ -67,9 +66,8 @@ sha2 = { version = "0.10", features = ["asm"] }
 smallvec = { version = "1", default-features = false, features = ["const_new", "write"] }
 subtle = { version = "2" }
 time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] }
-tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util", "tracing"] }
+tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
 tokio-rustls = { version = "0.24" }
-tokio-stream = { version = "0.1", features = ["net"] }
 tokio-util = { version = "0.7", features = ["codec", "compat", "io", "rt"] }
 toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }
 toml_edit = { version = "0.19", features = ["serde"] }